Reading & Writing Files¶
Plain Text¶
Read¶
with open('prac/foo.txt', 'r', encoding='utf-8') as f:
file = [line.strip() for line in f]
file
['1. This is file foo.',
'2. Just for demo, no practical utilities.',
'3. hahahah',
'4. This is line 4',
'',
'6. Line 5 (previous line) is empty']
Write¶
with open('prac/write.txt', 'w', encoding='utf-8') as f:
for line in file:
f.write(line + '\n')
JSON¶
Read JSON¶
Read from file¶
import json
with open("prac/dcard_forums.json") as f:
data = json.load(f)
print(len(data))
data[0]
373
{'id': '7f125e07-4460-4ea5-80b5-33f0e9aafa0c',
'alias': 'midnightlab',
'name': '午夜實驗室',
'description': '午夜實驗室10/6、10/7即將在華山登場!這裏提供大家交流活動資訊與討論,請大家要遵守 Dcard 板規喔!',
'subscriptionCount': 1837,
'subscribed': False,
'read': False,
'createdAt': '2016-05-14T19:15:15.698Z',
'updatedAt': '2018-11-05T03:24:32.914Z',
'canPost': False,
'ignorePost': False,
'invisible': True,
'isSchool': False,
'fullyAnonymous': False,
'canUseNickname': True,
'postThumbnail': {'size': 'small'},
'shouldCategorized': False,
'titlePlaceholder': '',
'subcategories': [],
'topics': ['午夜實驗室'],
'nsfw': False,
'mediaThreshold': {},
'limitCountries': [],
'limitStage': 0,
'availableLayouts': ['classic'],
'postCount': {'last30Days': 0}}
Read from string¶
jsonData = """
{"a":1,
"b":2,
"c":3,
"d":4,
"e":5}
"""
json.loads(jsonData)
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
CSV¶
Read: DictReader¶
import csv
import pprint as pp
fpath = 'prac/stock.tsv'
file = open(fpath, "r", encoding='cp950')
csvFile = csv.DictReader(file, delimiter='\t')
pp.pprint(list(csvFile)[:3])
file.close()
[OrderedDict([('證券代碼', '4938 '),
('簡稱', '和碩 '),
('年月日', '20110103'),
('開盤價(元)', ' 29.32'),
('最高價(元)', ' 29.42'),
('最低價(元)', ' 29.21'),
('收盤價(元)', ' 29.28'),
('成交值(千元)', ' 89508')]),
OrderedDict([('證券代碼', '4938 '),
('簡稱', '和碩 '),
('年月日', '20110104'),
('開盤價(元)', ' 29.42'),
('最高價(元)', ' 29.56'),
('最低價(元)', ' 29.28'),
('收盤價(元)', ' 29.42'),
('成交值(千元)', ' 107340')]),
OrderedDict([('證券代碼', '4938 '),
('簡稱', '和碩 '),
('年月日', '20110105'),
('開盤價(元)', ' 29.42'),
('最高價(元)', ' 29.42'),
('最低價(元)', ' 28.16'),
('收盤價(元)', ' 28.72'),
('成交值(千元)', ' 193077')])]
fpath = 'prac/stock.tsv'
file = open(fpath, "r", encoding='cp950')
csvFile = csv.DictReader(file, delimiter='\t')
lineCount = 1
for row in csvFile:
print(int(row['證券代碼']), int(row['成交值(千元)']))
if lineCount == 5: break
lineCount += 1
file.close()
4938 89508
4938 107340
4938 193077
4938 259533
4938 330576
Read: reader¶
with open(fpath, 'r', encoding='cp950') as f:
csvFile = csv.reader(f, delimiter='\t')
header = next(csvFile)
print('FIELDNAMES:', header)
lineCount = 1
for row in csvFile:
row = [ele.strip() for ele in row]
print(row)
if lineCount == 5: break
lineCount += 1
FIELDNAMES: ['證券代碼', '簡稱', '年月日', '開盤價(元)', '最高價(元)', '最低價(元)', '收盤價(元)', '成交值(千元)']
['4938', '和碩', '20110103', '29.32', '29.42', '29.21', '29.28', '89508']
['4938', '和碩', '20110104', '29.42', '29.56', '29.28', '29.42', '107340']
['4938', '和碩', '20110105', '29.42', '29.42', '28.16', '28.72', '193077']
['4938', '和碩', '20110106', '28.93', '29.00', '28.02', '28.20', '259533']
['4938', '和碩', '20110107', '28.02', '28.16', '27.22', '27.32', '330576']
Read: Pandas¶
import pandas as pd
df = pd.read_csv("prac/stock.tsv", sep="\t", encoding="cp950")
df
| 證券代碼 | 簡稱 | 年月日 | 開盤價(元) | 最高價(元) | 最低價(元) | 收盤價(元) | 成交值(千元) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 4938 | 和碩 | 20110103 | 29.32 | 29.42 | 29.21 | 29.28 | 89508 |
| 1 | 4938 | 和碩 | 20110104 | 29.42 | 29.56 | 29.28 | 29.42 | 107340 |
| 2 | 4938 | 和碩 | 20110105 | 29.42 | 29.42 | 28.16 | 28.72 | 193077 |
| 3 | 4938 | 和碩 | 20110106 | 28.93 | 29.00 | 28.02 | 28.20 | 259533 |
| 4 | 4938 | 和碩 | 20110107 | 28.02 | 28.16 | 27.22 | 27.32 | 330576 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1229 | 4938 | 和碩 | 20151224 | 66.06 | 66.40 | 64.97 | 65.06 | 318025 |
| 1230 | 4938 | 和碩 | 20151225 | 64.89 | 65.64 | 64.89 | 65.64 | 108529 |
| 1231 | 4938 | 和碩 | 20151228 | 66.06 | 66.06 | 65.31 | 65.73 | 141813 |
| 1232 | 4938 | 和碩 | 20151229 | 65.23 | 65.23 | 62.54 | 63.05 | 731616 |
| 1233 | 4938 | 和碩 | 20151230 | 63.30 | 63.55 | 60.53 | 61.37 | 916988 |
1234 rows × 8 columns
Write PdDataFrame.to_csv()¶
df.to_csv("prac/stock.csv", index=False)
Write PdDataFrame.to_json()¶
df.iloc[:2,:]
| 證券代碼 | 簡稱 | 年月日 | 開盤價(元) | 最高價(元) | 最低價(元) | 收盤價(元) | 成交值(千元) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 4938 | 和碩 | 20110103 | 29.32 | 29.42 | 29.21 | 29.28 | 89508 |
| 1 | 4938 | 和碩 | 20110104 | 29.42 | 29.56 | 29.28 | 29.42 | 107340 |
df.iloc[:2,:].to_json("prac/stock.json", orient="records", force_ascii=False)
Details of to_json()¶
See doc for details.
json_str = df.iloc[:2,:].to_json(orient="records", force_ascii=False)
json_str
'[{"證券代碼":4938,"簡稱":"和碩 ","年月日":20110103,"開盤價(元)":29.32,"最高價(元)":29.42,"最低價(元)":29.21,"收盤價(元)":29.28,"成交值(千元)":89508},{"證券代碼":4938,"簡稱":"和碩 ","年月日":20110104,"開盤價(元)":29.42,"最高價(元)":29.56,"最低價(元)":29.28,"收盤價(元)":29.42,"成交值(千元)":107340}]'
json.loads(json_str)
[{'證券代碼': 4938,
'簡稱': '和碩 ',
'年月日': 20110103,
'開盤價(元)': 29.32,
'最高價(元)': 29.42,
'最低價(元)': 29.21,
'收盤價(元)': 29.28,
'成交值(千元)': 89508},
{'證券代碼': 4938,
'簡稱': '和碩 ',
'年月日': 20110104,
'開盤價(元)': 29.42,
'最高價(元)': 29.56,
'最低價(元)': 29.28,
'收盤價(元)': 29.42,
'成交值(千元)': 107340}]