Reading & Writing Files

Plain Text

Read

with open('prac/foo.txt', 'r', encoding='utf-8') as f:
    file = [line.strip() for line in f]

file
['1. This is file foo.',
 '2. Just for demo, no practical utilities.',
 '3. hahahah',
 '4. This is line 4',
 '',
 '6. Line 5 (previous line) is empty']

Write

with open('prac/write.txt', 'w', encoding='utf-8') as f:
    for line in file:
        f.write(line + '\n')

JSON

Read JSON

Read from file

import json

with open("prac/dcard_forums.json") as f:
    data = json.load(f)

print(len(data))
data[0]
373
{'id': '7f125e07-4460-4ea5-80b5-33f0e9aafa0c',
 'alias': 'midnightlab',
 'name': '午夜實驗室',
 'description': '午夜實驗室10/6、10/7即將在華山登場!這裏提供大家交流活動資訊與討論,請大家要遵守 Dcard 板規喔!',
 'subscriptionCount': 1837,
 'subscribed': False,
 'read': False,
 'createdAt': '2016-05-14T19:15:15.698Z',
 'updatedAt': '2018-11-05T03:24:32.914Z',
 'canPost': False,
 'ignorePost': False,
 'invisible': True,
 'isSchool': False,
 'fullyAnonymous': False,
 'canUseNickname': True,
 'postThumbnail': {'size': 'small'},
 'shouldCategorized': False,
 'titlePlaceholder': '',
 'subcategories': [],
 'topics': ['午夜實驗室'],
 'nsfw': False,
 'mediaThreshold': {},
 'limitCountries': [],
 'limitStage': 0,
 'availableLayouts': ['classic'],
 'postCount': {'last30Days': 0}}

Read from string

jsonData = """
{"a":1,
"b":2,
"c":3,
"d":4,
"e":5}
"""

json.loads(jsonData)
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

Write JSON

Write to file

with open("prac/afile.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False)
a_dict = {'a': 1, 'b': 2}
json.dumps(a_dict)
'{"a": 1, "b": 2}'

CSV

Read: DictReader

import csv
import pprint as pp

fpath = 'prac/stock.tsv'
file = open(fpath, "r", encoding='cp950')
csvFile = csv.DictReader(file, delimiter='\t')

pp.pprint(list(csvFile)[:3])
file.close()
[OrderedDict([('證券代碼', '4938   '),
              ('簡稱', '和碩         '),
              ('年月日', '20110103'),
              ('開盤價(元)', '   29.32'),
              ('最高價(元)', '   29.42'),
              ('最低價(元)', '   29.21'),
              ('收盤價(元)', '   29.28'),
              ('成交值(千元)', '       89508')]),
 OrderedDict([('證券代碼', '4938   '),
              ('簡稱', '和碩         '),
              ('年月日', '20110104'),
              ('開盤價(元)', '   29.42'),
              ('最高價(元)', '   29.56'),
              ('最低價(元)', '   29.28'),
              ('收盤價(元)', '   29.42'),
              ('成交值(千元)', '      107340')]),
 OrderedDict([('證券代碼', '4938   '),
              ('簡稱', '和碩         '),
              ('年月日', '20110105'),
              ('開盤價(元)', '   29.42'),
              ('最高價(元)', '   29.42'),
              ('最低價(元)', '   28.16'),
              ('收盤價(元)', '   28.72'),
              ('成交值(千元)', '      193077')])]
fpath = 'prac/stock.tsv'
file = open(fpath, "r", encoding='cp950')
csvFile = csv.DictReader(file, delimiter='\t')

lineCount = 1
for row in csvFile:
    print(int(row['證券代碼']), int(row['成交值(千元)']))
    if lineCount == 5: break 
    lineCount += 1
    
file.close()
4938 89508
4938 107340
4938 193077
4938 259533
4938 330576

Read: reader

with open(fpath, 'r', encoding='cp950') as f:
    csvFile = csv.reader(f, delimiter='\t')
    header = next(csvFile)
    print('FIELDNAMES:', header)
    
    lineCount = 1
    for row in csvFile:
        row = [ele.strip() for ele in row]
        print(row)
        
        if lineCount == 5: break
        lineCount += 1
FIELDNAMES: ['證券代碼', '簡稱', '年月日', '開盤價(元)', '最高價(元)', '最低價(元)', '收盤價(元)', '成交值(千元)']
['4938', '和碩', '20110103', '29.32', '29.42', '29.21', '29.28', '89508']
['4938', '和碩', '20110104', '29.42', '29.56', '29.28', '29.42', '107340']
['4938', '和碩', '20110105', '29.42', '29.42', '28.16', '28.72', '193077']
['4938', '和碩', '20110106', '28.93', '29.00', '28.02', '28.20', '259533']
['4938', '和碩', '20110107', '28.02', '28.16', '27.22', '27.32', '330576']

Read: Pandas

import pandas as pd

df = pd.read_csv("prac/stock.tsv", sep="\t", encoding="cp950")
df
證券代碼 簡稱 年月日 開盤價(元) 最高價(元) 最低價(元) 收盤價(元) 成交值(千元)
0 4938 和碩 20110103 29.32 29.42 29.21 29.28 89508
1 4938 和碩 20110104 29.42 29.56 29.28 29.42 107340
2 4938 和碩 20110105 29.42 29.42 28.16 28.72 193077
3 4938 和碩 20110106 28.93 29.00 28.02 28.20 259533
4 4938 和碩 20110107 28.02 28.16 27.22 27.32 330576
... ... ... ... ... ... ... ... ...
1229 4938 和碩 20151224 66.06 66.40 64.97 65.06 318025
1230 4938 和碩 20151225 64.89 65.64 64.89 65.64 108529
1231 4938 和碩 20151228 66.06 66.06 65.31 65.73 141813
1232 4938 和碩 20151229 65.23 65.23 62.54 63.05 731616
1233 4938 和碩 20151230 63.30 63.55 60.53 61.37 916988

1234 rows × 8 columns

Write PdDataFrame.to_csv()

df.to_csv("prac/stock.csv", index=False)

Write PdDataFrame.to_json()

df.iloc[:2,:]
證券代碼 簡稱 年月日 開盤價(元) 最高價(元) 最低價(元) 收盤價(元) 成交值(千元)
0 4938 和碩 20110103 29.32 29.42 29.21 29.28 89508
1 4938 和碩 20110104 29.42 29.56 29.28 29.42 107340
df.iloc[:2,:].to_json("prac/stock.json", orient="records", force_ascii=False)

Details of to_json()

See doc for details.

json_str = df.iloc[:2,:].to_json(orient="records", force_ascii=False)
json_str
'[{"證券代碼":4938,"簡稱":"和碩         ","年月日":20110103,"開盤價(元)":29.32,"最高價(元)":29.42,"最低價(元)":29.21,"收盤價(元)":29.28,"成交值(千元)":89508},{"證券代碼":4938,"簡稱":"和碩         ","年月日":20110104,"開盤價(元)":29.42,"最高價(元)":29.56,"最低價(元)":29.28,"收盤價(元)":29.42,"成交值(千元)":107340}]'
json.loads(json_str)
[{'證券代碼': 4938,
  '簡稱': '和碩         ',
  '年月日': 20110103,
  '開盤價(元)': 29.32,
  '最高價(元)': 29.42,
  '最低價(元)': 29.21,
  '收盤價(元)': 29.28,
  '成交值(千元)': 89508},
 {'證券代碼': 4938,
  '簡稱': '和碩         ',
  '年月日': 20110104,
  '開盤價(元)': 29.42,
  '最高價(元)': 29.56,
  '最低價(元)': 29.28,
  '收盤價(元)': 29.42,
  '成交值(千元)': 107340}]