Reading & Writing Files

Plain Text

Read

with open('prac/foo.txt', 'r', encoding='utf-8') as f:
    file = [line.strip() for line in f]

file
Copy to clipboard
['1. This is file foo.',
 '2. Just for demo, no practical utilities.',
 '3. hahahah',
 '4. This is line 4',
 '',
 '6. Line 5 (previous line) is empty']
Copy to clipboard

Write

with open('prac/write.txt', 'w', encoding='utf-8') as f:
    for line in file:
        f.write(line + '\n')
Copy to clipboard

JSON

Read JSON

Read from file

import json

with open("prac/dcard_forums.json") as f:
    data = json.load(f)

print(len(data))
data[0]
Copy to clipboard
373
Copy to clipboard
{'id': '7f125e07-4460-4ea5-80b5-33f0e9aafa0c',
 'alias': 'midnightlab',
 'name': '午夜實驗室',
 'description': '午夜實驗室10/6、10/7即將在華山登場!這裏提供大家交流活動資訊與討論,請大家要遵守 Dcard 板規喔!',
 'subscriptionCount': 1837,
 'subscribed': False,
 'read': False,
 'createdAt': '2016-05-14T19:15:15.698Z',
 'updatedAt': '2018-11-05T03:24:32.914Z',
 'canPost': False,
 'ignorePost': False,
 'invisible': True,
 'isSchool': False,
 'fullyAnonymous': False,
 'canUseNickname': True,
 'postThumbnail': {'size': 'small'},
 'shouldCategorized': False,
 'titlePlaceholder': '',
 'subcategories': [],
 'topics': ['午夜實驗室'],
 'nsfw': False,
 'mediaThreshold': {},
 'limitCountries': [],
 'limitStage': 0,
 'availableLayouts': ['classic'],
 'postCount': {'last30Days': 0}}
Copy to clipboard

Read from string

jsonData = """
{"a":1,
"b":2,
"c":3,
"d":4,
"e":5}
"""

json.loads(jsonData)
Copy to clipboard
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
Copy to clipboard

Write JSON

Write to file

with open("prac/afile.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False)
Copy to clipboard
a_dict = {'a': 1, 'b': 2}
json.dumps(a_dict)
Copy to clipboard
'{"a": 1, "b": 2}'
Copy to clipboard

CSV

Read: DictReader

import csv
import pprint as pp

fpath = 'prac/stock.tsv'
file = open(fpath, "r", encoding='cp950')
csvFile = csv.DictReader(file, delimiter='\t')

pp.pprint(list(csvFile)[:3])
file.close()
Copy to clipboard
[OrderedDict([('證券代碼', '4938   '),
              ('簡稱', '和碩         '),
              ('年月日', '20110103'),
              ('開盤價(元)', '   29.32'),
              ('最高價(元)', '   29.42'),
              ('最低價(元)', '   29.21'),
              ('收盤價(元)', '   29.28'),
              ('成交值(千元)', '       89508')]),
 OrderedDict([('證券代碼', '4938   '),
              ('簡稱', '和碩         '),
              ('年月日', '20110104'),
              ('開盤價(元)', '   29.42'),
              ('最高價(元)', '   29.56'),
              ('最低價(元)', '   29.28'),
              ('收盤價(元)', '   29.42'),
              ('成交值(千元)', '      107340')]),
 OrderedDict([('證券代碼', '4938   '),
              ('簡稱', '和碩         '),
              ('年月日', '20110105'),
              ('開盤價(元)', '   29.42'),
              ('最高價(元)', '   29.42'),
              ('最低價(元)', '   28.16'),
              ('收盤價(元)', '   28.72'),
              ('成交值(千元)', '      193077')])]
Copy to clipboard
fpath = 'prac/stock.tsv'
file = open(fpath, "r", encoding='cp950')
csvFile = csv.DictReader(file, delimiter='\t')

lineCount = 1
for row in csvFile:
    print(int(row['證券代碼']), int(row['成交值(千元)']))
    if lineCount == 5: break 
    lineCount += 1
    
file.close()
Copy to clipboard
4938 89508
4938 107340
4938 193077
4938 259533
4938 330576
Copy to clipboard

Read: reader

with open(fpath, 'r', encoding='cp950') as f:
    csvFile = csv.reader(f, delimiter='\t')
    header = next(csvFile)
    print('FIELDNAMES:', header)
    
    lineCount = 1
    for row in csvFile:
        row = [ele.strip() for ele in row]
        print(row)
        
        if lineCount == 5: break
        lineCount += 1
Copy to clipboard
FIELDNAMES: ['證券代碼', '簡稱', '年月日', '開盤價(元)', '最高價(元)', '最低價(元)', '收盤價(元)', '成交值(千元)']
['4938', '和碩', '20110103', '29.32', '29.42', '29.21', '29.28', '89508']
['4938', '和碩', '20110104', '29.42', '29.56', '29.28', '29.42', '107340']
['4938', '和碩', '20110105', '29.42', '29.42', '28.16', '28.72', '193077']
['4938', '和碩', '20110106', '28.93', '29.00', '28.02', '28.20', '259533']
['4938', '和碩', '20110107', '28.02', '28.16', '27.22', '27.32', '330576']
Copy to clipboard

Read: Pandas

import pandas as pd

df = pd.read_csv("prac/stock.tsv", sep="\t", encoding="cp950")
df
Copy to clipboard
證券代碼 簡稱 年月日 開盤價(元) 最高價(元) 最低價(元) 收盤價(元) 成交值(千元)
0 4938 和碩 20110103 29.32 29.42 29.21 29.28 89508
1 4938 和碩 20110104 29.42 29.56 29.28 29.42 107340
2 4938 和碩 20110105 29.42 29.42 28.16 28.72 193077
3 4938 和碩 20110106 28.93 29.00 28.02 28.20 259533
4 4938 和碩 20110107 28.02 28.16 27.22 27.32 330576
... ... ... ... ... ... ... ... ...
1229 4938 和碩 20151224 66.06 66.40 64.97 65.06 318025
1230 4938 和碩 20151225 64.89 65.64 64.89 65.64 108529
1231 4938 和碩 20151228 66.06 66.06 65.31 65.73 141813
1232 4938 和碩 20151229 65.23 65.23 62.54 63.05 731616
1233 4938 和碩 20151230 63.30 63.55 60.53 61.37 916988

1234 rows × 8 columns

Write PdDataFrame.to_csv()

df.to_csv("prac/stock.csv", index=False)
Copy to clipboard

Write PdDataFrame.to_json()

df.iloc[:2,:]
Copy to clipboard
證券代碼 簡稱 年月日 開盤價(元) 最高價(元) 最低價(元) 收盤價(元) 成交值(千元)
0 4938 和碩 20110103 29.32 29.42 29.21 29.28 89508
1 4938 和碩 20110104 29.42 29.56 29.28 29.42 107340
df.iloc[:2,:].to_json("prac/stock.json", orient="records", force_ascii=False)
Copy to clipboard

Details of to_json()

See doc for details.

json_str = df.iloc[:2,:].to_json(orient="records", force_ascii=False)
json_str
Copy to clipboard
'[{"證券代碼":4938,"簡稱":"和碩         ","年月日":20110103,"開盤價(元)":29.32,"最高價(元)":29.42,"最低價(元)":29.21,"收盤價(元)":29.28,"成交值(千元)":89508},{"證券代碼":4938,"簡稱":"和碩         ","年月日":20110104,"開盤價(元)":29.42,"最高價(元)":29.56,"最低價(元)":29.28,"收盤價(元)":29.42,"成交值(千元)":107340}]'
Copy to clipboard
json.loads(json_str)
Copy to clipboard
[{'證券代碼': 4938,
  '簡稱': '和碩         ',
  '年月日': 20110103,
  '開盤價(元)': 29.32,
  '最高價(元)': 29.42,
  '最低價(元)': 29.21,
  '收盤價(元)': 29.28,
  '成交值(千元)': 89508},
 {'證券代碼': 4938,
  '簡稱': '和碩         ',
  '年月日': 20110104,
  '開盤價(元)': 29.42,
  '最高價(元)': 29.56,
  '最低價(元)': 29.28,
  '收盤價(元)': 29.42,
  '成交值(千元)': 107340}]
Copy to clipboard