{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reading & Writing Files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plain Text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1. This is file foo.',\n",
       " '2. Just for demo, no practical utilities.',\n",
       " '3. hahahah',\n",
       " '4. This is line 4',\n",
       " '',\n",
       " '6. Line 5 (previous line) is empty']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('prac/foo.txt', 'r', encoding='utf-8') as f:\n",
    "    file = [line.strip() for line in f]\n",
    "\n",
    "file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Write"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('prac/write.txt', 'w', encoding='utf-8') as f:\n",
    "    for line in file:\n",
    "        f.write(line + '\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## JSON"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read JSON\n",
    "\n",
    "#### Read from file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "373\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'alias': 'midnightlab',\n",
       " 'availableLayouts': ['classic'],\n",
       " 'canPost': False,\n",
       " 'canUseNickname': True,\n",
       " 'createdAt': '2016-05-14T19:15:15.698Z',\n",
       " 'description': '午夜實驗室10/6、10/7即將在華山登場!這裏提供大家交流活動資訊與討論,請大家要遵守 Dcard 板規喔!',\n",
       " 'fullyAnonymous': False,\n",
       " 'id': '7f125e07-4460-4ea5-80b5-33f0e9aafa0c',\n",
       " 'ignorePost': False,\n",
       " 'invisible': True,\n",
       " 'isSchool': False,\n",
       " 'limitCountries': [],\n",
       " 'limitStage': 0,\n",
       " 'mediaThreshold': {},\n",
       " 'name': '午夜實驗室',\n",
       " 'nsfw': False,\n",
       " 'postCount': {'last30Days': 0},\n",
       " 'postThumbnail': {'size': 'small'},\n",
       " 'read': False,\n",
       " 'shouldCategorized': False,\n",
       " 'subcategories': [],\n",
       " 'subscribed': False,\n",
       " 'subscriptionCount': 1837,\n",
       " 'titlePlaceholder': '',\n",
       " 'topics': ['午夜實驗室'],\n",
       " 'updatedAt': '2018-11-05T03:24:32.914Z'}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "with open(\"prac/dcard_forums.json\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "print(len(data))\n",
    "data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Read from string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jsonData = \"\"\"\n",
    "{\"a\":1,\n",
    "\"b\":2,\n",
    "\"c\":3,\n",
    "\"d\":4,\n",
    "\"e\":5}\n",
    "\"\"\"\n",
    "\n",
    "json.loads(jsonData)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Write JSON\n",
    "\n",
    "#### Write to file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"prac/afile.json\", \"w\", encoding=\"utf-8\") as f:\n",
    "    json.dump(data, f, ensure_ascii=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"a\": 1, \"b\": 2}'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a_dict = {'a': 1, 'b': 2}\n",
    "json.dumps(a_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CSV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read: `DictReader`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[OrderedDict([('證券代碼', '4938   '),\n",
      "              ('簡稱', '和碩         '),\n",
      "              ('年月日', '20110103'),\n",
      "              ('開盤價(元)', '   29.32'),\n",
      "              ('最高價(元)', '   29.42'),\n",
      "              ('最低價(元)', '   29.21'),\n",
      "              ('收盤價(元)', '   29.28'),\n",
      "              ('成交值(千元)', '       89508')]),\n",
      " OrderedDict([('證券代碼', '4938   '),\n",
      "              ('簡稱', '和碩         '),\n",
      "              ('年月日', '20110104'),\n",
      "              ('開盤價(元)', '   29.42'),\n",
      "              ('最高價(元)', '   29.56'),\n",
      "              ('最低價(元)', '   29.28'),\n",
      "              ('收盤價(元)', '   29.42'),\n",
      "              ('成交值(千元)', '      107340')]),\n",
      " OrderedDict([('證券代碼', '4938   '),\n",
      "              ('簡稱', '和碩         '),\n",
      "              ('年月日', '20110105'),\n",
      "              ('開盤價(元)', '   29.42'),\n",
      "              ('最高價(元)', '   29.42'),\n",
      "              ('最低價(元)', '   28.16'),\n",
      "              ('收盤價(元)', '   28.72'),\n",
      "              ('成交值(千元)', '      193077')])]\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import pprint as pp\n",
    "\n",
    "fpath = 'prac/stock.tsv'\n",
    "file = open(fpath, \"r\", encoding='cp950')\n",
    "csvFile = csv.DictReader(file, delimiter='\\t')\n",
    "\n",
    "pp.pprint(list(csvFile)[:3])\n",
    "file.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4938 89508\n",
      "4938 107340\n",
      "4938 193077\n",
      "4938 259533\n",
      "4938 330576\n"
     ]
    }
   ],
   "source": [
    "fpath = 'prac/stock.tsv'\n",
    "file = open(fpath, \"r\", encoding='cp950')\n",
    "csvFile = csv.DictReader(file, delimiter='\\t')\n",
    "\n",
    "lineCount = 1\n",
    "for row in csvFile:\n",
    "    print(int(row['證券代碼']), int(row['成交值(千元)']))\n",
    "    if lineCount == 5: break \n",
    "    lineCount += 1\n",
    "    \n",
    "file.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read: `reader`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FIELDNAMES: ['證券代碼', '簡稱', '年月日', '開盤價(元)', '最高價(元)', '最低價(元)', '收盤價(元)', '成交值(千元)']\n",
      "['4938', '和碩', '20110103', '29.32', '29.42', '29.21', '29.28', '89508']\n",
      "['4938', '和碩', '20110104', '29.42', '29.56', '29.28', '29.42', '107340']\n",
      "['4938', '和碩', '20110105', '29.42', '29.42', '28.16', '28.72', '193077']\n",
      "['4938', '和碩', '20110106', '28.93', '29.00', '28.02', '28.20', '259533']\n",
      "['4938', '和碩', '20110107', '28.02', '28.16', '27.22', '27.32', '330576']\n"
     ]
    }
   ],
   "source": [
    "with open(fpath, 'r', encoding='cp950') as f:\n",
    "    csvFile = csv.reader(f, delimiter='\\t')\n",
    "    header = next(csvFile)\n",
    "    print('FIELDNAMES:', header)\n",
    "    \n",
    "    lineCount = 1\n",
    "    for row in csvFile:\n",
    "        row = [ele.strip() for ele in row]\n",
    "        print(row)\n",
    "        \n",
    "        if lineCount == 5: break\n",
    "        lineCount += 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read: Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>證券代碼</th>\n",
       "      <th>簡稱</th>\n",
       "      <th>年月日</th>\n",
       "      <th>開盤價(元)</th>\n",
       "      <th>最高價(元)</th>\n",
       "      <th>最低價(元)</th>\n",
       "      <th>收盤價(元)</th>\n",
       "      <th>成交值(千元)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110103</td>\n",
       "      <td>29.32</td>\n",
       "      <td>29.42</td>\n",
       "      <td>29.21</td>\n",
       "      <td>29.28</td>\n",
       "      <td>89508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110104</td>\n",
       "      <td>29.42</td>\n",
       "      <td>29.56</td>\n",
       "      <td>29.28</td>\n",
       "      <td>29.42</td>\n",
       "      <td>107340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110105</td>\n",
       "      <td>29.42</td>\n",
       "      <td>29.42</td>\n",
       "      <td>28.16</td>\n",
       "      <td>28.72</td>\n",
       "      <td>193077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110106</td>\n",
       "      <td>28.93</td>\n",
       "      <td>29.00</td>\n",
       "      <td>28.02</td>\n",
       "      <td>28.20</td>\n",
       "      <td>259533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110107</td>\n",
       "      <td>28.02</td>\n",
       "      <td>28.16</td>\n",
       "      <td>27.22</td>\n",
       "      <td>27.32</td>\n",
       "      <td>330576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1229</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20151224</td>\n",
       "      <td>66.06</td>\n",
       "      <td>66.40</td>\n",
       "      <td>64.97</td>\n",
       "      <td>65.06</td>\n",
       "      <td>318025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1230</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20151225</td>\n",
       "      <td>64.89</td>\n",
       "      <td>65.64</td>\n",
       "      <td>64.89</td>\n",
       "      <td>65.64</td>\n",
       "      <td>108529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1231</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20151228</td>\n",
       "      <td>66.06</td>\n",
       "      <td>66.06</td>\n",
       "      <td>65.31</td>\n",
       "      <td>65.73</td>\n",
       "      <td>141813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1232</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20151229</td>\n",
       "      <td>65.23</td>\n",
       "      <td>65.23</td>\n",
       "      <td>62.54</td>\n",
       "      <td>63.05</td>\n",
       "      <td>731616</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1233</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20151230</td>\n",
       "      <td>63.30</td>\n",
       "      <td>63.55</td>\n",
       "      <td>60.53</td>\n",
       "      <td>61.37</td>\n",
       "      <td>916988</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1234 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      證券代碼           簡稱       年月日  開盤價(元)  最高價(元)  最低價(元)  收盤價(元)  成交值(千元)\n",
       "0     4938  和碩           20110103   29.32   29.42   29.21   29.28    89508\n",
       "1     4938  和碩           20110104   29.42   29.56   29.28   29.42   107340\n",
       "2     4938  和碩           20110105   29.42   29.42   28.16   28.72   193077\n",
       "3     4938  和碩           20110106   28.93   29.00   28.02   28.20   259533\n",
       "4     4938  和碩           20110107   28.02   28.16   27.22   27.32   330576\n",
       "...    ...          ...       ...     ...     ...     ...     ...      ...\n",
       "1229  4938  和碩           20151224   66.06   66.40   64.97   65.06   318025\n",
       "1230  4938  和碩           20151225   64.89   65.64   64.89   65.64   108529\n",
       "1231  4938  和碩           20151228   66.06   66.06   65.31   65.73   141813\n",
       "1232  4938  和碩           20151229   65.23   65.23   62.54   63.05   731616\n",
       "1233  4938  和碩           20151230   63.30   63.55   60.53   61.37   916988\n",
       "\n",
       "[1234 rows x 8 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv(\"prac/stock.tsv\", sep=\"\\t\", encoding=\"cp950\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Write `PdDataFrame.to_csv()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\"prac/stock.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Write `PdDataFrame.to_json()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>證券代碼</th>\n",
       "      <th>簡稱</th>\n",
       "      <th>年月日</th>\n",
       "      <th>開盤價(元)</th>\n",
       "      <th>最高價(元)</th>\n",
       "      <th>最低價(元)</th>\n",
       "      <th>收盤價(元)</th>\n",
       "      <th>成交值(千元)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110103</td>\n",
       "      <td>29.32</td>\n",
       "      <td>29.42</td>\n",
       "      <td>29.21</td>\n",
       "      <td>29.28</td>\n",
       "      <td>89508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4938</td>\n",
       "      <td>和碩</td>\n",
       "      <td>20110104</td>\n",
       "      <td>29.42</td>\n",
       "      <td>29.56</td>\n",
       "      <td>29.28</td>\n",
       "      <td>29.42</td>\n",
       "      <td>107340</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   證券代碼           簡稱       年月日  開盤價(元)  最高價(元)  最低價(元)  收盤價(元)  成交值(千元)\n",
       "0  4938  和碩           20110103   29.32   29.42   29.21   29.28    89508\n",
       "1  4938  和碩           20110104   29.42   29.56   29.28   29.42   107340"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:2,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[:2,:].to_json(\"prac/stock.json\", orient=\"records\", force_ascii=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Details of `to_json()`\n",
    "\n",
    "See [doc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html#pandas.DataFrame.to_json) for details."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[{\"證券代碼\":4938,\"簡稱\":\"和碩         \",\"年月日\":20110103,\"開盤價(元)\":29.32,\"最高價(元)\":29.42,\"最低價(元)\":29.21,\"收盤價(元)\":29.28,\"成交值(千元)\":89508},{\"證券代碼\":4938,\"簡稱\":\"和碩         \",\"年月日\":20110104,\"開盤價(元)\":29.42,\"最高價(元)\":29.56,\"最低價(元)\":29.28,\"收盤價(元)\":29.42,\"成交值(千元)\":107340}]'"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "json_str = df.iloc[:2,:].to_json(orient=\"records\", force_ascii=False)\n",
    "json_str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'年月日': 20110103,\n",
       "  '成交值(千元)': 89508,\n",
       "  '收盤價(元)': 29.28,\n",
       "  '最低價(元)': 29.21,\n",
       "  '最高價(元)': 29.42,\n",
       "  '簡稱': '和碩         ',\n",
       "  '證券代碼': 4938,\n",
       "  '開盤價(元)': 29.32},\n",
       " {'年月日': 20110104,\n",
       "  '成交值(千元)': 107340,\n",
       "  '收盤價(元)': 29.42,\n",
       "  '最低價(元)': 29.28,\n",
       "  '最高價(元)': 29.56,\n",
       "  '簡稱': '和碩         ',\n",
       "  '證券代碼': 4938,\n",
       "  '開盤價(元)': 29.42}]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "json.loads(json_str)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}