물론 요즘 공공데이터에서 xml, json등 여러 형식의 파일을 지원해주는 곳이 많기는 하다
하지만 혹시 파일 형식이 제한되거나 다루게될경우를 위해 작성한다
from urllib.parse import urlencode, quote_plus, unquote
import requests
url = 'https://api.odcloud.kr/api/15077756/v1/vaccine-stat?serviceKey='
key = 'key'
queryParams = '&' + urlencode({quote_plus('page'): '1', quote_plus('perPage'): '1824', quote_plus('returnType'): 'JSON'})
get_data = requests.get(url + key + unquote(queryParams))
import json
result_data = get_data.json()
file = open('vaccin.json', "w+")
file.write(json.dumps(result_data))
import csv
with open('vaccin.csv', 'w', encoding='utf-8')as f:
wr = csv.DictWriter(f, fieldnames = result_data['data'][0].keys())
wr.writeheader()
wr.writerows(result_data['data'])
from urllib.request import urlopen, Request
request = Request(url + queryParams)
request.get_method = lambda: 'GET'
response_body = urlopen(request).read()
f = open('covid.xml', 'wb')
f.write(response_body)
import xmltodict
tojson = xmltodict.parse(response_body)
json_obj = json.dumps(tojson)
covid_file = open('covid.json', 'w+')
covid_file.write(json_obj)
f.close()
import xml.etree.ElementTree as ET
import pandas as pd
xml_url = url + queryParams
data = urlopen(xml_url).read()
xtree = ET.fromstring(data)
rows = []
for node in xtree[1][0]:
n_accDefRate = node.find('accDefRate').text
n_accExamCnt = node.find('accExamCnt').text
n_accExamCompCnt = node.find('accExamCompCnt').text
n_careCnt = node.find('careCnt').text
n_clearCnt = node.find('clearCnt').text
n_createDt = node.find('createDt').text
n_deathCnt = node.find('deathCnt').text
n_decideCnt = node.find('decideCnt').text
n_examCnt = node.find('examCnt').text
n_resutlNegCnt = node.find('resutlNegCnt').text
n_seq = node.find('seq').text
n_stateDt = node.find('stateDt').text
n_stateTime = node.find('stateTime').text
n_updateDt = node.find('updateDt').text
rows.append({
'accDefRate': n_accDefRate,
'accExamCnt': n_accExamCnt,
'accExamCompCnt': n_accExamCompCnt,
'careCnt': n_careCnt,
'clearCnt': n_clearCnt,
'createDt': n_createDt,
'deathCnt': n_deathCnt,
'decideCnt': n_decideCnt,
'examCnt': n_examCnt,
'resutlNegCnt': n_resutlNegCnt,
'seq': n_seq,
'stateDt': n_stateDt,
'stateTime': n_stateTime,
'updateDt': n_updateDt
columns=['accDefRate', 'accExamCnt', 'accExamCompCnt', 'careCnt', 'clearCnt', 'createDt', 'deathCnt', 'decideCnt', 'examCnt', 'resutlNegCnt', 'seq', 'stateDt', 'stateTime', 'updateDt']
catalog_df = pd.DataFrame(rows, columns=columns)
catalog_df.to_csv('covid.csv', encoding='utf-8')
})