from urllib.request import urlopen
import requests as rq
from bs4 import BeautifulSoup as bs
from typing import final
from venv import create
import pandas as pd
from multiprocessing import Process
from typing import final
from venv import create
import requests
from decimal import Decimal
import re
import xml.etree.ElementTree as ET
import time
def get_kaptCode():
url = "http://apis.data.go.kr/1613000/AptListService2/getTotalAptList"
serviceKey = ""
serviceKey = requests.utils.unquote(serviceKey)
params = {'serviceKey': serviceKey, 'numOfRows': 18987}
response = requests.get(url, params=params)
return response.text
def get_energy(reqDate, kaptCode):
url = "http://apis.data.go.kr/1611000/ApHusEnergyUseInfoOfferService/getHsmpApHusUsgQtyInfoSearch"
serviceKey = ""
serviceKey = requests.utils.unquote(serviceKey)
params = {'serviceKey': serviceKey, 'kaptCode': kaptCode, 'reqDate': reqDate}
while(True):
try:
response = requests.get(url, params=params)
if response.status_code == 200:
break
except:
time.sleep(2)
return response.text
def energy_parsing(energy_type, response):
pattern = "<"+energy_type+">"+"(-?[0-9]+)</"+energy_type+">"
result = re.findall(pattern, response)
return result
response_kaptCode = open("C:/Users/TSM/OneDrive/바탕 화면/kapt/kaptCode.txt", "r", encoding="cp949")
kaptCode_list = []
for line in response_kaptCode:
kaptCode_list.append(line[:9])
response_kaptCode.close()
stop = 1
stopidx = 0
reqDate = ["202201", "202202", "202203", "202204", "202205", "202206", "202207" ,"202208" , "202209", "202210", "202211", "202212"]
for m in range(len(reqDate)):
if stop == 0:
for x in range(len(kaptCode_list)):
while(True):
des = open("C:/Users/TSM/OneDrive/바탕 화면/kapt/"+reqDate[0][:4]+"_kapt.txt", "a", encoding="cp949")
response = get_energy(reqDate[m], kaptCode_list[x])
kaptCode_energy = [kaptCode_list[x]]
result_code = energy_parsing("resultCode", response)
if result_code[0] == '00':
energy_type = ["helect", "hgas", "hheat", "hwaterCool", "hwaterHot"]
for i in range(len(energy_type)):
kaptCode_energy.append(energy_parsing(energy_type[i], response))
print(kaptCode_energy)
if not kaptCode_energy[1]:
continue
else:
str = reqDate[m] + ',' + kaptCode_energy[0] + ','
for j in range(1, len(kaptCode_energy)):
if len(kaptCode_energy[j]) != 1:
str += ' '
str += kaptCode_energy[j][0] + ','
str += '\n'
des.write(str)
des.close()
break
else:
continue
des.close()
elif stop == 1:
for x in range(stopidx, len(kaptCode_list)):
while(True):
des = open("C:/Users/TSM/OneDrive/바탕 화면/kapt/"+reqDate[0][:4]+"_kapt.txt", "a", encoding="cp949")
response = get_energy(reqDate[m], kaptCode_list[x])
kaptCode_energy = [kaptCode_list[x]]
result_code = energy_parsing("resultCode", response)
if result_code[0] == '00':
energy_type = ["helect", "hgas", "hheat", "hwaterCool", "hwaterHot"]
for i in range(len(energy_type)):
kaptCode_energy.append(energy_parsing(energy_type[i], response))
print(kaptCode_energy)
if not kaptCode_energy[1]:
continue
else:
str = reqDate[m] + ',' + kaptCode_energy[0] + ','
for j in range(1, len(kaptCode_energy)):
if len(kaptCode_energy[j]) != 1:
str += ' '
str += kaptCode_energy[j][0] + ','
str += '\n'
des.write(str)
des.close()
break
else:
continue
des.close()
stop = 0
- try, except 문에서 오류가 발생할 경우 sleep(2)하고 다시 try 하는 방식으로 구현하여 timeout error 해결 (기존에는 except문에서 다시 요청을 보냈고, 보낸 요청에 대해 다시 error가 발생할 경우, 프로세스가 종료되었음)
- HTTP status code 뿐만 아니라 해당 API에는 resultCode라는 필수 response parameter가 있었는데, 이 값이 '00'일 경우에만 정상적으로 응답을 받은 게 됨. 따라서 status code가 200인 경우 + resultCode가 '00'인 경우에만 file에 write하도록 수정함
- cmd에서 pythonw -u kaptget_20xx.py log_20xx.log 2>&1 명령어로 년도 마다 pythonw을 생성하여 병렬로 백그라운드에 실행시킴