수송에너지 일단락 -> 건물에너지로 합류
아직 진행중 + 그냥 기록
MySQL이용/ Python으로 query 및 pandas DataFrame 이용
import chunk
from typing import final
from venv import create
import pandas as pd
import multiprocessing
import base64
import chunk
import os
import sys
import pymysql
import requests
from sqlalchemy import create_engine
Chunksize=10**7
column_names2=['date','addr','electricity','count']
df2 = pd.DataFrame(columns=column_names2)
df2=df2.astype({'date':'str','addr':'str','electricity':'int','count':'int'})
df2=df2.sort_values(by=['date','addr'])
db_connection_str=''
db_connection=create_engine(db_connection_str)
conn = pymysql.connect(host="",
user='',
password='',
db='EnergyConsumption',
charset='utf8')
for chunk2 in pd.read_csv("",chunksize=Chunksize,sep =' ',encoding='CP949',names=['date','addr','electricity'],header=0,low_memory=False):
chunk2=chunk2.drop(chunk2[chunk2.date>=202201].index)
chunk2=chunk2.astype({'date':'str','addr':'str','electricity':'int'})
chunk2=chunk2.groupby(['date','addr'],as_index=False)['electricity'].agg({'electricity':'sum','count':'count'})
chunk2=chunk2.astype({'date':'str','addr':'str','electricity':'int','count':'int'})
df2=pd.concat([df2,chunk2])
print("1")
while(df2.duplicated(subset=['date','addr']).sum()>0):
print(df2.duplicated(subset=['date','addr']).sum())
df2=df2.groupby(['date','addr'],as_index=False).sum()
df2=df2.astype({'date':'str','addr':'str','electricity':'int','count':'int'})
print(df2.duplicated(subset=['date','addr']).sum())
print("end")
df2.to_sql(name='electricity',con=db_connection,if_exists='append',index=False)
+2022년도 data append 필요
+df.astype으로 column의 type 명확히 정의해주어야함
+astype은 inplace 없음/ 재정의 해주어야함