import chunk
from typing import final
from venv import create
import pandas as pd
import multiprocessing
import base64
import chunk
import os
import sys
import pymysql
import requests
from sqlalchemy import create_engine
import requests
import warnings
import fileinput
import glob
from multiprocessing import Process
import bisect
Chunksize = 1000000
warnings.simplefilter(action='ignore', category=FutureWarning)
df = pd.DataFrame(columns=['date', 'hjd', 'gas', 'count'])
df = df.astype({'date':'str','hjd':'str','gas':'int','count':'int'})
df = df.sort_values(by=['date','hjd'])
x = 0; y = 0
for chunk in pd.read_csv("C:/Users/TSM/gas_trunc.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','gas'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','gas':'int'})
chunk = chunk.groupby(['date','hjd'],as_index=False)['gas'].agg({'gas':'sum','count':'count'})
chunk = chunk.astype({'date':'str','hjd':'str','gas':'int','count':'int'})
df = pd.concat([df,chunk])
x += Chunksize
print("gas -> 읽음: ", x)
while (df.duplicated(subset=['date','hjd']).sum() > 0):
print(df.duplicated(subset=['date','hjd']).sum())
df = df.groupby(['date','hjd'],as_index=False).sum()
df = df.astype({'date':'str','hjd':'str','gas':'int','count':'int'})
print(df.duplicated(subset=['date','hjd']).sum())
df.to_csv("gas_groupby.csv", mode='a', index=False, header=None)
df = pd.DataFrame(columns=['date', 'hjd', 'electricity', 'count'])
df = df.astype({'date':'str','hjd':'str','electricity':'int','count':'int'})
df = df.sort_values(by=['date','hjd'])
x = 0; y = 0
for chunk in pd.read_csv("C:/Users/TSM/electricity_trunc.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','electricity'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','electricity':'int'})
chunk = chunk.groupby(['date','hjd'],as_index=False)['electricity'].agg({'electricity':'sum','count':'count'})
chunk = chunk.astype({'date':'str','hjd':'str','electricity':'int','count':'int'})
df = pd.concat([df,chunk])
x += Chunksize
print("electricity -> 읽음: ", x)
while (df.duplicated(subset=['date','hjd']).sum() > 0):
print(df.duplicated(subset=['date','hjd']).sum())
df = df.groupby(['date','hjd'],as_index=False).sum()
df = df.astype({'date':'str','hjd':'str','electricity':'int','count':'int'})
print(df.duplicated(subset=['date','hjd']).sum())
df.to_csv("electricity_groupby.csv", mode='a', index=False, header=None)