import chunk
from typing import final
from venv import create
import pandas as pd
import multiprocessing
import base64
import chunk
import os
import sys
import pymysql
import requests
from sqlalchemy import create_engine
import requests
import warnings
import fileinput
import glob
from multiprocessing import Process
import bisect
#-*- coding: cp949 -*-
Chunksize = 1000000
warnings.simplefilter(action='ignore', category=FutureWarning)
x = 0; y = 0
for chunk in pd.read_csv("C:/Users/TSM/h_gas.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','gas'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','gas':'int'})
idx1 = chunk[chunk['addr'] == ' '].index
chunk.drop(idx1,inplace=True)
idx2 = chunk[chunk['hjd'] == 'False'].index
chunk.drop(idx2,inplace=True)
chunk = chunk.drop_duplicates(keep='first')
x += Chunksize
print("gas -> 읽음: ", x)
chunk.to_csv("gas_trunc.csv", mode='a', index=False, header=None)
print("=====================읽음: ", x, "=====================")
x = 0; y = 0
for chunk in pd.read_csv("C:/Users/TSM/h_electricity.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','electricity'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','electricity':'int'})
idx1 = chunk[chunk['addr'] == ' '].index
chunk.drop(idx1,inplace=True)
idx2 = chunk[chunk['hjd'] == 'False'].index
chunk.drop(idx2,inplace=True)
chunk = chunk.drop_duplicates(keep='first')
x += Chunksize
print("electricity -> 읽음: ", x)
chunk.to_csv("electricity_trunc.csv", mode='a', index=False, header=None)
print("=====================읽음: ", x, "=====================")
h_gas와 h_electricity 각각 행정동코드 'False'인 경우, 지번주소가 없는 경우 날리기 + row가 중복되는 경우도 날리기