h_gas, h_electricity truncate

ewillwin·2022년 7월 28일
0

TSMtech Record

목록 보기
14/39
import chunk
from typing import final
from venv import create
import pandas as pd
import multiprocessing
import base64
import chunk
import os
import sys
import pymysql
import requests
from sqlalchemy import create_engine
import requests
import warnings
import fileinput
import glob
from multiprocessing import Process
import bisect
#-*- coding: cp949 -*-

Chunksize = 1000000
warnings.simplefilter(action='ignore', category=FutureWarning)

x = 0; y = 0
for chunk in pd.read_csv("C:/Users/TSM/h_gas.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','gas'],header=0,low_memory=False):
    chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','gas':'int'})
    idx1 = chunk[chunk['addr'] == ' '].index
    chunk.drop(idx1,inplace=True)
    idx2 = chunk[chunk['hjd'] == 'False'].index
    chunk.drop(idx2,inplace=True)
    chunk = chunk.drop_duplicates(keep='first')
    x += Chunksize
    print("gas -> 읽음: ", x)
    chunk.to_csv("gas_trunc.csv", mode='a', index=False, header=None)
    
print("=====================읽음: ", x, "=====================")

x = 0; y = 0
for chunk in pd.read_csv("C:/Users/TSM/h_electricity.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','electricity'],header=0,low_memory=False):
    chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','electricity':'int'})
    idx1 = chunk[chunk['addr'] == ' '].index
    chunk.drop(idx1,inplace=True)
    idx2 = chunk[chunk['hjd'] == 'False'].index
    chunk.drop(idx2,inplace=True)
    chunk = chunk.drop_duplicates(keep='first')
    x += Chunksize
    print("electricity -> 읽음: ", x)
    chunk.to_csv("electricity_trunc.csv", mode='a', index=False, header=None)
    
print("=====================읽음: ", x, "=====================")

h_gas와 h_electricity 각각 행정동코드 'False'인 경우, 지번주소가 없는 경우 날리기 + row가 중복되는 경우도 날리기

profile
💼 Software Engineer @ LG Electronics | 🎓 SungKyunKwan Univ. CSE

0개의 댓글