아래는 DB insert 코드
#-*-coding:utf-8 -*-
from urllib.request import urlopen
import requests as rq
from bs4 import BeautifulSoup as bs
import time
from datetime import datetime
import zipfile
import os
import boto3
from typing import final
from venv import create
import pandas as pd
import chunk
import warnings
from multiprocessing import Process
import bisect
import pandas as pd
from typing import final
from venv import create
import requests
from decimal import Decimal
import json
import shutil
import boto3
import json
from boto3.dynamodb.conditions import Key, Attr
from decimal import Decimal
def mul424(x):
x = x * 424
return x
def addr_truncate_sido(addr):
new_addr = addr[:2] + '00000000'
return new_addr
def addr_truncate_sigungu(addr):
new_addr = addr[:5] + '00000'
return new_addr
warnings.simplefilter(action='ignore', category=FutureWarning)
dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-2', aws_access_key_id='비', aws_secret_access_key='밀')
print("db insert start")
df0 = pd.read_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/buildingenergy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count'],low_memory=False)
df0 = df0.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int'}, errors='ignore')
df0['gas'] = df0['gas'].astype('Int64')
df0 = df0.fillna(0)
df0['gas_co2'] = df0['gas'].apply(mul424)
df0['electricity_co2'] = df0['electricity'].apply(mul424)
df0['energy_sum'] = df0['gas'] + df0['electricity']
df0['co2_sum'] = df0['gas_co2'] + df0['electricity_co2']
df0 = df0.fillna(0)
df0 = df0.sort_values(by=['date','addr'])
df0.to_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy.csv", mode='w', index=False, header=None)
table0 = dynamodb.Table('energy') ##### 읍면동단위 table #####
with table0.batch_writer() as batch:
for index, row in df0.iterrows():
batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
df1 = pd.read_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count','gas_co2','electricity_co2','energy_sum','co2_sum'],header=0,low_memory=False)
df1 = df1.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int','gas_co2':'int','electricity_co2':'int','energy_sum':'int','co2_sum':'int'}, errors='ignore')
df1['gas'] = df1['gas'].astype('Int64')
df1 = df1.fillna(0)
df1['addr'] = df1['addr'].apply(addr_truncate_sido)
df1 = df1.groupby(['date', 'addr'], as_index=False)['electricity', 'gas', 'count'].sum()
df1['count'] = df1['count'].astype('Int64')
df1['gas_co2'] = df1['gas'].apply(mul424)
df1['electricity_co2'] = df1['electricity'].apply(mul424)
df1['energy_sum'] = df1['gas'] + df1['electricity']
df1['co2_sum'] = df1['gas_co2'] + df1['electricity_co2']
df1 = df1.fillna(0)
df1 = df1.sort_values(by=['date','addr'])
df1.to_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy_sido.csv", mode='w', index=False, header=None)
table1 = dynamodb.Table('energy_sido') ##### 시도단위 table #####
with table1.batch_writer() as batch:
for index, row in df1.iterrows():
batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
df2 = pd.read_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count','gas_co2','electricity_co2','energy_sum','co2_sum'],header=0,low_memory=False)
df2 = df2.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int','gas_co2':'int','electricity_co2':'int','energy_sum':'int','co2_sum':'int'}, errors='ignore')
df2['gas'] = df2['gas'].astype('Int64')
df2 = df2.fillna(0)
df2['addr'] = df2['addr'].apply(addr_truncate_sigungu)
df2 = df2.groupby(['date', 'addr'], as_index=False)['electricity', 'gas', 'count'].sum()
df2['count'] = df2['count'].astype('Int64')
df2['gas_co2'] = df2['gas'].apply(mul424)
df2['electricity_co2'] = df2['electricity'].apply(mul424)
df2['energy_sum'] = df2['gas'] + df2['electricity']
df2['co2_sum'] = df2['gas_co2'] + df2['electricity_co2']
df2 = df2.fillna(0)
df2 = df2.sort_values(by=['date','addr'])
df2.to_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy_sigungu.csv", mode='w', index=False, header=None)
table2 = dynamodb.Table('energy_sigungu') ##### 시군구단위 table #####
with table2.batch_writer() as batch:
for index, row in df2.iterrows():
batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
print("complete db insert")