#-*-coding:utf-8 -*-
from urllib.request import urlopen
import requests as rq
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time
from selenium.webdriver.support.ui import Select
from datetime import datetime
import zipfile
import os
import boto3
from selenium.webdriver.common.by import By
from typing import final
from venv import create
import pandas as pd
import chunk
import warnings
from multiprocessing import Process
import bisect
import pandas as pd
from typing import final
from venv import create
import requests
from decimal import Decimal
import json
import shutil
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
def mul424(x):
x = x * 424
return x
def addr_truncate_sido(addr):
new_addr = addr[:2] + '00000000'
return new_addr
def addr_truncate_sigungu(addr):
new_addr = addr[:5] + '00000'
return new_addr
def findfile(name, path):
for dirpath, dirname, filename in os.walk(path):
if name in filename:
return os.path.join(dirpath, name)
filepath = findfile(name, "/")
return filepath
def s3_connection():
try:
s3 = boto3.client(
service_name="s3",
region_name="ap-northeast-2",
aws_access_key_id="",
aws_secret_access_key="",
)
except Exception as e:
print(e)
else:
print("s3 bucket connected!")
return s3
def parsing(addr,addr2,addr3,addr4) :
f_electricity=open(addr,"r",encoding='cp949')
f2_electricity=open(addr2,"w",encoding='cp949')
for line in f_electricity:
gas_list=line.split('|')
if(str(int(gas_list[9]))=="0"):
if(gas_list[7]=='0'):
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+str(int(gas_list[8]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
else:
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+'산 '+' '+str(int(gas_list[8]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
else:
if(gas_list[7]=='0'):
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+str(int(gas_list[8]))+'-'+str(int(gas_list[9]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
else:
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+'산 '+str(int(gas_list[8]))+'-'+str(int(gas_list[9]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
f2_electricity.write(new_line)
f_electricity.close()
f2_electricity.close()
f_gas=open(addr3,"r",encoding='cp949')
f2_gas=open(addr4,"w",encoding='cp949')
for line in f_gas:
gas_list=line.split('|')
if(str(int(gas_list[9]))=="0"):
if(gas_list[7]=='0'):
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+str(int(gas_list[8]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
else:
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+'산 '+' '+str(int(gas_list[8]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
else:
if(gas_list[7]=='0'):
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+str(int(gas_list[8]))+'-'+str(int(gas_list[9]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
else:
new_line=gas_list[0]+","+gas_list[4]+' '+gas_list[5]+" "+gas_list[6]+" "+'산 '+str(int(gas_list[8]))+'-'+str(int(gas_list[9]))+'번지'+','+gas_list[2]+gas_list[3]+','+gas_list[16]
f2_gas.write(new_line)
f2_gas.close()
f_gas.close()
s3=s3_connection()
s3.download_file('updatingdata','map1.xlsx','/home/ubuntu/auto_insert/map1.xlsx')
s3.download_file('updatingdata','map2.csv','/home/ubuntu/auto_insert/map2.csv')
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
prefs = {'download.default_directory' : '/home/ubuntu/auto_insert'}
options.add_experimental_option('prefs', prefs)
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
time.sleep(1)
driver.get("https://cloud.eais.go.kr/moct/awp/abb01/AWPABB01F05")
time.sleep(2)
driver.find_element(By.XPATH, '//*[@id="__layout"]/div/div[1]/div/div/div[1]/div[1]/dl/dd[1]/input').send_keys('')
driver.find_element("name","pwd").send_keys('')
driver.find_element("xpath",'//*[@id="__layout"]/div/div[1]/div/div/div[1]/div[1]/button').click()
time.sleep(1)
driver.get("https://open.eais.go.kr/nbemifm/OpenInfoList.do?viewType=10")
select=Select(driver.find_element(By.CSS_SELECTOR,"#searchPubDataGrpCd"))
select.select_by_index(1)
driver.find_element("xpath",'//*[@id="content"]/div/span/a/img').click()
xpath='//*[@id="content"]/div/div[3]/table/tbody/tr[2]/td[3]'
date=driver.find_element('xpath',xpath)
date=date.text
date=date.split('-')
year=datetime.today().year
month=datetime.today().month
# if(year is not int(date[0]) or month is not int(date[1])) :
# print("no update")
# quit()
# else :
if(month<=3):
year=year-1
month=month+9
else:
month=month-3
if(month<10):
month='0'+str(month)
else:
month=str(month)
date=str(year)+month
################################
################################
date = '202206' # temp date set
################################
################################
#driver.find_element('xpath','//*[@id="content"]/div/div[3]/table/tbody/tr[2]/td[5]/a[1]/span/span/img').click()
driver.find_element('xpath','//*[@id="content"]/div/div[3]/table/tbody/tr[6]/td[5]/a[1]/span/span/img').click() #temp for June
time.sleep(30)
#driver.find_element('xpath','//*[@id="content"]/div/div[3]/table/tbody/tr[3]/td[5]/a[1]/span/span/img').click()
driver.find_element('xpath','//*[@id="content"]/div/div[3]/table/tbody/tr[7]/td[5]/a[1]/span/span/img').click() #temp for June
time.sleep(30)
driver.quit()
gas = zipfile.ZipFile("/home/ubuntu/auto_insert/지번별에너지사용량_도시가스_"+date+".zip")
electricity = zipfile.ZipFile('/home/ubuntu/auto_insert/지번별에너지사용량_전기_'+date+'.zip')
gas.extractall('/home/ubuntu/auto_insert')
electricity.extractall('/home/ubuntu/auto_insert')
gas.close()
electricity.close()
electricity_file=findfile("eais_elcty_"+date+".txt",'/home/ubuntu/auto_insert')
new_electricity_file="/home/ubuntu/auto_insert/electricity_"+date+".txt"
gas_file=findfile("eais_gas_"+date+".txt",'/home/ubuntu/auto_insert')
new_gas_file="/home/ubuntu/auto_insert/gas_"+date+".txt"
parsing(electricity_file,new_electricity_file,gas_file,new_gas_file)
# ##############################################################
# '''sum_gas = 0; sum_electricity = 0; ccount = 0
# Chunksize = 1000000
# for chunk in pd.read_csv(new_gas_file,chunksize=Chunksize,sep =',',encoding='cp949',usecols=[3],names=['gas'],header=0,low_memory=False):
# chunk = chunk.astype({'gas':'int'})
# sum_gas += chunk['gas'].sum()
# for chunk in pd.read_csv(new_electricity_file,chunksize=Chunksize,sep =',',encoding='cp949',usecols=[3],names=['electricity'],header=0,low_memory=False):
# chunk = chunk.astype({'electricity':'int'})
# sum_electricity += chunk['electricity'].sum()
# ccount += len(chunk.index)
# dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-2', aws_access_key_id='', aws_secret_access_key='')
# bjd_table = dynamodb.Table('WholeConsumption')
# print(date, sum_gas, sum_electricity, ccount)
# print(type(date))
# print(type(sum_gas))
# print(type(sum_electricity))
# print(type(ccount))
# bjd_table.put_item(
# Item={
# 'Date': date,
# 'gas': int(sum_gas),
# 'electricity': int(sum_electricity),
# 'count': int(ccount),
# }
# )'''
############################################################
print("start hjd transform")
Chunksize = 10000
warnings.simplefilter(action='ignore', category=FutureWarning)
kakao_key=[]
index=0
def get_hangjeongdong(addr):
global index
global kakao_key
url = "https://dapi.kakao.com/v2/local/search/address.json"
params={'query':addr}
headers = {"Authorization":"KakaoAK "+ kakao_key[index]}
hangjeongdong=requests.get(url,params=params,headers=headers).json()
if('documents' not in hangjeongdong):
return 100
hangjeongdong=hangjeongdong['documents']
if(len(hangjeongdong)==0):
return False
else :
hangjeongdong=hangjeongdong[0]['address']['h_code']
if hangjeongdong == '':
return False
else:
return hangjeongdong
map1 = pd.read_excel("/home/ubuntu/auto_insert/map1.xlsx",names=['행정동코드','법정동코드'])
map1 = map1.astype({'행정동코드':'str','법정동코드':'str'})
map1_set = set(list(map1['법정동코드']))
x = 0; y = 0
for chunk in pd.read_csv(new_gas_file,chunksize=Chunksize,sep =',',encoding='cp949',names=['date','addr','bjd','gas'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','bjd':'str','gas':'int'})
map2 = pd.read_csv("/home/ubuntu/auto_insert/map2.csv",names=['지번주소','행정동코드','법정동코드'],dtype={'지번주소':'str','행정동코드':'str','법정동코드':'str'},encoding='cp949',low_memory=False)
map2_set = set(list(map2['지번주소']))
map2_list_addr = list(map2['지번주소'])
map2_list_hjd = list(map2['행정동코드'])
idxarr = sorted(range(len(map2_list_addr)),key=lambda k:map2_list_addr[k])
map2_list_addr.sort()
for i, row in chunk.iterrows():
지번주소 = chunk.at[i, 'addr']
법정동코드 = chunk.at[i, 'bjd']
if 법정동코드 not in map1_set: # not in map1
if 지번주소 not in map2_set: # not in map2
y+=1
행정동코드 = get_hangjeongdong(지번주소)
if 행정동코드 == 100:
index += 1
if index == len(kakao_key):
break
행정동코드 = get_hangjeongdong(지번주소)
if 행정동코드 is False:
chunk.drop(i,axis=0,inplace=True)
else:
chunk.at[i, 'bjd'] = 행정동코드
tmp_data = {'지번주소':[지번주소],'행정동코드':[행정동코드],'법정동코드':[법정동코드]}
tmp_df = pd.DataFrame(tmp_data)
tmp_df = tmp_df.astype({'지번주소':'str','행정동코드':'str','법정동코드':'str'})
tmp_df.to_csv('/home/ubuntu/auto_insert/map2.csv',mode='a',index=False,encoding='cp949',header=False)
else: # exist in map2
idxx = bisect.bisect_left(map2_list_addr, 지번주소)
idxxx = idxarr[idxx]
temp = map2_list_hjd[idxxx]
if (temp is not None) and (temp is not False):
chunk.at[i, 'bjd'] = temp
else:
chunk.drop(i,axis=0,inplace=True)
else: # exist in map1
chunk.at[i, 'bjd'] = map1.loc[map1['법정동코드']==법정동코드]['행정동코드'].values.astype('str')[0]
x += Chunksize
print("읽음: ", x, " 요청 보냄: ", y)
chunk.to_csv("/home/ubuntu/auto_insert/h_gas.csv", mode='a', index=False, header=None)
x = 0; y = 0
for chunk in pd.read_csv(new_electricity_file,chunksize=Chunksize,sep =',',encoding='cp949',names=['date','addr','bjd','electricity'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','bjd':'str','electricity':'int'})
map2 = pd.read_csv("/home/ubuntu/auto_insert/map2.csv",names=['지번주소','행정동코드','법정동코드'],dtype={'지번주소':'str','행정동코드':'str','법정동코드':'str'},encoding='cp949',low_memory=False)
map2_set = set(list(map2['지번주소']))
map2_list_addr = list(map2['지번주소'])
map2_list_hjd = list(map2['행정동코드'])
idxarr = sorted(range(len(map2_list_addr)),key=lambda k:map2_list_addr[k])
map2_list_addr.sort()
for i, row in chunk.iterrows():
지번주소 = chunk.at[i, 'addr']
법정동코드 = chunk.at[i, 'bjd']
if 법정동코드 not in map1_set: # not in map1
if 지번주소 not in map2_set: # not in map2
y+=1
행정동코드 = get_hangjeongdong(지번주소)
if 행정동코드 == 100:
index += 1
if index == len(kakao_key):
break
행정동코드 = get_hangjeongdong(지번주소)
if 행정동코드 is False:
chunk.drop(i,axis=0,inplace=True)
else:
chunk.at[i, 'bjd'] = 행정동코드
tmp_data = {'지번주소':[지번주소],'행정동코드':[행정동코드],'법정동코드':[법정동코드]}
tmp_df = pd.DataFrame(tmp_data)
tmp_df = tmp_df.astype({'지번주소':'str','행정동코드':'str','법정동코드':'str'})
tmp_df.to_csv('/home/ubuntu/auto_insert/map2.csv',mode='a',index=False,encoding='cp949',header=False)
else: # exist in map2
idxx = bisect.bisect_left(map2_list_addr, 지번주소)
idxxx = idxarr[idxx]
temp = map2_list_hjd[idxxx]
if (temp is not None) and (temp is not False):
chunk.at[i, 'bjd'] = temp
else:
chunk.drop(i,axis=0,inplace=True)
else: # exist in map1
chunk.at[i, 'bjd'] = map1.loc[map1['법정동코드']==법정동코드]['행정동코드'].values.astype('str')[0]
x += Chunksize
print("읽음: ", x, " 요청 보냄: ", y)
chunk.to_csv("/home/ubuntu/auto_insert/h_electricity.csv", mode='a', index=False, header=None)
print("complete hjd trasnform")
##############################truncate & group by sum & join##############################
#truncate
print("start truncate")
Chunksize = 1000000
x = 0
for chunk in pd.read_csv("/home/ubuntu/auto_insert/h_gas.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','gas'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','gas':'int'})
idx1 = chunk[chunk['addr'] == ' '].index
chunk.drop(idx1,inplace=True)
idx2 = chunk[chunk['hjd'] == 'False'].index
chunk.drop(idx2,inplace=True)
chunk = chunk.drop_duplicates(keep='first')
x += Chunksize
print("gas -> 읽음: ", x)
chunk.to_csv("/home/ubuntu/auto_insert/gas_trunc.csv", mode='a', index=False, header=None)
print("=====================읽음: ", x, "=====================")
x = 0
for chunk in pd.read_csv("/home/ubuntu/auto_insert/h_electricity.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','electricity'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','electricity':'int'})
idx1 = chunk[chunk['addr'] == ' '].index
chunk.drop(idx1,inplace=True)
idx2 = chunk[chunk['hjd'] == 'False'].index
chunk.drop(idx2,inplace=True)
chunk = chunk.drop_duplicates(keep='first')
x += Chunksize
print("electricity -> 읽음: ", x)
chunk.to_csv("/home/ubuntu/auto_insert/electricity_trunc.csv", mode='a', index=False, header=None)
print("=====================읽음: ", x, "=====================")
print("complete truncate")
#group by
print("groupby 시작")
df = pd.DataFrame(columns=['date', 'hjd', 'gas', 'count'])
df = df.astype({'date':'str','hjd':'str','gas':'int','count':'int'})
df = df.sort_values(by=['date','hjd'])
x = 0; y = 0
for chunk in pd.read_csv("/home/ubuntu/auto_insert/gas_trunc.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','gas'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','gas':'int'})
chunk = chunk.groupby(['date','hjd'],as_index=False)['gas'].agg({'gas':'sum','count':'count'})
chunk = chunk.astype({'date':'str','hjd':'str','gas':'int','count':'int'})
df = pd.concat([df,chunk])
x += Chunksize
print("gas -> 읽음: ", x)
while (df.duplicated(subset=['date','hjd']).sum() > 0):
print(df.duplicated(subset=['date','hjd']).sum())
df = df.groupby(['date','hjd'],as_index=False).sum()
df = df.astype({'date':'str','hjd':'str','gas':'int','count':'int'})
print(df.duplicated(subset=['date','hjd']).sum())
df.to_csv("/home/ubuntu/auto_insert/gas_groupby.csv", mode='a', index=False, header=None)
df = pd.DataFrame(columns=['date', 'hjd', 'electricity', 'count'])
df = df.astype({'date':'str','hjd':'str','electricity':'int','count':'int'})
df = df.sort_values(by=['date','hjd'])
x = 0; y = 0
for chunk in pd.read_csv("/home/ubuntu/auto_insert/electricity_trunc.csv",chunksize=Chunksize,sep =',',encoding='utf-8',names=['date','addr','hjd','electricity'],header=0,low_memory=False):
chunk = chunk.astype({'date':'str','addr':'str','hjd':'str','electricity':'int'})
chunk = chunk.groupby(['date','hjd'],as_index=False)['electricity'].agg({'electricity':'sum','count':'count'})
chunk = chunk.astype({'date':'str','hjd':'str','electricity':'int','count':'int'})
df = pd.concat([df,chunk])
x += Chunksize
print("electricity -> 읽음: ", x)
while (df.duplicated(subset=['date','hjd']).sum() > 0):
print(df.duplicated(subset=['date','hjd']).sum())
df = df.groupby(['date','hjd'],as_index=False).sum()
df = df.astype({'date':'str','hjd':'str','electricity':'int','count':'int'})
print(df.duplicated(subset=['date','hjd']).sum())
df.to_csv("/home/ubuntu/auto_insert/electricity_groupby.csv", mode='a', index=False, header=None)
print("groupby 완료")
#join
print("join 시작")
gas = pd.read_csv("/home/ubuntu/auto_insert/gas_groupby.csv",sep =',',encoding='utf-8',names=['date','addr','gas','count'],header=0,low_memory=False)
gas = gas.astype({'date':'str','addr':'str','gas':'int','count':'int'})
gas = gas.sort_values(by=['date','addr'])
gas.drop(['count'], axis=1, inplace=True)
electricity = pd.read_csv("/home/ubuntu/auto_insert/electricity_groupby.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','count'],header=0,low_memory=False)
electricity = electricity.astype({'date':'str','addr':'str','electricity':'int','count':'int'})
electricity = electricity.sort_values(by=['date','addr'])
df = pd.merge(left=electricity, right=gas, how='left', on=['date','addr'])
df = df[['date','addr','electricity','gas','count']]
df = df.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int'}, errors='ignore')
df.to_csv("/home/ubuntu/auto_insert/buildingenergy.csv", mode='a', index=False, header=None)
print("join 완료")
##############################db insert##############################
print("db insert start")
warnings.simplefilter(action='ignore', category=FutureWarning)
dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-2', aws_access_key_id='', aws_secret_access_key='')
table = dynamodb.Table('BUILDINGENERGY')
df = pd.read_csv("/home/ubuntu/auto_insert/buildingenergy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count'],header=0,low_memory=False)
df = df.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int'}, errors='ignore')
df['gas'] = df['gas'].astype('Int64')
df = df.fillna(0)
df['gas_co2'] = df['gas'].apply(mul424)
df['electricity_co2'] = df['electricity'].apply(mul424)
df['energy_sum'] = df['gas'] + df['electricity']
df['co2_sum'] = df['gas_co2'] + df['electricity_co2']
df = df.fillna(0)
df = df.sort_values(by=['date','addr'])
df.to_csv("/home/ubuntu/auto_insert/" + date + ".csv", mode='a', index=False, header=None) #temporary
with table.batch_writer() as batch:
for index, row in df.iterrows():
batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
table1 = dynamodb.Table('BUILDING_ENERGY_SIDO')
table2 = dynamodb.Table('BUILDINGENERGY_SIGUNGU')
df1 = pd.read_csv("/home/ubuntu/auto_insert/" + date + ".csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count','gas_co2','electricity_co2','energy_sum','co2_sum'],header=0,low_memory=False)
df1 = df1.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int','gas_co2':'int','electricity_co2':'int','energy_sum':'int','co2_sum':'int'}, errors='ignore')
df1['gas'] = df1['gas'].astype('Int64')
df1 = df1.fillna(0)
df1['addr'] = df1['addr'].apply(addr_truncate_sido)
df1 = df1.groupby(['date', 'addr'], as_index=False)['electricity', 'gas', 'count'].sum()
df1['count'] = df1['count'].astype('Int64')
df1['gas_co2'] = df1['gas'].apply(mul424)
df1['electricity_co2'] = df1['electricity'].apply(mul424)
df1['energy_sum'] = df1['gas'] + df1['electricity']
df1['co2_sum'] = df1['gas_co2'] + df1['electricity_co2']
df1 = df1.fillna(0)
df1 = df1.sort_values(by=['date','addr'])
df1.to_csv("/home/ubuntu/auto_insert/sido.csv", mode='w', index=False, header=None)
print(df1)
with table1.batch_writer() as batch:
for index, row in df1.iterrows():
batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
dff = pd.read_csv("/home/ubuntu/auto_insert/" + date + ".csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count','gas_co2','electricity_co2','energy_sum','co2_sum'],header=0,low_memory=False)
dff = dff.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int','gas_co2':'int','electricity_co2':'int','energy_sum':'int','co2_sum':'int'}, errors='ignore')
dff['gas'] = dff['gas'].astype('Int64')
dff = dff.fillna(0)
dff['addr'] = dff['addr'].apply(addr_truncate_sigungu)
dff = dff.groupby(['date', 'addr'], as_index=False)['electricity', 'gas', 'count'].sum()
dff['count'] = dff['count'].astype('Int64')
dff['gas_co2'] = dff['gas'].apply(mul424)
dff['electricity_co2'] = dff['electricity'].apply(mul424)
dff['energy_sum'] = dff['gas'] + dff['electricity']
dff['co2_sum'] = dff['gas_co2'] + dff['electricity_co2']
dff = dff.fillna(0)
dff = dff.sort_values(by=['date','addr'])
dff.to_csv("/home/ubuntu/auto_insert/sigungu.csv", mode='w', index=False, header=None)
print(dff)
with table2.batch_writer() as batch:
for index, row in dff.iterrows():
batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
print("complete db insert")
# s3 = s3_connection()
# s3.delete_object(Bucket='updatingdata', Key='map2.csv') # file name should not be duplicated!
s3 = s3_connection()
s3.upload_file("/home/ubuntu/auto_insert/"+date+".csv", "updatingdata", date+".csv")
s3.upload_file("/home/ubuntu/auto_insert/map2.csv", "updatingdata", "map2.csv")
# if os.path.exists("/home/ubuntu/auto_insert"):
# shutil.rmtree("/home/ubuntu/auto_insert")
# if not os.path.exists("/home/ubuntu/auto_insert"):
# os.mkdir("/home/ubuntu/auto_insert")
selenium version 변경에 따른 오류 주의
-> selenium.webdriver.chrome.service, webdriver_manager.chrome package 이용
IAM policy
-> dynamodbfullaccess, batchwrite가능하도록 적용 후 Key pair 발급받기
S3 Upload 시, 중복되는 file name은 불가함
-> 삭제 후 upload 해야함
위 코드는 date 부분이 하드코딩 되어있음
-> date 설정 부분, tr indexing 부분 변경 필요