건물에너지 data는 이미 관계형 DB (RDS)로 구축했는데 data 분석의 다각화를 위해 비정형 데이터 처리가 필요해짐
-> DB migration 작업 진행 (from SQL(RDS) to NoSQL(DynamoDB))
import pandas as pd
from typing import final
from venv import create
import multiprocessing
import base64
import os
import sys
import pymysql
import requests
from sqlalchemy import create_engine
#from crawler import crawl
import boto3
import json
dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-2', aws_access_key_id='비밀', aws_secret_access_key='ㅂㅁ')
table = dynamodb.Table('buildingenergy')
db_connection_str='ㅂㅁ'
db_connection=create_engine(db_connection_str)
conn = pymysql.connect(host="ㅂㅁ",
user='root',
password='ㅂㅁ',
db='EnergyConsumption',
charset='utf8')
cur = conn.cursor()
cur.execute("SELECT * FROM energy_temp")
i = 0
with table.batch_writer() as batch:
while(True):
row = cur.fetchone()
print(i)
i += 1
if row is None:
break
dict = {
'date': row[0],
'addr': row[1],
'electricity': row[2],
'gas': row[3],
'energy_sum': row[4],
'electricity_co2': row[5],
'gas_co2': row[6],
'co2_sum': row[7],
'count': row[8]
}
batch.put_item(
Item = dict
)
conn.close()
cursor에 "SELECT * FROM energy_temp" query 결과를 저장 후 row is None이 될 때까지 한 줄씩 읽으면서 DynamoDB에 row 하나씩 insert