하루 온종일 풀어본 SQL 유가 분석
ctrl+shift+i
import mysql.connector
conn = mysql.connector.connect(
host = "",
port = 3306,
user = "oneday",
password = "1234",
database = "oneday"
)
cursor = conn.cursor(buffered=True)
# gas_brand
sql_b = "CREATE TABLE GAS_BRAND(" + \
"id int not null auto_increment primary key, " + \
"name varchar(16) not null)"
cursor.execute(sql_b)
# gas_station
sql_s = "CREATE TABLE GAS_STATION(" + \
"id int auto_increment primary key, " +\
"brand int not null, " +\
"name varchar(64) not null, " +\
"city char(2) not null, " +\
"gu varchar(10) not null, " +\
"address varchar(128) not null, " +\
"gasoline int not null, " +\
"diesel int not null, " +\
"self boolean not null, " +\
"car_wash boolean not null, " +\
"charging_station boolean not null, " +\
"car_maintenance boolean not null, " +\
"convenience_store boolean not null, " +\
"24_hours boolean not null, " +\
"lat decimal(16,14) not null, " +\
"lng decimal(17,14) not null, " +\
"foreign key (brand) references GAS_BRAND(id));"
cursor.execute(sql_s)
queries1 = [
(1, 'SK에너지'),
(2, 'HD현대오일뱅크'),
(3, 'GS칼텍스'),
(4, 'S-OIL'),
(5, '알뜰주유소'),
(6, '자가상표')
]
query = "INSERT INTO GAS_BRAND VALUES (%s, %s)"
cursor.executemany(query, queries1)
conn.commit()
sql_result = "DESC GAS_STATION"
cursor.execute(sql_result)
result = cursor.fetchall()
for i in result:
print(i)
sql_result = "SELECT * FROM GAS_BRAND"
cursor.execute(sql_result)
result = cursor.fetchall()
for i in result:
print(i)
# 화폐단위 문자형 >> 숫자형
def stringToInt(s):
if s != '':
s = s.replace(',', '')
return int(s)
else:
return None
stringToInt('1,000')
# 주유소 브랜드를 입력하면 GAS_BRAND 데이터를 참고하여 ID 반환
def getID(brand):
sql_result = "SELECT * FROM GAS_BRAND"
cursor.execute(sql_result)
result = cursor.fetchall()
for i in result:
if i[1] == brand:
return i[0]
# 브랜드명이 '알뜰(ex)'인 경우 있음
elif brand == '알뜰(ex)':
return 5
getID('SK에너지')
# 주소를 입력받아 구 이름 반환
def getGu(add):
addList = add.split()
return addList[1]
getGu('서울시 강남구 헌릉로 730')
import googlemaps
gmaps_key = 'AIzaSyALyv5xMRzF_RJUIeJ84qh25GgNWoIJ8LM'
gmaps = googlemaps.Client(key = gmaps_key)
# 주소를 입력받아 위도, 경도 반환
def getLL(add):
tmp = gmaps.geocode(add, language='ko')
lat = tmp[0].get("geometry")["location"]["lat"]
lng = tmp[0].get("geometry")["location"]["lng"]
return lat, lng
getLL('서울시 강남구 헌릉로 730')
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook
# 오피넷 -> 구 정보 가져오기
url = 'https://www.opinet.co.kr/searRgSelect.do'
driver = webdriver.Chrome(executable_path='../driver/chromedriver.exe')
driver.get(url)
# 시/도
sido_list_raw = driver.find_element(By.ID, "SIDO_NM0")
sido_list = sido_list_raw.find_elements(By.TAG_NAME, "option")
# 서울 선택
seoul_select = sido_list[1].get_attribute("value")
sido_list_raw.send_keys(seoul_select)
# 구 리스트 만들기
gu_list_raw = driver.find_element(By.ID, "SIGUNGU_NM0")
gu_list = gu_list_raw.find_elements(By.TAG_NAME, "option")
gu_names = [option.get_attribute("value") for option in gu_list]
gu_names = gu_names[1:]
sql = "INSERT INTO GAS_STATION (brand, name, city, gu, address, gasoline, diesel, self, " +\
"car_wash, charging_station, car_maintenance, convenience_store, 24_hours, lat, lng) " +\
"VALUES (%s, %s, '서울', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
def check(data, tag):
return 'off' not in data.select_one(tag)['src']
sqltmp = "ALTER TABLE GAS_STATION MODIFY diesel int NULL;"
cursor.execute(sqltmp)
conn.commit()
for gu in tqdm_notebook(gu_names):
element = driver.find_element(By.ID, 'SIGUNGU_NM0')
element.send_keys(gu)
time.sleep(0.5)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
# 검색할 주유소 개수
cnt = int(driver.find_element(By.ID, 'totCnt').text)
for i in range(1, cnt+1):
# 각 주유소 클릭
station = driver.find_element(By.CSS_SELECTOR, f'#body1 > tr:nth-child({i}) > td.rlist > a')
station.click()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
data = soup.select('#os_dtail_info')[0]
# brand
brand = getID(data.select_one('#poll_div_nm').text)
# name
name = data.select_one('.header').text.strip()
# address
address = data.select_one('#rd_addr').text
# gasoline
gasoline = stringToInt(data.select_one('#b027_p').text)
# diesel
diesel = stringToInt(data.select_one('#d047_p').text)
# self
slf = data.select_one('#SPAN_SELF_VLT_YN_ID')
if type(slf.find('img')) == type(None):
is_self = False
else:
is_self = True
# car_wash
car_wash = check(data, '#cwsh_yn')
# charging_station
charging_station = check(data, '#lpg_yn')
# car_maintenance
car_maintenance = check(data, '#maint_yn')
# convenience_store
convenience_store = check(data, '#cvs_yn')
# 24_hours
sel24 = check(data, '#sel24_yn')
tmp = gmaps.geocode(address, language='ko')
# lat
lat = tmp[0].get('geometry')['location']['lat']
# lng
lng = tmp[0].get('geometry')['location']['lng']
cursor.execute(sql, (brand, name, gu, address, gasoline, diesel,
is_self, car_wash, charging_station, car_maintenance, convenience_store, sel24, lat, lng))
conn.commit()
# 데이터 개수 확인
cursor.execute("select count(*) from GAS_STATION")
result = cursor.fetchall()
print(result[0])
# 데이터 상위 10개 출력
cursor.execute("select * from GAS_STATION limit 10")
result = cursor.fetchall()
for i in result:
print(i)
import pandas as pd
sql = "select s.id, b.name 'brand', s.name, s.city, s.gu, s.address, s.gasoline, s.diesel, s.self, " +\
"s.car_wash, s.charging_station, s.car_maintenance, s.convenience_store, s.24_hours, " +\
"s.lat, s.lng " +\
"from GAS_BRAND b, GAS_STATION s " +\
"where b.id = s.brand ORDER BY s.id"
cursor.execute(sql)
result = cursor.fetchall()
columns = [i[0] for i in cursor.description]
df = pd.DataFrame(data=result, columns=columns)
df.to_csv("[DS]sql2_oilstation_ohjaemin.csv", index=False, encoding='euc-kr')
df = pd.read_csv("[DS]sql2_chasuhui.csv", index_col=0, thousands=',', encoding='euc-kr')
df.head(10)
# 미왕빌딩 주소: 서울 강남구 강남대로 364
lat, lng = getLL('서울 강남구 강남대로 364')
lat, lng
# POINT(경도, 위도)
# SET @location = POINT(경도, 위도) : 기준이 되는 위치 설정
# ST_DISTANCE_SPHERE(POINT, POINT) : 두 좌표 간 거리(단위: m)
cursor.execute("SET @location = POINT(127.029340, 37.495599)")
cursor.execute("SELECT * FROM (SELECT s.id id, b.name brand, s.name name, address, \
ST_DISTANCE_SPHERE(@location, POINT(lng, lat))/1000 distance \
FROM GAS_BRAND b, GAS_STATION s WHERE b.id = s.brand) t \
WHERE distance*1000 <= 1000")
result = cursor.fetchall()
for row in result:
print(row)
cursor.execute("SELECT * FROM (SELECT s.id id, b.name brand, s.name name, address, \
gasoline, self, 24_hours, convenience_store, \
ST_DISTANCE_SPHERE(@location, POINT(lng, lat))/1000 distance \
FROM GAS_BRAND b, GAS_STATION s \
WHERE b.id = s.brand and self = 1 and 24_hours = 1 and convenience_store = 1 \
ORDER BY distance LIMIT 10) t \
ORDER BY gasoline")
result = cursor.fetchall()
for row in result:
print(row)
cursor.execute("SELECT gu, b.name brand, avg(gasoline) avg_price \
FROM GAS_BRAND b, GAS_STATION s \
WHERE b.id = s.brand GROUP BY gu, brand ORDER BY avg_price")
result = cursor.fetchall()
for row in result:
print(row)
conn.close()
driver.quit()