import mysql.connector
conn = mysql.connector.connect(
host = "__호스트 주소__",
port = 3306,
user = "oneday",
password = "1234",
database = 'oneday'
)
cursor = conn.cursor(buffered = True)
# GAS_BRAND 테이블
sql1 = "create table GAS_BRAND(id int not null auto_increment primary key, name varchar(16) not null)"
cursor.execute(sql1)
# GAS_STATION 테이블
sql2 = "CREATE TABLE GAS_STATION(id int not null auto_increment primary key, " + \
"brand int not null, " + \
"name varchar(64) not null, " + \
"city char(2) not null, " + \
"gu varchar(10) not null, " + \
"address varchar(128) not null, " + \
"gasoline int not null, " + \
"diesel int not null, " + \
"self boolean not null, " + \
"car_wash boolean not null, " + \
"charging_station boolean not null, " + \
"car_maintenance boolean not null, " + \
"convenience_store boolean not null, " + \
"24_hours boolean not null, " + \
"lat decimal(16,14) not null, " + \
"lng decimal(17,14) not null, " + \
"foreign key (brand) references GAS_BRAND(id));"
cursor.execute(sql2)
sql3 = "insert into GAS_BRAND (name) values ('SK에너지'), ('현대오일뱅크'), ('GS칼텍스'), ('S-OIL'), ('알뜰주유소'), ('자가상표')"
cursor.execute(sql3)
sql_result1 = "desc GAS_BRAND"
cursor.execute(sql_result1)
result = cursor.fetchall()
for i in result:
print(i)
sql_result2 = "desc GAS_STATION"
cursor.execute(sql_result2)
result = cursor.fetchall()
for i in result:
print(i)
sql_result3 = "select * from GAS_BRAND"
cursor.execute(sql_result3)
result = cursor.fetchall()
for i in result:
print(i)
def toInt(str):
return int(str.replace(',',''))
toInt('1,000')
def getBrandID(brandName):
dict_cursor = conn.cursor(dictionary = True)
dict_cursor.execute("select * from GAS_BRAND")
gas_brand = dict_cursor.fetchall()
if brandName == '알뜰(ex)':
brandName = '알뜰주유소'
for item in gas_brand:
if item['name'] == brandName:
return item['id']
return -1
getBrandID('SK에너지')
def getGuName(address):
gu = address.split()[1]
return gu
getGuName('서울시 강남구 헌릉로 730')
def getLocation(address):
import googlemaps
gmaps_key = "AIzaSyCBJzHf4bc80wFall-hH4sb_VlCeCkXEwI"
gmaps = googlemaps.Client(key = gmaps_key)
tmp = gmaps.geocode(address, language='ko')
lat = tmp[0].get("geometry")["location"]["lat"]
lng = tmp[0].get("geometry")["location"]["lng"]
return lat, lng
getLocation('서울시 강남구 헌릉로 730')
import time
from selenium import webdriver
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook
def getOption(str):
return False if '_off' in driver.find_element_by_css_selector(str).get_attribute('src').split('/')[-1] else True
# 오피넷 접근
url = 'https://www.opinet.co.kr/searRgSelect.do'
driver = webdriver.Chrome('../driver/chromedriver.exe')
driver.get(url)
time.sleep(1)
driver.get(url)
# 서울 고정
sido_select = driver.find_element_by_css_selector('#SIDO_NM0')
sido_select.send_keys('서울')
## 부가정보 선택
# 세차장 선택
driver.find_element_by_css_selector('#CWSH_YN').click()
# 경정비 선택
driver.find_element_by_css_selector('#MAINT_YN').click()
# 편의점 선택
driver.find_element_by_css_selector('#CVS_YN').click()
# 24시간 선택
xpath_24h ='//*[@id="SEL24_YN"]'
driver.find_element_by_css_selector('#SEL24_YN').click()
# 구 데이터 가져오기
gu_select = driver.find_element_by_css_selector('#SIGUNGU_NM0')
gu_list = gu_select.find_elements_by_tag_name('option')
gu_list[1].get_attribute("value")
gu_names = []
for option in gu_list:
gu_names.append(option.get_attribute("value"))
gu_names = gu_names[1:]
import pandas as pd
import time
import googlemaps
sql = "insert into GAS_STATION (brand, name, city, gu, address, gasoline, diesel, self, car_wash, charging_station, car_maintenance, convenience_store, 24_hours, lat, lng) " + \
"values (%s, %s, '서울', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
for i in tqdm_notebook(range(len(gu_names))):
gu_selector = f'#SIGUNGU_NM0 > option:nth-child({i+2})'
driver.find_element_by_css_selector(gu_selector).click()
station_items = driver.find_elements_by_css_selector('#body1 > tr') # 주유소 목록
for idx in range(len(station_items)):
detail_selector = f'#body1 > tr:nth-child({idx+1}) > td.rlist > a'
driver.find_element_by_css_selector(detail_selector).click()
name = driver.find_element_by_css_selector('.header #os_nm').get_attribute('innerText')
gasoline = toInt(driver.find_element_by_css_selector('#b027_p').get_attribute('innerText'))
diesel = toInt(driver.find_element_by_css_selector('#d047_p').get_attribute('innerText'))
address = driver.find_element_by_css_selector('#rd_addr').get_attribute('innerText')
brand = getBrandID(driver.find_element_by_css_selector('#poll_div_nm').get_attribute('innerText'))
cwsh_yn = getOption('.service #cwsh_yn')
lpg_yn = getOption('.service #lpg_yn')
maint_yn = getOption('.service #maint_yn')
cvs_yn = getOption('.service #cvs_yn')
sel24_yn = getOption('.service #sel24_yn')
try:
driver.find_element_by_css_selector('#self_icon').get_attribute('alt')
is_self = True
except:
is_self = False
# address
address = driver.find_element_by_css_selector('#rd_addr').get_attribute('innerText')
# gu
gu = getGuName(address)
# lat,lng
lat, lng = getLocation(address)
cursor.execute(sql, (brand, name, gu, address, gasoline, diesel, is_self, cwsh_yn, lpg_yn, maint_yn, cvs_yn, sel24_yn, lat, lng))
conn.commit()
time.sleep(0.2)
time.sleep(0.5)
driver.quit()
cursor.execute("select count(*) from GAS_STATION")
result = cursor.fetchall()
print(result[0])
cursor.execute("select * from GAS_STATION limit 10 ")
result = cursor.fetchall()
for row in result:
print(row)
import pandas as pd
sql4 = "select s.id, b.name brand, s.name, s.city, s.gu, s.address, s.gasoline, s.diesel, s.self, s.car_wash, s.charging_station, s.car_maintenance, s.convenience_store, s.24_hours, s.lat, s.lng " + \
"from GAS_BRAND b, GAS_STATION s " + \
"where b.id = s.brand order by s.id"
cursor.execute(sql4)
result = cursor.fetchall()
num_fields = len(cursor.description)
field_names = [i[0] for i in cursor.description]
df = pd.DataFrame(result)
df.columns = field_names
df.head()
df.to_csv('../data/SQL2_oil_station_data.csv', index=False, encoding = "euc-kr")
df_oil = pd.read_csv('../data/SQL2_oil_station_data.csv', index_col = 0, thousands = ',', encoding = "euc-kr")
df_oil.head(3)
1. 위도, 경도 정보를 이용하여 미왕빌딩에서 1킬로 이내에 위치한 주유소 정보 검색
(주유소 아이디, 주유소 브랜드명, 주유소 매장명, 주소, 미왕빌딩으로 부터의 거리 (km))
# 미왕빌딩 주소: 서울특별시 강남구 강남대로 364
lat, lng = getLocation("서울특별시 강남구 강남대로 364")
lat, lng
# 위도, 경도를 이용해서 반경(거리) 구하기
sql5 = "select s.id, b.name as brand, s.name, s.address, (6371*acos(cos(radians(37.4955366))*cos(radians(s.lat))*cos(radians(s.lng)-radians(127.0293521))+sin(radians(37.4955366))*sin(radians(s.lat)))) as distance " + \
"from GAS_BRAND as b, GAS_STATION as s " + \
"where s.brand = b.id " + \
"having distance <= 1 " + \
"order by distance"
cursor.execute(sql5)
result = cursor.fetchall()
for i in result:
print(i)
2. 위도, 경도 정보를 이용하여 미왕빌딩에서 셀프주유가 가능하고 24시간이면서 편의점이 있는 가장 가까운 주유소 10개를 휘발유 가격이 가장 저렴한 순으로 정렬하여 조회
주유소 아이디, 주유소 브랜드명, 주유소 매장명, 주소, 휘발유 가격, 부가정보 (셀프, 24시간, 편의점 여부), 미왕빌딩으로 부터의 거리 (km)
sql6 = "select t.id, t.brand, t.name, t.address, t.gasoline, t.self, t.24_hours, t.convenience_store, t.distance " + \
"from (select s.id id, b.name brand, s.name name, s.address, s.gasoline, s.self, s.24_hours, s.convenience_store, (6371*acos(cos(radians(37.4955366))*cos(radians(s.lat))*cos(radians(s.lng)-radians(127.0293521))+sin(radians(37.4955366))*sin(radians(s.lat)))) as distance " + \
"from GAS_BRAND b, GAS_STATION s " + \
"where b.id = s.brand and s.self = 'Y' and s.24_hours = 'Y' and s.convenience_store = 'Y' " + \
"order by distance limit 10) as t " + \
"order by t.gasoline"
cursor.execute(sql6)
result = cursor.fetchall()
for i in result:
print(i)
3. 구별로 주유소 브랜드 별 휘발유 평균가격을 조회하여 저렴한 순으로 출력
구 이름, 주유소 브랜드 이름, 휘발유 평균 가격
sql7 = "select s.gu, b.name, avg(s.gasoline) " + \
"from GAS_STATION as s, GAS_BRAND as b " + \
"where b.id = s.brand " + \
"group by s.gu, b.name " + \
"order by s.gu, avg(s.gasoline)"
cursor.execute(sql7)
result = cursor.fetchall()
for i in result:
print(i)
conn.close()
안녕하셍요-! 혹시 궁금한게 있는데 개인적으로 질문해도 괜찮을까요,,?