스타벅스 이디야 데이터 분석(SQL With Python) - 23.03.02

박무연·2023년 3월 2일
0

🟣EDA

목록 보기
9/9
post-thumbnail

핵심간 간략하게

Auto_increment 입력

cursor.execute('insert into coffee_brand (name) values ("starbucks"), ("Ediya")')
  • 괄후 두개로 따로 입력이 가능!

팝업창 예외처리

- My code

try:
    driver.find_element(By.CSS_SELECTOR,'.holiday_notice_close a').click()
except:
    pass
  • 팝업창이 없으면 에러가 나기때문에 예외처리 해줌
  • 팝업창이 없으면 예외문으로 넘어가 그냥 아무 것도 실행 하지 않음

- 예외 설명창 띄우기

from selenium.common import NoSuchElementException

try:
    driver.find_element(By.CSS_SELECTOR,'.holiday_notice_close a').click()

except NoSuchElementException as e:
    print(e)
  • 예외가 발생 할 경우 어떤 예외인지 보여준다

이디야 전체 코드

- beautifulSoup 사용X

url = 'https://www.starbucks.co.kr/index.do'
driver = webdriver.Chrome('../../driver/chromedriver_win32/chromedriver.exe')
driver.get(url)

try:
    driver.find_element(By.CSS_SELECTOR,'.holiday_notice_close a').click()
except:
    pass

driver.maximize_window()
storePage = driver.find_element(By.CSS_SELECTOR, '#gnb > div > nav > div > ul > li.gnb_nav03 > h2 > a')
action = ActionChains(driver)
action.move_to_element(storePage).perform()
time.sleep(0.5)

driver.find_element(By.CSS_SELECTOR,'#gnb > div > nav > div > ul > li.gnb_nav03 > div > div > div > ul:nth-child(1) > li.gnb_sub_ttl > a').click()
time.sleep(0.5)

# 지역검색 선택
driver.find_element(By.CSS_SELECTOR,'#container > div > form > fieldset > div > section > article.find_store_cont > article > header.loca_search > h3 > a').click()
time.sleep(0.5)

# 서울 선택
driver.find_element(By.CSS_SELECTOR, '#container > div > form > fieldset > div > section > article.find_store_cont > article > article:nth-child(4) > div.loca_step1 > div.loca_step1_cont > ul > li:nth-child(1) > a').click()
time.sleep(0.5)

# 전체 선택
driver.find_element(By.CSS_SELECTOR,'#mCSB_2_container > ul > li:nth-child(1) > a').click()
# 자동화 코드

contents = driver.find_elements(By.CSS_SELECTOR,'#mCSB_3_container > ul > li')
cur = oneday.cursor(buffered=True)
for content in contents:
    name = content.find_element(By.CSS_SELECTOR, 'strong').get_attribute('innerText')
    #print(name,end='')
    address = content.find_element(By.CSS_SELECTOR, 'p').get_attribute('innerText').split('\n')[0]
    #print(address,end='')
    gu = address.split(' ')[1]
    #print(gu,end='')
    lat = content.get_attribute('data-lat')
    #print(lat,end='')
    lng = content.get_attribute('data-long')
    #print(lng)
    
    sql = f"insert into coffee_store(brand, name, gu_name, address, lat, lng) values ('1','{name}', '{gu}','{address}', '{lat}','{lng}')"
    cur.execute(sql)
    oneday.commit()
    print(sql)
    print('--'*30)
  • %s, %s, %s, 를 사용하지 않음
  • f"~내용~" 사용
  • {내용} 후 반드시 콤마로 묶어줘야 한다 e.g) '{내용}'

스타벅스 코드

- beautifulSoup 사용X

- 구 정보 받아오기

cur.execute('select distinct(gu_name) from coffee_store;')
result = cur.fetchall()
gu_list = []
for item in result:
    gu_list.append(item[0])

len(gu_list)

- 전체 코드

  • 페이지 오픈은 제외
for gu in gu_list:
    driver.find_element(By.CSS_SELECTOR, '#keyword').clear()
    keyword = driver.find_element(By.CSS_SELECTOR, '#keyword').click()
    driver.find_element(By.CSS_SELECTOR, '#keyword').send_keys(f'서울 {gu}')
    time.sleep(0.5)
    driver.find_element(By.CSS_SELECTOR, '#keyword_div > form > button').click()

    time.sleep(0.5)
    contents = driver.find_elements(By.CSS_SELECTOR, '#placesList > li')
    len(contents)
    for content in contents:
        time.sleep(0.5)
        name = content.find_element(By.CSS_SELECTOR, 'a > dl > dt').get_attribute('innerText')
        #print(name)
        address = content.find_element(By.CSS_SELECTOR, 'a > dl > dd').get_attribute('innerText')
        if '(' in address:
            input_address = address.split('(')[0]
        else:
            input_address = address
        #print(address)
            
        tmp = gmaps.geocode(input_address, language="ko")
        lat = tmp[0].get("geometry")["location"]["lat"]
        #print(lat, end='')
        lng = tmp[0].get("geometry")["location"]["lng"]
        #print(lng, end='')
        #print(gu)
        
        sql = f"insert into coffee_store(brand, name, gu_name, address, lat, lng) values ('2','{name}', '{gu}','{address}', '{lat}','{lng}')"
        cur.execute(sql)
        oneday.commit()
        print(sql)
        print('-'*30)
        time.sleep(0.5)

SQL 분석하기

- 브랜드별 매장 분석하기

sql  = "select gu_name gu, count(gu_name) count , case brand when '1' then '스타벅스' else '이디야' end as brand from coffee_store where brand like '1' group by gu_name order by count(gu_name) desc limit 10;"
cur.execute(sql)
result =cur.fetchall()

for num, data in enumerate(result):
    print(f'{num} :{data}')
  • case 를 사용하여 브랜드명을 붙여 주었음
  • 사실 필요하진 않다!
  • 밑에 코드 처럼 작성해도 okay!
select s.gu_name, count(s.brand)  , s.brand(?) # 브랜드 명을 추가 하고 싶을때 넣어도 될 것 같다
from coffee_brand b , cofee_store s
where s.brand = b.id and b.name ='STARBUCKS' 
group by s.gu_name 
order by count(s.brand) desc

- 구별, 브랜드별 각각의 매장 조회

  • 구별, 브랜드별 이니깐 두번 group by 해주면 된다
  • gorup by를 어떻게 사용하는지 잘 기억 해두자!!
sql  = "select s.gu_name gu , b.name name , count(s.brand) count" + \
    "from coffee_store s, coffee_brand b" + \
    "where s.brand = b.id" + \
    "group by s.gu_name, s.brand order by gu"
cur.execute(sql)
result =cur.fetchall()

for num, data in enumerate(result):
    print(f'{num} :{data}')

- 이디야 매장 정보와 스타벅스 매칭하여 저장

sql = "select * \
    from (\
    select id, 'STARBUCKS', name, gu_name, address, lat, lng\
            from coffee_store \
            where brand like '1') s, \
    (\
    select id, 'EDIYA', name, gu_name, address, lat, lng \
        from coffee_store\
        where brand like '2' group by gu_name) e\
    \
    order by s.id , e.id ;"

cur.execute(sql)
result =cur.fetchall()

for num, data in enumerate(result):
    print(f'{num} :{data}')
  • 이렇게 하긴 했는데 사실 잘 모르겠다,,,
  • 아래 코드가 이해하기 쉬워보이고 설득력 있어보인다..
select * 
from 
(select s.id s_id, b.name s_brand, s.gu_name s_gu, s.address s_address, s.lat s_lat, s.lng s_lng from cofee_store s , coffee_brand b where b.id = s.brand and b.name like 'STARBUCKS') st,
(select s.id e_id, b.name e_name , s.gu_name e_gu, s.address e_address, s.lat e_lat, s.lng e_lng from cofee_store s , coffee_brand b where b.id = s.brand and b.name like 'STARBUCKS') ed
order by s_id e_id

- alias 컬럼명 저장

num_fields = len(cursor.description) 
field_name = [i[0] for i in cursor.description]

# 컬럼명 주기
df  = pd.DataFrame(result)
df.columns = field_name
  • cursor의 description에 컬러명을 지니고 있음!
profile
Easy day!

0개의 댓글