크롤링 (세션)

NaHyun Kim·2020년 5월 25일
0
import time
import requests
import csv
from bs4 import BeautifulSoup
from selenium import webdriver

driver = webdriver.Chrome('./chromedriver')
driver.implicitly_wait(5)

url='https://www.starbucks.co.kr/menu/drink_list.do'
driver.get(url)
links = driver.find_elements_by_css_selector('#container > div.content > div.product_result_wrap.product_result_wrap01 > div > dl > dd:nth-child(2) > div.product_list > dl > dd > ul > li > dl > dt > a')

products = []
for link in links:
    products.append(link.get_attribute('prod'))
product_names=[]
product_desc=[]
product_images=[]

for product in products:
    driver.get('https://www.starbucks.co.kr/menu/drink_view.do?product_cd='+product)
   # cateogry =  driver.find_element_by_css_selector('#container > div.sub_tit_wrap > div > h2 > img')
   # print(category.get_attribute('alt'))
    name =  driver.find_elements_by_css_selector('#container > div.content02 > div.product_view_wrap1 > div.product_view_detail > div.myAssignZone > h4')
    product_names.append(name[0].text)
    desc = driver.find_elements_by_css_selector('#container > div.content02 > div.product_view_wrap1 > div.product_view_detail > div.myAssignZone > p')
    product_desc.append(desc[0].text)
    images = driver.find_element_by_css_selector('#product_thum_wrap > ul > li > a')
   # print(images.get_attribute('data-image'))
    product_images.append(images.get_attribute('data-image'))

starbucks = [{
    'name' : props[0],
    'description' : props[1],
    'image' : props[2]
    } for props in zip (product_names, product_desc, product_images)]


with open('output.csv', 'w') as csvfile:
    csvout = csv.DictWriter(csvfile, ['name', 'description', 'image'])
    csvout.writeheader()
    csvout.writerows(starbucks)

0개의 댓글