네이버 웹툰 크롤링
from selenium import webdriver as wb from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time import pandas as pd from bs4 import BeautifulSoup as bs import requests as req #네이버 주소 url='https://comic.naver.com/webtoon/weekday' driver = wb.Chrome() driver.get(url) #크롤링 Toon_Ratings =[] Toon_Imgs=[] Toon_Titles=[] Toon_Wrts=[] Toon_Details=[] Toon_Genres =[] Toon_Grades=[] Toon_Frists=[] Toon_News=[] Toon_NewDates=[] for i in range(7): move=driver.find_elements(By.CSS_SELECTOR,'#content > .category_tab> li') move[i+1].click() time.sleep(1) #time.sleep(1) #1초쉬기 for i in range(10): time.sleep(0.5) #점수 Toon_Rating = driver.find_elements(By.CSS_SELECTOR,'div.rating_type>strong') Rating=Toon_Rating[i].text #요일웹툰 들어가기 list_toon=driver.find_elements(By.CSS_SELECTOR,'.list_area.daily_img .thumb') list_toon[i].click() #웹툰 썸네일 Toon_Images = driver.find_element(By.CSS_SELECTOR,"#content > div.comicinfo > div.thumb > a > img") Toon_Img = Toon_Images.get_attribute('src') #타이틀 Toon_Title = driver.find_element(By.CSS_SELECTOR,'h2 > span.title').text #작가 Toon_Wrt = driver.find_element(By.CSS_SELECTOR,'h2 > span.wrt_nm').text #설명 Toon_Detail = driver.find_element(By.CSS_SELECTOR,'div.detail>p').text #장르 Toon_Genre = driver.find_element(By.CSS_SELECTOR,'p.detail_info > span.genre').text #등급 Toon_Grade = driver.find_element(By.CSS_SELECTOR,'p.detail_info > span.age').text #최신화 Toon_New = driver.find_element(By.CSS_SELECTOR,'tr:nth-child(2) > td.title').text #최신화 등록일 Toon_NewDate = driver.find_element(By.CSS_SELECTOR,'tr:nth-child(2) > td.num').text #연재 시작일 list_move=driver.find_element(By.CSS_SELECTOR,'#content > div.comicinfo > div.detail > ul > li:nth-child(2) > a') list_move.click() Toon_Frist=driver.find_element(By.CSS_SELECTOR,'dl.rt>dd.date').text Toon_Ratings.append(Rating)#별점 모음 Toon_Imgs.append(Toon_Img)#이미지 주소 모음 Toon_Frists.append(Toon_Frist)#연재 시작일 모음 Toon_Titles.append(Toon_Title)#제목 모음 Toon_Wrts.append(Toon_Wrt)#작가 모음 Toon_Details.append(Toon_Detail)#설명 모음 Toon_Genres.append(Toon_Genre)#장르 모음 Toon_Grades.append(Toon_Grade)#등급 모음 Toon_News.append(Toon_New)#최신화 이름 Toon_NewDates.append(Toon_NewDate)#최신화 등록일 time.sleep(0.5) driver.back() driver.back() time.sleep(0.5)
개선점
범위 수정, 다른페이지 크롤링시 이동관련 제목, 작가, 설명, 장르, 등급...등등 제대로할 것