프로젝트 크롤링

ParkJinYoung·2022년 9월 16일
0

네이버 웹툰 크롤링

from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests as req
#네이버 주소
url='https://comic.naver.com/webtoon/weekday'
driver = wb.Chrome()
driver.get(url)
#크롤링
Toon_Ratings =[]
Toon_Imgs=[]
Toon_Titles=[]
Toon_Wrts=[] 
Toon_Details=[]
Toon_Genres =[]
Toon_Grades=[]
Toon_Frists=[]
Toon_News=[]
Toon_NewDates=[]
for i in range(7):
    move=driver.find_elements(By.CSS_SELECTOR,'#content > .category_tab> li')
    move[i+1].click()
    time.sleep(1)
    #time.sleep(1) #1초쉬기
    for i in range(10):
        time.sleep(0.5)
        #점수
        Toon_Rating = driver.find_elements(By.CSS_SELECTOR,'div.rating_type>strong')
        Rating=Toon_Rating[i].text
        #요일웹툰 들어가기
        list_toon=driver.find_elements(By.CSS_SELECTOR,'.list_area.daily_img  .thumb')
        list_toon[i].click()
        #웹툰 썸네일
        Toon_Images = driver.find_element(By.CSS_SELECTOR,"#content > div.comicinfo > div.thumb > a > img")
        Toon_Img = Toon_Images.get_attribute('src')
        #타이틀
        Toon_Title = driver.find_element(By.CSS_SELECTOR,'h2 > span.title').text
        #작가
        Toon_Wrt = driver.find_element(By.CSS_SELECTOR,'h2 > span.wrt_nm').text
        #설명
        Toon_Detail = driver.find_element(By.CSS_SELECTOR,'div.detail>p').text
        #장르
        Toon_Genre = driver.find_element(By.CSS_SELECTOR,'p.detail_info > span.genre').text
        #등급
        Toon_Grade = driver.find_element(By.CSS_SELECTOR,'p.detail_info > span.age').text
        #최신화
        Toon_New = driver.find_element(By.CSS_SELECTOR,'tr:nth-child(2) > td.title').text
        #최신화 등록일
        Toon_NewDate = driver.find_element(By.CSS_SELECTOR,'tr:nth-child(2) > td.num').text
        #연재 시작일
        list_move=driver.find_element(By.CSS_SELECTOR,'#content > div.comicinfo > div.detail > ul > li:nth-child(2) > a')
        list_move.click()
        Toon_Frist=driver.find_element(By.CSS_SELECTOR,'dl.rt>dd.date').text
        Toon_Ratings.append(Rating)#별점 모음
        Toon_Imgs.append(Toon_Img)#이미지 주소 모음
        Toon_Frists.append(Toon_Frist)#연재 시작일 모음
        Toon_Titles.append(Toon_Title)#제목 모음
        Toon_Wrts.append(Toon_Wrt)#작가 모음
        Toon_Details.append(Toon_Detail)#설명 모음
        Toon_Genres.append(Toon_Genre)#장르 모음
        Toon_Grades.append(Toon_Grade)#등급 모음
        Toon_News.append(Toon_New)#최신화 이름
        Toon_NewDates.append(Toon_NewDate)#최신화 등록일
        time.sleep(0.5)
        driver.back()
        driver.back()
        time.sleep(0.5)

개선점
범위 수정, 다른페이지 크롤링시 이동관련 제목, 작가, 설명, 장르, 등급...등등 제대로할 것

profile
꾸준히

0개의 댓글