정적크롤링 - 나도코딩
크롤링 활용해 보기 - 인기 급상승 웹툰 목록 출력하기
import requests as req
from bs4 import BeautifulSoup as BS
url = "https://comic.naver.com/webtoon/weekday"
res = req.get(url)
res.raise_for_status()
soup = BS(res.text, "lxml")
rank1 = soup.find("li" ,attrs={"class":"rank01"})
print(rank1.get_text(strip=True))
ranks = rank1.find_next_siblings("li")
for rank in ranks:
print(rank.get_text(strip = True))
arrs = soup.select("ol#realTimeRankFavorite li")
for arr in arrs:
print(arr.get_text(strip = True))
크롤링 활용해 보기 - 월요일 웹툰 목록 출력하기
import requests as req
from bs4 import BeautifulSoup as BS
url = "https://comic.naver.com/webtoon/weekday"
res = req.get(url)
res.raise_for_status()
soup = BS(res.text, "lxml")
mon_arrs = soup.select("h4.mon+ul a.title")
for arr in mon_arrs:
print(arr.get_text(strip=True))
크롤링 활용해 보기 - 일주일 웹툰목록 출력하기
import requests as req
from bs4 import BeautifulSoup as BS
url = "https://comic.naver.com/webtoon/weekday"
res = req.get(url)
res.raise_for_status()
soup = BS(res.text, "lxml")
weeks = ["mon","tue","wen","thu","fri","sat","sun"]
for week in weeks:
lsts = soup.select(f"h4.{week}+ul a.title")
print(f"{week}의 웹툰 목록입니다.")
for index , lst in enumerate(lsts, 1):
print(index,":",lst.get_text(strip=True), sep="")
크롤링 활용해 보기 - 사용자 입력_특정요일_웹툰목록 출력하기
import requests as req
from bs4 import BeautifulSoup as BS
url = "https://comic.naver.com/webtoon/weekday"
res = req.get(url)
res.raise_for_status()
soup = BS(res.text, "lxml")
daylist = ["mon","tue","wen","thu","fri","sat","sun"]
print(daylist)
while True:
try:
day = input("요일을 입력하세요")
if day in daylist:
print(day)
lsts = soup.select(f"h4.{day}+ul a.title")
print(f"{day} 웹툰 목록입니다.")
for index , lst in enumerate(lsts, 1):
print(index,":",lst.get_text(strip=True))
break
else:
print("다시 입력하세요")
except ValueError:
pass
크롤링 활용해 보기 - 베스트 셀러 출력하기
import requests as req
from bs4 import BeautifulSoup as BS
import re
url="https://search.daum.net/search?w=tot&DA=YZR&t__nil_searchbox=btn&sug=&sugo=&sq=&o=&q=%EC%B1%85"
res = req.get(url)
res.raise_for_status()
soup = BS(res.text, "lxml")
arrs =soup.select("ul.list_thumb div.wrap_cont")
for index, arr in enumerate(arrs,1):
if index>10:
break
link = arr.find("a")
title = arr.find("strong", attrs={"class":"tit_item"}).get_text(strip = True)
author = arr.find("span", attrs ={"class": "txt_sub"}).get_text(strip = True)
print(f"{index}위 제목:{title} 작가: {author}")
print(link["href"])
print()
print()