pip install beautifulsoup4
pip install lxml
※ lxml : 어떤 구문을 분석하는 파서
현재 네이버 웹툰은 JS 기반의 동적 사이트이기 때문에 웹 아카이빙을 이용하여 과거 버전의 네이버 웹툰으로 실습을 진행함
https://web.archive.org/web/20200401052025/https://comic.naver.com/webtoon/weekday.nhn

import requests
from bs4 import BeautifulSoup
url = "https://web.archive.org/web/20200401052025/https://comic.naver.com/webtoon/weekday.nhn"
res = requests.get(url)
res.raise_for_status()
soup = BeautifulSoup(res.text, "lxml")
print(soup.title)
print(soup.title.get_text())
print(soup.a)
print(soup.a.attrs)
print(soup.a["href"])
rank3 = soup.find("li", attrs={"class": "rank03"})
print(rank3.a.get_text())
#rank4 = rank3.next_sibling.next_sibling
rank4 = rank3.find_next_sibling("li")
print(rank4.a.get_text())
#rank2 = rank3.previous_sibling.previous_sibling
rank2 = rank3.find_previous_sibling("li")
print(rank2.a.get_text())
print(rank3.find_next_siblings("li"))
print(soup.find("a", text="연놈-226화. 여전히 작은 아이 (희망)"))
print(rank3.parent)

import requests
from bs4 import BeautifulSoup
url = "https://web.archive.org/web/20200401052025/https://comic.naver.com/webtoon/weekday.nhn"
res = requests.get(url)
res.raise_for_status()
soup = BeautifulSoup(res.text, "lxml")
cartoons = soup.find_all("a", attrs={"class":"title"})
for cartoon in cartoons:
print(cartoon.get_text())
▶ class 속성이 title인 모든 "a" element들을 찾는 방식(find_all(element, attrs={"tag" : "값"}))으로 가져옴

참고 : 2025.02.04일 기준 무료회차에 해당 되는 부분임

import requests
from bs4 import BeautifulSoup
url = "https://web.archive.org/web/20201028205420/https://comic.naver.com/webtoon/list.nhn?titleId=641253&weekday=fri"
res = requests.get(url)
res.raise_for_status()
soup = BeautifulSoup(res.text, "lxml")
cartoons = soup.find_all("td", attrs={"class":"title"})
for cartoon in cartoons:
title = cartoon.a.get_text()
link = (cartoon.a["href"])[20:]
print(title, link)

import requests
from bs4 import BeautifulSoup
url = "https://web.archive.org/web/20201028205420/https://comic.naver.com/webtoon/list.nhn?titleId=641253&weekday=fri"
res = requests.get(url)
res.raise_for_status()
soup = BeautifulSoup(res.text, "lxml")
cartoons = soup.find_all("div", attrs={"class" : "rating_type"})
sum, cnt = 0, 0
for cartoon in cartoons:
rate = float(cartoon.strong.get_text())
sum+=rate
cnt+=1
print(f"평균 평점 : {sum/cnt}")

import csv
import csv
import requests
from bs4 import BeautifulSoup
url = "https://finance.naver.com/sise/sise_market_sum.nhn?sosok=0&page="
filename = "시가총액_1~200.csv"
f = open(filename, "w", encoding="utf-8-sig", newline="")
writer = csv.writer(f)
title = "N, 종목명, 현재가, 전일비, 등락률, 액면가, 시가총액, 상장주식수, 외국인비율, 거래량, PER, ROE, 토론실".split(',')
writer.writerow(title)
for page in range(1, 5):
res = requests.get(url + str(page))
res.raise_for_status()
soup = BeautifulSoup(res.text, "lxml")
date_rows = soup.find("table", attrs={"class":"type_2"}).find("tbody").find_all("tr")
for row in date_rows:
columns = row.find_all("td")
if len(columns) <= 1:
continue
data = [column.get_text().strip() for column in columns]
writer.writerow(data)
