-- 04.네이버 웹툰.ipynb --
import requests
from bs4 import BeautifulSoup
import os
import urllib.parse as urlparser # url 다루기
from os.path import basename, splitext, split # 확장자 관련
url = 'https://comic.naver.com/webtoon/detail?titleId=478261&no=102&week=thu'
response = requests.get(url)
response
dom = BeautifulSoup(response.text, 'html.parser')
title = dom.select_one("#subTitle_toolbar").text.strip()
title
imgs = dom.select("#comic_view_area .wt_viewer img")
len(imgs)
img_urls = [
img.attrs['src']
for img in imgs
]
img_urls
img_url = img_urls[0]
img_url
response = requests.get(img_url, stream=True) # 바이너리 파일의 경우 stream옵션을 주자
response # 403 에러!!!!
headers = {
'Referer' : url,
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
}
response = requests.get(img_url, headers=headers, stream=True) # 바이너리 파일의 경우 stream옵션을 주자
response
with open("navertoon.jpg", 'wb') as f :
f.write(response.content)
def download_naver_webtoon(titleId, no) :
url = f'https://comic.naver.com/webtoon/detail?titleId={titleId}&no={no}'
response = requests.get(url)
dom = BeautifulSoup(response.text, 'html.parser')
title = dom.select_one("#subTitle_toolbar").text.strip()
imgs = dom.select("#comic_view_area .wt_viewer img")
print(no, title, len(imgs), '개 이미지')
img_urls = [
img.attrs['src']
for img in imgs
]
headers = {
'Referer' : url,
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
}
for img_url in img_urls :
# 파일명 추출
disassembled = urlparser.urlparse(img_url) # url 구성요소 분리
filename = basename(disassembled.path) # 파일명만 분리 하겠습니다.
savePath = os.path.join('/content/drive/MyDrive/05_crawl(AI2401)/download', filename)
print(f'다운로드 : {img_url} -> {filename}')
response = requests.get(img_url, headers=headers, stream=True)
with open(savePath, 'wb') as f :
f.write(response.content)
print('성공')
titleId = 478261
no = 102
download_naver_webtoon(titleId, no)
-- 구글 드라이브 파일에 저장함. --