# requests ํจํค์ง ์ค์น
!pip install requests
# ์ด๋ค ๋ฐฉ์์ HTTP ์์ฒญ์ ํ๋๋์ ๋ฐ๋ผ์ ํด๋นํ๋ ์ด๋ฆ์ ํจ์๋ฅผ ์ฌ์ฉํ๋ค.
response = requests.get() # get ๋ฐฉ์
response = requests.post() # post ๋ฐฉ์
response = requests.put() # put ๋ฐฉ์
response = requests.delete() # delect ๋ฐฉ์
# page source๋ฅผ ๊ฐ์ ธ์จ๋ค.
import requests
page_no = 1
page_url = f"https://finance.naver.com/sise/sise_index_day.naver?code=KPI200&page={page_no}"
source = requests.get(page_url).text
source
๊ฐ์ ธ์จ source๋ฅผ ํ์ธํ๋ฉด ๋ค์๊ณผ ๊ฐ์ต๋๋ค.
'<html lang="ko">\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=euc-kr">\n<title>๋ค์ด๋ฒ ์ฆ๊ถ</title>\n\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/newstock.css">\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/common.css">\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/layout.css">\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/main.css">\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/newstock2.css">\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/newstock3.css">\n<link rel="stylesheet" type="text/css" href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/world.css">\n</head>
์ด๊ฒ์ bs4 ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฌ์ฉํด parse tree๋ก ๋ณํํ๋ฉด
# beautifulsoup4๋ฅผ ๋ถ๋ฌ์ต๋๋ค.
import bs4
# BeautifulSoup ํจ์๋ฅผ ์ฌ์ฉํด์ ๋ถ๋ฌ์จ html source๋ฅผ "lxml" parser๋ก parsing ํฉ๋๋ค.
source = bs4.BeautifulSoup(source)
# bs4์ prettify() ํจ์๋ HTML source๋ฅผ tab์ ๊ธฐ์ค์ผ๋ก "์ด์๊ฒ" ๋ณด์ฌ์ค๋๋ค.
print(source.prettify())
<html lang="ko">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>
๋ค์ด๋ฒ ์ฆ๊ถ
</title>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/newstock.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/common.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/layout.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/main.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/newstock2.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/newstock3.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20230519195543/css/world.css" rel="stylesheet" type="text/css"/>
</head>