๋ชฉํ
**<span class = โvalueโ>**
# import
from urllib.request import urlopen
from bs4 import BeautifulSoup
url = "https://finance.naver.com/marketindex/"
# page = urlopen(url)
response = urlopen(url)
response
soup = BeautiefulSoup(page, "html.parser")
print(soup.prettify())
ํ์จ ๊ฐ๊ฒฉ(<span class = "value">
) ๋ถ๋ฌ์ค๊ธฐ
# 1
soup.find_all("span", "value"), len(soup.find_all("span", "value"))
soup.find_all("span", {"class":"value"})[0].text, soup.find_all("span", {"class":"value"})[0].string, soup.find_all("span", {"class":"value"})[0].get_text()
Output : ('1,171.10', '1,171.10', '1,171.10')
Summary
import requests # ์์ฒญํ๊ณ , ์๋ต
# from urllib.request.Request
from bs4 import BeautifulSoup
url = "https://finance.naver.com/marketindex/"
response = requests.get(url)
# requests.get(), requests.post()
# response.text
soup = BeautifulSoup(response.text, "html.parser")
print(soup.prettify())
๋ณ์๋ช
.status
: ํฌ๋กค๋ง์ ์งํํ์ ๋, ์ ์์ ์ผ๋ก ์์ฒญํ๊ณ ์๋ต๋ฐ์๋์ง๋ฅผ ํ์ธ์์ผ์ฃผ๋ ์ซ์url = "https://finance.naver.com/marketindex/"
response = requests.get(url)
response.status
# soup.find_all("li", "on")
# id => #
# class => .
exchangeList = soup.select("#exchangeList > li")
len(exchangeList), exchangeList
title = exchangeList[0].select_one(".h_lst").text
exchange = exchangeList[0].select_one(".value").text
change = exchangeList[0].select_one(".change").text
updown = exchangeList[0].select_one(".head_info.point_up > .blind").text
# link
title, exchange, change, updown
Output : ('๋ฏธ๊ตญ USD', '1,349.40', '4.40', '์์น')
select
๋ฅผ ์ฌ์ฉํ ๊ฒฝ์ฐ ์/ํ์ ์ด๋์ด ์ข ๋ ์์ ๋ก์findmethod = soup.find_all("ul", id="exchangeList")
findmethod[0].find_all("span", "value")
exchangeList[0].select_one("a").get("href")
์ output /marketindex/exchangeDetail.naver?marketindexCd=FX_USDKRW ๋ค์ด๋ฒ ๊ธ์ต์์ ๋ณด๊ธฐ ๋๋ฌธ์ ์ฃผ์ ์ถ๊ฐ ํ์baseUrl = "https://finance.naver.com"
baseUrl + exchangeList[0].select_one("a").get("href")
Output : 'https://finance.naver.com/marketindex/exchangeDetail.naver?marketindexCd=FX_USDKRW'
exchange_datas
์ ์ฅ ํํ : dictionaryimport pandas as pd
# 4๊ฐ ๋ฐ์ดํฐ ์์ง
exchange_datas = []
baseUrl = "https://finance.naver.com"
for item in exchangeList:
data = {
"title": item.select_one(".h_lst").text,
"exchnage": item.select_one(".value").text,
"change": item.select_one(".change").text,
"updown": item.select_one(".head_info.point_up > .blind").text,
"link": baseUrl + item.select_one("a").get("href")
}
print(data)
exchange_datas.append(data)
df = pd.DataFrame(exchange_datas)
df.to_excel("./naverfinance.xlsx")
๋ชฉํ
๋งํฌ : ์ฌ๋ช ์ ๋๋์ - ์ํค๋ฐฑ๊ณผ, ์ฐ๋ฆฌ ๋ชจ๋์ ๋ฐฑ๊ณผ์ฌ์ (wikipedia.org)
ModuleLoad
utf-8
๋ก ์ธ์ฝ๋ฉ ํด์ผ ํจimport urllib
from urllib.request import urlopen, Request
html = "https://ko.wikipedia.org/wiki/{search_words}"
# https://ko.wikipedia.org/wiki/์ฌ๋ช
์_๋๋์
req = Request(html.format(search_words=urllib.parse.quote("์ฌ๋ช
์_๋๋์"))) # ๊ธ์๋ฅผ URL๋ก ์ธ์ฝ๋ฉ
response = urlopen(req)
soup = BeautifulSoup(response, "html.parser")
print(soup.prettify())
n = 0
for each in soup.find_all("ul"):
print("=>" + str(n) + "========================")
print(each.get_text())
n += 1
soup.find_all("ul")[15].text.strip().replace("\xa0", "").replace("\n", "")
Output : '์ฑ์๋ผ: ์ค์ฌ์ฅ ์ญ (์์ญ: ๊น๋ฏผ์ )๋ฐ์์: ์ฅํ๋ฆผ(ํ๋ฆฌ๋ชจํ ๋์ธ ์ค) ์ญ (์์ญ: ๊นํ์ง)์ต์ฌ์ฑ: ์ต๋์น(์ฌ์นด์ด) ์ญ (์์ญ: ์ฅ๋์)โ