bs 크롤링 기본코드

고독한 키쓰차·2022년 5월 18일
0
import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import time
import random
import re
import datetime
from dateutil.relativedelta import *
import os
# os.environ["HTTP_PROXY"]="http://168.219.61.252:8080"
# os.environ["HTTPS_PROXY"]="https://168.219.61.252:8080"


# read_html
rootUrl = 'https://finance.naver.com/marketindex/exchangeDetail.nhn?marketindexCd=FX_USDKRW'

def read_html(url):
    headers = {"user-agent": "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"}
    req = requests.post(url, headers=headers ,verify=False)
    header = req.headers
    status = req.status_code
    is_ok = req.ok
    html = req.text
#     time.sleep(1)
    return html
    
start_date = '2020-01-01'
end_date = '2021-01-01'
today = datetime.datetime.today().strftime('%Y-%m-%d')

today = pd.to_datetime(today, format = '%Y-%m-%d')
gap = pd.to_datetime(today) - pd.to_datetime(start_date)

pg_num = gap.days // 7

# 달러 데이터 
URL = {'ALUMINUM' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?fdtc=2&marketindexCd=CMDT_AAY',
       'COPPER' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?fdtc=2&marketindexCd=CMDT_CDY',
       'WTI' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?marketindexCd=OIL_CL&fdtc=2',
       'GOLD' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?marketindexCd=CMDT_GC&fdtc=2',
       'USD' : 'https://finance.naver.com/marketindex/exchangeDailyQuote.naver?marketindexCd=FX_USDKRW'}

lst = []
for k in URL.keys():
    u = URL[k]
    
    for i in range(1, pg_num): # paging
        fx = read_html(f'{u}&page={i}')
        fx_rows = BeautifulSoup(fx, 'html.parser').select('tbody > tr')

        for r in fx_rows:
            date = r.select('td.date')
            num = r.select('td.num')
            lst += [[k, date[0].text.replace('.', '-').strip(), num[0].text.strip()]]

profile
Data Scientist or Gourmet

0개의 댓글