import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import time
import random
import re
import datetime
from dateutil.relativedelta import *
import os
# os.environ["HTTP_PROXY"]="http://168.219.61.252:8080"
# os.environ["HTTPS_PROXY"]="https://168.219.61.252:8080"
# read_html
rootUrl = 'https://finance.naver.com/marketindex/exchangeDetail.nhn?marketindexCd=FX_USDKRW'
def read_html(url):
headers = {"user-agent": "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"}
req = requests.post(url, headers=headers ,verify=False)
header = req.headers
status = req.status_code
is_ok = req.ok
html = req.text
# time.sleep(1)
return html
start_date = '2020-01-01'
end_date = '2021-01-01'
today = datetime.datetime.today().strftime('%Y-%m-%d')
today = pd.to_datetime(today, format = '%Y-%m-%d')
gap = pd.to_datetime(today) - pd.to_datetime(start_date)
pg_num = gap.days // 7
# 달러 데이터
URL = {'ALUMINUM' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?fdtc=2&marketindexCd=CMDT_AAY',
'COPPER' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?fdtc=2&marketindexCd=CMDT_CDY',
'WTI' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?marketindexCd=OIL_CL&fdtc=2',
'GOLD' : 'https://finance.naver.com/marketindex/worldDailyQuote.naver?marketindexCd=CMDT_GC&fdtc=2',
'USD' : 'https://finance.naver.com/marketindex/exchangeDailyQuote.naver?marketindexCd=FX_USDKRW'}
lst = []
for k in URL.keys():
u = URL[k]
for i in range(1, pg_num): # paging
fx = read_html(f'{u}&page={i}')
fx_rows = BeautifulSoup(fx, 'html.parser').select('tbody > tr')
for r in fx_rows:
date = r.select('td.date')
num = r.select('td.num')
lst += [[k, date[0].text.replace('.', '-').strip(), num[0].text.strip()]]