SW과정 머신러닝 1008(4)

JongseokLee·2021년 10월 8일
0
post-thumbnail

SW과정 머신러닝 1008(4)

1. 네이버 영화 평점[딕셔너리 저장]

import requests
import urllib.request
from bs4 import BeautifulSoup
import re

list_records = []

for index in range(1,11):
    params = urllib.parse.urlencode({'page':index})
    url = f'https://movie.naver.com/movie/point/af/list.naver?{params}'
    #print(url)
    res = urllib.request.urlopen(url)
    soup = BeautifulSoup(res, 'html.parser')
    table = soup.find('table',class_='list_netizen')
    #print(table)
    for i,tr in enumerate(table.select('tbody tr')): #enumaerate 함수 데이터만 뽑아 오는게 아니라 인덱스값도 같이 가져옴
        # print(i)
        # print(tr)
        #print('----------------------------')
        for j,td in enumerate(tr.find_all('td')):
            # print(j)
            # print(td)
            # print('---------------------------------------')
            if j == 0:
                recode = int(td.text.strip())
                #print('글번호:',recode)
            elif j == 1:
                recode1 = td.select_one('td.title a').text.strip()
                print('제목:',recode1)
                recode2 = int(td.select_one('em').text.strip())
                print('점수:',recode2)
                recode3 = td.text.strip()
                recode3 = recode3.replace(recode1,'')
                recode3 = recode3.replace('신고','')
                recode3 = re.sub('별점 - 총 10점 중[0-9]{1,2}',' ',recode3).strip()
                print('감상평:',recode3)
        movie_dic = {'제목':recode1,'점수':recode2,'감상평':recode3}
        list_records.append(movie_dic)
        print(list_records)

2. 스타벅스 맵 크롤링

from selenium import webdriver
from bs4 import BeautifulSoup
import time

#selenium tutorial python 셀레니움 구문

#버전 94.0.4606.71
driver = webdriver.Chrome('./crawling/chromedriver.exe')
driver.get('https://www.starbucks.co.kr/store/store_map.do')

# source = driver.page_source
# print(source)

loca = driver.find_element_by_class_name('loca_search')
#print(type(loca))
loca.click()
time.sleep(5)

sido = driver.find_element_by_class_name('sido_arae_box')
li = sido.find_elements_by_tag_name('li')
print(li)
li[5].click()
time.sleep(5)

sido = driver.find_element_by_class_name('gugun_arae_box')
li = sido.find_elements_by_tag_name('li')
print(li)
li[5].click()
time.sleep(5)

source = driver.page_source
soup = BeautifulSoup(source,'html.parser')
ul = soup.select_one('ul.quickSearchResultBoxSidoGugun')
li = ul.select('li')
for i in li:
    print(i.select_one('strong').text)
    print(i.select_one('p').text)

3. 네이버 자동 로그인

id = ''
pw = ''

from selenium import webdriver
from bs4 import BeautifulSoup

driver = webdriver.Chrome('./crawling/chromedriver')
driver.get('https://nid.naver.com/nidlogin.login')
driver.implicitly_wait(5)

# driver.find_element_by_id('id').send_keys(id)
# driver.find_element_by_id('pw').send_keys(pw)
# driver.find_element_by_class_name('btn_login').click()

driver.execute_script("document.getElementsByName('id')[0].value='" + id + "'")
driver.execute_script("document.getElementsByName('pw')[0].value='" + pw + "'")
#driver.find_element_by_class_name('btn_login').click()
driver.find_element_by_xpath('//*[@id="log.login"]').click()

4. 네이버 자동로그인/마일리지 가져오기

id = ' '
pw = ''

# from selenium import webdriver
# from selenium.webdriver.common.keys import Keys
# from bs4 import BeautifulSoup
# import pyperclip

# driver = webdriver.Chrome('./crawling/chromedriver')
# driver.get('https://nid.naver.com/nidlogin.login')
# driver.implicitly_wait(5)

# # driver.find_element_by_id('id').send_keys(id)
# # driver.find_element_by_id('pw').send_keys(pw)
# # driver.find_element_by_class_name('btn_login').click()

# #driver.execute_script("document.getElementsByName('id')[0].value='" + id + "'")
# #driver.execute_script("document.getElementsByName('pw')[0].value='" + pw + "'")
# #driver.find_element_by_class_name('btn_login').click()
# #driver.find_element_by_xpath('//*[@id="log.login"]').click()

# tag_id = driver.find_element_by_id('id')
# tag_pw = driver.find_element_by_id('pw')

# tag_id.click() #커서를 깜빡이게 하는 함수
# pyperclip.copy(id)
# tag_id.send_Keys(Keys.CONTROL,'v')

# tag_pw.click()
# pyperclip.copy(pw)
# tag_pw.send_Keys(Keys.CONTROL,'v')

# driver.find_element_by_xpath('//*[@id="log.login"]').click()

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import pyperclip
import urllib

driver = webdriver.Chrome('./crawling/chromedriver')
driver.get('https://nid.naver.com/nidlogin.login')
driver.implicitly_wait(5)

#driver.find_element_by_id('id').send_keys(id)
#driver.find_element_by_id('pw').send_keys(pw)
#driver.find_element_by_class_name('btn_login').click()

#driver.execute_script("document.getElementsByName('id')[0].value='"+ id +"'")
#driver.execute_script("document.getElementsByName('pw')[0].value='"+ pw +"'")
#driver.find_element_by_xpath('//*[@id="log.login"]').click()

tag_id = driver.find_element_by_id('id')
tag_pw = driver.find_element_by_id('pw')

tag_id.click()
pyperclip.copy(id)
tag_id.send_keys(Keys.CONTROL,'v')

tag_pw.click()
pyperclip.copy(pw)
tag_pw.send_keys(Keys.CONTROL,'v')

driver.find_element_by_xpath('//*[@id="log.login"]').click()


driver.get('https://order.pay.naver.com/home')
html = driver.page_source #현재 페이지의 소스값을 가져옴

soup = BeautifulSoup(html,'html.parser')
point = soup.select_one('dl.my_npoint strong').text
print('마일리지는',point,'원 입니다.')

5. 네이버 오타 검사

import requests

client_id = "Ztm6dSWZEPhyt6Dfh2dB"
client_secret = "Gu4yevi6Gv"
query = input('검색할 단어 >>> ')
url = 'https://openapi.naver.com/v1/search/errata.json'
qs = {'query':query}
headers = {
    "X-Naver-Client-Id":client_id,
    "X-Naver-Client-Secret":client_secret
    }

response = requests.get(url,params=qs, headers=headers)
print(response.text)

6. 카카오 API 지도검색

# curl -v -X GET "https://dapi.kakao.com/v2/local/search/address.json" \
#   -H "Authorization: KakaoAK {REST_API_KEY}" \
#   --data-urlencode "query=전북 삼성동 100" 

REST_API_KEY = 'b2c8a44ae2c65a9b39e5e979d05242c1'

import requests

query = input('주소 입력 >>> ')
url = 'https://dapi.kakao.com/v2/local/search/address.json'
qs = {'query':query}
headers = {
    "Authorization": f'KakaoAK {REST_API_KEY}'
    }

response = requests.get(url,params=qs, headers=headers)
print(response.text)

Install

selenium
conda install selenium

pyperclip
conda install pyperclip
pip install pyperclip

profile
DataEngineer Lee.

0개의 댓글