[0220] Naver API

이아연·2024년 2월 20일
0

(0) import

import os
import sys
import urllib.request

client_id = '# Your client_id' 
client_secret = '# Your client_secret' 
encText = urllib.parse.quote('파이썬')
url = "https://openapi.naver.com/v1/search/blog?query=" + encText
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", client_id)
request.add_header("X-Naver-Client-Secret", client_secret)

response = urllib.request.urlopen(request)
rescode = response.getcode()
if rescode == 200:
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print('Error Code'+rescode)
response, response.getcode(), response.code, response.status

(<http.client.HTTPResponse at 0x28e40903e80>, 200, 200, 200)

# 글자로 읽을 경우 decode utf-8 tjfwjd
print(response_body.decode('utf-8'))

책 검색

import os
import sys
import urllib.request

client_id = '5tH3m1__QC2JdoSdwKeb' # Your client_id
client_secret = 'zhHPsWKjlW' # Your client_secret
encText = urllib.parse.quote('파이썬')
url = "https://openapi.naver.com/v1/search/book?query=" + encText
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", client_id)
request.add_header("X-Naver-Client-Secret", client_secret)

response = urllib.request.urlopen(request)
rescode = response.getcode()
if rescode == 200:
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print('Error Code'+rescode)

카페 검색

import os
import sys
import urllib.request

client_id = '5tH3m1__QC2JdoSdwKeb' # Your client_id
client_secret = 'zhHPsWKjlW' # Your client_secret
encText = urllib.parse.quote('파이썬')
url = "https://openapi.naver.com/v1/search/cafearticle?query=" + encText
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", client_id)
request.add_header("X-Naver-Client-Secret", client_secret)

response = urllib.request.urlopen(request)
rescode = response.getcode()
if rescode == 200:
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print('Error Code'+rescode)

쇼핑 검색

import os
import sys
import urllib.request

client_id = '5tH3m1__QC2JdoSdwKeb' # Your client_id
client_secret = 'zhHPsWKjlW' # Your client_secret
encText = urllib.parse.quote('파이썬')
url = "https://openapi.naver.com/v1/search/shop?query=" + encText
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", client_id)
request.add_header("X-Naver-Client-Secret", client_secret)

response = urllib.request.urlopen(request)
rescode = response.getcode()
if rescode == 200:
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print('Error Code'+rescode)

백과사전 검색

import os
import sys
import urllib.request

client_id = '5tH3m1__QC2JdoSdwKeb' # Your client_id
client_secret = 'zhHPsWKjlW' # Your client_secret
encText = urllib.parse.quote('파이썬')
url = "https://openapi.naver.com/v1/search/encyc?query=" + encText
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", client_id)
request.add_header("X-Naver-Client-Secret", client_secret)

response = urllib.request.urlopen(request)
rescode = response.getcode()
if rescode == 200:
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print('Error Code'+rescode)

encText = urllib.parse.quote('몰스킨')
url = "https://openapi.naver.com/v1/search/shop?query=" + encText

(1) gen_search_url()

url 얻기

def gen_search_url(api_node, search_text, start_num, disp_num):
    base = 'https://openapi.naver.com/v1/search/'
    node = '/'+api_node+'.json'
    param_query = '?query='+urllib.parse.quote(search_text)
    param_start = '&start='+str(start_num)
    param_disp= '&display='+str(disp_num)

    return base + node + param_query + param_start + param_disp

test

gen_search_url('shop','Test',10, 3)

결과 :
'https://openapi.naver.com/v1/search//shop.json?query=Test&start=10&display=3'

(2) gen_result_onpage()

쇼핑 페이지 json 불러오기

import json 
import datetime 

def get_result_onpage(url):
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id", client_id)
    request.add_header("X-Naver-Client-Secret", client_secret)
    response = urllib.request.urlopen(request)
    print("[%s] Url Request Success" % datetime.datetime.now())
    return json.loads(response.read().decode("utf-8"))

url = gen_search_url('shop','몰스킨',1, 5) # shop에서 몰스킨 검색, 첫 페이지부터 시작해서 5개 보여줘
one_result = get_result_onpage(url)

현재 날짜 불러오는 모듈

datetime.datetime.now()
url = gen_search_url('shop','몰스킨',1, 5) # shop에서 몰스킨 검색, 첫 페이지부터 시작해서 5개 보여줘
one_result = get_result_onpage(url)

결과 : [2024-02-19 13:44:39.671358] Url Request Success

one_result

one_result['items'][0]

one_result['items'][0]['title']

one_result['items'][0]['link']

(3) get_fields()

데이터 프레임 생성 함수

import pandas as pd
def get_fields(json_data):
    title = [each['title'] for each in json_data['items']]
    link = [each['link'] for each in json_data['items']]
    lprice = [each['lprice'] for each in json_data['items']]
    mall_name = [each['mallName'] for each in json_data['items']]

    result_pd = pd.DataFrame(
        {'title':title,
        'link':link,
        'lprice':lprice,
        'mall':mall_name}, columns=['title','lprice','link','mall']
    )
    return result_pd
get_fields(one_result)

(4) delete_tag()

태그 삭제

def delete_tag(input_str):
    input_str = input_str.replace('<b>','')
    input_str = input_str.replace('</b>','')
    return input_str
import pandas as pd
def get_fields(json_data):
    title = [delete_tag(each['title']) for each in json_data['items']]
    link = [each['link'] for each in json_data['items']]
    lprice = [each['lprice'] for each in json_data['items']]
    mall_name = [each['mallName'] for each in json_data['items']]

    result_pd = pd.DataFrame(
        {'title':title,
        'link':link,
        'lprice':lprice,
        'mall':mall_name}, columns=['title','lprice','link','mall']
    )
    return result_pdimport pandas as pd
def get_fields(json_data):
    title = [delete_tag(each['title']) for each in json_data['items']]
    link = [each['link'] for each in json_data['items']]
    lprice = [each['lprice'] for each in json_data['items']]
    mall_name = [each['mallName'] for each in json_data['items']]

    result_pd = pd.DataFrame(
        {'title':title,
        'link':link,
        'lprice':lprice,
        'mall':mall_name}, columns=['title','lprice','link','mall']
    )
    return result_pd
get_fields(one_result)

(5)actMain()

100페이지, 1000개의 상품 url, 데이터 프레임 생성
result_mol에 append한 후 concat으로 합치기

result_mol = []

for n in range(1, 1000, 100):
    url = gen_search_url('shop','몰스킨', n, 100)
    json_result = get_result_onpage(url)
    pd_result = get_fields(json_result)

    result_mol.append(pd_result)
result_mol = pd.concat(result_mol)
result_mol.info()

reset_index, 가격 데이터타입 float으로 바꾸기

result_mol.reset_index(drop=True, inplace=True)
result_mol.info()
result_mol['lprice'] = result_mol['lprice'].astype('float')

(6) to_excel

엑셀로 데이터 옮기기
writer 지정
.to_excel(writer, sheet_name=)
worksheet.set_column
색 설정

writer = pd.ExcelWriter("../data/06_molskin_diary_in_naver_shop.xlsx", engine="xlsxwriter")
result_mol.to_excel(writer, sheet_name="Sheet1")

workbook = writer.book 
worksheet = writer.sheets["Sheet1"]
worksheet.set_column("A:A", 4)
worksheet.set_column("B:B", 60)
worksheet.set_column("C:C", 10)
worksheet.set_column("D:D", 10)
worksheet.set_column("E:E", 50)
worksheet.set_column("F:F", 10)

worksheet.conditional_format("C2:C1001", {"type": "3_color_scale"})
writer.close()

(6) 시각화

import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(15, 6))
sns.countplot(
    x = result_mol['mall'],
    data = result_mol,
    # palette = 'RdYlGn',
    order = result_mol['mall'].value_counts().index
)
plt.xticks(rotation=90)
plt.show()

자료 출처 : 제로베이스 데이터 취업 스쿨

profile
Hi Welcome

0개의 댓글