19_EDA 4

김정연·2023년 6월 30일
0

데이터스쿨

목록 보기
20/30

06.Naver API

1. 네이버 API 사용 등록


2. 네이버 검색 API 사용하기

  • urllib : http 프로토콜에 따라서 서버의 요청/응답을 처리하기 위한 모듈
  • urllib.request : 클라이언트의 요청을 처리하는 모듈
  • urllib.parse : url 주소에 대한 분석
# 네이버 검색 API 예제 - 블로그 검색
import os
import sys
import urllib.request


client_id = "8ZE_34gSct81y2DipGJF"
client_secret = "1H6O47Qvw9"


encText = urllib.parse.quote("파이썬")
url = "https://openapi.naver.com/v1/search/blog?query=" + encText # JSON 결과
# url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request)
rescode = response.getcode()
if(rescode==200):
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print("Error Code:" + rescode)
response, response.getcode(), response.code, response.status

#출력 : (<http.client.HTTPResponse at 0x112da3e20>, 200, 200, 200)
# 글자로 읽을 경우, decode utf-8 설정
response_body.decode("utf-8")

3. "몰스킨"

  • 몰스킨
import os
import sys
import urllib.request


client_id = "8ZE_34gSct81y2DipGJF"
client_secret = "1H6O47Qvw9"


encText = urllib.parse.quote("몰스킨")
url = "https://openapi.naver.com/v1/search/shop?query=" + encText # JSON 결과
# url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request)
rescode = response.getcode()
if(rescode==200):
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print("Error Code:" + rescode)텍스트

(1) gen_search_url()

def gen_search_url(api_node, search_text, start_num, disp_num):
    base = "https://openapi.naver.com/v1/search"
    node = "/" + api_node + ".json"
    param_query = "?query=" + urllib.parse.quote(search_text)
    param_start = "&start=" + str(start_num)
    param_disp = "&display=" + str(disp_num)
return base + node + param_query + param_start + param_disp
```python
gen_search_url("shop", "TEST", 10, 3)

#출력 : 'https://openapi.naver.com/v1/search/shop.json?query=TEST&start=10&display=3'

(2)get_result_onpage()

import json
import datetime

def get_result_onpage(url):
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id",client_id)
    request.add_header("X-Naver-Client-Secret",client_secret)
    response = urllib.request.urlopen(request)
    print("[%s] Url Request Success" % datetime.datetime.now())
    return json.loads(response.read().decode("utf-8"))
datetime.datetime.now()

#출력 : datetime.datetime(2023, 6, 27, 21, 13, 12, 771051)
url = gen_search_url("shop", "몰스킨", 1, 5)
one_result = get_result_onpage(url)

#출력 : [2023-06-27 21:13:13.451638] Url Request Success
one_result["items"][0]["title"]

#출력 : '[<b>몰스킨</b>] 2023-24년 18개월 다이어리(데일리, 위클리)'
one_result["items"][0]["link"]

#출력 : 'https://search.shopping.naver.com/gate.nhn?id=39859450686'
one_result["items"][0]["mallName"]

#출력 : '몰스킨공식온라인스토어'

(3) get_fields()

import pandas as pd

def get_fields(json_data):
    title = [each["title"] for each in json_data["items"]]
    link = [each["link"] for each in json_data["items"]]
    lprice = [each["lprice"] for each in json_data["items"]]
    mall_name = [each["mallName"] for each in json_data["items"]]
    
    result_pd = pd.DataFrame({
        "title" : title,
        "link" : link,
        "lprice" : lprice,
        "mall" : mall_name,
    }, columns=["title", "lprice", "link", "mall"])
    
    return result_pd
get_fields(one_result)

(4) delete_tag()

def delete_tag(input_str):
    input_str = input_str.replace("<b>", "")
    input_str = input_str.replace("</b>", "")
    return input_str
import pandas as pd

def get_fields(json_data):
    title = [delete_tag(each["title"]) for each in json_data["items"]]
    link = [delete_tag(each["link"]) for each in json_data["items"]]
    lprice = [delete_tag(each["lprice"]) for each in json_data["items"]]
    mall_name = [delete_tag(each["mallName"]) for each in json_data["items"]]
    
    result_pd = pd.DataFrame({
        "title" : title,
        "link" : link,
        "lprice" : lprice,
        "mall" : mall_name,
    }, columns=["title", "lprice", "link", "mall"])
    
    return result_pd
get_fields(one_result)

url = gen_search_url("shop", "몰스킨", 1, 5)
json_result = get_result_onpage(url)
pd_result = get_fields(json_result)

#출력 : [2023-06-27 21:34:31.314620] Url Request Success
pd_result

(5) actMain()

for n in range(1, 1000, 100):
    print(n)
    
 # 출력 : 
 1
101
201
301
401
501
601
701
801
901
result_mol = []

for n in range(1, 1000, 100):
    url = gen_search_url("shop", "몰스킨", n, 100)
    json_result = get_result_onpage(url)
    pd_result = get_fields(json_result)
    
    result_mol.append(pd_result)
    
    
result_mol = pd.concat(result_mol)

result_mol.reset_index(drop=True, inplace=True)
result_mol.info()

result_mol["lprice"] = result_mol["lprice"].astype("float")
result_mol.info()

(5) to_excel()

writer = pd.ExcelWriter("../data/06_molskin_diary_in_naver_shop.xlsx", engine="xlsxwriter")
result_mol.to_excel(writer, sheet_name="Sheet1")

workbook = writer.book
worksheet = writer.sheets["Sheet1"]
worksheet.set_column("A:A", 4) #숫자는 칸 간격
worksheet.set_column("B:B", 60)
worksheet.set_column("C:C", 10)
worksheet.set_column("D:D", 10)
worksheet.set_column("E:E", 50)
worksheet.set_column("F:F", 10)

worksheet.conditional_format("C2:C1001", {"type": "3_color_scale"})
writer.save()

(6) 시각화

import platform
import matplotlib.pyplot as plt 
from matplotlib import font_manager, rc

path = "c:/Windows/Fonts/malgun.ttf"

if platform.system() == "Darwin":
    print("Hangul OK in your MAC!!!")
    rc("font", family="Arial Unicode MS")
elif platform.system() == "Windows":
    font_name = font_manager.FontProperties(fname=path).get_name()
    print("Hangul OK in your Windows!!!")
    rc("font", family=font_name)
else:
    print("Unknown system.. sorry~~~")
    
plt.rcParams["axes.unicode_minus"] = False 

#출력 : Hangul OK in your MAC!!!
import seaborn as sns

plt.figure(figsize=(15,6))
sns.countplot(
    result_mol["mall"],
    data = result_mol,
    palette = "RdYlGn",
    order = result_mol["mall"].value_counts().index
)
plt.xticks(rotation=90) # 글자를 나란히 하면 겹치니까 글자를 돌림
plt.show()

0개의 댓글