https://developers.naver.com/docs/serviceapi/search/blog/blog.md#python
urlilib : http 프로토콜에 따라서 서버의 요청/응답을 처리하기 위한 모듈
urlilib.request : 클라이언트의 요청을 처리하기 위한 모듈
urlilib.parse : url 주소에 대한 분석
# 네이버 검색 API 예제 - 블로그 검색
import os
import sys
import urllib.request
client_id = "YOUR_CLIENT_ID"
client_secret = "YOUR_CLIENT_SECRET"
encText = urllib.parse.quote("딥러닝")
url = "https://openapi.naver.com/v1/search/blog?query=" + encText # JSON 결과
# url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request)
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
print(response_body.decode('utf-8'))
else:
print("Error Code:" + rescode)
{
"lastBuildDate":"Fri, 07 Apr 2023 19:18:07 +0900",
"total":168381,
"start":1,
"display":10,
"items":[
{
"title":"AI 2041를 통해 읽은 <b>딥러닝<\/b>과 딥페이크의 미래",
"link":"https:\/\/nohji.com\/4914",
"description":"그렇다면 <b>딥러닝<\/b>의 단점과 그것이 가져올 북자용은 무엇일까? 우선 당신 자신보다 인공지능이 당신에... 인공지능이 데이터를 수집하고 학습하는 <b>딥러닝<\/b>은 광고 시장에서 천문학적인 금액의 투자가... ",
"bloggername":"노지의 소박한 이야기",
"bloggerlink":"https:\/\/nohji.com\/",
"postdate":"20230205"
},
{
"title":"캐논, <b>딥러닝<\/b> AI 기술로 주요 디지털 사진 문제 해결한다",
"link":"https:\/\/photohistory.tistory.com\/20503",
"description":"캐논 카메라 디지털 사진의 문제점을 <b>딥러닝<\/b> AI로 해결한다 카메라가 인기 없는 이유는 혁신성이... 캐논이 최첨단 기술 소개 코너에서 <b>딥 러닝<\/b> AI 기술을 이용해서 디지털 카메라로 촬영한 사진의 문제점을... ",
"bloggername":"사진은 권력이다",
"bloggerlink":"https:\/\/photohistory.tistory.com\/",
"postdate":"20230226"
},
import os
import sys
import urllib.request
client_id = "YOUR_CLIENT_ID"
client_secret = "YOUR_CLIENT_SECRET"
encText = urllib.parse.quote("몰스킨")
url = "https://openapi.naver.com/v1/search/shop?query=" + encText # JSON 결과
# url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request)
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
print(response_body.decode('utf-8'))
else:
print("Error Code:" + rescode)
{
"lastBuildDate":"Fri, 07 Apr 2023 19:23:39 +0900",
"total":33520,
"start":1,
"display":10,
"items":[
{
"title":"<b>몰스킨<\/b> 노트 가죽 하드커버 감성 고급 업무용 이쁜 심플",
"link":"https:\/\/search.shopping.naver.com\/gate.nhn?id=82526953942",
"image":"https:\/\/shopping-phinf.pstatic.net\/main_8252695\/82526953942.7.jpg",
"lprice":"28800",
"hprice":"",
"mallName":"베스트펜",
"productId":"82526953942",
"productType":"2",
"brand":"몰스킨",
"maker":"",
"category1":"생활\/건강",
"category2":"문구\/사무용품",
"category3":"노트\/수첩",
"category4":"노트"
},
{
"title":"2023년 <b>몰스킨<\/b> 하드커버 다이어리(데일리, 위클리, 한정판)",
"link":"https:\/\/search.shopping.naver.com\/gate.nhn?id=84904377827",
"image":"https:\/\/shopping-phinf.pstatic.net\/main_8490437\/84904377827.1.jpg",
"lprice":"20000",
"hprice":"",
"mallName":"안네프랑크",
"productId":"84904377827",
"productType":"2",
"brand":"몰스킨",
"maker":"몰스킨",
"category1":"생활\/건강",
"category2":"문구\/사무용품",
"category3":"다이어리\/플래너",
"category4":"다이어리"
},
- (1) getSearchUrl : 검색 URL 생성
- (2) getResultPage : Page 데이터 가져오기
- (3) getFiles() : Pandas DataFrame 생성
- deleteTab() : <b> 태그 제거
- (4) actMain() : All Data Gathering
- (5) to_excel() : Export to Excel
def getSearchUrl(apiNode, searchText, startNum, dispNum):
base_url = "https://openapi.naver.com/v1/search"
node="/"+apiNode+".json"
param_query="?query="+urllib.parse.quote(searchText)
param_start="&start="+str(startNum)
param_disp="&display="+str(dispNum)
return base_url+node+param_query+param_start+param_disp
getSearchUrl('shop','test',10,3)
'https://openapi.naver.com/v1/search/shop.json?query=test&start=10&display=3'
import json
import datetime
def getResultPage(url, client_id, client_secret):
request=urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request)
print("[%s] Url Request Success"%datetime.datetime.now())
rescode = response.getcode()
if(rescode==200):
return json.loads(response.read().decode('utf-8'))
else:
return print("Error Code:" + rescode)
client_id = "YOUR_CLIENT_ID"
client_secret = "YOUR_CLIENT_SECRET"
url=getSearchUrl('shop','몰스킨',1,5)
result=getResultPage(url,client_id,client_secret)
result
[2023-04-07 20:13:05.869195] Url Request Success
{'lastBuildDate': 'Fri, 07 Apr 2023 20:13:06 +0900',
'total': 33507,
'start': 1,
'display': 5,
'items': [{'title': '<b>몰스킨</b> 노트 가죽 하드커버 감성 고급 업무용 이쁜 심플',
'link': 'https://search.shopping.naver.com/gate.nhn?id=82526953942',
'image': 'https://shopping-phinf.pstatic.net/main_8252695/82526953942.7.jpg',
'lprice': '28800',
'hprice': '',
'mallName': '베스트펜',
'productId': '82526953942',
'productType': '2',
'brand': '몰스킨',
'maker': '',
'category1': '생활/건강',
'category2': '문구/사무용품',
'category3': '노트/수첩',
'category4': '노트'},
{'title': '2023년 <b>몰스킨</b> 하드커버 다이어리(데일리, 위클리, 한정판)',
'link': 'https://search.shopping.naver.com/gate.nhn?id=84904377827',
'image': 'https://shopping-phinf.pstatic.net/main_8490437/84904377827.1.jpg',
'lprice': '20000',
'hprice': '',
'mallName': '안네프랑크',
'productId': '84904377827',
'productType': '2',
'brand': '몰스킨',
'maker': '몰스킨',
'category1': '생활/건강',
'category2': '문구/사무용품',
'category3': '다이어리/플래너',
'category4': '다이어리'},
result['items'][0]['title']
'<b>몰스킨</b> 노트 가죽 하드커버 감성 고급 업무용 이쁜 심플'
result['items'][0]['link']
'https://search.shopping.naver.com/gate.nhn?id=82526953942'
result['items'][0]['lprice']```
'28800'
result['items'][0]['mallName']
'베스트펜'
def deleteTag(input_str):
input_str=input_str.replace("<b>","")
input_str=input_str.replace("</b>","")
return input_str
#deleteTag() 추가
def getFields(json_data):
title=[deleteTag(each['title']) for each in json_data["items"]]
link=[each['link'] for each in json_data["items"]]
lprice=[each['lprice'] for each in json_data["items"]]
mall_name=[each['mallName'] for each in json_data["items"]]
result_pd=pd.DataFrame({
"title" : title,
"link" : link,
"lprice" : lprice,
"mall" : mall_name
}, columns=["title","lprice","link","mall"])
return result_pd
getFields(result)
result_mol=[]
for n in range(1,1000,100):
url=getSearchUrl("shop","몰스킨",n,100)
json_result=getResultPage(url,client_id,client_secret)
pd_result=getFields(json_result)
result_mol.append(pd_result)
result_mol=pd.concat(result_mol)
[2023-04-07 20:28:31.057909] Url Request Success
[2023-04-07 20:28:31.414691] Url Request Success
[2023-04-07 20:28:31.718005] Url Request Success
[2023-04-07 20:28:32.065504] Url Request Success
[2023-04-07 20:28:32.399301] Url Request Success
[2023-04-07 20:28:32.807164] Url Request Success
[2023-04-07 20:28:33.147453] Url Request Success
[2023-04-07 20:28:33.474379] Url Request Success
[2023-04-07 20:28:33.796276] Url Request Success
[2023-04-07 20:28:34.173313] Url Request Success
result_mol.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 0 to 99
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 title 1000 non-null object
1 lprice 1000 non-null object
2 link 1000 non-null object
3 mall 1000 non-null object
dtypes: object(4)
memory usage: 39.1+ KB
result_mol.reset_index(drop=True, inplace=True)
result_mol.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 title 1000 non-null object
1 lprice 1000 non-null object
2 link 1000 non-null object
3 mall 1000 non-null object
dtypes: object(4)
memory usage: 31.4+ KB
result_mol['lprice']=result_mol['lprice'].astype(float)
result_mol.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 title 1000 non-null object
1 lprice 1000 non-null float64
2 link 1000 non-null object
3 mall 1000 non-null object
dtypes: float64(1), object(3)
memory usage: 31.4+ KB
!pip install xlsxwriter
writer=pd.ExcelWriter("../data/06_molskin_diary_in_naver_shop.xlsx", engine='xlsxwriter')
result_mol.to_excel(writer,sheet_name="Sheet1")
workbook=writer.book
worksheet=writer.sheets["Sheet1"]
worksheet.set_column('A:A', 4) # 컬럼 위치, 셀 너비
worksheet.set_column('B:B', 60)
worksheet.set_column('C:C', 10)
worksheet.set_column('D:D', 10)
worksheet.set_column('E:E', 50)
worksheet.set_column('F:F', 10)
worksheet.conditional_format("C2:C1001",{"type":"3_color_scale"}) # 색 설정
writer.save()
# %load set_matplotlib_hangul
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import seaborn as sns
import platform
# %matplotlib inline
get_ipython().run_line_magic("matplotlib","inline")
path='C:/Windows/Fonts/malgun.ttf'
plt.rcParams['axes.unicode_minus'] = False
if platform.system()=="Darwin": #mac
rc("font",family='Arial Unicodes MS')
print('MAC Hangul OK')
elif platform.system()=="Windows": #window
font_name=font_manager.FontProperties(fname=path).get_name()
rc("font",family=font_name)
print('WIndow Hangul OK')
else:
print('Unknown System')
# 어느 사이트에사 가장 많이 검색됐는지 시각화
plt.figure(figsize=(15, 6))
sns.countplot(
data=result_mol,
x=result_mol['mall'],
palette="RdYlGn",
order=result_mol["mall"].value_counts().index
)
plt.xticks(rotation=90)
plt.show()