#은 토픽을 의미, ---를 만날경우 해당 토픽에 대한 키워드가 끝난걸로 판단. (해당 기준으로 태그를 생성)
#이 인식될 경우 해당 WORD는 --- 만나기 전 까지의 아이템들의 TAG가 된다.
import json
import pymysql
from urllib import parse, request
############################ mariadb 연결
conn = pymysql.connect(host="127.0.0.1", user="####", password="####", db="ZZALU", charset="utf8")
cur = conn.cursor()
############################ file에서 keyword 추출
# file 읽
keyword_file = open("./search_keyword.txt", "r", encoding='UTF8')
# print(keyword_file.readlines());
keyword_lists = keyword_file.readlines();
search_dict = {}
tag = []
keyword = []
for keywords in keyword_lists:
if(keywords.strip()):
if('#' in keywords.strip()):
tag.append(keywords.strip().split("#")[-1].strip())
elif('-' in keywords.strip()):
print(tag);
print(keyword)
tag_string = ','.join(s for s in tag)
search_dict[tag_string] = keyword
tag.pop()
keyword = []
else:
keyword.append(keywords.strip())
############################
print(str(type(keywords)))
print(search_dict)
############################ 추출한 keyword를 기반으로 parsing
gif_dict = {}
for search in search_dict:
search_keywords = []
search_keywords.append(search.split(',')[-1])
search_keywords += search_dict[search]
for keyword in search_keywords:
url = "http://api.giphy.com/v1/gifs/search"
params = parse.urlencode({
"q": keyword,
"api_key": "####",
"limit": "10"
})
with request.urlopen("".join((url, "?", params))) as response:
data = json.loads(response.read())
# print(json.dumps(data, sort_keys=True, indent=4))
# print(data["data"][0]["embed_url"])
for d in data["data"]:
search_tag_list = search.split(",")
if(keyword not in search_tag_list):
search_tag_list.append(keyword)
# 기존에 keyword 추가
if(d["images"]["480w_still"]["url"] in gif_dict) :
if(keyword not in gif_dict[d["images"]["480w_still"]["url"]]["tags"]): # keyworkd 있는지 확인
gif_dict[d["images"]["480w_still"]["url"]]["tags"].append(keyword)
# gif_dict 추가
else :
gif_dict[d["images"]["480w_still"]["url"]] = {
"gif_path": d["images"]["480w_still"]["url"],
"source" : d["source"],
"source_post_url" : d["source_post_url"],
"source_tld" : d["source_tld"],
"title" : d["title"],
"import_datetime" : d["import_datetime"] ,
"tags" : search_tag_list,
}
print(gif_dict[d["images"]["480w_still"]["url"]])
print("")
exists_sql = ""
insert_sql = ""
update_sql = ""
select_sql = ""
try:
exists_sql = "SELECT EXISTS (SELECT * FROM GIPHY_GIF WHERE GIF_PATH='" + d["images"]["480w_still"]["url"] + "') AS SUCCESS"
cur.execute(exists_sql)
exists_sql_return = cur.fetchone()[0]
if(exists_sql_return == 1): # 이미 존재하는 경우
select_sql = "SELECT TAGS FROM GIPHY_GIF WHERE GIF_PATH='" + d["images"]["480w_still"]["url"] + "'"
cur.execute(select_sql)
get_tags = cur.fetchone()[0]
get_tags_list = list(set(get_tags.split(",") + search_tag_list))
update_sql = "UPDATE GIPHY_GIF SET TAGS='" + ",".join(s for s in get_tags_list) + "'"
else:
insert_sql = "INSERT INTO GIPHY_GIF (GIF_PATH, TAGS, SORUCES, SORUCES_POST_URL, SORUCES_TLD, IMPORT_DATETIME) VALUES('" + d["images"]["480w_still"]["url"] + "','" + ','.join(s for s in gif_dict[d["images"]["480w_still"]["url"]]["tags"]) + "','" + d["source"] + "','" + d["source_post_url"] + "','" + d["source_tld"]+ "','" + d["import_datetime"] + "')"
cur.execute(insert_sql);
except pymysql.err.InternalError as e:
code, msg = e.args
print("=== sql execute failed ===")
print("Error code : ", code)
print("Error message : ", msg)
print(insert_sql)
print()
else:
print("=== sql execute success ===")
print()
conn.commit();
conn.close();
print("=== DB 반영 완료 ===")
생성한 DB TABLE
CREATE DATABASE IF NOT EXISTS ZZALU DEFAULT CHARACTER SET UTF8;
USE ZZALU;
DROP TABLE IF EXISTS GIPHY_GIF;
CREATE TABLE IF NOT EXISTS GIPHY_GIF (
_ID INT PRIMARY KEY AUTO_INCREMENT,
GIF_PATH VARCHAR(300) NOT NULL UNIQUE,
USE_COUNT INT NOT NULL DEFAULT 0,
DOWNLOAD_COUNT INT NOT NULL DEFAULT 0,
LIKE_COUNT INT NOT NULL DEFAULT 0,
TAGS VARCHAR(300),
SORUCES VARCHAR(300),
SORUCES_POST_URL VARCHAR(300),
SORUCES_TLD VARCHAR(300),
IMPORT_DATETIME DATETIME,
RELATIONS_VIDEO VARCHAR(300),
VISITED_COUNT INT NOT NULL DEFAULT 0,
SCRAP_COUNT INT NOT NULL DEFAULT 0);
결과
정상적으로 출력되는 것을 확인할 수 있었다.