import json
import pandas as pd
from konlpy.tag import Mecab
with open('SentiWord_info.json', encoding='utf-8-sig', mode='r') as f:
SentiWord_info = json.load(f)
sentiword_dic = pd.DataFrame(SentiWord_info)
def calculate_sentiment_score(sentence, sentiword_dic):
score = 0
for word in sentence.split():
if word in sentiword_dic['word'].values:
word_score = int(sentiword_dic[sentiword_dic['word'] == word]['polarity'].values[0])
score += word_score
return score
community_raw = pd.read_csv("개인주소")
community_df = community_raw.dropna(subset=['processed_comment']).copy()
mecab = Mecab(dicpath='/usr/local/lib/mecab/dic/mecab-ko-dic')
def preprocess_text(text):
return " ".join(mecab.morphs(text))
community_df["tagged_str"] = community_df["processed_comment"].apply(preprocess_text)
community_df["sentiment_score"] = community_df["tagged_str"].apply(lambda x: calculate_sentiment_score(x, sentiword_dic))
def label_sentiment(score):
if score > 0:
return "긍정"
elif score < 0:
return "부정"
else:
return "중립"
community_df["sentiment_label"] = community_df["sentiment_score"].apply(label_sentiment)
community_df.to_csv('개인주소', index=False)
- 평가: 긍정과 부정보다 중립이 대부분인듯하다. 나의 의견으로는 감성사전으로는 부족한거같다. 다른 방법을 생각해봐야겠다.
노션페이지에서 다운가능