Ex6) Kospi_Community_VIX

Jacob Kim·2024년 2월 1일
0

Naver Project Week4

목록 보기
7/16
!pip install diffusers transformers accelerate
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers) (2023.11.17)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)
Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)
Installing collected packages: diffusers, accelerate
Successfully installed accelerate-0.26.1 diffusers-0.25.0
import requests
from bs4 import BeautifulSoup
from urllib import request
import time
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'
}
url = 'https://www.dogdrip.net/stock/category/291984713'
res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.text)
# 게시판 전체 태그 추출
whole_board = soup.find('table', class_ = 'ed table table-divider')
whole_board
<table class="ed table table-divider">
<thead>
<tr>
<th scope="col">번호</th> <th scope="col">제목</th> <th class="author ed" scope="col">글쓴이</th> <th scope="col">
				추천 수		      </th> <th scope="col">
		        날짜		      </th> <th scope="col">
		        조회 수		      </th> </tr>
</thead>
<tbody>
<tr class="notice">
<td class="ed notice text-small">
          공지        </td>
<td class="title">
<a class="ed link-reset" href="/stock/273533897?category=291984713&amp;page=1" style="">
<span class="ed title-link">
<span style="font-weight:bold">주식 / 재테크와 관련된 글을 올려주시기 바랍니다.</span> </span>
</a>
</td>
<td class="author" scope="row">
<a class="ed flex flex-left flex-middle link-reset member_13140068" href="#popup_menu_area" onclick="return false"><img alt="[레벨:50]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/105.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:135318206드립력, 레벨:50/50"/>
		    overflow          </a>
</td>
<td class="ed voteNum text-primary">6		</td> <td class="time" scope="row">2020.08.07</td>
<td class="readNum">16850</td> </tr> <tr>
<td class="ed no text-xxsmall">
		            54046		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534398188?category=291984713&amp;page=1" style="">
<span class="ed title-link">2차전지 곱버스는 없는겨?</span>
<span class="ed text-primary">4</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_524056370" href="#popup_menu_area" onclick="return false"><img alt="[레벨:2]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/2.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:107드립력 (2%), 레벨:2/50"/>
		    랩뉴비          </a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">55 분 전</td> <td class="readNum">80</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54045		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534384446?category=291984713&amp;page=1" style="">
<span class="ed title-link">펀더멘털문제없다</span>
<span class="ed text-primary">6</span> <span class="ed print-icon margin-left-xxsmall">
<i class="fas fa-image ed margin-right-xxsmall"></i> </span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_236650705" href="#popup_menu_area" onclick="return false"><img alt="[레벨:6]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/6.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:2597드립력 (3%), 레벨:6/50"/>
		    외노자주린이          </a>
</td> <td class="ed voteNum text-primary">2		</td> <td class="time" scope="row">2 시간 전</td> <td class="readNum">184</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54044		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534368123?category=291984713&amp;page=1" style="">
<span class="ed title-link">지구 망한듯?</span>
<span class="ed text-primary">1</span> <span class="ed print-icon margin-left-xxsmall">
<i class="fas fa-image ed margin-right-xxsmall"></i> </span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_79623105" href="#popup_menu_area" onclick="return false"><img alt="[레벨:6]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/6.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:3999드립력 (51%), 레벨:6/50"/>
		    라루루          </a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">3 시간 전</td> <td class="readNum">412</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54043		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534367430?category=291984713&amp;page=1" style="">
<span class="ed title-link">오늘 좆스피말고 다른데도 다떨구는데 왜이래</span>
<span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_524056370" href="#popup_menu_area" onclick="return false"><img alt="[레벨:2]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/2.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:107드립력 (2%), 레벨:2/50"/>
		    랩뉴비          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">3 시간 전</td> <td class="readNum">236</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54042		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534364396?category=291984713&amp;page=1" style="">
<span class="ed title-link">주식 복귀 1주일차</span>
<span class="ed text-primary">2</span> <span class="ed print-icon margin-left-xxsmall">
<i class="fas fa-image ed margin-right-xxsmall"></i> </span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_121846383" href="#popup_menu_area" onclick="return false"><img alt="[레벨:6]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/6.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:4878드립력 (82%), 레벨:6/50"/>
		    기다린다          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">3 시간 전</td> <td class="readNum">332</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54041		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534360500?category=291984713&amp;page=1" style="">
<span class="ed title-link">와 오늘 개 살벌했네</span>
<span class="ed text-primary">2</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_432246775" href="#popup_menu_area" onclick="return false"><img alt="[레벨:3]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/3.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:509드립력 (27%), 레벨:3/50"/>
		    파이어족은불타오...          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">3 시간 전</td> <td class="readNum">504</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54040		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534359690?category=291984713&amp;page=1" style="">
<span class="ed title-link">코리아 주식시장이 볍신인건 태생적인 시스템 한계지 머</span>
<span class="ed text-primary">4</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_534061172" href="#popup_menu_area" onclick="return false"><img alt="[레벨:1]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/1.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:3드립력 (0%), 레벨:1/50"/>
		    토지          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">259</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54039		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534359093?category=291984713&amp;page=1" style="">
<span class="ed title-link">개쓰레기 국장 우스갯소리로 하던</span>
<span class="ed text-primary">1</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_201466689" href="#popup_menu_area" onclick="return false"><img alt="[레벨:6]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/6.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:4514드립력 (69%), 레벨:6/50"/>
		    팩트만말해요          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">172</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54038		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534357480?category=291984713&amp;page=1" style="">
<span class="ed title-link">국장 왜 나락감?</span>
<span class="ed text-primary">7</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_508751389" href="#popup_menu_area" onclick="return false"><img alt="[레벨:2]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/2.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:197드립력 (32%), 레벨:2/50"/>
		    댕드립넷          </a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">476</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54037		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534357457?category=291984713&amp;page=1" style="">
<span class="ed title-link">역시 분산투자가 짱이네요</span>
<span class="ed text-primary">2</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_432609794" href="#popup_menu_area" onclick="return false"><img alt="[레벨:5]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/5.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:1503드립력 (0%), 레벨:5/50"/>
		    초코에미친놈          </a>
</td> <td class="ed voteNum text-primary">3		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">249</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54036		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534353027?category=291984713&amp;page=1" style="">
<span class="ed title-link">힘들때 웃는것이 일류다</span>
<span class="ed text-primary">8</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_292476305" href="#popup_menu_area" onclick="return false"><img alt="[레벨:4]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/4.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:1075드립력 (39%), 레벨:4/50"/>
		    움몽          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">217</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54035		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534347861?category=291984713&amp;page=1" style="">
<span class="ed title-link">나는 세종에 욕심이 너무컸다</span>
<span class="ed text-primary">5</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_349272838" href="#popup_menu_area" onclick="return false"><img alt="[레벨:4]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/4.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:1275드립력 (67%), 레벨:4/50"/>
		    신한지주42</a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">316</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54034		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534347032?category=291984713&amp;page=1" style="">
<span class="ed title-link">국장이나 루시드나 둘다 똑같은넘이요</span>
<span class="ed text-primary">1</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_443100374" href="#popup_menu_area" onclick="return false"><img alt="[레벨:2]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/2.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:253드립력 (51%), 레벨:2/50"/>
		    인조인간18</a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">4 시간 전</td> <td class="readNum">129</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54033		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534337727?category=291984713&amp;page=1" style="">
<span class="ed title-link">국장 좆스피 좆스닥의 유일한 해결방법.</span>
<span class="ed text-primary">10</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_530272978" href="#popup_menu_area" onclick="return false"><img alt="[레벨:2]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/2.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:264드립력 (54%), 레벨:2/50"/>
		    hellstock          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">5 시간 전</td> <td class="readNum">532</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54032		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534334055?category=291984713&amp;page=1" style="">
<span class="ed title-link">먹은만큼 다 뱉네 와</span>
<span class="ed text-primary">8</span> <span class="ed print-icon margin-left-xxsmall">
<i class="fas fa-image ed margin-right-xxsmall"></i> </span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_533170847" href="#popup_menu_area" onclick="return false"><img alt="[레벨:1]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/1.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:8드립력 (5%), 레벨:1/50"/>
		    규롱          </a>
</td> <td class="ed voteNum text-primary">2		</td> <td class="time" scope="row">6 시간 전</td> <td class="readNum">601</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54031		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534333731?category=291984713&amp;page=1" style="">
<span class="ed title-link">좆스피 개미털고 상승중</span>
<span class="ed text-primary">6</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_530272978" href="#popup_menu_area" onclick="return false"><img alt="[레벨:2]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/2.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:264드립력 (54%), 레벨:2/50"/>
		    hellstock          </a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">6 시간 전</td> <td class="readNum">525</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54030		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534333046?category=291984713&amp;page=1" style="">
<span class="ed title-link">코스피도 이정도면 숨 차지 않을까</span>
<span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_522312167" href="#popup_menu_area" onclick="return false"><img alt="[레벨:1]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/1.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:97드립력 (96%), 레벨:1/50"/>
		    오위오위고          </a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">6 시간 전</td> <td class="readNum">159</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54029		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534332312?category=291984713&amp;page=1" style="">
<span class="ed title-link">궁금한데 공매도있었으면</span>
<span class="ed text-primary">5</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_2359008" href="#popup_menu_area" onclick="return false"><img alt="[레벨:9]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/9.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:10307드립력 (66%), 레벨:9/50"/>
		    전자연필          </a>
</td> <td class="ed voteNum text-primary">0		</td> <td class="time" scope="row">6 시간 전</td> <td class="readNum">280</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54028		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534331402?category=291984713&amp;page=1" style="">
<span class="ed title-link">애미 시발 수익다 뱉어내게 생겼네</span>
<span class="ed text-primary">7</span> <span class="ed print-icon margin-left-xxsmall">
</span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_41424265" href="#popup_menu_area" onclick="return false"><img alt="[레벨:9]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/9.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:9829드립력 (43%), 레벨:9/50"/>
		    저소음흑축써주세...          </a>
</td> <td class="ed voteNum text-primary">1		</td> <td class="time" scope="row">6 시간 전</td> <td class="readNum">322</td> </tr><tr>
<td class="ed no text-xxsmall">
		            54027		  		          </td> <td class="title">
<span>
<a href="/stock/category/291984713">
<span class="ed margin-right-xxsmall text-muted" style="color:#444444">[국장]</span> </a> <a class="ed link-reset" href="/stock/534331171?category=291984713&amp;page=1" style="">
<span class="ed title-link">국장 저점 시그널</span>
<span class="ed text-primary">3</span> <span class="ed print-icon margin-left-xxsmall">
<i class="fas fa-image ed margin-right-xxsmall"></i> </span> </a>
</span>
</td> <td class="author">
<a class="ed flex flex-left flex-middle link-reset member_21467343" href="#popup_menu_area" onclick="return false"><img alt="[레벨:8]" class="xe_point_level_icon" src="https://www.dogdrip.net/modules/point/icons/ddcoa/8.gif" style="vertical-align:middle;margin:0px 3px 2px 0px;" title="포인트:7635드립력 (31%), 레벨:8/50"/>
		    최후의광휘          </a>
</td> <td class="ed voteNum text-primary">2		</td> <td class="time" scope="row">6 시간 전</td> <td class="readNum">427</td> </tr> </tbody></table>
# whole_board 태그에서 각 게시물에 해당하는 태그 추출
all_title = whole_board.find_all('td', class_ = 'title')
all_title = [title.text for title in all_title]
all_title = [title.replace('\n', '').replace('[국장]', '').strip() for title in all_title]
all_title
['주식 / 재테크와 관련된 글을 올려주시기 바랍니다.',
 '2차전지 곱버스는 없는겨?4',
 '펀더멘털문제없다6',
 '지구 망한듯?1',
 '오늘 좆스피말고 다른데도 다떨구는데 왜이래',
 '주식 복귀 1주일차2',
 '와 오늘 개 살벌했네2',
 '코리아 주식시장이 볍신인건 태생적인 시스템 한계지 머4',
 '개쓰레기 국장 우스갯소리로 하던1',
 '국장 왜 나락감?7',
 '역시 분산투자가 짱이네요2',
 '힘들때 웃는것이 일류다8',
 '나는 세종에 욕심이 너무컸다5',
 '국장이나 루시드나 둘다 똑같은넘이요1',
 '국장 좆스피 좆스닥의 유일한 해결방법.10',
 '먹은만큼 다 뱉네 와8',
 '좆스피 개미털고 상승중6',
 '코스피도 이정도면 숨 차지 않을까',
 '궁금한데 공매도있었으면5',
 '애미 시발 수익다 뱉어내게 생겼네7',
 '국장 저점 시그널3']

# 각 게시글의 URL 추출
article_url = soup.find('table', class_ = 'ed table table-divider')
article_url_temp = article_url.find_all('a', class_ = 'ed link-reset')

all_article_url = []
for tag in article_url_temp :
    all_article_url.append(tag['href'])
all_article_url
['/stock/273533897?category=291984713&page=1',
 '/stock/534398188?category=291984713&page=1',
 '/stock/534384446?category=291984713&page=1',
 '/stock/534368123?category=291984713&page=1',
 '/stock/534367430?category=291984713&page=1',
 '/stock/534364396?category=291984713&page=1',
 '/stock/534360500?category=291984713&page=1',
 '/stock/534359690?category=291984713&page=1',
 '/stock/534359093?category=291984713&page=1',
 '/stock/534357480?category=291984713&page=1',
 '/stock/534357457?category=291984713&page=1',
 '/stock/534353027?category=291984713&page=1',
 '/stock/534347861?category=291984713&page=1',
 '/stock/534347032?category=291984713&page=1',
 '/stock/534337727?category=291984713&page=1',
 '/stock/534334055?category=291984713&page=1',
 '/stock/534333731?category=291984713&page=1',
 '/stock/534333046?category=291984713&page=1',
 '/stock/534332312?category=291984713&page=1',
 '/stock/534331402?category=291984713&page=1',
 '/stock/534331171?category=291984713&page=1']
all_article_url = [('https://www.dogdrip.net' + url) for url in all_article_url]
all_article_url
['https://www.dogdrip.net/stock/273533897?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534398188?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534384446?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534368123?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534367430?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534364396?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534360500?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534359690?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534359093?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534357480?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534357457?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534353027?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534347861?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534347032?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534337727?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534334055?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534333731?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534333046?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534332312?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534331402?category=291984713&page=1',
 'https://www.dogdrip.net/stock/534331171?category=291984713&page=1']
# 게시글의 내용을 크롤링
url = 'https://www.dogdrip.net/stock/475288748?category=291984713&page=1'
article_id = url.split('?category=')[0]
article_id = article_id.split('https://www.dogdrip.net/stock/')[1]

res = requests.get(url, headers = headers)
soup = BeautifulSoup(res.text)
soup.find_all('div', class_ = 'document_' + article_id + '_0 rhymix_content xe_content')
[<div class="document_475288748_0 rhymix_content xe_content"><p>이제 빼야한다는게 너무 마음아프다.. 그래도 50프로 먹고 나왔으니 기분은 좋다</p>
 <p> </p>
 <p>남은 사람들은 계속 추매해서 집 사자! </p>
 <p> </p>
 <p>좀만 버티면 평수가 달라진다</p></div>]
# 스크래핑한 데이터를 정리
df = pd.DataFrame({'title' : all_title, 'URL' : all_article_url})
df

batch_inputs = df['title'].values.tolist()
# BASE_URL = "https://gall.dcinside.com/mgallery/board/lists/?id=kospi"
# ARTICLE_BASE_URL = "https://gall.dcinside.com"

# # 헤더 설정
# headers = [
# {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'
# },
# ]
# res_list = []
# #몇 페이지부터 몇 페이지까지
# for i in range(1, 60):
#     # 파라미터 설정
#     params = {'id': 'kospi','page':i}

#     response = requests.get(BASE_URL, params=params, headers=headers[0])

#     soup = BeautifulSoup(response.content, 'html.parser')

#     #실질적 글 목록 부분
#     article_list = soup.find('gall_listwrap list').find_all('tr')

#     # 한 페이지에 있는 모든 게시물을 긁어오는 코드
#     for tr_item in article_list:
#         # # 이미지가 있는 게시물일 경우만 탐색 시작 start
#         # image_flag = tr_item.find('em', class_='icon_img icon_pic')

#         # if image_flag is None:
#         #     continue
#         # # 이미지가 있는 게시물일 경우만 탐색 시작 end

#         # print('+'*12)

#         # 제목 추출
#         title_tag = tr_item.find('a', href=True)
#         title = title_tag.text

#         print("T: ", title)
#         # print("주소: ", title_tag['href'])
#         res_list.append(title)

#         # # 이미지가 있는 게시물에 request
#         # article_response = requests.get(ARTICLE_BASE_URL + title_tag['href'], headers=headers[0])
#         # # print("url: ", article_response.url)

#         # article_id = (title_tag['href'].split('no=')[1]).split('&')[0]
#         # # print("게시물 ID : ", article_id)

#         # article_soup = BeautifulSoup(article_response.content, 'html.parser')

#         # # 게시물 부분의 태그
#         # article_contents = article_soup.find('div', class_='writing_view_box').find_all('div')
batch_inputs.extend(res_list[7:])

print(batch_inputs[1:])
print(len(batch_inputs[1:]))

k_vix_data = batch_inputs[1:]
['2차전지 곱버스는 없는겨?4', '펀더멘털문제없다6', '지구 망한듯?1', '오늘 좆스피말고 다른데도 다떨구는데 왜이래', '주식 복귀 1주일차2', '와 오늘 개 살벌했네2', '코리아 주식시장이 볍신인건 태생적인 시스템 한계지 머4', '개쓰레기 국장 우스갯소리로 하던1', '국장 왜 나락감?7', '역시 분산투자가 짱이네요2', '힘들때 웃는것이 일류다8', '나는 세종에 욕심이 너무컸다5', '국장이나 루시드나 둘다 똑같은넘이요1', '국장 좆스피 좆스닥의 유일한 해결방법.10', '먹은만큼 다 뱉네 와8', '좆스피 개미털고 상승중6', '코스피도 이정도면 숨 차지 않을까', '궁금한데 공매도있었으면5', '애미 시발 수익다 뱉어내게 생겼네7', '국장 저점 시그널3']
20
class k_vix_dataloader(Dataset):
    def __init__(self, data_list):
        self.data = data_list

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # 인덱스에 해당하는 데이터 반환
        return self.data[index]
tokenizer = AutoTokenizer.from_pretrained("snunlp/KR-FinBert-SC")
model = AutoModelForSequenceClassification.from_pretrained("snunlp/KR-FinBert-SC")

dataset = k_vix_dataloader(k_vix_data)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
get_results = []
for data in dataloader:
    print(data)
    inputs = tokenizer(data, return_tensors="pt", padding=True, truncation=False)
    outputs = model(**inputs)
    get_results.extend(outputs.logits)
['2차전지 곱버스는 없는겨?4', '지구 망한듯?1', '코스피도 이정도면 숨 차지 않을까', '코리아 주식시장이 볍신인건 태생적인 시스템 한계지 머4', '힘들때 웃는것이 일류다8', '국장 왜 나락감?7', '펀더멘털문제없다6', '오늘 좆스피말고 다른데도 다떨구는데 왜이래']
['국장 저점 시그널3', '국장 좆스피 좆스닥의 유일한 해결방법.10', '나는 세종에 욕심이 너무컸다5', '개쓰레기 국장 우스갯소리로 하던1', '와 오늘 개 살벌했네2', '궁금한데 공매도있었으면5', '먹은만큼 다 뱉네 와8', '국장이나 루시드나 둘다 똑같은넘이요1']
['역시 분산투자가 짱이네요2', '애미 시발 수익다 뱉어내게 생겼네7', '주식 복귀 1주일차2', '좆스피 개미털고 상승중6']
all_pred = torch.stack(get_results, dim=0)

act = nn.Softmax(dim=1)
pred = act(all_pred)
res = torch.mean(pred, 0).tolist() # neg, neu, pos
print(res)
[0.004312992095947266, 0.9470561146736145, 0.04863091558218002]
neg = res[0] / (res[0] + res[2])
pos = res[2] / (res[0] + res[2])

print(neg, pos)
# 0.08146342582357387 0.9185365741764261
profile
AI, Information and Communication, Electronics, Computer Science, Bio, Algorithms

0개의 댓글

관련 채용 정보