import streamlit as st # 앱을 만드는 미니멀한 프레임워크
import os
import torch
import nltk
import urllib.request
from models.model_builder import ExtSummarizer
from newspaper import Article
from ext_sum import summarize
def main():
st.markdown("<h1 style='text-align: center;'>Extractive Summary✏️</h1>", unsafe_allow_html=True)
# Download model
if not os.path.exists('checkpoints/mobilebert_ext.pt'):
download_model()
# Load model
model = load_model('mobilebert')
# Input
## input type 버튼 생성해서 raw/url 클릭하는지에 따라 실행하는 방법
input_type = st.radio("Input Type: ", ["URL", "Raw Text"])
st.markdown("<h3 style='text-align: center;'>Input</h3>", unsafe_allow_html=True)
if input_type == "Raw Text":
with open("raw_data/input.txt") as f:
sample_text = f.read()
text = st.text_area("", sample_text, 200)
else:
url = st.text_input("", "https://www.cnn.com/2020/05/29/tech/facebook-violence-trump/index.html")
st.markdown(f"[*Read Original News*]({url})")
text = crawl_url(url)
input_fp = "raw_data/input.txt"
with open(input_fp, 'w') as file:
file.write(text)
# Summarize
## 결과물의 길이를 버튼으로
sum_level = st.radio("Output Length: ", ["Short", "Medium"])
## 버튼에 따른 결과물 길이
max_length = 3 if sum_level == "Short" else 5
result_fp = 'results/summary.txt'
## 실제로 요약된 값
summary = summarize(input_fp, result_fp, model, max_length=max_length)
st.markdown("<h3 style='text-align: center;'>Summary</h3>", unsafe_allow_html=True)
st.markdown(f"<p align='justify'>{summary}</p>", unsafe_allow_html=True)
def download_model():
nltk.download('popular')
url = 'https://www.googleapis.com/drive/v3/files/1umMOXoueo38zID_AKFSIOGxG9XjS5hDC?alt=media&key=AIzaSyCmo6sAQ37OK8DK4wnT94PoLx5lx-7VTDE'
# These are handles to two visual elements to animate.
weights_warning, progress_bar = None, None
try:
weights_warning = st.warning("Downloading checkpoint...")
progress_bar = st.progress(0)
with open('checkpoints/mobilebert_ext.pt', 'wb') as output_file:
with urllib.request.urlopen(url) as response:
length = int(response.info()["Content-Length"])
counter = 0.0
MEGABYTES = 2.0 ** 20.0
while True:
data = response.read(8192)
if not data:
break
counter += len(data)
output_file.write(data)
# We perform animation by overwriting the elements.
weights_warning.warning("Downloading checkpoint... (%6.2f/%6.2f MB)" %
(counter / MEGABYTES, length / MEGABYTES))
progress_bar.progress(min(counter / length, 1.0))
# Finally, we remove these visual elements by calling .empty().
finally:
if weights_warning is not None:
weights_warning.empty()
if progress_bar is not None:
progress_bar.empty()
@st.cache(suppress_st_warning=True)
def load_model(model_type):
checkpoint = torch.load(f'checkpoints/{model_type}_ext.pt', map_location='cpu')
model = ExtSummarizer(device="cpu", checkpoint=checkpoint, bert_type=model_type)
return model
def crawl_url(url):
article = Article(url)
article.download()
article.parse()
return article.text
if __name__ == "__main__":
main()
https://zzsza.github.io/mlops/2021/02/07/python-streamlit-dashboard/
input
처음 오류
Traceback (most recent call last):
File "real_test.py", line 104, in <module>
main()
File "real_test.py", line 48, in main
summary = summarize(input_fp, result_fp, model, max_length=max_length)
File "/home/u7ryean/project-template/ai/bert-extractive-summarization/ext_sum.py", line 114, in summarize
test(model, input_data, result_fp, max_length, block_trigram=True)
File "/home/u7ryean/project-template/ai/bert-extractive-summarization/ext_sum.py", line 80, in test
sent_scores, mask = model(src, segs, clss, mask, mask_cls)
File "/home/u7ryean/anaconda3/envs/test/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/home/u7ryean/project-template/ai/bert-extractive-summarization/models/model_builder.py", line 47, in forward
sents_vec = top_vec[torch.arange(top_vec.size(0)).unsqueeze(1), clss]
IndexError: tensors used as indices must be long, byte or bool tensors
PyTorch version 1.1.0 available.
PyTorch version 1.1.0 available.
loading configuration file checkpoints/mobilebert/config.json
Model config MobileBertConfig {
"attention_probs_dropout_prob": 0.1,
"classifier_activation": false,
"embedding_size": 128,
"hidden_act": "relu",
"hidden_dropout_prob": 0.0,
"hidden_size": 512,
"initializer_range": 0.02,
"intermediate_size": 512,
"intra_bottleneck_size": 128,
"key_query_shared_bottleneck": true,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"normalization_type": "no_norm",
"num_attention_heads": 4,
"num_feedforward_networks": 4,
"num_hidden_layers": 24,
"pad_token_id": 0,
"trigram_input": true,
"true_hidden_size": 128,
"type_vocab_size": 2,
"use_bottleneck": true,
"use_bottleneck_attention": false,
"vocab_size": 30522
}
loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/u7ryean/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
PyTorch version 1.1.0 available.
PyTorch version 1.1.0 available.
loading configuration file checkpoints/mobilebert/config.json
Model config MobileBertConfig {
"attention_probs_dropout_prob": 0.1,
"classifier_activation": false,
"embedding_size": 128,
"hidden_act": "relu",
"hidden_dropout_prob": 0.0,
"hidden_size": 512,
"initializer_range": 0.02,
"intermediate_size": 512,
"intra_bottleneck_size": 128,
"key_query_shared_bottleneck": true,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"normalization_type": "no_norm",
"num_attention_heads": 4,
"num_feedforward_networks": 4,
"num_hidden_layers": 24,
"pad_token_id": 0,
"trigram_input": true,
"true_hidden_size": 128,
"type_vocab_size": 2,
"use_bottleneck": true,
"use_bottleneck_attention": false,
"vocab_size": 30522
}
loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/u7ryean/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
# 현재 내가 위치한 브랜치
git branch
# 브랜치를 새로 만들면서 브랜치 변경
git checkout -b 브랜치명
# 브랜치 삭제
git branch -d 브랜치명
git add .
git commit -m "커밋할 메세지"
git config --global user.name "내이름"
git push
import streamlit as st # 앱을 만드는 미니멀한 프레임 워크
오늘 계획을 모두 수행했다면 그렇게 할 수 있었던 성공요인이 무엇이었나?
생각대로 되지 않았다면 그 이유는 무엇이었나? 어떤 점을 개선해야 같은 실수를 반복하지 않을 수 있을까?
전체 프로젝트의 진행 상황에 비추어 내일 이어서 해야할 일, 앞으로 해야할 일은 무엇인가?
가지고 있는 계획이 여전히 유효한가? 수정이 필요할까?