딥러닝(AI학습 47)

이유진·2024년 7월 8일

AI DL colab python

--28-2.순환신경망.ipynb--

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from tensorflow import keras

tf.keras.utils.set_random_seed(42) # 랜덤 시드 사용
tf.config.experimental.enable_op_determinism()

base_path = r'/content/drive/MyDrive/dataset'

model = None
model = keras.models.load_model(os.path.join(base_path, 'best-simplernn-model.h5'))

model.summary()

from tensorflow.keras.datasets import imdb

(train_input, train_target), (test_input, test_target) = imdb.load_data(num_words=500)

from sklearn.model_selection import train_test_split

한번만 실행!

train_input, val_input, train_target, val_target = \
train_test_split(train_input, train_target, test_size=0.2, random_state=42)

train_input.shape, val_input.shape, test_input.shape

from tensorflow.keras.preprocessing.sequence import pad_sequences

max_len = 100

train_seq = pad_sequences(
sequences = train_input,
maxlen = max_len,
)

평가

val_seq = pad_sequences(val_input, max_len)

검증세트 확인

loss, accuracy = model.evaluate(val_seq, val_target, batch_size=64)

print(f'Val Loss: {loss}')
print(f'Val Accuracy: {accuracy}') # <- best_epoch 때와 동일한 결과.

테스트 셋

test_seq = pad_sequences(test_input, max_len)

test_seq.shape

test_seq[0]

loss, accuracy = model.evaluate(test_seq, test_target, batch_size=64)

print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}') # <- best_epoch 때와 동일한 결과.

예측하기

IMDB 데이터셋의 단어 인덱스 가져오기

word_index = imdb.get_word_index()

len(word_index)

sample_review = 'The best documentary I have watched in a very long time. This is definitely a must see for everyone. This family and their love and support for each other is truly amazing.'
sample_review

import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')

소문자 변환 뒤 토큰화

tokens = word_tokenize(sample_review.lower())
print(tokens)

리뷰를 인덱스로 전환

sample_review_index = [word_index.get(word, 0) for word in tokens]

print(sample_review_index)

패딩

sample_review_index_padded = pad_sequences([sample_review_index], maxlen=max_len)

sample_review_index_padded

sample_review_index_padded.shape

예측하기

predict = model.predict(sample_review_index_padded)

def predict_review(review):

# 리뷰를 인덱스로 변환합니다.
sample_review_index = [word_index.get(word, 0) for word in word_tokenize(review.lower())]
# 패딩
sample_review_index_padded = pad_sequences([sample_review_index], maxlen=max_len)
# 예측
prediction = model.predict(sample_review_index_padded)
print(f'Prediction: {prediction[0][0]}')

안좋은 평점

predict_review("This is a very one sided documentary about a woman who is sentenced to a 15 year mandatory prison sentence for dealing drugs. The documentary is made by her family and they want you to believe she doesn't deserve her sentence. However if you read the court reports you will see that she was a drug dealer, she lied to the police, and she was found guily after a trial. The tragic part of this documentary is that the woman left behind a husband and three little girls who will forever be damaged by not having their mother around. Yes you will feel sorry for the children because none of this was their fault. However their drug dealing mom got the sentence she deserved and now she has to spend the rest of her life making it up to these kids. The big lesson of this documentary is if you committ a crime in the US you will get locked up!")