--29.LSTM, GRU.ipynb--
Long Short-term Memory
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow import keras
tf.keras.utils.set_random_seed(42) # 랜덤 시드 사용
tf.config.experimental.enable_op_determinism()
base_path = r'/content/drive/MyDrive/dataset'
from tensorflow.keras.datasets import imdb
from sklearn.model_selection import train_test_split
(train_input, train_target), (test_input, test_target) = imdb.load_data(num_words=500)
train_input, val_input, train_target, val_target = \
train_test_split(train_input, train_target, test_size=0.2, random_state=42)
from tensorflow.keras.preprocessing.sequence import pad_sequences
max_len = 100
train_seq = pad_sequences(train_input, maxlen=max_len)
val_seq = pad_sequences(val_input, maxlen=max_len)
test_seq = pad_sequences(test_input, maxlen=max_len)
model = keras.Sequential()
model.add(keras.layers.Input(shape=(100,)))
model.add(keras.layers.Embedding(500, 16))
model.add(keras.layers.LSTM(8))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.summary()
"""
embedding (Embedding) (None, 100, 16) 8000
lstm (LSTM) (None, 8) 800
# SimpleRNN 은 모델 파라미터 개수가 200개였다
# LSTM 셀에는 작은셀이 4개 있으므로 정확히 x4개가 되어 800개가 됨.
dense (Dense) (None, 1) 9
=================================================================
Total params: 8809 (34.41 KB)
Trainable params: 8809 (34.41 KB)
Non-trainable params: 0 (0.00 Byte)
"""
None
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])
checkpoint_cb = keras.callbacks.ModelCheckpoint(os.path.join(base_path, 'best-lstm-model.h5'),
save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
history = model.fit(train_seq, train_target, epochs=100, batch_size=64,
validation_data=(val_seq, val_target),
callbacks=[checkpoint_cb, early_stopping_cb])
early_stopping_cb.best_epoch # 27번째 epoch가 best
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()
"""
↑ train loss 와 val loss 의 '격차' 가 이전의 SimpleRNN 때 보다 훨씬 좁혀졌다
기본 순환층 보다 LSTM이 overfit 을 잘 억제하면서 훈련을 잘 수행했다.
"""
None
model = keras.Sequential()
model.add(keras.layers.Input(shape=(100,)))
model.add(keras.layers.Embedding(500, 16))
model.add(keras.layers.LSTM(8, dropout=0.3))
model.add(keras.layers.Dense(1, activation='sigmoid'))
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])
checkpoint_cb = keras.callbacks.ModelCheckpoint(os.path.join(base_path, 'best-dropout-model.h5'),
save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
history = model.fit(train_seq, train_target, epochs=100, batch_size=64,
validation_data=(val_seq, val_target),
callbacks=[checkpoint_cb, early_stopping_cb])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()
model = keras.Sequential()
model.add(keras.layers.Input(shape=(100,)))
model.add(keras.layers.Embedding(500, 16))
model.add(keras.layers.LSTM(8, dropout=0.3, return_sequences=True))
model.add(keras.layers.LSTM(8, dropout=0.3))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.summary()
"""
embedding_2 (Embedding) (None, 100, 16) 8000
lstm_2 (LSTM) (None, 100, 8) 800
# 첫번째 LSTM 층이 모든 타임스텝(100개) 의 은닉상태를 출력하기 때문에
# 출력크기가 (?, 100, 8) 2차원
lstm_3 (LSTM) (None, 8) 544
# 마지막 LSTM 층의 출력크기는 마지막 타임 스텝의 은닉상태만 출력하기 떄문에
# 출력 크기가 (?, 8) 이다.
dense_2 (Dense) (None, 1) 9
=================================================================
Total params: 9353 (36.54 KB)
Trainable params: 9353 (36.54 KB)
Non-trainable params: 0 (0.00 Byte)
"""
None
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])
checkpoint_cb = keras.callbacks.ModelCheckpoint(os.path.join(base_path, 'best-2lstm-model.h5'),
save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
history = model.fit(train_seq, train_target, epochs=100, batch_size=64,
validation_data=(val_seq, val_target),
callbacks=[checkpoint_cb, early_stopping_cb])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()
Gated Recurrent Unit
model = keras.Sequential()
model.add(keras.layers.Input(shape=(100,)))
model.add(keras.layers.Embedding(500, 16))
model.add(keras.layers.GRU(8))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.summary()
"""
embedding_4 (Embedding) (None, 100, 16) 8000
gru (GRU) (None, 8) 624
parmeter 개수
GRU 에는 작은 셀이 3개가 있다
입력에 곱하는 weights 16 8 = 128개
은닉상태에 곱하는 weight 8 8 = 64개
bias 가 뉴런마다 하나씩 = 8개
모두 더하면 128 + 64 + 8 = 200개
이런 셀이 3개가 있다 200 * 3개 => 600개
# TF GRU 는 내부적으로 작은 셀마다 하나의 bias 가 추가된다
# 작은셀 3개 x 8개 뉴런 = 24개
dense_4 (Dense) (None, 1) 9
=================================================================
Total params: 8633 (33.72 KB)
Trainable params: 8633 (33.72 KB)
Non-trainable params: 0 (0.00 Byte)
"""
None
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])
checkpoint_cb = keras.callbacks.ModelCheckpoint(os.path.join(base_path, 'best-gru-model.h5'),
save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
history = model.fit(train_seq, train_target, epochs=100, batch_size=64,
validation_data=(val_seq, val_target),
callbacks=[checkpoint_cb, early_stopping_cb])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()
rnn_model = keras.models.load_model(os.path.join(base_path, 'best-2lstm-model.h5'))
rnn_model.evaluate(test_seq, test_target)