
[1] importimport tensorflow as tf
import numpy as np
import tensorflow_datasets as tfdf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
[2] Download and prepare the Dataset이전 실습에서 사용했던 하위 단어 인코딩 세트와 달리 처음부터 어휘를 구축하고 패딩된 시퀀스를 생성해야 한다.
Tokenizer 클래스와 pad_sequences() 메서드를 사용하여 이를 수행한다.
imdb, info = tfdf.load('imdb_reviews', with_info=True, as_supervised=True)
train_data, test_data = imdb['train'], imdb['test']
training_sentences = []
training_labels = []
testing_sentences= []
testing_labels = []
for s,l in train_data:
    training_sentences.append(s.numpy().decode('utf8'))
    training_labels.append(l.numpy())
    
for s,l in test_data:
    testing_sentences.append(s.numpy().decode('utf8'))
    testing_labels.append(l.numpy())
    
training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
train_sequences = tokenizer.texts_to_sequences(training_sentences)
padded_train = pad_sequences(train_sequences, truncating='post', maxlen=120)
test_sequences = tokenizer.texts_to_sequences(testing_sentences)
padded_test = pad_sequences(test_sequences, truncating='post', maxlen=120)
[3] Plot Utility모델을 정의하기 전에, 학습 후 정확도와 손실 내역을 쉽게 시각화할 수 있도록 시각화 하는 함수를 먼저 정의한다.
import matplotlib.pyplot as plt
# Plot Utility
def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()
[4] Model build - (1) Flatten 레이어
model_flatten = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=120),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model_flatten.compile(loss='binary_crossentropy',
                      metrics=['accuracy'],
                      optimizer='adam')
model_flatten.summary()

history_flatten = model_flatten.fit(padded_train, training_labels_final,
                            epochs=10,
                            validation_data = (padded_test, testing_labels_final))

plot_graphs(history_flatten, 'accuracy')
plot_graphs(history_flatten, 'loss')

[4] Model build - (2) LSTM 레이어model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

history = model.fit(train_dataset, 
                    epochs=10,
                    validation_data = test_dataset)

import matplotlib.pyplot as plt
# Plot utility
def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()
# Plot the accuracy and results 
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")


[4] Model build - (3) GRU 레이어# model 3 - GRU
model_gru = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=120),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model_gru.compile(loss='binary_crossentropy',
                  metrics=['accuracy'],
                  optimizer='adam')
model_gru.summary()

history_gru = model_gru.fit(padded_train, training_labels_final,
                            epochs=10,
                            validation_data = (padded_test, testing_labels_final))

plot_graphs(history_gru, 'accuracy')
plot_graphs(history_gru, 'loss')

[4] Model build - (4) Convolution 레이어# model 4 - Convolution
model_conv = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=120),
    tf.keras.layers.Conv1D(filters=128, kernel_size=5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model_conv.compile(loss='binary_crossentropy',
                   metrics=['accuracy'],
                   optimizer='adam')
model_conv.summary()

history_conv = model_conv.fit(padded_train, training_labels_final,
                              epochs=10,
                              validation_data = (padded_test, testing_labels_final))

plot_graphs(history_conv, 'accuracy')
plot_graphs(history_conv, 'loss')
