tf.data.Dataset을 이용해 모델이 데이터를 효율적으로 활용할 수 있도록 구현해보는게 목적입니다.
기본적인 머신러닝 작업과정은 아래와 같습니다.
모델 완성 후 평가 지표에 따라서 모델을 평가해 봅시다.
오버피팅을 방지하기 위한 다양한 방법들을 사용해보자.
Training
Evaluation
# 코드 호환성을 위한 분기
use_colab = True
assert use_colab in [True, False]
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
로컬 머신 (노트북, 데스크탑) 구매
AWS, 네이버 클라우드 등
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import os
tf.__version__
tf.keras.dataset.cifar10 tf.keras.dataset.cifar100 tf.keras.dataset.mnist tf.keras.dataset.fashion_mnist tf.keras.dataset.imdb
test_data_split, valid_data, test_labels_split, valid_labels = \ train_test_split(test_data, test_labels, test_size=0.2, shuffle=True)
# Load training and eval data from tf.keras
(train_data, train_labels), (test_data, test_labels) = \
tf.keras.datasets.cifar10.load_data()
!cd /root/.keras/datasets/ && ls -al
#total 166520
#drwxr-xr-x 3 root root 4096 Dec 29 01:11 .
#drwxr-xr-x 1 root root 4096 Dec 29 01:11 ..
#drwxr-xr-x 2 2156 1103 4096 Jun 4 2009 cifar-10-batches-py
#-rw-r--r-- 1 root root 170498071 Dec 29 01:11 cifar-10-batches-#py.tar.gz
print(train_data.shape, train_labels.shape)
print(test_data.shape, test_labels.shape)
print(test_labels[0]) # 데이터셋 제작할때, [3] => 3 으로 데이터 형태를 변경해줘야합니다.
#(50000, 32, 32, 3) (50000, 1)
#(10000, 32, 32, 3) (10000, 1)
#[3]
test_data, valid_data, test_labels, valid_labels = \
train_test_split(test_data, test_labels, test_size=0.1, shuffle=True) # 0~1의 값으로 줍니다.
# test_size => 0.1 = 10%
# test_size => 0.2 = 20%
# raw data normalization
# RGB 값이 0~255 사이 값인 것을 이용해 Normalization 진행
train_data = train_data / 255.
train_data = train_data.reshape([-1, 32, 32, 3])
train_labels = train_labels.reshape([-1]) #
valid_data = valid_data / 255.
valid_data = valid_data.reshape([-1, 32, 32, 3])
valid_labels = valid_labels.reshape([-1])
test_data = test_data / 255.
test_data = test_data.reshape([-1, 32, 32, 3])
test_labels = test_labels.reshape([-1])
# [데이터수, 32, 32, 3] [데이터수,]
print(train_data.shape, train_labels.shape)
print(valid_data.shape, valid_labels.shape)
print(test_data.shape, test_labels.shape)
#(50000, 32, 32, 3) (50000,)
#(1000, 32, 32, 3) (1000,)
#(9000, 32, 32, 3) (9000,)
def one_hot_label(image, label): # label => one_hot
label = tf.one_hot(label, depth=10)
return image, label
tf.one_hot( indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None ) # indices = [0, 1, 2] depth = 3 tf.one_hot(indices, depth) # output: [3 x 3] # [[1., 0., 0.], # [0., 1., 0.], # [0., 0., 1.]]
batch_size = 32
# for train
N = len(train_data)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.map(one_hot_label) # 함수의 이름을 써서 구현
train_dataset = train_dataset.shuffle(10000).repeat().batch(batch_size=batch_size)
print(train_dataset)
# # for valid
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_data, valid_labels))
valid_dataset = valid_dataset.map(one_hot_label)
valid_dataset = valid_dataset.batch(batch_size=batch_size)
print(valid_dataset)
# # for test
test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
test_dataset = test_dataset.map(one_hot_label)
test_dataset = test_dataset.batch(batch_size=batch_size)
print(test_dataset)
#<_BatchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float64, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))>
#<_BatchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float64, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))>
#<_BatchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float64, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))>
# the save point
if use_colab:
checkpoint_dir ='./drive/My Drive/train_ckpt/cifar10_classification/exp1'
else:
checkpoint_dir = 'cifar10_classification/exp1'
if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir)
model = model() # 구현했던 모델 model.load_weights(checkpoint_dir)
index = 219 # index를 변경해 확인해보자 (0~49999)
print("label = {}".format(train_labels[index]))
plt.imshow(train_data[index].reshape(32, 32, 3))
plt.colorbar()
#plt.gca().grid(False)
plt.show()
class Conv(tf.keras.Model):
def __init__(self, num_filters, kernel_size=3):
super(Conv, self).__init__() # 재료창고, 자제창고
self.conv1 = layers.Conv2D(num_filters, kernel_size, padding='same')
self.conv2 = layers.Conv2D(num_filters, kernel_size, padding='same')
self.bn1 = layers.BatchNormalization()
self.bn2 = layers.BatchNormalization()
def call(self, inputs, skip=None, training=True):
x = self.conv1(inputs)
x = self.bn1(x)
x = layers.Activation('relu')(x)
x = self.conv2(x)
x = self.bn2(x)
if skip is not None:
x = tf.concat([x, inputs], -1)
x = layers.Activation('relu')(x)
return x
class Dense(tf.keras.Model):
def __init__(self, num_nodes=1024):
super(Dense, self).__init__()
self.dense1 = layers.Dense(num_nodes)
self.dense2 = layers.Dense(num_nodes)
self.bn1 = layers.BatchNormalization()
self.bn2 = layers.BatchNormalization()
def call(self, inputs, training=True):
x = self.dense1(inputs)
x = self.bn1(x)
x = layers.Activation("relu")(x)
x = layers.Dropout(0.5)(x)
x = self.dense2(x)
x = self.bn2(x)
x = layers.Activation("relu")(x)
return x
class VGGlikeModel(tf.keras.Model):
def __init__(self):
super(VGGlikeModel, self).__init__()
self.conv_block1 = Conv(32)
self.conv_block2 = Conv(64)
self.conv_block3 = Conv(128)
self.conv_block4 = Conv(256)
self.fc = Dense()
self.outputs = layers.Dense(10)
def call(self, inputs, training=True):
x = self.conv_block1(inputs, True)
x = layers.MaxPooling2D()(x)
x = self.conv_block2(x, True)
x = layers.MaxPooling2D()(x)
x = self.conv_block3(x, True)
x = layers.MaxPooling2D()(x)
x = self.conv_block4(x, True)
x = layers.Flatten()(x)
x = self.fc(x)
x = self.outputs(x)
return x
model = VGGlikeModel()
# input_tensor = layers.Input(shape=(32, 32, 3,))
# x = layers.Conv2D(32, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(input_tensor)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(32, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(input_tensor)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x_skip = layers.MaxPooling2D()(x) # 16, 16, 32
# x = layers.Conv2D(64, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x_skip)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(64, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(64, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x) # 16, 16, 64
# x = tf.concat([x, x_skip], -1) # 16, 16, 96
# x = layers.Activation('relu')(x)
# x = layers.MaxPooling2D()(x)
# x = layers.Conv2D(128, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(128, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(128, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.MaxPooling2D()(x)
# x = layers.Conv2D(256, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(256, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Conv2D(256, 3, padding='same',
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Flatten()(x)
# x = layers.Dense(1024,
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# x = layers.Dropout(0.5)(x)
# x = layers.Dense(1024,
# kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = layers.BatchNormalization()(x)
# x = layers.Activation('relu')(x)
# output_tensor = layers.Dense(10)(x)
# model = tf.keras.Model(input_tensor, output_tensor)
# without training, just inference a model in eager execution:
predictions = model(train_data[0:1], training=False)
print("Predictions: ", predictions.numpy()) # 총 10개의 데이터가 나오면 모델이 잘 구성된겁니다.
#Predictions: [[ 0.01264782 0.03612096 0.06472357 0.18101807 0.05038511 0.01011162
# -0.13988994 0.00797083 -0.1587979 -0.04783075]]
model.compile(optimizer=tf.keras.optimizers.Adam(), # learning rate가 기본값으로 1e-3으로 들어가있습니다.
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
Model: "vg_glike_model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv (Conv) multiple 10400
conv_1 (Conv) multiple 57664
conv_2 (Conv) multiple 262784
conv_3 (Conv) multiple 1115392
dense (Dense) multiple 8972288
dense_3 (Dense) multiple 10250
=================================================================
Total params: 10428778 (39.78 MB)
Trainable params: 10422762 (39.76 MB)
Non-trainable params: 6016 (23.50 KB)
_________________________________________________________________
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir, # save point dir
save_weights_only=True,
monitor='val_loss',
mode='auto',
save_best_only=True,
verbose=1)
max_epochs = 20
# using `tf.data.Dataset`
history = model.fit(train_dataset,
steps_per_epoch=len(train_data) // batch_size, # train data의 길이 // batch 길이
epochs=max_epochs,
validation_data=valid_dataset,
validation_steps=len(valid_data) // batch_size,
callbacks=[cp_callback]
)
Epoch 1/20
1560/1562 [============================>.] - ETA: 0s - loss: 1.2029 - accuracy: 0.5797
Epoch 1: val_loss improved from inf to 1.20217, saving model to ./drive/My Drive/train_ckpt/cifar10_classification/exp1
1562/1562 [==============================] - 36s 15ms/step - loss: 1.2025 - accuracy: 0.5798 - val_loss: 1.2022 - val_accuracy: 0.5575
Epoch 2/20
1561/1562 [============================>.] - ETA: 0s - loss: 0.7655 - accuracy: 0.7331
Epoch 2: val_loss improved from 1.20217 to 0.67656, saving model to ./drive/My Drive/train_ckpt/cifar10_classification/exp1
1562/1562 [==============================] - 30s 19ms/step - loss: 0.7654 - accuracy: 0.7332 - val_loss: 0.6766 - val_accuracy: 0.7601
Epoch 3/20
1560/1562 [============================>.] - ETA: 0s - loss: 0.6093 - accuracy: 0.7895
Epoch 3: val_loss did not improve from 0.67656
1562/1562 [==============================] - 22s 14ms/step - loss: 0.6090 - accuracy: 0.7896 - val_loss: 0.7080 - val_accuracy: 0.7540
Epoch 4/20
1558/1562 [============================>.] - ETA: 0s - loss: 0.4966 - accuracy: 0.8274
Epoch 4: val_loss did not improve from 0.67656
1562/1562 [==============================] - 26s 16ms/step - loss: 0.4963 - accuracy: 0.8275 - val_loss: 0.7635 - val_accuracy: 0.7611
Epoch 5/20
1561/1562 [============================>.] - ETA: 0s - loss: 0.4116 - accuracy: 0.8564
Epoch 5: val_loss improved from 0.67656 to 0.48932, saving model to ./drive/My Drive/train_ckpt/cifar10_classification/exp1
1562/1562 [==============================] - 21s 13ms/step - loss: 0.4117 - accuracy: 0.8563 - val_loss: 0.4893 - val_accuracy: 0.8488
Epoch 6/20
1227/1562 [======================>.......] - ETA: 4s - loss: 0.3422 - accuracy: 0.8804
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-20-d1931d775249> in <cell line: 4>()
2
3 # using `tf.data.Dataset`
----> 4 history = model.fit(train_dataset,
5 steps_per_epoch=len(train_data) // batch_size, # train data의 길이 // batch 길이
6 epochs=max_epochs,
3 frames
/usr/local/lib/python3.10/dist-packages/keras/src/callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
320 self._call_batch_begin_hook(mode, batch, logs)
321 elif hook == "end":
--> 322 self._call_batch_end_hook(mode, batch, logs)
323 else:
324 raise ValueError(
KeyboardInterrupt:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Valid Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Valid Loss')
plt.show()
model.load_weights(checkpoint_dir)
results = model.evaluate(test_dataset, steps=len(test_data) // batch_size)
# loss
print("loss value: {:.3f}".format(results[0]))
# accuracy
print("accuracy value: {:.4f}%".format(results[1]*100))
test_batch_size = 16
batch_index = np.random.choice(len(test_data), size=test_batch_size, replace=False)
batch_xs = test_data[batch_index]
batch_ys = test_labels[batch_index]
y_pred_ = model(batch_xs, training=False)
fig = plt.figure(figsize=(16, 10))
for i, (px, py) in enumerate(zip(batch_xs, y_pred_)):
p = fig.add_subplot(4, 8, i+1)
if np.argmax(py) == batch_ys[i]:
p.set_title("y_pred: {}".format(np.argmax(py)), color='blue')
else:
p.set_title("y_pred: {}".format(np.argmax(py)), color='red')
p.imshow(px.reshape(32, 32, 3))
p.axis('off')
def final_score():
print("Model params num : " + str(model.count_params()))
print("Accuracy : " + str(results[1]))
s = (model.count_params() * 32) / (1024 ** 2)
score = 50 * (results[1] + min((1/s), 1))
print("score : " + str(score))
final_score()
(train_data, train_labels), (test_data, test_labels) = \ tf.keras.datasets.cifar100.load_data()