데이터를 ImageDataGenerator를 이용해 모델이 데이터를 효율적으로 활용할 수 있도록 구현해보는게 목적입니다.
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization, ReLU, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import numpy as np
import matplotlib.pyplot as plt
tf.__version__
# 2.15.0
use_colab = True
assert use_colab in [True, False]
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
#Downloading data from https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
#68606236/68606236 [==============================] - 0s 0us/step
PATH
#/root/.keras/datasets/cats_and_dogs_filtered
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
print(train_dir, validation_dir)
#/root/.keras/datasets/cats_and_dogs_filtered/train /root/.keras/datasets/cats_and_dogs_filtered/validation
# directory with our training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')
print(train_cats_dir)
# directory with our training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')
# directory with our validation cat pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
# directory with our validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
print(validation_dogs_dir)
/root/.keras/datasets/cats_and_dogs_filtered/train/cats
/root/.keras/datasets/cats_and_dogs_filtered/validation/dogs
num_cats_tr = len(os.listdir(train_cats_dir))
print('total training cat images:', num_cats_tr)
num_dogs_tr = len(os.listdir(train_dogs_dir))
print('total training dog images:', num_dogs_tr)
print("--")
num_cats_val = len(os.listdir(validation_cats_dir))
print('total validation cat images:', num_cats_val)
num_dogs_val = len(os.listdir(validation_dogs_dir))
print('total validation dog images:', num_dogs_val)
print("--")
total_train = num_cats_tr + num_dogs_tr
print("Total training images:", total_train)
total_val = num_cats_val + num_dogs_val
print("Total validation images:", total_val)
total training cat images: 1000
total training dog images: 1000
--
total validation cat images: 500
total validation dog images: 500
--
Total training images: 2000
Total validation images: 1000
batch_size = #
epochs = #
IMG_HEIGHT = 224
IMG_WIDTH = 224
# 데이터셋에 저장된 이미지를 출력하는 함수
# 이미지 확인을 위해 사용한다.
def plotImages(images_arr):
fig, axes = plt.subplots(1, 5, figsize=(20,20))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
ax.axis('off')
plt.tight_layout()
plt.show()
# Generator for our training data
image_gen_train = ImageDataGenerator(rescale=#,
rotation_range=#,
width_shift_range=#,
height_shift_range=#,
zoom_range=#,
horizontal_flip=#,
vertical_flip=#)
train_data_gen = image_gen_train.flow_from_directory(batch_size=#,
directory=#,
shuffle=#,
target_size=(#, #),
class_mode=#)
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images)
# Generator for our validation data
image_gen_val = ImageDataGenerator(rescale=#)
val_data_gen = image_gen_val.flow_from_directory(batch_size=#,
directory=#,
target_size=(#, #),
class_mode=#)
sample_training_images, _ = next(train_data_gen)
plotImages(sample_training_images[:5])
_[:5] # 정답 데이터
model = Sequential()
model.add(Dense(1, activation='linear'))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=#), # 0.0001~0.000001
loss=tf.keras.losses.BinaryCrossentropy(from_logits=#),
metrics=['accuracy'])
# the save point
if use_colab:
checkpoint_dir ='./drive/My Drive/train_ckpt/catsanddogs/exp1'
if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir)
else:
checkpoint_dir = 'catsanddogs/exp1'
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir,
save_weights_only=True,
monitor='val_loss',
mode='auto',
save_best_only=True,
verbose=1)
#model.load_weights(checkpoint_dir) # 모델이 완전히 같은 모양일 때만 동작합니다.
history = model.fit(
#,
steps_per_epoch=#,
epochs=#,
validation_data=#,
validation_steps=#,
callbacks=[#]
)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
model.load_weights(checkpoint_dir)
eval = model.evaluate(val_data_gen)
def final_score():
print("Model params num : " + str(model.count_params()))
print("Accuracy : " + str(eval[1]))
s = (model.count_params() * 32) / (1024 ** 2)
score = 50 * (eval[1] + min((1/s), 1))
print("score : " + str(score))
final_score()