
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
learning_rate = 0.001
training_epochs = 15
batch_size = 100
# 체크포인트를 저장할 디렉터리 생성
cur_dir = os.getcwd()
ckpt_dir_name = "checkpoints"
model_dir_name = "mnist_cnn_seq"
checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)
mnist = keras.datasets.mnist
# 데이터 셋 분리
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 255로 나눠서 0~1로 scaling
train_images = train_images.astype(np.float32) / 255.0
test_images = test_images.astype(np.float32) / 255.0
# 3차원 데이터를 4차원 데이터로 변환
# (batch, width, height) => (batch, width, height, channel)
train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)
# one-hot encoding
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)
train_dataset = (
tf.data.Dataset.from_tensor_slices((train_images, train_labels))
.shuffle(buffer_size=100000) # training 할 때는 셔플이 필요함
.batch(batch_size)
)
test_dataset = (
tf.data.Dataset.from_tensor_slices((test_images, test_labels))
.batch(batch_size)
)
def create_model():
model = keras.Sequential() # Sequential API 사용 선언
# 벽돌을 쌓듯 레이어를 쌓아올린다.
model.add(
keras.layers.Conv2D(
filters=32,
kernel_size=3,
activation=tf.nn.relu,
padding="SAME",
input_shape=(28, 28, 1),
)
)
model.add(keras.layers.MaxPool2D(padding="SAME")) # 기본 2x2, stride 2
model.add(
keras.layers.Conv2D(
filters=64,
kernel_size=3,
activation=tf.nn.relu,
padding="SAME",
)
)
model.add(keras.layers.MaxPool2D(padding="SAME"))
model.add(
keras.layers.Conv2D(
filters=128,
kernel_size=3,
activation=tf.nn.relu,
padding="SAME",
)
)
model.add(keras.layers.MaxPool2D(padding="SAME"))
# fc로 들어가기 전 벡터로 펴 주는 단계
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(256, activation=tf.nn.relu))
model.add(keras.layers.Dropout(0.4))
model.add(keras.layers.Dense(10))
return model
model = create_model()
model.summary()
모델 확인

def loss_fn(model, images, labels):
# training=True : dropout 활성화
logits = model(images, training=True)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
logits=logits,
labels=labels,
)
)
return loss
def grad(model, images, labels):
with tf.GradientTape() as tape:
loss = loss_fn(model, images, labels)
return tape.gradient(loss, model.variables)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
def evaluate(model, images, labels):
logits = model(images, training=False)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
checkpoint = tf.train.Checkpoint(cnn=model)
for epoch in range(training_epochs):
avg_loss = 0.0
avg_train_acc = 0.0
avg_test_acc = 0.0
train_step = 0
test_step = 0
# 학습 루프
for images, labels in train_dataset:
grads = grad(model, images, labels)
optimizer.apply_gradients(zip(grads, model.variables))
loss = loss_fn(model, images, labels)
acc = evaluate(model, images, labels)
avg_loss = avg_loss + loss
avg_train_acc = avg_train_acc + acc
train_step += 1
avg_loss = avg_loss / float(train_step)
avg_train_acc = avg_train_acc / float(train_step)
# 평가 루프
for images, labels in test_dataset:
acc = evaluate(model, images, labels)
avg_test_acc = avg_test_acc + acc
test_step += 1
avg_test_acc = avg_test_acc / float(test_step)
print(
"Epoch: ",
"{}".format(epoch + 1),
"loss = ",
"{:.8f}".format(avg_loss),
"train accuracy = ",
"{:.4f}".format(avg_train_acc),
"test accuracy = ",
"{:.4f}".format(avg_test_acc),
)
# 에포크마다 체크포인트 저장
checkpoint.save(file_prefix=checkpoint_prefix)
다음과 같은 모델 구조는 Sequential API로 구현할 수 없다.

⇒ 이런 케이스를 다루기 위해 Functional API를 사용한다.
Sequential 대신 Functional API로 동일한 CNN을 구성한 예시이다.
def create_model():
# input layer 선언
inputs = keras.Input(shape=(28, 28, 1))
# 이 레이어의 입력(inputs)을 명시해준다.
conv1 = keras.layers.Conv2D(
filters=32,
kernel_size=3,
padding="SAME",
activation=tf.nn.relu,
)(inputs)
pool1 = keras.layers.MaxPool2D(padding="SAME")(conv1)
conv2 = keras.layers.Conv2D(
filters=64,
kernel_size=3,
padding="SAME",
activation=tf.nn.relu,
)(pool1)
pool2 = keras.layers.MaxPool2D(padding="SAME")(conv2)
conv3 = keras.layers.Conv2D(
filters=128,
kernel_size=3,
padding="SAME",
activation=tf.nn.relu,
)(pool2)
pool3 = keras.layers.MaxPool2D(padding="SAME")(conv3)
pool3_flat = keras.layers.Flatten()(pool3)
dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)(pool3_flat)
drop4 = keras.layers.Dropout(rate=0.4)(dense4)
logits = keras.layers.Dense(units=10)(drop4)
# Functional API에서는 입력과 출력을 명시해서 Model을 만든다.
return keras.Model(inputs=inputs, outputs=logits)
model = create_model()
model.summary()
모델 확인

ResNet 스타일의 Residual Block을 Functional API로 구현한 예시이다.

# 입력 feature map의 채널 수가 256이라고 가정
inputs = keras.Input(shape=(28, 28, 256))
conv1 = keras.layers.Conv2D(
filters=64,
kernel_size=1,
padding="SAME",
activation=keras.layers.ReLU(),
)(inputs)
conv2 = keras.layers.Conv2D(
filters=64,
kernel_size=3,
padding="SAME",
activation=keras.layers.ReLU(),
)(conv1)
conv3 = keras.layers.Conv2D(
filters=256,
kernel_size=1,
padding="SAME",
)(conv2)
# 입력(inputs)과 conv3 출력을 더해 skip connection을 만든다.
add3 = keras.layers.add([conv3, inputs])
relu3 = keras.layers.ReLU()(add3)
residual_block = keras.Model(inputs=inputs, outputs=relu3)
tf.keras.Model을 상속하여 완전히 커스터마이즈 가능한 모델을 만든다. __init__ 메서드에서 레이어를 생성하고, 이를 클래스 인스턴스의 속성으로 설정한다. call 메서드에서 순전파(forward pass)를 정의한다. class MNISTModel(tf.keras.Model):
def __init__(self):
super(MNISTModel, self).__init__()
self.conv1 = keras.layers.Conv2D(
filters=32,
kernel_size=3,
padding="SAME",
activation=tf.nn.relu,
)
self.pool1 = keras.layers.MaxPool2D(padding="SAME")
self.conv2 = keras.layers.Conv2D(
filters=64,
kernel_size=3,
padding="SAME",
activation=tf.nn.relu,
)
self.pool2 = keras.layers.MaxPool2D(padding="SAME")
self.conv3 = keras.layers.Conv2D(
filters=128,
kernel_size=3,
padding="SAME",
activation=tf.nn.relu,
)
self.pool3 = keras.layers.MaxPool2D(padding="SAME")
self.pool3_flat = keras.layers.Flatten()
self.dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)
self.drop4 = keras.layers.Dropout(rate=0.4)
self.dense5 = keras.layers.Dense(units=10)
def call(self, inputs, training=False):
net = self.conv1(inputs)
net = self.pool1(net)
net = self.conv2(net)
net = self.pool2(net)
net = self.conv3(net)
net = self.pool3(net)
net = self.pool3_flat(net)
net = self.dense4(net)
net = self.drop4(net)
net = self.dense5(net)
return net
model = MNISTModel()
# ============================================
# 0. Import Libraries
# ============================================
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # 0: INFO, 1: WARNING, 2: ERROR, 3: FATAL
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
# ============================================
# 1. 하이퍼파라미터 설정
# ============================================
learning_rate = 0.001
training_epochs = 15
batch_size = 100
# 체크포인트를 저장할 디렉터리 생성
cur_dir = os.getcwd()
ckpt_dir_name = "checkpoints"
model_dir_name = "mnist_cnn_seq"
checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)
# ============================================
# 2. 데이터 파이프라인 구성
# ============================================
mnist = keras.datasets.mnist
# 데이터 셋 분리
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 255로 나눠서 0~1로 scaling
train_images = train_images.astype(np.float32) / 255.0
test_images = test_images.astype(np.float32) / 255.0
# 3차원 데이터를 4차원 데이터로 변환
# (batch, width, height) => (batch, width, height, channel)
train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)
# one-hot encoding
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)
train_dataset = (
tf.data.Dataset.from_tensor_slices((train_images, train_labels))
.shuffle(buffer_size=100000) # training 할 때는 셔플이 필요함
.batch(batch_size)
)
test_dataset = (
tf.data.Dataset.from_tensor_slices((test_images, test_labels))
.batch(batch_size)
)
# ============================================
# 3-1. 신경망 모델 구축 (Sequential CNN)
# ============================================
def create_model():
model = keras.Sequential() # Sequential API 사용 선언
# 벽돌을 쌓듯 레이어를 쌓아올린다.
model.add(
keras.layers.Conv2D(
filters=32,
kernel_size=3,
activation=tf.nn.relu,
padding="same",
input_shape=(28, 28, 1),
)
)
model.add(keras.layers.MaxPool2D(padding="same")) # 기본 2x2, stride 2
model.add(
keras.layers.Conv2D(
filters=64,
kernel_size=3,
activation=tf.nn.relu,
padding="same",
)
)
model.add(keras.layers.MaxPool2D(padding="same"))
model.add(
keras.layers.Conv2D(
filters=128,
kernel_size=3,
activation=tf.nn.relu,
padding="same",
)
)
model.add(keras.layers.MaxPool2D(padding="same"))
# fc로 들어가기 전 벡터로 펴 주는 단계
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(256, activation=tf.nn.relu))
model.add(keras.layers.Dropout(0.4))
model.add(keras.layers.Dense(10))
return model
model = create_model()
model.summary()
# ============================================
# 3-2. 신경망 모델 구축 (Fucntional Api)
# ============================================
def create_model_functional():
# input layer 선언
inputs = keras.Input(shape=(28, 28, 1))
# 이 레이어의 입력(inputs)을 명시해준다.
conv1 = keras.layers.Conv2D(
filters=32,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
)(inputs)
pool1 = keras.layers.MaxPool2D(padding="same")(conv1)
conv2 = keras.layers.Conv2D(
filters=64,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
)(pool1)
pool2 = keras.layers.MaxPool2D(padding="same")(conv2)
conv3 = keras.layers.Conv2D(
filters=128,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
)(pool2)
pool3 = keras.layers.MaxPool2D(padding="same")(conv3)
pool3_flat = keras.layers.Flatten()(pool3)
dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)(pool3_flat)
drop4 = keras.layers.Dropout(rate=0.4)(dense4)
logits = keras.layers.Dense(units=10)(drop4)
# Functional API에서는 입력과 출력을 명시해서 Model을 만든다.
return keras.Model(inputs=inputs, outputs=logits)
functional_model = create_model_functional()
functional_model.summary()
# ============================================
# 3-3. 신경망 모델 구축 (Model Subclassing)
# ============================================
class MNISTModel(tf.keras.Model):
def __init__(self):
super(MNISTModel, self).__init__()
self.conv1 = keras.layers.Conv2D(
filters=32,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
)
self.pool1 = keras.layers.MaxPool2D(padding="same")
self.conv2 = keras.layers.Conv2D(
filters=64,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
)
self.pool2 = keras.layers.MaxPool2D(padding="same")
self.conv3 = keras.layers.Conv2D(
filters=128,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
)
self.pool3 = keras.layers.MaxPool2D(padding="same")
self.pool3_flat = keras.layers.Flatten()
self.dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)
self.drop4 = keras.layers.Dropout(rate=0.4)
self.dense5 = keras.layers.Dense(units=10)
def call(self, inputs, training=False):
net = self.conv1(inputs)
net = self.pool1(net)
net = self.conv2(net)
net = self.pool2(net)
net = self.conv3(net)
net = self.pool3(net)
net = self.pool3_flat(net)
net = self.dense4(net)
net = self.drop4(net)
net = self.dense5(net)
return net
model = MNISTModel()
# ============================================
# 4. 손실 함수 정의
# ============================================
def loss_fn(model, images, labels):
# training=True : dropout 활성화
logits = model(images, training=True)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=logits,
labels=labels,
)
)
return loss
# ============================================
# 5. 그래디언트 계산
# ============================================
def grad(model, images, labels):
with tf.GradientTape() as tape:
loss = loss_fn(model, images, labels)
return tape.gradient(loss, model.variables)
# ============================================
# 6. 옵티마이저 선택
# ============================================
# optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
# ============================================
# 7. 성능 지표 정의
# ============================================
def evaluate(model, images, labels):
logits = model(images, training=False)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
# ============================================
# 8. 체크포인트 저장 설정
# ============================================
checkpoint = tf.train.Checkpoint(cnn=model)
# ============================================
# 9. 신경망 모델 학습 및 검증
# ============================================
for epoch in range(training_epochs):
avg_loss = 0.0
avg_train_acc = 0.0
avg_test_acc = 0.0
train_step = 0
test_step = 0
# 학습 루프
for images, labels in train_dataset:
grads = grad(model, images, labels)
optimizer.apply_gradients(zip(grads, model.variables))
loss = loss_fn(model, images, labels)
acc = evaluate(model, images, labels)
avg_loss = avg_loss + loss
avg_train_acc = avg_train_acc + acc
train_step += 1
avg_loss = avg_loss / float(train_step)
avg_train_acc = avg_train_acc / float(train_step)
# 평가 루프
for images, labels in test_dataset:
acc = evaluate(model, images, labels)
avg_test_acc = avg_test_acc + acc
test_step += 1
avg_test_acc = avg_test_acc / float(test_step)
print(
"Epoch: ",
"{}".format(epoch + 1),
"loss = ",
"{:.8f}".format(avg_loss),
"train accuracy = ",
"{:.4f}".format(avg_train_acc),
"test accuracy = ",
"{:.4f}".format(avg_test_acc),
)
# 에포크마다 체크포인트 저장
checkpoint.save(file_prefix=checkpoint_prefix)
# ============================================
# 10. Residual Block (Functional API 예시)
# ============================================
# 입력 feature map의 채널 수가 256이라고 가정
inputs_res = keras.Input(shape=(28, 28, 256))
conv1 = keras.layers.Conv2D(
filters=64,
kernel_size=1,
padding="same",
activation=keras.layers.ReLU(),
)(inputs_res)
conv2 = keras.layers.Conv2D(
filters=64,
kernel_size=3,
padding="same",
activation=keras.layers.ReLU(),
)(conv1)
conv3 = keras.layers.Conv2D(
filters=256,
kernel_size=1,
padding="same",
)(conv2)
# 입력(inputs_res)과 conv3 출력을 더해 skip connection을 만든다.
add3 = keras.layers.add([conv3, inputs_res])
relu3 = keras.layers.ReLU()(add3)
residual_block = keras.Model(inputs=inputs_res, outputs=relu3)



출처: 모두를 위한 딥러닝 강좌 2
https://www.youtube.com/watch?v=7eldOrjQVi0&list=PLQ28Nx3M4Jrguyuwg4xe9d9t2XE639e5C