실습
- CIFAR - 10 에 관련된 데이터 를 가지고 학습을 한다.
필요한 라이브러리 + 데이터 부르기
'''
라이브러리들을 불러오자.
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as rd
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import keras
(train_x, train_y), (test_x, test_y) = keras.datasets.cifar10.load_data()
데이터 전처리
mean_n , std_n = train_x.mean() , train_x.std()
train_x = (train_x - mean_n) / std_n
test_x = (test_x - mean_n) / std_n
from tensorflow.keras.utils import to_categorical
class_n = len(np.unique(train_y))
class_n
train_y = to_categorical(train_y , class_n)
test_y = to_categorical(test_y , class_n)
모델
from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.callbacks import EarlyStopping
from keras.src.engine.training import optimizer
clear_session()
il = Input(shape = (32,32,3))
hi = Conv2D(
filters = 32,
kernel_size = (3,3),
strides = (1,1),
padding = 'same',
activation = 'relu'
)(il)
hi = Conv2D(
filters = 32,
kernel_size = (3,3),
strides = (1,1),
padding = 'same',
activation = 'relu'
)(hi)
hi = keras.layers.BatchNormalization()(hi)
hi = MaxPool2D(pool_size=(2,2) , strides = (2,2))(hi)
hi = keras.layers.Dropout(0.25)(hi)
hi = Conv2D(
filters = 64,
kernel_size = (3,3),
strides = (1,1),
padding = 'same',
activation = 'relu'
)(hi)
hi = Conv2D(
filters = 64,
kernel_size = (3,3),
strides = (1,1),
padding = 'same',
activation = 'relu'
)(hi)
hi = keras.layers.BatchNormalization()(hi)
hi = MaxPool2D(pool_size=(2,2) , strides = (2,2))(hi)
hi = keras.layers.Dropout(0.25)(hi)
hi = keras.layers.Flatten()(hi)
hi = keras.layers.Dense(1024 , activation = 'relu')(hi)
hi = keras.layers.BatchNormalization()(hi)
hi = keras.layers.Dropout(0.35)(hi)
ol = Dense(10 , activation = 'softmax')(hi)
model = Model(il , ol)
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
es = EarlyStopping(monitor = 'val_loss',
min_delta = 0,
patience = 3,
verbose = 1,
restore_best_weights = True)
history = model.fit(train_x , train_y , validation_split = 0.2 , epochs = 100000 , verbose = 1 , callbacks = [es])
hist = model.fit(train_x , train_y , validation_split = 0.2 , epochs = 100000 , verbose = 1 , callbacks = [es])
성능평가
performance_test = model.evaluate(test_x, test_y, batch_size=100)
print('Test Loss : {:.6f}, Test Accuracy : {:.3f}%'.format(performance_test[0], performance_test[1]*100))
if not isinstance(history, dict):
history = history.history
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title('Accuracy : Training vs Validation')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc=0)
plt.show()
if not isinstance(history, dict):
history = history.history
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Loss : Training vs Validation')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc=0)
plt.show()
train_y = train_y.argmax(axis=1)
test_y = test_y.argmax(axis=1)
pred_train = model.predict(train_x)
pred_test = model.predict(test_x)
single_pred_train = pred_train.argmax(axis=1)
single_pred_test = pred_test.argmax(axis=1)
logi_train_accuracy = accuracy_score(train_y, single_pred_train)
logi_test_accuracy = accuracy_score(test_y, single_pred_test)
print('CNN')
print(f'트레이닝 정확도 : {logi_train_accuracy*100:.2f}%')
print(f'테스트 정확도 : {logi_test_accuracy*100:.2f}%')
시각화
'''
성능 확인을 위해
Ctrl+Enter를 이용하여
반복 실행 해보자!
'''
id = rd.randrange(0,10000)
print(f'id = {id}')
print(f'다음 그림은 {labels[test_y[id]]} 입니다.')
print(f'모델의 예측 : {labels[single_pred_test[id]]}')
prob = np.floor(pred_test[id]*100).tolist()
prob_dict = {}
for idx, prob in enumerate(prob) :
prob_dict[ labels[idx] ] = prob
print('모델의 카테고리별 확률 : ')
print(prob_dict)
if test_y[id] == single_pred_test[id] :
print('정답입니다')
else :
print('틀렸어요')
plt.imshow(test_x[id].reshape([32,32,-1]))
plt.show()
'''
틀린 것만 관찰해보자!
Ctrl+Enter를 이용하여
반복 실행 해보자!
'''
true_false = (test_y == single_pred_test)
f_id = np.where(true_false == False)[0]
f_n = len(f_id)
id = f_id[rd.randrange(0,f_n)]
print(f'id = {id}')
print(f'다음 그림은 {labels[test_y[id]]} 입니다.')
print(f'모델의 예측 : {labels[single_pred_test[id]]}')
prob = np.floor(pred_test[id]*100).tolist()
prob_dict = {}
for idx, prob in enumerate(prob) :
prob_dict[ labels[idx] ] = prob
print('모델의 카테고리별 확률 : ')
print(prob_dict)
if test_y[id] == single_pred_test[id] :
print('정답입니다')
else :
print('틀렸어요')
plt.imshow(test_x[id].reshape([32,32,-1]))
plt.show()