FLY AI 4기 7일차: 뉴럴네트워크 다중 분류

염지현·2024년 1월 3일

FLY AI 4기

목록 보기
8/16

7일차: 뉴럴네트워크 다중 분류

7일차 요약

  • 오전: 뉴럴네트워크 다중 분류
  • 오후: CNN을 활용한 뉴럴네트워크 다중 분류

fashion_MNIST 데이터를 뉴럴네트워크로 다중 분류하기

  • MNIST 데이터를 사용해서 같이 훈련을 시켜보고 후에 fashion_mnist로 실습을 진행하였다.(fashion_mnist는 첨 들어봄)

1. 데이터 준비

데이터 로드

  • 알아서 잘 갖춰진 데이터는 없다.
  • 반드시 shape, 데이터 수, 데이터 생김새를 확인해봐야 한다.
from keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

pd.Series(y_train).value_counts().sort_index()
samples = np.random.randint(60000, size = 16)
plt.figure(figsize = (8,8))
for i, idx in enumerate(samples):
  plt.subplot(4, 4, 1+i)
  plt.imshow(X_train[idx], cmap = 'gray')
  plt.axis('off')
  plt.title(y_train[idx])
plt.show()

검증 데이터

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.3, random_state = 42)

X_train.shape, X_val.shape, y_train.shape, y_val.shape

최대 최소 정규화

X_train_s = X_train.astype('float32')/255.
X_val_s = X_val.astype('float32')/255.

원핫 인코딩

from keras.utils import to_categorical

y_train_o = to_categorical(y_train)
y_val_o = to_categorical(y_val)

flatten

X_train_s = X_train_s.reshape(-1, 28*28)
X_val_s = X_val_s.reshape(-1, 28*28)

2. 모델만들기

import keras
from keras import layers
model = keras.Sequential([
    layers.Dense(units = 64, activation='relu', input_shape=(28*28,)),
    layers.Dense(units = 32, activation='relu'),
    layers.Dense(units = 10, activation='softmax')
])
model.summary()

3. 학습

model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)
EPOCHS = 15
BATCH_SIZE = 32

history = model.fit(
    X_train_s, y_train_o,
    epochs = EPOCHS,
    batch_size = BATCH_SIZE,
    validation_data = (X_val_s, y_val_o),
    verbose = 1
)
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure(figsize=(16, 8))
    plt.subplot(1,2,1)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.plot(hist['epoch'], hist['loss'], label='train loss')
    plt.plot(hist['epoch'], hist['val_loss'], label='val loss')
    plt.legend()

    plt.subplot(1,2,2)
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.plot(hist['epoch'], hist['accuracy'], label='train accuracy')
    plt.plot(hist['epoch'], hist['val_accuracy'], label='val accuracy')
    plt.legend()
    plt.show()

plot_history(history)

4.평가

X_test_s = X_test.astype('float32')/255.
X_test_s = X_test_s.reshape(-1, 28*28)

y_pred = model.predict(X_test_s)

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

def print_metrics(y_test, y_pred):
    print(f'accuracy : {accuracy_score(y_test, y_pred) }')
    print(f'recall : {recall_score(y_test, y_pred, average="macro") }')
    print(f'precision : {precision_score(y_test, y_pred, average="macro") }')
    print(f'f1 : {f1_score(y_test, y_pred, average="macro") }')

print_metrics(y_test,y_pred)

오답 확인

samples = np.where(y_pred!=y_test)[0]
samples = np.random.choice(samples, size = 9)

samples

plt.figure(figsize = (8,8))
for i, idx in enumerate(samples):
  plt.subplot(3, 3, 1+i)
  plt.imshow(X_test[idx], cmap = 'gray')
  plt.axis('off')
  plt.title(f'{y_test[idx]}(pred={y_pred[idx]})')
plt.show()

CNN으로 cats_vs_dogs 분류하기

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

import os
train_cats_path = os.path.join(train_path,'cats')
train_dogs_path = os.path.join(train_path,'dogs')
val_cats_path = os.path.join(val_path,'cats')
val_dogs_path = os.path.join(val_path,'dogs')

from PIL import Image

train_imgs = os.listdir(train_cats_path) + os.listdir(train_dogs_path)
train_imgs.sort()

X_train = []
y_train = []
for idx, imgname in enumerate(train_imgs):
  label = train_imgs[idx].split('.')[0]
  num_label = 0 if label=='cats' else 1
  img = Image.open(os.path.join(train_path, label+'s', imgname))
  resize_img = img.resize((224, 224))
  img_arr = np.array(resize_img)
  X_train.append(img_arr)
  y_train.append(num_label)

X_train = np.array(X_train)
y_train = np.array(y_train)


val_imgs = os.listdir(val_cats_path) + os.listdir(val_dogs_path)
val_imgs.sort()

X_val = []
y_val = []
for idx, imgname in enumerate(val_imgs):
  label = val_imgs[idx].split('.')[0]
  num_label = 0 if label=='cats' else 1
  img = Image.open(os.path.join(val_path, label+'s', imgname))
  resize_img = img.resize((224, 224))
  img_arr = np.array(resize_img)
  X_val.append(img_arr)
  y_val.append(num_label)
X_val = np.array(X_val)
y_val = np.array(y_val)

from keras.utils import to_categorical

X_train_s=X_train.astype('float32')/255.
X_val_s=X_val.astype('float32')/255.

y_train_o = to_categorical(y_train)
y_val_o = to_categorical(y_val)

from keras import layers
import keras

model = keras.Sequential([
    layers.Conv2D(filters = 16, kernel_size = (3,3),activation='relu',input_shape=(224,224,3)),
    layers.MaxPooling2D(pool_size = (2,2)),

    layers.Conv2D(filters = 32, kernel_size = (3,3),activation='relu'),
    layers.MaxPooling2D(pool_size = (2,2)),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(2, activation='softmax')

])


EPOCHS = 10
BATCH_SIZE = 32

model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

history = model.fit(
    X_train_s, y_train_o,
    epochs = EPOCHS,
    batch_size = BATCH_SIZE,
    validation_data = (X_val_s, y_val_o),
    verbose = 1
)


def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure(figsize=(16, 8))
    plt.subplot(1,2,1)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.plot(hist['epoch'], hist['loss'], label='train loss')
    plt.plot(hist['epoch'], hist['val_loss'], label='val loss')
    plt.legend()

    plt.subplot(1,2,2)
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.plot(hist['epoch'], hist['accuracy'], label='train accuracy')
    plt.plot(hist['epoch'], hist['val_accuracy'], label='val accuracy')
    plt.legend()
    plt.show()
plot_history(history)

0개의 댓글