[Dacon/machine learning] 패션 의류 분류 연습

hottogi·2022년 11월 1일
1

출처: https://dacon.io/competitions/open/235594/codeshare/2297?page=1&dtype=recent

공유된 코드에 기반한 학습용 게시물입니다.

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 파이참으로 구동하기 위해 추가

import tensorflow
from tensorflow.keras.models import *
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.regularizers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold

train = pd.read_csv('train.csv').iloc[:, 1:]
test = pd.read_csv('test.csv').iloc[:, 1:]
submission = pd.read_csv('sample_submission.csv')

train.head()
test.head()

X = np.array(train.drop('label', axis = 1), dtype = 'float32')
y = train.label
target = np.array(test, dtype='float32')

X /= 255
target /= 255

image = X[2020,:].reshape(28,28)
print(f'{int(y[2020])}번 클래스 의류')
plt.imshow(image)
plt.show()

X = X.reshape(-1, 28,28,1)
target = target.reshape(-1, 28,28,1)

np.random.seed(1204)

skf = StratifiedKFold(n_splits = 15, random_state = 1011, shuffle = True)

reLR = ReduceLROnPlateau(patience = 5,verbose = 1,factor = 0.5) # 학습률 조정
es = EarlyStopping(patience = 5, verbose=1, monitor = 'val_acc', mode = 'max') # 학습 조기 종료

sub_pred = np.zeros((test.shape[0], 10))

for i, (tr_idx, val_idx) in enumerate(skf.split(X, y)):
    print('=' * 25)
    print(f'{i + 1}번째 학습 FOLD 학습 시작')

    tr_x, tr_y = X[tr_idx], y[tr_idx]  # 학습데이터
    val_x, val_y = X[val_idx], y[val_idx]  # 검증데이터

    mc = ModelCheckpoint(f'cv_study{i + 1}.h5', save_best_only=True, verbose=1, monitor='val_acc', mode='max',
                         save_weights_only=True)

    model = Sequential()

    model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1), padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((3, 3)))
    model.add(Dropout(0.3))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((3, 3)))
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(10, activation='softmax'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer=RMSprop(lr=0.003), metrics=['acc'])

    history = model.fit(tr_x, tr_y, epochs=1000,
                        validation_data=(val_x, val_y), callbacks=[es, mc, reLR])

    model.load_weights(f'cv_study{i + 1}.h5')

    pred = model.predict(target) / 15
    sub_pred += pred
    print(f'{i + 1}번째 학습 FOLD 학습 완료\n')

submission['label'] = [np.argmax(x) for x in sub_pred] # 각 클래스별 확률에서 제일 높은 확률의 클래스 할당
submission.to_csv('cv_study.csv', index = False)
profile

0개의 댓글

관련 채용 정보