출처: https://dacon.io/competitions/open/235594/codeshare/2297?page=1&dtype=recent
공유된 코드에 기반한 학습용 게시물입니다.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 파이참으로 구동하기 위해 추가
import tensorflow
from tensorflow.keras.models import *
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.regularizers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold
train = pd.read_csv('train.csv').iloc[:, 1:]
test = pd.read_csv('test.csv').iloc[:, 1:]
submission = pd.read_csv('sample_submission.csv')
train.head()
test.head()
X = np.array(train.drop('label', axis = 1), dtype = 'float32')
y = train.label
target = np.array(test, dtype='float32')
X /= 255
target /= 255
image = X[2020,:].reshape(28,28)
print(f'{int(y[2020])}번 클래스 의류')
plt.imshow(image)
plt.show()
X = X.reshape(-1, 28,28,1)
target = target.reshape(-1, 28,28,1)
np.random.seed(1204)
skf = StratifiedKFold(n_splits = 15, random_state = 1011, shuffle = True)
reLR = ReduceLROnPlateau(patience = 5,verbose = 1,factor = 0.5) # 학습률 조정
es = EarlyStopping(patience = 5, verbose=1, monitor = 'val_acc', mode = 'max') # 학습 조기 종료
sub_pred = np.zeros((test.shape[0], 10))
for i, (tr_idx, val_idx) in enumerate(skf.split(X, y)):
print('=' * 25)
print(f'{i + 1}번째 학습 FOLD 학습 시작')
tr_x, tr_y = X[tr_idx], y[tr_idx] # 학습데이터
val_x, val_y = X[val_idx], y[val_idx] # 검증데이터
mc = ModelCheckpoint(f'cv_study{i + 1}.h5', save_best_only=True, verbose=1, monitor='val_acc', mode='max',
save_weights_only=True)
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1), padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((3, 3)))
model.add(Dropout(0.3))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((3, 3)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer=RMSprop(lr=0.003), metrics=['acc'])
history = model.fit(tr_x, tr_y, epochs=1000,
validation_data=(val_x, val_y), callbacks=[es, mc, reLR])
model.load_weights(f'cv_study{i + 1}.h5')
pred = model.predict(target) / 15
sub_pred += pred
print(f'{i + 1}번째 학습 FOLD 학습 완료\n')
submission['label'] = [np.argmax(x) for x in sub_pred] # 각 클래스별 확률에서 제일 높은 확률의 클래스 할당
submission.to_csv('cv_study.csv', index = False)