0419 aws_ImageDataGenerator

wldnjswldnjs·2022년 5월 16일
0

딥러닝

목록 보기
7/10
post-custom-banner

1. catdog_full_folder생성 (linux)

# 폴더구조 생성 및 고양이, 멍멍이 이미지 각 폴더로 복사

import os, shutil

original_dataset_dir = './data/cat_dog/train'

base_dir = 'data/cat_dog_full'
os.mkdir(base_dir)

train_dir = os.path.join(base_dir,'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir,'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir,'test')
os.mkdir(test_dir)


train_cats_dir = os.path.join(train_dir,'cats')
os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(train_dir,'dogs')
os.mkdir(train_dogs_dir)

validation_cats_dir = os.path.join(validation_dir,'cats')
os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir,'dogs')
os.mkdir(validation_dogs_dir)

test_cats_dir = os.path.join(test_dir,'cats')
os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir,'dogs')
os.mkdir(test_dogs_dir)

## file 복사 ##
## 고양이와 멍멍이가 각각 12,500개
## train : 7,000 
## validation : 3,000
## test : 2,500 

fnames = ['cat.{}.jpg'.format(i) for i in range(7000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(train_cats_dir, fname)
    shutil.copyfile(src,dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(7000,10000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src,dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(10000,12500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src,dst)
    
fnames = ['dog.{}.jpg'.format(i) for i in range(7000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src,dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(7000,10000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src,dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(10000,12500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src,dst)

2. Keras가 제공하는 ImageDataGenerator 사용

import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

train_dir = './data/cat_dog_full/train'    # cats folder, dogs folder
valid_dir = './data/cat_dog_full/validation'

# ImageDataGenerator를 생성해요!
# RGB 값(0~255로 구성)으로 각 픽셀의 값을 나눠서 정규화 (MinMaxScaling)
train_datagen = ImageDataGenerator(rescale=1/255)
validation_datagen = ImageDataGenerator(rescale=1/255)

train_generator = train_datagen.flow_from_directory(
    train_dir,    # target directory
    classes=['cats', 'dogs'], # 폴더명을 class로 잡는다. cats : 0, dogs : 1
                              # 따로 명시하지 않으면 오름차순으로 명시  
    target_size=(150,150),
    batch_size=20,            # 한번에 20개 가져온다. 많아서 한번에 못들고 옴
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    valid_dir,                 # target directory
    classes=['cats', 'dogs'],
    target_size=(150,150),
    batch_size=20,
    class_mode='binary'
)

# for x_data, t_data in train_generator:
#     print(x_data.shape)
#     print(t_data.shape)
#     break
    
figure = plt.figure()
ax = []

for i in range(20):
    ax.append(figure.add_subplot(4,5,i+1))

for x_data, t_data in train_generator:
    print(x_data.shape)   # (20, 150, 150, 3)
    print(t_data.shape)
    
    for idx, img_data in enumerate(x_data): # idx : 20, img_data (150, 150, 3)
        ax[idx].imshow(img_data)
        
    break
    
plt.tight_layout()
plt.show()

3. 모델 생성

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop

model = Sequential()

model.add(Conv2D(filters=32,
                 kernel_size=(3,3),
                 activation='relu',
                 input_shape=(150,150,3)))

model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=64,
                 kernel_size=(3,3),
                 activation='relu'))

model.add(Conv2D(filters=128,
                 kernel_size=(3,3),
                 activation='relu'))

model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=128,
                 kernel_size=(3,3),
                 activation='relu'))

model.add(MaxPooling2D(pool_size=(2,2)))

#### feature extraction ####

model.add(Flatten())

model.add(Dense(units=256,
                activation='relu'))

model.add(Dense(units=1,
                activation='sigmoid'))

print(model.summary())

4. 모델 학습 및 저장

![](https://velog.velcdn.com/images/wldnjswldnjs/post/69b54c90-93d8-43ac-81fc-65bc01c3dcb0/image.png)

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_generator,    # 14000개 이미지 , 20개씩 뽑아내.. 700번
                   steps_per_epoch=700,
                   epochs=30,
                   validation_data=validation_generator,  # 6000개 이미지 , 20개
                   validation_steps=300)

model.save('./data/cats_dogs_full_cnn_model.h5')

csv와 image generator의 정확도 차이는 별로 없다.

post-custom-banner

0개의 댓글