[딥러닝] 수어 동작 인식 모델 만들기 (CNN-LSTM) (1/4)

지현·2022년 4월 22일
0

같은 팀원(미서님)이 정리해준 자료에 따르면 참고하던 레퍼런스에는 GoogleNet 1개, VGG16 1개, Inception V3 2개였다. 이 팀원이 Inception v3을 사용하겠다고 하였고, 또 다른 논문을 참고하여 나는 VGG16 모델을 사용해보기로 했다.

이번에 참고하여 진행할 깃허브는 Video-Classification-CNN-and-LSTM이다.


train_CNN_RNN.py

from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.optimizers import SGD
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.vgg16 import VGG16
from keras.layers import LSTM
import numpy as np
import glob,os
from scipy.misc import imread,imresize

-> 필요한 라이브러리 import


batch_size = 128


# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')


train_dir='/content/drive/MyDrive/video_data/dataset_expt/dataset_expt(ver1.0)/3frame/traindata'
valid_dir='/content/drive/MyDrive/video_data/dataset_expt/dataset_expt(ver1.0)/3frame/validationdata'
print(train_dir)
print(valid_dir)

def bring_data_from_directory():
  datagen = ImageDataGenerator(rescale=1. / 255)
  train_generator = datagen.flow_from_directory(
          train_dir,
          target_size=(224, 224),
          batch_size=batch_size,
          class_mode='categorical',  # this means our generator will only yield batches of data, no labels
          shuffle=True,
          classes=['class_1','class_2','class_3'])

  validation_generator = datagen.flow_from_directory(
          valid_dir,
          target_size=(224, 224),
          batch_size=batch_size,
          class_mode='categorical',  # this means our generator will only yield batches of data, no labels
          shuffle=True,
          classes=['class_1','class_2','class_3'])
  return train_generator,validation_generator

-> 데이터를 가져오는 코드인 것 같다.
directory 경로와 classes를 수정했다.


def load_VGG16_model():
  base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))
  print("Model loaded..!")
  print(base_model.summary())
  return base_model

-> VGG16 model을 load하는 코드


def extract_features_and_store(train_generator,validation_generator,base_model):
  train_data = np.load(open('video_x_VGG16.npy'))
  train_labels = np.load(open('video_y_VGG16.npy'))
  train_data,train_labels = shuffle(train_data,train_labels)
  validation_data = np.load(open('video_x_validate_VGG16.npy'))
  validation_labels = np.load(open('video_y_validate_VGG16.npy'))
  validation_data,validation_labels = shuffle(validation_data,validation_labels)

  train_data = train_data.reshape(train_data.shape[0],
                     train_data.shape[1] * train_data.shape[2],
                     train_data.shape[3])
  validation_data = validation_data.reshape(validation_data.shape[0],
                     validation_data.shape[1] * validation_data.shape[2],
                     validation_data.shape[3])
  
  return train_data,train_labels,validation_data,validation_labels

->train_data와 validation_data, train_labels와 validation_labels를 수정해야 할 것 같은데 수정을 하는 게 맞는 걸까?


def train_model(train_data,train_labels,validation_data,validation_labels):
  ''' used fully connected layers, SGD optimizer and 
      checkpoint to store the best weights'''

  model = Sequential()
  model.add(LSTM(256,dropout=0.2,input_shape=(train_data.shape[1],
                     train_data.shape[2])))
  model.add(Dense(1024, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(5, activation='softmax'))
  sgd = gradient_descent_v2(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True)
  model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
  #model.load_weights('video_1_LSTM_1_512.h5')
  callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0), ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ]
  nb_epoch = 500
  model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
  return model

-> 모델 생성...?


def test_on_whole_videos(train_data,train_labels,validation_data,validation_labels):
  parent = os.listdir("/Users/.../video/test")
  #.....................................Testing on whole videos.................................................................
  x = []
  y = []
  count = 0
  output = 0
  count_video = 0
  correct_video = 0
  total_video = 0
  base_model = load_VGG16_model()
  model = train_model(train_data,train_labels,validation_data,validation_labels)
  for video_class in parent[1:]:
      print(video_class)
      child = os.listdir("/Users/.../video/test" + "/" + video_class)
      for class_i in child[1:]:
          sub_child = os.listdir("/Users/.../video/test" + "/" + video_class + "/" + class_i)
          for image_fol in sub_child[1:]:
              if (video_class ==  'class_4' ):
                  if(count%4 == 0):
                      image = imread("/Users/.../video/test" + "/" + video_class + "/" + class_i + "/" + image_fol)
                      image = imresize(image , (224,224))

                      x.append(image)
                      y.append(output)
                      #cv2.imwrite('/Users/.../video/validate/' + video_class + '/' + str(count) + '_' + image_fol,image)
                  count+=1

              else:
                  if(count%4 == 0):
                      image = imread("/Users/.../video/test" + "/" + video_class + "/" + class_i + "/" + image_fol)
                      image = imresize(image , (224,224))
                      x.append(image)
                      y.append(output)
                      #cv2.imwrite('/Users/.../video/validate/' + video_class + '/' + str(count) + '_' + image_fol,image)
                  count+=1
          #correct_video+=1
          x = np.array(x)
          y = np.array(y)
          x_features = base_model.predict(x)
          #np.save(open('feat_' + 'class_' + str(output) + '_' + str(count_video) +'_'  + '.npy','w'),x)

          correct = 0
          
          answer = model.predict(x_features)
          for i in range(len(answer)):
              if(y[i] == np.argmax(answer[i])):
                  correct+=1
          print(correct,"correct",len(answer))
          total_video+=1
          if(correct>= len(answer)/2):
              correct_video+=1
          x = []
          y = []
          count_video+=1
      output+=1

  print("correct_video",correct_video,"total_video",total_video)
  print("The accuracy for video classification of ",total_video, " videos is ", (correct_video/total_video))

-> 학습 코드...?


if __name__ == '__main__':
  train_generator,validation_generator = bring_data_from_directory()
  base_model = load_VGG16_model()
  train_data,train_labels,validation_data,validation_labels = extract_features_and_store(train_generator,validation_generator,base_model)
  train_model(train_data,train_labels,validation_data,validation_labels)
  test_on_whole_videos(train_data,train_labels,validation_data,validation_labels)

-> 현재 오류가 나는 부분, 아래 오류가 나는 것을 보니 위에서 data 경로를 수정해야 하는 것이 맞는 듯 하다.
전에 수어 인식 LSTM 학습시키기에서 좌표값으로 생성했던 데이터가 .npy였는데 이 확장자가 뜻하는 것이 무엇인지 다음번에 찾아봐야겠다.

FileNotFoundError: [Errno 2] No such file or directory: 'video_x_VGG16.npy'

Trouble Shooting


다음 시간에 좀 더 진행 할 부분

😎 머신 러닝 - epoch, batch size, iteration의 의미에 대해 공부하기
😎 해당 모델이 어디에 사용되었는지, dataset은 무엇이었는지 알아보기
😎 전체적인 코드에 대해서 공부하기 (자세하게 의미를 알기 보다는 한 셀이 의미하는 것이 무엇인지 생각해보기)
😎 데이터 셋을 Input하는 방법 알아보기
😎 .npy 파일에 대해서 알아보기
😎 아나콘다와 VSCode 연동하기
😎 위의 코드 VSCode에서 진행해보기

profile
화이팅!

0개의 댓글