학습한 내용
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import seaborn as sns
from PIL import Image
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from timeit import default_timer as timer
classes = ['Downdog', 'Goddess', 'Plank', 'Tree', 'Warrior2']
"""1. 데이터 비율 확인 체크"""
def data_check(path="./dataset/YogaPoses"):
count_dit = {}
for root, dirs, files in os.walk(path):
if files != [] and str(root.split("\\")[-1]) in classes:
#파일이 존재하고 root.split한값중에 마지막값이 classes에 있으면
count_dit[str(root.split('\\')[-1])] = len(files)
#files의 갯수를 count_dit에 넣는다 key:value
#각 클래스별로 파일이 몇개씩 있는지 확인
return count_dit
counts = data_check()
#{'Downdog': 196, 'Goddess': 199, 'Plank': 197, 'Tree': 198, 'Warrior2': 198}
"""2. 데이터 train val"""
def data_split(path="./dataset/YogaPoses", split_predictions=0.1):
train_dict = {}
val_dict = {}
counts = data_check(path)
for root, dirs, files in os.walk(path):
if files != [] and str(root.split('\\')[-1]) in classes:
file_paths = [os.path.join(root, files[i])
for i in range(len(files))]
#파일경로 지정
vaild_idex = np.random.randint(
low=0, high=len(files), size=int(len(files)*split_predictions))
#최대최소사이에서 정수를 파일개수의 10%만큼
#[ 79 122 171 155 76 63 154 72 49 94 114 23 37 17 154 156 152 192 108]
train_idex = list(set(range(0, len(files))) - set(vaild_idex))
# valid에서 뽑아간것 제외 나머지 90%
train_dict[str(root.split('\\')[-1])] = [file_paths[idx]
for idx in train_idex]
val_dict[str(root.split('\\')[-1])] = [file_paths[idx]
for idx in vaild_idex]
return train_dict, val_dict
# train_dict,val_dict에 key:value형식으로 file_path를 넣어줌
train_split_data, val_split_data = data_split()
"""
Train data size : [177, 181, 178, 180, 181]
Val data size : [19, 19, 19, 19, 19]
"""
"""3. custom dataset"""
class YogaPosesData(Dataset):
def __init__(self, data_dict, transform=None):
self.data_dict = data_dict
self.transform = transform
def __getitem__(self, idx):
counts = [len(l) for l in self.data_dict.values()]
sum_counts = list(np.cumsum(counts))
sum_counts = [0] + sum_counts + [np.inf]
for c, v in enumerate(sum_counts):
if idx < v:
i = (idx - sum_counts[c - 1]) - 1
break
label = list(self.data_dict.keys())[c-1]
img = Image.open(self.data_dict[str(label)][i]).convert("RGB")
if self.transform:
img = self.transform(img)
return img, classes.index(str(label))
def __len__(self):
return sum([len(l) for l in self.data_dict.values()])
"""4. data augmentation"""
train_data_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomVerticalFlip(),
transforms.RandomAdjustSharpness(sharpness_factor=1.5),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(),
transforms.ToTensor()
])
#compose 여러동작을 한번에 묶어서 하게해줌
#resize 크기변경 randomverticalflip 상하반전 randomhorizontalflip 좌우반전
#colorjitter lightness 명도 ,hue 색상 , saturation 채도 임의변경
#randomadjustsharpness 선명도 조절
#totensor 텐서형식으로 변경
val_data_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ColorJitter(),
transforms.ToTensor()
])
"""5. data loader"""
data_train = YogaPosesData(train_split_data, transform=train_data_transform)
data_val = YogaPosesData(val_split_data, transform=val_data_transform)
#데이터와 transform 형식 넣어줌
#Total number a train images >> 895
#Val number a val images >> 95
"""6. train data val data cheek"""
t_idx = np.random.randint(0, len(data_train))
v_idx = np.random.randint(0, len(data_val))
#train,test에서 임의의 정수하나 뽑아서
t_img, t_label = data_train[t_idx]
v_img, v_label = data_val[v_idx]
#뽑은 정수에 해당하는 이미지와 라벨
"""show train image cheek"""
# plt.figure(figsize=(8, 5))
# plt.subplot(121)
# plt.imshow(t_img.numpy().transpose(1, 2, 0))
# plt.title(f"Train Data class = {classes[t_label]}")
# plt.subplot(122)
# plt.imshow(v_img.numpy().transpose(1, 2, 0))
# plt.title(f"Val data class = {classes[v_label]}")
# plt.show()
# 이미지와 라벨을 출력해서 확인
"""data loader"""
train_loader = DataLoader(data_train, batch_size=50, shuffle=True)
val_loader = DataLoader(data_val, batch_size=50, shuffle=False)
"""Loss Function"""
criterion = nn.CrossEntropyLoss()
"""device"""
device = torch.device(
"cuda") if torch.cuda.is_available() else torch.device('cpu')
"""model chose"""
def base_model_build(device):
"""Load the pretrained model from pytorch"""
vgg11 = models.vgg11(pretrained=True)
for param in vgg11.features.parameters():
param.requires_grad = False
# 역전파 단계 중에 변화도 계산 x
n_inputs = vgg11.classifier[6].in_features
#(6): Linear(in_features=4096, out_features=1000, bias=True)
last_layer = nn.Linear(n_inputs, len(classes))
# (4096,1000)을 (4096, 5)로 변경
vgg11.classifier[6] = last_layer
if device:
print("training ... ")
vgg11.to(device)
return vgg11
def loss_acc_visuaize(history, optim, path):
plt.figure(figsize=(20, 10))
plt.suptitle(str(optim))
plt.subplot(121)
plt.plot(history['train_loss'], label='train_loss')
plt.plot(history['val_loss'], label='val_loss')
plt.legend()
plt.title("loss Curvers")
plt.subplot(122)
plt.plot(history['train_acc'], label='train_acc')
plt.plot(history['val_acc'], label='val_acc')
plt.legend()
plt.title("ACC Curvers")
plt.savefig(str(path) + 'loss_acc.png')
def grad_visuaize(history, optim, path, ylimit=10):
"""gadient norm distribution"""
plt.figure(figsize=(20, 10))
plt.suptitle(str(optim))
plt.subplot(131)
sns.kdeplot(weight_grads1, shade=True)
sns.kdeplot(bias_grads1, shade=True)
#밀도함수
plt.legend(['weight', 'bias'])
plt.title("Linear layer 1")
plt.ylim(0, ylimit)
plt.subplot(132)
sns.kdeplot(weight_grads2, shade=True)
sns.kdeplot(bias_grads2, shade=True)
plt.legend(['weight', 'bias'])
plt.title("Linear layer 2")
plt.ylim(0, ylimit)
plt.subplot(133)
sns.kdeplot(weight_grads3, shade=True)
sns.kdeplot(bias_grads3, shade=True)
plt.legend(['weight', 'bias'])
plt.title("Linear layer 3")
plt.ylim(0, ylimit)
plt.savefig(str(path) + "grad_norms.png")
def visual_predict(model, data=data_val):
#validation 예측
c = np.random.randint(0, len(data))
img, label = data[c]
with torch.no_grad():
#autograd꺼서 메모리사용량을 줄이고 연산속도 빠르게
model.eval()
#dropout,batchnorm 비활성화
#학습할때 필요했던 기능을 비활성화함으로써 예측모드
out = model(img.view(1, 3, 224, 224).to(device))
out = torch.exp(out)
print(out)
plt.figure(figsize=(10, 5))
plt.subplot(121)
plt.imshow(img.numpy().transpose((1, 2, 0)))
plt.title(str(classes[label]))
plt.subplot(122)
plt.barh(classes, out.cpu().numpy()[0])
plt.show()
def class_accuracies(model, data_dict=val_split_data, classes=classes):
accuracy_dic = {}
with torch.no_grad():
model.eval()
for c in data_dict.keys():
correct_count = 0
total_count = len(data_dict[str(c)])
gt = classes.index(str(c))
for path in data_dict[str(c)]:
im = Image.open(path).convert('RGB')
im = transforms.ToTensor()(im)
im = transforms.Resize((224, 224))(im)
out = model(im.view(1, 3, 224, 244)).to(device)
out = torch.exp(out)
pred = list(out.cpu.numpy()[0])
pred = pred.index(max(pred))
#각클래스일 확률을 예측한값중에 가장 큰값인 인덱스
if gt == pred:
correct_count += 1
#맞췄으면 1증가
print(f"Acc for class {str(c)} : ", correct_count / total_count)
accuracy_dic[str(c)] = correct_count / total_count
return accuracy_dic
#정확도 리턴
학습한 내용 중 어려웠던 점 또는 해결못한 것들
클래스 부분 코드가 잘 이해가 안된다
해결방법 작성
내일 마저 코드를 완성하고 print해보며 알아봐야겠다.
학습 소감