10๊ฐ์ ํด๋์ค(butterfly, dog, spider, horse, sheep, cow, cat, squirrel, elephant, chicken)๋ก ์ด๋ฏธ์ง๋ฅผ ๋ถ๋ฅํ๋ ๋ชจ๋ธ์ ๋ง๋ค์ด๋ดค๋ค. pytorch์์ ์ ๊ณตํ๋ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ํ์ฉํ๋ค.
main.py ํ์ผ์์ ํ๊บผ๋ฒ์ ์ฝ๋๋ฅผ ์์ฑํ๋ฉด ์ฝ๋์ ๊ฐ๋ ์ฑ, ๋ชจ๋์ฑ์ด ๋จ์ด์ง๊ธฐ ๋๋ฌธ์ ์ปค์คํ ๋ฐ์ดํฐ์ ํด๋์ค๋ฅผ ๋ง๋ค์ด์ ๋ฐํคํธ์ฒ๋ผ ํธํ๊ฒ ์กฐ๋ฆฌํ ์ ์๊ฒ ํ๋ค!
import os # download data
import pandas as pd # get label
from PIL import Image # process image
from torch.utils.data import Dataset # inherit Dataset to make CustomDataset
class CustomDataset(Dataset): # inherit Dataset
## 1. ์์ฑ์
def __init__(self, root = './datasets', transform = transform, mode = 'train'):
# Initialize member variables
self.root = root
self.transform = transform
self.mode = mode
if self.mode == 'train':
self.dataset_path = os.path.join(self.root, 'train_images')
self.annotations_file = pd.read_csv(os.path.join(self.root, 'train.csv'))
elif self.mode == 'val':
self.dataset_path = os.path.join(root, 'val_images')
self.annotations_file = pd.read_csv(os.path.join(root, 'val.csv')
else:
raise NotImplementedError(f"Mode {self.mode} is not implemented yet...")
# Get image name
image_names = self.annotations_file.iloc[:,0].tolist() # Image names in first column
# Load image path
self.image_paths = []
for i in image_names:
path = os.path.join(self.dataset_path, i)
self.image_paths.append(path)
# Get corresponding labels
self.labels = self.annotations_file.iloc[:,1].tolist()
# Get class names
self.class_names = list(set(self.labels))
## 2. len
def __len__(self):
return len(self.image_paths)
## 3. getitem
def __getitem__(self, idx):
images = PIL.open(self.image_paths[idx])
label = self.labels[idx]
# Transform image to tensor
if self.transform:
image = self.transform(image)
return image, label
dataset = CustomDataset()
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from utils.dataset import CustomDataset
from torchvision.models import vgg16
import argparse
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import numpy as np
# Define Transforms
transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor()
])
# Define Dataset
train_dataset = CustomDataset(root = './datasets', transform = transform, mode = 'train')
val_dataset = CustomDataset(root = './datasets', transform = transform, mode = 'val')
print(f"Length of train, validation dataset: {len(train_dataset)}, {len(val_dataset)}")
# Get Mean, Standard Deviation of RGB Channel
def get_mean_std(dataset):
mean_RGB = [np.mean(image.numpy(), axis = (1, 2)) for image, _ in dataset]
std_RGB = [np.std(image.numpy(), axis = (1, 2)) for image, _ in dataset]
mean_R = np.mean([r for (r, _, _) in mean_RGB])
mean_G = np.mean([g for (_, g, _) in mean_RGB])
mean_B = np.mean([b for (_, _, b) in mean_RGB])
std_R = np.mean([r for (r, _, _) in std_RGB])
std_G = np.mean([g for (_, g, _) in std_RGB])
std_B = np.mean([b for (_, _, b) in std_RGB])
return [mean_R, mean_G, mean_B], [std_R, std_G, std_B]
train_mean_rgb, train_std_rgb = get_mean_std(train_dataset)
val_mean_rgb, val_std_rgb = get_mean_std(val_dataset)
# Data Augmentation
transform_train = transforms.Compose([
transforms.Resize([256, 256]),
transforms.RandomCrop([224, 224]),
transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.2),
transforms.RandomHorizontalFlip(p = 1),
transforms.RandomVerticalFlip(p = 1),
transforms.ToTensor(),
transforms.Normalize(train_mean_rgb, train_std_rgb)
])
transform_val = transforms.Compose([
transforms.Resize([256, 256]),
transforms.RandomCrop([224, 224]),
transforms.Normalize(val_mean_rgb, val_std_rgb)
])
dataset.py ๋ชจ๋์์ ์ ์ํ CustomDataset ํด๋์ค๋ฅผ ์ด์ฉํ์ฌ ์ค์ train, validation dataset์ ์ ์ํ๋ค.
์ฆ, ๋ชจ๋ธ์ ๋ฃ๊ธฐ ์ํด, ๋ฐ์ดํฐ์
์ ๋ค์ด๋ก๋ ๋ฐ๊ณ , Input(image)์ Output(label) ํํ๋ก ๋ณํํ๋ค.
train_dataset = CustomDataset(root = './datasets', transform = transform_train, mode = 'train')
val_dataset = CustomDataset(root = './datasets', transform = transform_val, mode = 'val')
torch.utils.data ๋ผ์ด๋ธ๋ฌ๋ฆฌ์ DataLoader ํด๋์ค๋ฅผ ๋ถ๋ฌ์์ train, validation dataset์ ๋ฐฐ์น ๋จ์๋ก ๋ก๋ํ๋ค. ์ด๋ shuffle ์ ๋ฌด(shuffle = True), ๋ฐ์ดํฐ์ ์ฌ์ฉํ ์๋ธ ํ๋ก์ธ์ค์ ์(num_workers = 0)๋ฅผ ํ๋ผ๋ฏธํฐ๋ก ์ง์ ํ ์ ์๋ค.
train_dataloader = DataLoader(train_dataset, batch_size = 32, shuffle = True, num_workers = 0)
val_dataloader = DataLoader(val_dataset, batch_size = 32, shuffle = False, num_workers = 0)
๋ชจ๋ธ์ ํ์ต์ํค๊ธฐ ์ํด์ ๋ฐ์ดํฐ๋ฅผ ํ ์๋ก ๋ณํํด์ผํ๋ค. ํ ์๋ก ๋ณํํ๊ธฐ ์ ์ ๋ฌธ์๋ก ๋ ๋ ์ด๋ธ์ ์ ์ ์ธ๋ฑ์ค๋ก ๋ฐ๊ฟ์ผํ๋ค.
# Index to change labels
index_for_class = {'butterfly': 0, 'dog': 1, 'spider': 2, 'horse': 3, 'sheep' : 4,
'cow': 5, 'cat': 6, 'squirrel': 7, 'elephant': 8, 'chicken': 9}
torch๋ device ํด๋์ค๋ฅผ ์ ๊ณตํ๋ค. CUDA๋ NVIDIA GPU์์ ์คํ๋๋๋ก ์ค๊ณ๋ ์ํํธ์จ์ด ํ๊ฒฝ์ด๋ค. CUDA๋ฅผ ์ฌ์ฉํ๋ฉด NVIDIA GPU์ ๋ณ๋ ฌ ์ฒ๋ฆฌ ๋ฅ๋ ฅ์ ์ต๋ํ์ผ๋ก ํ์ฉํ์ฌ ๋ฅ๋ฌ๋ ๋คํธ์ํฌ์ ํ์ต ์๊ฐ์ ๋ง์ด ๋จ์ถ์ํฌ ์ ์๋ค.
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} for inference")
torch์์ ์ ๊ณตํ๋ ํจํค์ง๋ฅผ ์ด์ฉํ์ฌ ๋ชจ๋ธ ์ธ์คํด์ค๋ฅผ ๋ง๋ค๊ณ , ๋ชจ๋ธ์ device๋ก ๋ณด๋ธ๋ค.
# Model
model = vgg16(pretrained = True)
model.classifier[6] = torch.nn.Linear(in_features = 4096, out_features = 10)
model.to(device)
# Loss, Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
# Train Loop
for ep in range(10):
model.train() # Set model in training mode
for batch, (images, labels) in enumerate(tqdm(train_dataloader)):
# Make labels to indices
label_to_index = [index_for_class[label] for label in labels]
labels = torch.tensor(label_to_index).to(device)
# Send (images, labels) to device
images = images.to(device)
# Compute prediction and loss
outputs = model(images)
print(outputs.shape, labels.shape)
loss = criterion(outputs, labels)
writer.add_scalar("loss", loss.item(), ep)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 10 == 0:
print(f"Epoch : {ep}, Loss : {loss.item()}")
๋ชจ๋ธ์ validation mode๋ก ์ค์ ํ๋ค. torch.no_grad() ์ปจํ ์คํธ ๋งค๋์ ๋ฅผ ํตํด validation ์์๋ gradient ๊ณ์ฐ์ ๋นํ์ฑํํ์ฌ ํ๋ผ๋ฏธํฐ ์ ๋ฐ์ดํธ๋ฅผ ํ์ง ์๋๋ค. (๋ฉ๋ชจ๋ฆฌ ์ ์ฝ)
# Validation
model.eval() # Set model in validation mode
total = 0
correct = 0
with torch.no_grad(): # No gradients computed during validation mode
for (images, labels) in val_dataloader:
total += len(images)
# Make labels to indices
label_to_index = [index_for_class[label] for label in labels]
labels = torch.tensor(label_to_index).to(device)
# Send (images, labels) to device
images = images.to(device)
# Forward pass : Compute prediction and loss
outputs = model(images)
_, pred = torch.max(outputs, dim = 1)
correct += (pred == labels).sum().items()
print(f"After training epoch {ep}, Validation Accuracy : {correct / total}")