๋ฐ์ดํฐ๋ฅผ ํธ๋ฆฌํ๊ฒ ๋ค๋ฃจ๊ธฐ ์ํ์ฌ ๋ชจ๋ธ ๋ถ๋ถ๊ณผ ๋
๋ฆฝ์์ผ์ ๋ค๋ค์ผํจ
PyTorch๋ ์ฌ์ฉ์๊ฐ ๊ตฌ์ถํ ๋ฐ์ดํฐ์ ๋ฏธ๋ฆฌ ๋ง๋ค์ด์ง ๋ฐ์ดํฐ๋ฅผ ์ฌ์ฉ๊ฐ๋ฅํ๋๋ก primitives(๊ธฐ์ด์์)๋ฅผ ์ ๊ณตํจ
์ฌ์ ์์ฑ๋ ๋ฐ์ดํฐ์ธํธ ๊ฐ์ ธ์ฌ ์ ์ฌ์ฉ
training_data = datasets.FashionMNIST(
root="data", # train๊ณผ test๋ฐ์ดํฐ๊ฐ ์ ์ฅ๋ ๊ฒฝ๋ก
train=True, # ํ์ต์ธ์ง ํ
์คํธ์ธ์ง ๋ช
์
download=True, # root ๊ฒฝ๋ก์์ ๋ถ๋ฌ์์ง์ง ์์๊ฒฝ์ฐ
# ์ธํฐ๋ท์์ ๋ค์ด๋ก๋ํด์ฌ๊ฒ์ธ์ง?
transform=transforms.ToTensor() # feature์ label ์ ์ฒ๋ฆฌ ํ์์ฌํญ ๋ช
์
)
๋ฐ์ดํฐ (์ด๋ฏธ์ง์ ์ฉ) ์ ์ฒ๋ฆฌ ๋ชจ๋ ๋ชจ์
- Compose
- ๋ฐ์ดํฐ์ ์ฒ๋ฆฌ๊ณผ์ ์ฌ๋ฌ๊ฐ์ผ๋ ๋ฌถ์ด์ ์ฌ์ฉ
- ToTensor()
- PIL์ด๋ฏธ์ง or Numpy ๋ฐฐ์ด --> Tensor ํ์์ผ๋ก ๋ณํ
- "H x W x C" โ "C x H x W"
- ์ค์ผ์ผ๋ง๋ ๋์ด 0~1 ๋ก ๋ณํ๋๋ค. (ํญ์๋๋๊ฒ์์๋)
- numpy ๊ฒฝ์ฐ dtype = np.uint8 ์ผ๊ฒฝ์ฐ๋ง ์ค์ผ์ผ๋ง๋จ
- ์์ธํ ์ฌํญ์ Document ์ฐธ๊ณ
- Normalize(mean, std, inplace=False)
- ์ ๊ทํ์ํ (ํ๊ท ,ํ์คํธ์ฐจ,๋์ฒด์ฌ๋ถ)
- ToPILImage()
- csv ํ์ผ๋ก ๋ฐ์ดํฐ์ ์ ๋ฐ์ ๊ฒฝ์ฐ, PIL image๋ก ๋ณํ
- Resize((300, 300))
- ์ด๋ฏธ์ง ์ฌ์ด์ฆ๋ณํ
- RandomHorizontalFlip(p = ํ๋ฅ ๊ฐ๊ธฐ์ )
- ์ด๋ฏธ์ง ์ข์ฐ๋์นญ ( p์ default ๊ฐ 0.5 )
๋ฉ์๋ ๋๋ฌด๋ง์.. Document ์ฐธ๊ณ
์์ฑ๋ ๋ฐ์ดํฐ์ธํธ ํด๋์ค๊ฐ ๋ด๊ธด ๊ฐ์ฒด๋ฅผ ์ ๋ ฅ์ผ๋ก ๋ฐ์์, ๋ฐ์ดํฐ๋ฅผ iterableํ๊ฒ Load ํ ์ ์๋๋ก ํจ
# 1. ์ฌ์ ์์ฑ๋ ๋ฐ์ดํฐ์ธํธ ํด๋์ค ์ฌ์ฉ์
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
train_dataset = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=transforms.ToTensor()
)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# 2. ์ฌ์ฉ์๊ฐ ์ ์ํ ๋ฐ์ดํฐ์ธํธ ํด๋์ค ์ฌ์ฉ์
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
class CustomDataset(Dataset):
'''
'''
return
train_dataset = CustomDataset(train_path,
transform=transforms.Compose([transforms.ToTensor()]))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
x = torch.randint(len(training_data),size=(1,))
print(x)
print(x.item())
> tensor([12491])
> 12491
1. dataset ํด๋์ค๊ฐ ๋ด๊ธด ๊ฐ์ฒด์์ฑ
2. dataloader๋ฅผ ๋ด์ ๊ฐ์ฒด์์ฑ
from torchvision import datasets
import torchvision.transforms as transforms
# 1. dataset ํด๋์ค๊ฐ ๋ด๊ธด ๊ฐ์ฒด ์์ฑ
training_data = datasets.FashionMNIST(
root="data", # train๊ณผ test๋ฐ์ดํฐ๊ฐ ์ ์ฅ๋ ๊ฒฝ๋ก
train=True, # ํ์ต์ธ์ง ํ
์คํธ์ธ์ง ๋ช
์
download=True, # root ๊ฒฝ๋ก์์ ๋ถ๋ฌ์์ง์ง ์์๊ฒฝ์ฐ ์ธํฐ๋ท์์ ๋ค์ด๋ก๋ํด์ฌ๊ฒ์ธ์ง?
transform=transforms.ToTensor() # feature์ label ์ ์ฒ๋ฆฌ ํ์์ฌํญ ๋ช
์
)
test_data = datasets.FashionMNIST(
root="data", # train๊ณผ test๋ฐ์ดํฐ๊ฐ ์ ์ฅ๋ ๊ฒฝ๋ก
train=False, # ํ์ต์ธ์ง ํ
์คํธ์ธ์ง ๋ช
์
download=True, # root ๊ฒฝ๋ก์์ ๋ถ๋ฌ์์ง์ง ์์๊ฒฝ์ฐ ์ธํฐ๋ท์์ ๋ค์ด๋ก๋ํด์ฌ๊ฒ์ธ์ง?
transform=transforms.ToTensor() # feature์ label ์ ์ฒ๋ฆฌ ํ์์ฌํญ ๋ช
์
)
# 2. DataLoader๋ฅผ ๋ด์ ๊ฐ์ฒด ์์ฑ
from torch.utils.data import DataLoader
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
# 3. iterable ํจ์ผ๋ก, ๋ฐ์ดํฐ ํ๊ฐ ๊ฐ์ ธ์์ ํ๋กํ
input, output = next(iter(train_dataloader))
idx = 0
img = input[idx].squeeze()
label = output[idx]
plt.figure(figsize=[2,2])
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")
1. dataset ํด๋์ค ์์ฑ
2. dataset ํด๋์ค๊ฐ ๋ด๊ธด ๊ฐ์ฒด์์ฑ
3. dataloader๋ฅผ ๋ด์ ๊ฐ์ฒด์์ฑ
from torch.utils.data import Dataset
# 1. ๋ฐ์ดํฐ์ธํธ ํด๋์ค ์์ฑ
class CustomDataset(Dataset):
# ๊ฐ์ฒด์์ฑ์ ํ๋ฒ์คํ
# ๋ฐ์ดํฐ๊ฒฝ๋ก์ transform ์ ์ ๋ฑ..
def __init__(self, data_path, transform=None, target_transform=None):
data = pd.read_csv(data_path)
Y_data = data['label']
Y_data = np.array(Y_data)
X_data = data.drop(columns='label',axis=1)
X_data = np.array(X_data).reshape(-1,28,28,1).astype('float32')
self.X_data = X_data
self.Y_data = Y_data
self.transform = transform
self.target_transform = target_transform
# ๋ฐ์ดํฐ์ธํธ์ ์ด ๊ฐ์๋ฅผ ๋ฆฌํด
def __len__(self):
return len(self.Y_data)
# ์ธ๋ฑ์ค๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ๋ฐ์ดํฐ์ธํธ๋ก๋ถํฐ ์ํ์ ๊ฐ์ ธ์ค๋ ํจ์
# ํํํํ๋ก ๋ฆฌํด : (์
๋ ฅ,์ถ๋ ฅ)
def __getitem__(self, idx):
# ๋ฐ์ดํฐ ์ง์
image = self.X_data[idx]
label = self.Y_data[idx]
# ์
๋ ฅ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
if self.transform:
image = self.transform(image)
# ์ถ๋ ฅ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
if self.target_transform:
label = self.target_transform(label)
return image, label
# 2. dataset ํด๋์ค๊ฐ ๋ด๊ธด ๊ฐ์ฒด ์์ฑ
import torchvision.transforms as transforms
train_path = "dataset/fashion-mnist_train.csv"
test_path = "dataset/fashion-mnist_test.csv"
train_dataset = CustomDataset(train_path,
transform=transforms.Compose([transforms.ToTensor()]))
test_dataset = CustomDataset(test_path,
transform=transforms.Compose([transforms.ToTensor()]))
# 3. DataLoader๋ฅผ ๋ด์ ๊ฐ์ฒด ์์ฑ
from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
# 4. iterable ํจ์ผ๋ก, ๋ฐ์ดํฐ ํ๊ฐ ๊ฐ์ ธ์์ ํ๋กํ
input, output = next(iter(train_dataloader))
idx = 0
img = input[idx].squeeze()
label = output[idx]
plt.figure(figsize=[2,2])
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")