Dataset
- img 파일 저장 형식 : jpg & png
- img path name : "/content/drive/MyDrive/vision/OCR이미지분류/원천데이터/ABSTRACT_ILLUSTRATION/L1_1/L2_1/L3_1/i_0001_101_607536.jpg"
- 소분류 L3 카테고리 : 1
- 중분류 L2 카테고리 : 1
- 대분류 L1 카테고리 : 1
- PATH NAME 으로 유추 가능
class Dataset(torch.utils.data.Dataset) :
def __init__(self, mode, img_path_np, transform = None):
self.mode = mode
self.imgs = img_path_np
self.transform = transform
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
item = {}
file_path = self.imgs[idx]
img = cv2.imread(file_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
item['img'] = self.transform(image = img)['image']
if self.mode == 'train' :
item['y'] = torch.tensor(int(file_path.split("L3_")[1].split('/')[0])) -1
return item
transformer = albumentations.Compose([
albumentations.Normalize(),
albumentations.Resize(height = 224, width = 224),
albumentations.ColorJitter(p=1),
albumentations.pytorch.transforms.ToTensorV2(),
])
transform_oneof = albumentations.Compose([
albumentations.Normalize(),
albumentations.Resize(224, 224),
albumentations.OneOf([
albumentations.RandomRotate90(p=1),
albumentations.VerticalFlip(p=1)
], p=1),
albumentations.OneOf([
albumentations.MotionBlur(p=1),
albumentations.OpticalDistortion(p=1),
albumentations.GaussNoise(p=1)
], p=1),
albumentations.OneOf([
albumentations.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
albumentations.OpticalDistortion(p=1),
albumentations.GaussNoise(p=1)
], p=1),
albumentations.pytorch.transforms.ToTensorV2(),
])
conv2d
class Conv2dNet(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super(Conv2dNet, self).__init__()
self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding )
self.relu = torch.nn.ReLU()
self.pool = torch.nn.MaxPool2d(2)
def forward(self, x):
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
return x
class Net(torch.nn.Module):
def __init__(self,in_channels = 3 , out_channels = 124, kernel_size = 3, stride = 3, padding = 3 , target_label = None ):
super().__init__()
self.out_channels = out_channels
self.img_model = torch.nn.Sequential(
Conv2dNet(in_channels, out_channels,kernel_size, stride, padding),
Conv2dNet(out_channels, out_channels*2,kernel_size, stride, padding),
Conv2dNet(out_channels*2, out_channels*4,kernel_size, stride, padding),
Conv2dNet(out_channels*4, out_channels*8, kernel_size, stride, padding),
torch.nn.AdaptiveAvgPool2d(2),
torch.nn.Flatten()
)
self.linear_input = (self.out_channels * 8 * 2 * 2 )
self.output_layer = torch.nn.Linear(self.linear_input , target_label)
def forward(self, img):
out = self.img_model(img)
x = self.output_layer(out)
return x
vgg16
from torchvision.models import vgg16
use_pretrained = True
vgg16 = vgg16(pretrained = use_pretrained)
class vggNet(torch.nn.Module):
def __init__(self, vgg, target_num, use_pretrained = True):
super().__init__()
self.vgg = vgg
self.vgg.classifier[6] = torch.nn.Linear(in_features = 4096, out_features = target_num)
def forward(self, x):
out = self.vgg(x)
return out