이미지 분류 모델 구축 with Pytorch

Mollang·2023년 3월 14일
0
post-thumbnail

Dataset

  • img 파일 저장 형식 : jpg & png
  • img path name : "/content/drive/MyDrive/vision/OCR이미지분류/원천데이터/ABSTRACT_ILLUSTRATION/L1_1/L2_1/L3_1/i_0001_101_607536.jpg"
  • 소분류 L3 카테고리 : 1
  • 중분류 L2 카테고리 : 1
  • 대분류 L1 카테고리 : 1
  • PATH NAME 으로 유추 가능
class Dataset(torch.utils.data.Dataset) :
  def __init__(self, mode, img_path_np, transform = None):
    self.mode = mode
    self.imgs = img_path_np 
    self.transform = transform
  
  def __len__(self):
    return len(self.imgs)

  def __getitem__(self, idx):
    item = {}
    file_path = self.imgs[idx] 
    img = cv2.imread(file_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    item['img'] = self.transform(image = img)['image']
    
    if self.mode == 'train' :
      item['y'] = torch.tensor(int(file_path.split("L3_")[1].split('/')[0])) -1   # L3뒤 값 추출 
    return item


# transformer 
transformer = albumentations.Compose([
    albumentations.Normalize(),
    albumentations.Resize(height = 224, width = 224),
    albumentations.ColorJitter(p=1),
    albumentations.pytorch.transforms.ToTensorV2(),
])

# transform_oneof 
transform_oneof = albumentations.Compose([
    albumentations.Normalize(),
    albumentations.Resize(224, 224), 
    albumentations.OneOf([
                          albumentations.RandomRotate90(p=1),
                          albumentations.VerticalFlip(p=1)            
    ], p=1),
    albumentations.OneOf([
                          albumentations.MotionBlur(p=1),
                          albumentations.OpticalDistortion(p=1),
                          albumentations.GaussNoise(p=1)                 
    ], p=1),
    
    albumentations.OneOf([
                          albumentations.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                          albumentations.OpticalDistortion(p=1),
                          albumentations.GaussNoise(p=1)                 
    ], p=1),
    albumentations.pytorch.transforms.ToTensorV2(),
])

conv2d

class Conv2dNet(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(Conv2dNet, self).__init__()
        self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride,  padding )
        self.relu = torch.nn.ReLU()
        self.pool = torch.nn.MaxPool2d(2)

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x)
        return x


class Net(torch.nn.Module):
    def __init__(self,in_channels = 3 , out_channels = 124, kernel_size = 3, stride = 3,  padding = 3 , target_label = None ): 
        super().__init__()
        self.out_channels = out_channels
        self.img_model = torch.nn.Sequential(
            Conv2dNet(in_channels, out_channels,kernel_size, stride, padding),  
            Conv2dNet(out_channels, out_channels*2,kernel_size, stride, padding),  
            Conv2dNet(out_channels*2, out_channels*4,kernel_size, stride, padding),
            Conv2dNet(out_channels*4, out_channels*8, kernel_size, stride, padding), 
            torch.nn.AdaptiveAvgPool2d(2), 
            torch.nn.Flatten() 
        )

        self.linear_input = (self.out_channels * 8 * 2 * 2 ) 
        self.output_layer = torch.nn.Linear(self.linear_input , target_label)  

    def forward(self, img):
        out  = self.img_model(img)
        x = self.output_layer(out)
        return x

vgg16

from torchvision.models import vgg16
use_pretrained = True 
vgg16 = vgg16(pretrained = use_pretrained)


class vggNet(torch.nn.Module):
    def __init__(self, vgg, target_num, use_pretrained = True):
        super().__init__()
        self.vgg = vgg
        self.vgg.classifier[6] = torch.nn.Linear(in_features = 4096, out_features = target_num)
    def forward(self, x):
        out = self.vgg(x)
        return out

0개의 댓글