PFLD model refactoring

이준우·2024년 11월 26일

기존의 PFLD model 코드에서 refactoring을 해보았다.

BEFORE

# model.py

import torch
from torch import nn

class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(CNNBlock, self).__init__()

        self.layers = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, bias=False, **kwargs),
            nn.BatchNorm2d(out_channels),
            nn.ReLU6()
        )

    def forward(self, x):
        return self.layers(x)

class DepthwiseConv(nn.Module):
    def __init__(self, in_channels, out_channels, stride, **kwargs):
        super(DepthwiseConv, self).__init__()

        self.DW_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, groups=in_channels, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU6(),
        )

    def forward(self, x):
        return self.DW_conv(x)

class PointwiseConv(nn.Module):
    def __init__(self, in_channels, out_channels, nonlinear=None):
        super(PointwiseConv, self).__init__()

        self.R_PW_conv = CNNBlock(in_channels, out_channels, kernel_size=1, stride=1)
        self.L_PW_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1),
            nn.BatchNorm2d(out_channels),
        )
        self.nonlinear = nonlinear

    def forward(self, x):
        if self.nonlinear:
            return self.R_PW_conv(x)
        else:
            return self.L_PW_conv(x)

class Bottleneck(nn.Module):
    def __init__(self, in_channels, expansion, out_channels, stride, residual=None):
        super(Bottleneck, self).__init__()
        print('ssss', stride)
        self.layers = nn.Sequential(
            PointwiseConv(in_channels, in_channels*expansion, nonlinear=True),
            DepthwiseConv(in_channels * expansion, in_channels * expansion, stride),
            PointwiseConv(in_channels * expansion, out_channels, nonlinear=False),
        )

        self.residual = residual

    def forward(self, x):
        if self.residual:
            return x + self.layers(x)
        else:
            return self.layers(x)

class PFLDBackbone(nn.Module):
    def __init__(self, in_channels=3):
        super(PFLDBackbone, self).__init__()
        # input(-1, 3, 112, 112)
        self.conv1_layer = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU6()
        )
        self.DW_conv1_layer = DepthwiseConv(64, 64, stride=1, padding=1)  # output : [1, 64, 56, 56]

        self.Bottleneck_1_1 = Bottleneck(64, 6, 64, 2, residual=False)
        self.Bottleneck_1_2 = Bottleneck(64, 6, 64, 1, residual=True)
        self.Bottleneck_1_3 = Bottleneck(64, 6, 64, 1, residual=True)
        self.Bottleneck_1_4 = Bottleneck(64, 6, 64, 1, residual=True)
        self.Bottleneck_1_5 = Bottleneck(64, 6, 64, 1, residual=True)  # output : [1, 64, 28, 28]

        self.Bottleneck_2_1 = Bottleneck(64, 6, 128, 2, residual=False)  # output : [1, 128, 14, 14]

        self.Bottleneck_3_1 = Bottleneck(128, 6, 128, 1, residual=False)
        self.Bottleneck_3_2 = Bottleneck(128, 6, 128, 1, residual=True)
        self.Bottleneck_3_3 = Bottleneck(128, 6, 128, 1, residual=True)
        self.Bottleneck_3_4 = Bottleneck(128, 6, 128, 1, residual=True)
        self.Bottleneck_3_5 = Bottleneck(128, 6, 128, 1, residual=True)
        self.Bottleneck_3_6 = Bottleneck(128, 6, 128, 1, residual=True)  # output : [1, 128, 14, 14]

        self.Bottleneck_4_1 = Bottleneck(128, 6, 16, 1, residual=False)

        self.conv2_layer = CNNBlock(16, 32, kernel_size=3, stride=2, padding=1)
        self.conv3_layer = nn.Conv2d(32, 128, kernel_size=7, stride=1)
        self.bn_l = nn.BatchNorm2d(128)

        self.avgpool1 = nn.AvgPool2d(14)
        self.avgpool2 = nn.AvgPool2d(7)
        self.fc = nn.Linear(176, 196)


    def forward(self, x):
        x = self.conv1_layer(x)
        x = self.DW_conv1_layer(x)

        x = self.Bottleneck_1_1(x)

        x = self.Bottleneck_1_2(x)
        x = self.Bottleneck_1_3(x)
        x = self.Bottleneck_1_4(x)
        out1 = self.Bottleneck_1_5(x)
        print(out1.shape)

        x = self.Bottleneck_2_1(out1)


        x = self.Bottleneck_3_1(x)
        x = self.Bottleneck_3_2(x)
        x = self.Bottleneck_3_3(x)
        x = self.Bottleneck_3_4(x)
        x = self.Bottleneck_3_5(x)
        x = self.Bottleneck_3_6(x)

        x = self.Bottleneck_4_1(x)

        s1 = self.avgpool1(x)
        s1 = s1.view(s1.size(0), -1)


        x = self.conv2_layer(x)

        s2 = self.avgpool2(x)
        s2 = s2.view(s2.size(0), -1)

        s3 = self.conv3_layer(x)

        s3 = s3.view(s3.size(0), -1)

        cat = torch.cat([s1, s2, s3], dim=1)
        landmarks = self.fc(cat)

        return out1, landmarks
    
# [-1, 64, 28, 28]    
class AuxiliaryBlock(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = CNNBlock(64, 128, kernel_size=3, stride=2, padding=1)
        self.conv2 = CNNBlock(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = CNNBlock(128, 32, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(32, 128, kernel_size=7, stride=1, padding=0)
        self.global_avg = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(128, 32)
        self.fc2 = nn.Linear(32, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.global_avg(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

a = torch.randn(1, 3, 112, 112)
model = PFLDBackbone(in_channels=3)
out1, landmark = model(a)

print('out:', out1.shape)
print('landmark : ', landmark.shape)

b = torch.randn(1, 64, 28, 28)
model = AuxiliaryBlock()
print(model(b).shape)

AFTER

import yaml
import torch
from torch import nn

class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super().__init__()

        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.ReLU()

    def forward(self, x) -> torch.Tensor:
        '''
            forward pass

            Returns : output tensor of shape -> (-1, channel, width, height)

        '''

        return self.act(self.bn(self.conv(x)))


class CNNBlockNonact(nn.Module):
    '''
        This class doesn't have activation function.
    '''

    def __init__(self, in_channels, out_channels, **kwargs):
        super().__init__()

        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x) -> torch.Tensor:
        '''
            forward pass

            Returns : output tensor of shape : (-1, channel, width, height)

        '''

        return self.bn(self.conv(x))


class DepthwiseBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()

        self.Dw_conv = nn.Conv2d(in_channels, out_channels,
                                 kernel_size=3, stride=stride,
                                 padding=1, groups=in_channels)
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.ReLU()

    def forward(self, x) -> torch.Tensor:
        '''
            forward pass

            Returns : output tensor of shape : (-1, channel, width, height)

        '''

        return self.act(self.bn(self.Dw_conv(x)))


class InvertedResidualBlock(nn.Module):
    '''
        This class is PFlD Backbone with MobilenetV2.
    '''
    expansion = 2

    def __init__(self, in_channels, out_channels, stride, use_residual):
        super().__init__()

        # print('Inverted Expansion:', InvertedResidualBlock.expansion)
        self.layers = nn.Sequential(
            CNNBlock(in_channels, in_channels, kernel_size=1, stride=1, padding=0),
            DepthwiseBlock(in_channels, in_channels * InvertedResidualBlock.expansion, stride=stride),
            CNNBlockNonact(in_channels * InvertedResidualBlock.expansion, out_channels,
                           kernel_size=1, stride=1, padding=0)
        )

        self.layers1 = DepthwiseBlock(in_channels, in_channels * InvertedResidualBlock.expansion, stride=stride)
        self.act = nn.ReLU()

        self.use_residual = use_residual

    def forward(self, x):
        x2 = self.layers1(x)
        # print('x2.shape expansion : ',x.shape, x2.shape)
        # print('expansion : ', InvertedResidualBlock.expansion)
        shortcut = self.layers(x)

        if self.use_residual:
            x = self.act(x + shortcut)
        else:
            x = self.act(shortcut)

        return x

    @classmethod
    def change_exp(cls, exp) -> int:
        '''
            This funcion is change the InvertedResidualBlock variable.

            Returs : your input inteager value
        '''
        cls.expansion = exp



class PFLDBackbone(nn.Module):
    def __init__(self):
        super().__init__()
        '''
            This class is PFLD BackBone.

            input_shape : (-1, 3, 112, 112)

        '''

        self.conv1 = CNNBlock(in_channels=3, out_channels=64, kernel_size=3, stride=2, padding=1)
        self.conv2 = CNNBlock(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)

        self.in_channels = 64

        self.layers1 = self._create_block(64, 2, 64, 5, 2)
        self.layers2 = self._create_block(64, 2, 128, 1, 2)
        self.layers3 = self._create_block(128, 4, 128, 6, 1)
        self.layers4 = self._create_block(128, 2, 16, 1, 1)

        self.conv3 = CNNBlockNonact(in_channels=16, out_channels=32,
                                    kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=128,
                               kernel_size=7, stride=1, padding=0)

        self.avgpool1 = nn.AvgPool2d(14)
        self.avgpool2 = nn.AvgPool2d(7)

        self.fcs = nn.Linear(176, 196)  # landmark

    def forward(self, x):
        x = self.conv1(x)  # (-1, 64, 56, 56)
        x = self.conv2(x)  # (-1, 64, 56, 56)
        out1 = self.layers1(x)  # (-1, 64, 28, 28)
        x = self.layers2(out1)  # (-1, 128, 14, 14)
        x = self.layers3(x)  # (-1, 128, 14, 14)
        x = self.layers4(x)  # (-1, 16, 14, 14)
        s1 = self.avgpool1(x)  # (-1, 16, 1, 1)
        s1 = s1.view(s1.size(0), -1)
        x = self.conv3(x)  # (-1, 32, 7, 7)
        s2 = self.avgpool2(x)  # (-1, 32, 1, 1)
        s2 = s2.view(s2.size(0), -1)

        s3 = self.conv4(x)  # (-1, 128, 1, 1)
        s3 = s3.view(s3.size(0), -1)

        features = torch.cat([s1, s2, s3], dim=1)

        landmark = self.fcs(features)

        return out1, landmark

    def _create_block(self, in_channels, expansion, out_channels, repeat, stride):
        layers = []
        use_residual = None

        InvertedResidualBlock.change_exp(expansion)

        for idx, _ in enumerate(range(repeat)):
            # print(f"Block {idx}, Expansion: {InvertedResidualBlock.expansion}")
            if idx == 0:
                stride = stride
                use_residual = False

            else:
                stride = 1
                use_residual = True

            layers.append(InvertedResidualBlock(in_channels, out_channels, stride, use_residual))
            in_channels = out_channels

        return nn.Sequential(*layers)


class AuxiliaryBlock(nn.Module):
    '''
        This class just calcualte euler_angles: pitch, yaw, raw

    '''

    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 32, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(32, 128, kernel_size=7, stride=1, padding=0)

        self.avgpool = nn.AvgPool2d(3)

        self.linear1 = nn.Linear(128, 32)
        self.linear2 = nn.Linear(32, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0), -1)

        x = self.linear1(x)
        euler_angle = self.linear2(x)

        return euler_angle


user_input = torch.randn(1, 3, 112, 112)
model = PFLDBackbone()


# user_input2 = torch.randn(1, 64, 28, 28)
# model2 = AuxiliaryBlock()
# print(model2(user_input2))

from torchinfo import summary

model1 = PFLDBackbone()
print(summary(model1, (1, 3, 112, 112)))

model2 = AuxiliaryBlock()
print(summary(model2, (1, 64, 28, 28)))

기존보다 좀 더 깔끔한 코드인 것 같다. 기존처럼 PFLDBackbone에 코드들이 몰려있으면 모델의 구조를 파악하는 가독성 부분에선 편하겠지만 pytonic한 코드와 객체지향의 느낌을 살려 다시 작성하는게 좋다고 판단하여 refactoring을 진행하였다.

github : https://github.com/JUNOO1026/Object_Detection/blob/main/landmark/PFLD/pfld_model.py

profile
멋진 인생을 살기 위한 footprint

0개의 댓글