[Week 2] ๊ณผ์ œ #1

ํ˜œ ์ฝฉยท2022๋…„ 9์›” 27์ผ
0
post-thumbnail

๐Ÿšฉ Model์˜ ๊ตฌ์กฐ (์‹คํ–‰ ์ˆœ์„œ)

import torch
from torch import nn
from torch.nn.parameter import Parameter

# Function
class Function_A(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name

    def forward(self, x):
        x = x * 2
        return x

class Function_B(nn.Module):
    def __init__(self):
        super().__init__()
        self.W1 = Parameter(torch.Tensor([10]))
        self.W2 = Parameter(torch.Tensor([2]))

    def forward(self, x):
        x = x / self.W1
        x = x / self.W2

        return x

class Function_C(nn.Module):
    def __init__(self):
        super().__init__()
        self.register_buffer('duck', torch.Tensor([7]), persistent=True)

    def forward(self, x):
        x = x * self.duck
        
        return x

class Function_D(nn.Module):
    def __init__(self):
        super().__init__()
        self.W1 = Parameter(torch.Tensor([3]))
        self.W2 = Parameter(torch.Tensor([5]))
        self.c = Function_C()

    def forward(self, x):
        x = x + self.W1
        x = self.c(x)
        x = x / self.W2

        return x


# Layer
class Layer_AB(nn.Module):
    def __init__(self):
        super().__init__()

        self.a = Function_A('duck')
        self.b = Function_B()

    def forward(self, x):
        x = self.a(x) / 5
        x = self.b(x)

        return x

class Layer_CD(nn.Module):
    def __init__(self):
        super().__init__()

        self.c = Function_C()
        self.d = Function_D()

    def forward(self, x):
        x = self.c(x)
        x = self.d(x) + 1

        return x


# Model
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.ab = Layer_AB()
        self.cd = Layer_CD()

    def forward(self, x):
        x = self.ab(x)
        x = self.cd(x)

        return x

x = torch.tensor([7])

model = Model()
model(x)

[ ] = ์‹คํ–‰ ์ˆœ์„œ

x = tensor 7

  1. layer_ab(x)
    [1] duck์ด๋ผ๋Š” name์˜ function_a __init__ ์‹คํ–‰
    [2] function_b __init__ ์‹คํ–‰
    [5] x = function_a ์˜ forward() ์—ฐ์‚ฐ ์‹คํ–‰ / 5
    [6] x = function_b ์˜ forward() ์—ฐ์‚ฐ ์‹คํ–‰
  1. layer_cd(x)
    [3] duck ๋ฒ„ํผ ์ €์žฅํ•˜๋Š” function_a __init__ ์‹คํ–‰
    [4] function_d __init__ ์‹คํ–‰
    [7] x = function_c ์˜ forward() ์—ฐ์‚ฐ ์‹คํ–‰
    [8] x = function_d ์˜ forward() ์—ฐ์‚ฐ ์‹คํ–‰ + 1
  • ๊ฐ ํ•จ์ˆ˜๋“ค์ด ์ฒ˜์Œ ํ˜ธ์ถœ๋˜๋ฉด __init__์ด ์‹คํ–‰๋˜๊ณ  ๋‹ค์Œ ํ•จ์ˆ˜๋ฅผ ์‹คํ–‰ํ•˜๊ธฐ ์ „์ธ ๋Œ€๊ธฐ์ค‘ ์ƒํƒœ์— ๋“ค์–ด๊ฐ„๋‹ค.



๐Ÿ W์™€ b๋Š” ์™œ Parameter๋กœ ์ง€์ •ํ•ด์ค˜์š”?

  • W์™€ b๋ฅผ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ์ง€์ •ํ•ด์ฃผ๋ฉด, ํ•„์š”ํ•  ๋•Œ ๊ฐ’์„ ๊ณ„์† ๊ฐ€์ ธ์™€ ์“ธ ์ˆ˜ ์žˆ๊ณ 
    ๋ฏธ๋ถ„๋„ ๊ฐ€๋Šฅํ•˜๋‹ค.
    ํ•˜์ง€๋งŒ Tensor๋กœ ์ง€์ •ํ•ด์ฃผ๋ฉด, ๊ณ„์‚ฐ์€ ํŒŒ๋ผ๋ฏธํ„ฐ์™€ ๋™์ผํ•˜๊ฒŒ ์ž˜ ์ˆ˜ํ–‰ํ•˜๊ฒ ์ง€๋งŒ ๋ฏธ๋ถ„์ด ๋ถˆ๊ฐ€๋Šฅํ•˜๊ณ  ๊ฐ’์ด ์—…๋ฐ์ดํŠธ๊ฐ€ ๋˜์ง€ ์•Š๋Š”๋‹ค. ๋˜ํ•œ, ๋ชจ๋ธ์„ ์ €์žฅํ•  ๋•Œ ํ…์„œ๊ฐ’์€ ํ•จ๊ป˜ ์ €์žฅ๋˜์ง€ ์•Š์•„ ๋ฌด์‹œ๋œ๋‹ค.

๐Ÿšฉ Buffer

  • Module์— ์ €์žฅํ•ด ๋†“๊ณ  ์‚ฌ์šฉํ•˜๋Š” Tensor์˜ ์ผ์ข…์œผ๋กœ ํ•™์Šต์„ ํ†ตํ•ด ๊ณ„์‚ฐ๋˜์ง€ ์•Š๋Š” Tensor

๐Ÿ ๋ชจ๋“ˆ or ๋ชจ๋ธ ์•ˆ์— ์กด์žฌํ•˜๋Š” ํŒŒ๋ผ๋ฏธํ„ฐ / ๋ฒ„ํผ ์•Œ์•„๋‚ด๊ธฐ

  • module.named_buffers()
  • model.buffers()
  • ์ด๋ฆ„์„ ํ†ตํ•ด ํŠน์ • buffer ๊ฐ€์ ธ์˜ค๊ธฐ: get_buffer("name")
for name, buffer in model.named_buffers():
    print(f"[ Name ] : {name}\n[ Buffer ] : {buffer}")
    print("-" * 30)
    
>>>    
[ Name ] : cd.c.duck
[ Buffer ] : tensor([7.])
------------------------------
[ Name ] : cd.d.c.duck
[ Buffer ] : tensor([7.])
------------------------------


# TODO : Function_C์— ์†ํ•˜๋Š” Buffer๋ฅผ ๊ฐ€์ ธ์˜ค์„ธ์š”!
buffer = model.get_buffer("cd.c.duck")



๐Ÿ ๋‚ด ๋ชจ๋ธ ์•ˆ์— ์–ด๋–ค ๋ชจ๋“ˆ๋“ค์ด ์žˆ์—ˆ๋Š”์ง€ ๊ธฐ์–ต์ด ์•ˆ ๋‚˜์š”!

for name, module in model.named_modules():
    print(f"[ Name ] : {name}\n[ Module ]\n{module}")
    print("-" * 30)
    
>>>
[ Name ] : 
[ Module ]
Model(
  (ab): Layer_AB(
    (a): Function_A()
    (b): Function_B()
  )
  (cd): Layer_CD(
    (c): Function_C()
    (d): Function_D(
      (c): Function_C()
    )
  )
)
------------------------------
[ Name ] : ab
[ Module ]
Layer_AB(
  (a): Function_A()
  (b): Function_B()
)
------------------------------
[ Name ] : ab.a
[ Module ]
Function_A()
------------------------------
[ Name ] : ab.b
[ Module ]
Function_B()
------------------------------
[ Name ] : cd
[ Module ]
Layer_CD(
  (c): Function_C()
  (d): Function_D(
    (c): Function_C()
  )
)
------------------------------
[ Name ] : cd.c
[ Module ]
Function_C()
------------------------------
[ Name ] : cd.d
[ Module ]
Function_D(
  (c): Function_C()
)
------------------------------
[ Name ] : cd.d.c
[ Module ]
Function_C()
------------------------------



๐Ÿ hook

  • ํ”„๋กœ๊ทธ๋žจ, ํ˜น์€ ํŠน์ • ํ•จ์ˆ˜ ์‹คํ–‰ ํ›„์— ๊ฑธ์–ด๋†“๋Š” ๊ฒฝ์šฐ
- forward_pre_hooks
- forward_hooks
- full_backward_hooks
- state_dict_hooks                  # used internally

๐Ÿ apply

import torch
from torch import nn

@torch.no_grad()
def init_weights(m):
    print('module:', m)
    if type(m) == nn.Linear:
        m.weight.fill_(1.0)
        print('linear apply:', m.weight)
    elif type(m) == nn.Sequential:
      print('It is sequential')

net = nn.Sequential(nn.Linear(5, 2), nn.Linear(2, 2))
print('------apply start------')
net.apply(init_weights)
print('---------end----------')
>>>
------apply start------
module: Linear(in_features=5, out_features=2, bias=True)
linear apply: Parameter containing:
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)
module: Linear(in_features=2, out_features=2, bias=True)
linear apply: Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
module: Sequential(
  (0): Linear(in_features=5, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)
It is sequential
---------end----------
  • Sequential ์•ˆ์˜ nn.Linear(5, 2), nn.Linear(2, 2)์—๋งŒ apply ํ•จ์ˆ˜๊ฐ€ ์ ์šฉ๋˜๋Š” ๊ฒŒ ์•„๋‹ˆ๋ผ Sequential ๊ทธ ์ž์ฒด(self)์—๋„ ์ ์šฉํ•˜๋Š” ๊ฒƒ์„ ํ™•์ธํ•  ์ˆ˜ ์žˆ๋‹ค.




โœ๐Ÿป ํšŒ๊ณ 

1๋ฒˆ์งธ ๊ณผ์ œ๊ฐ€ ์ดํ‹€์— ๊ฑธ์ณ ๋๋‚ฌ๋‹ค... Step by Step ์œผ๋กœ ์ฐจ๊ทผ์ฐจ๊ทผ ๊ณต๋ถ€ํ•  ์ˆ˜ ์žˆ์—ˆ์ง€๋งŒ ์˜ค๋กœ์ง€ docs๋งŒ ๋ณด๊ณ  ์ดํ•ดํ•˜๋ ค๋‹ˆ ๋งŽ์€ ์‹œ๊ฐ„์ด ๊ฑธ๋ ธ๋‹ค.
์™ธ์šฐ์ง€๋Š” ๋ชปํ•˜๋”๋ผ๋„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ  ๋„˜์–ด๊ฐ€๊ณ  ์‹ถ์–ด์„œ ์งˆ๋ฌธ๋„ ๋งŽ์ด ํ•˜๊ณ  ์ •๋ฆฌ๋„ ํ•˜๋ฉด์„œ ๋‚˜์˜ ์ดํ•ด๋ฅผ ๋„์™”๋‹ค. ์‹œ๊ฐ„์€ ์˜ค๋ž˜ ๊ฑธ๋ ธ์ง€๋งŒ ์ดํ‹€๋™์•ˆ ๋งŽ์ด ์„ฑ์žฅํ•œ ๊ธฐ๋ถ„์ด๋ผ ๋ฟŒ๋“ฏํ–ˆ๋‹ค!

profile
๋ฐฐ์šฐ๊ณ  ์‹ถ์€๊ฒŒ ๋งŽ์€ ๊ฐœ๋ฐœ์ž๐Ÿ“š

0๊ฐœ์˜ ๋Œ“๊ธ€