Reference
nn.Parameter는 low-level의 api 이기 때문에 실제 학습에서는 잘 사용되지 않는다
import torch
from torch import nn
from torch import Tensor
class MyLinear(nn.Module):
def __init__(self, in_features, out_features, bias = True):
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.weights = nn.Parameter(
torch.randn(in_features, out_features)
)
self.bias = nn.Parameter(torch.randn(out_features))
def forward(self, x:Tensor):
return x@self.weights + self.bias
x = torch.randn(5,7) # 데이터 5개 feature 7개
# row 5, col 7 tensor의 정규분포를 따르는 랜덤 숫자 반환 (input x)
layer = MyLinear(7,12)
# (weight tensor)
print(layer(x).shape)
# Q. 어떻게 객체에 input을 넣으면 바로 forward 함수로 들어가지?
# A. nn.Module을 상속하면서 layer와 output을 반환하는 forward(input) 메서드를
# 상속하고 있기 때문에 자연적으로 input을 객체 안에 넣으면 forward 함수를 실행하게 되는것
# backwrard propagation을 할때 미분이 되는 대상
for value in layer.parameters():
print(value)
# Parameter containing: --> weight
# tensor([[-1.6870e+00, -1.6341e+00, -1.1871e-01, -1.3332e-01, 4.0530e-01,
# 1.4942e+00, 1.2132e-01, 3.0088e-01, 8.7858e-01, 1.2331e+00,
# -6.3245e-01, 1.3270e+00],
# [-1.6745e+00, -7.8123e-01, 3.2348e-01, -4.5228e-01, -4.1481e-01,
# 2.0434e-01, -1.9098e+00, -3.1609e-01, -1.8538e+00, 5.9098e-02,
# 8.3901e-03, -1.6366e+00],
# [-3.3777e-01, 1.7517e-01, -5.5396e-01, -2.0859e+00, 1.6113e-03,
# 9.9615e-01, -9.9338e-01, -2.5963e-01, 7.4590e-01, 8.2320e-02,
# 4.4123e-01, 3.2984e-01],
# [-1.0102e+00, -8.6517e-01, 5.9489e-01, 2.8615e+00, 3.1414e+00,
# -4.0385e-01, -3.4812e-01, -2.2696e-01, 1.0443e-01, 4.4085e-01,
# -1.7535e+00, 8.2616e-01],
# [ 7.5014e-01, -6.0464e-01, -3.4773e-01, -5.2418e-01, -9.8451e-01,
# -8.0645e-01, -1.1786e+00, 9.0820e-02, -4.9526e-01, 5.9909e-01,
# 9.5106e-01, -1.7665e+00],
# [ 9.5683e-01, -1.2164e+00, -1.8714e+00, -1.5876e+00, -1.0724e+00,
# -2.6991e-02, 1.0431e+00, 1.1481e+00, -1.0769e+00, 2.5158e+00,
# -6.1803e-01, 5.1032e-01],
# [-1.1782e+00, -2.8835e+00, 2.3490e-02, -1.6397e+00, -1.6549e+00,
# -5.8763e-01, -2.0034e+00, -1.0103e+00, 1.1630e+00, -1.9497e+00,
# 1.1685e-01, -3.7031e-01]], requires_grad=True)
# Parameter containing: --> bias
# tensor([ 1.2938, -0.9338, -0.3758, 0.2563, -0.1678, -0.0947, 1.4653, -2.4409,
# 0.9019, -0.1023, -0.9561, -0.0532], requires_grad=True)**tensor**
Q.
생각해보면 W, b도 tensor를 이용하면 되는 것 아닌가요?
왜 굳이 Parameter라는 별개의 클래스를 사용하는 거죠?
A.
Paramter 클래스를 사용해야만 gradient 를 계산하는 함수인 grad_fn 가 생성되기 때문
class MyLinear(nn.Module):
def __init__(self, in_features, out_features, bias = True):
super().__init__()
self.in_features = in_features
self.out_features = out_features
# weigth와 bias를 Tensor로 지정함
self.weights = Tensor(
torch.randn(in_features, out_features)
)
self.bias = Tensor(torch.randn(out_features))
def forward(self, x:Tensor):
return x@self.weights + self.bias
layer = MyLinear(7,12)
x = torch.randn(5,7)
layer(x).shape
# 여기서는 weight이 출력이 되지 않음 <- 미분의 대상이 되는 것만 보여주기 때문
for value in layer.parameters():
print(value, '2')
⇒ 이미 layer안에 parameter가 내장되어 있기 때문에 nn.Parameter로 지정할 일이 없음
for epoch in range(epochs):
optimizer.zero_grad() # 이전 epoch의 gradient값 없애주기
outputs = model(inputs)
loss = criterion(outputs, label) # criterion == loss
loss.backward() # 파라미터의 gradient 값 계산
optimizer.step() # update parameter
import numpy as np
x_values = [i for i in range(11)] # int type
x_train = np.array(x_values, dtype = np.float32) # int -> float
x_train = x_train.reshape(-1,1) # row1개에 1개의 데이터로 shape 변경
y_values = [2*i+1 for i in x_values]
y_train = np.array(y_values, dtype = np.float32)
y_train = y_train.reshape(-1,1)
import torch
from torch.autograd import Variable
class LinearRegression(torch.nn.Module):
def __init__(self, inputSize, outputSize):
super(LinearRegression, self).__init__()
self.linear = torch.nn.Linear(inputSize, outputSize)
def forward(self, x):
out = self.linear(x)
return out
self.weight = **Parameter**(torch.empty((out_features, in_features), **factory_kwargs))
if bias:
self.bias = **Parameter**(torch.empty(out_features, **factory_kwargs))
else:
self.register_parameter('bias', None)
⇒ 살펴보면 nn.Parameter가 미리 Linear layer 안에 구현되어 있음inputDim = 1
outputDim = 1
learningRate = 0.01
epochs = 100
model = LinearRegression(inputDim, outputDim)
if torch.cuda.is_available():
model.cuda()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr= learningRate)
for epoch in range(epochs):
if torch.cuda.is_available():
inputs = Variable(torch.from_numpy(x_train).cuda())
labels = Variable(torch.from_numpy(y_train).cuda())
else:
inputs = Variable(torch.from_numpy(x_train))
labels = Variable(torch.from_numpy(y_train))
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
print(loss)
loss.backward()
optimizer.step()
print(epoch, loss.item())
epoch1 - tensor(97.4232, grad_fn=)
epoch100 - tensor(0.0030, grad_fn=)
with torch.no_grad(): # test 할때는 gradient descent 를 하지 않기 위해
if torch.cuda.is_available():
predicted = model(Varialbe(torch.from_numpy(x_train).cuda())).cpu().data.numpy()
else:
predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
print(predicted)
predict - y_hat
[[ 0.41637066]
[ 2.5004187 ]
[ 4.584467 ]
[ 6.6685147 ]
[ 8.7525625 ]
[10.836611 ]
[12.920658 ]
[15.004706 ]
[17.088755 ]
[19.172802 ]
[21.256851 ]]
label - y
array([[ 1.],
[ 3.],
[ 5.],
[ 7.],
[ 9.],
[11.],
[13.],
[15.],
[17.],
[19.],
[21.]], dtype=float32)
for p in model.parameters():
if p.requires_grad:
print(p.name, p.data)
####
None tensor([[2.0840]]) # weight
None tensor([0.4164]) # bias
: 직접 backward 직접 구현 예제
⇒ forward : self.sigmoid(torch.mm(self.w.T, x))
: w.T*x에 시그모이드를 씌운것
self.grad[”dw”] = (1/x.shape[1])*torch.mm(x, (y_hat-y).T)
self.grad[”db”] = (1/x.shape[1])*torch.sum(y_hat-y)
: x에 1이 들어가면 bias 의 미분
self.w = self.w - self.lr*self.grads[”dw”]
self.b = self.b - self.lr*self.grads[”db”]
class LogisticRegression(nn.Module):
def __int__(self, dim, lr=torch.scalar_tensor(0.01)):
super(LogisticRegression,self).__init__()
# wieght & bias 초기값 설정
self.w = torch.zeros(dim, 1, dtype = torch.float).to(device)
self.b = torch.scalar_tensor(0).to(device)
# 이번 예제는 자동 미분이 아니라 직접 미분을 구현하기 때문에 nn.parameter 지정하지 않아도 됨
self.grads = {'dw' : torch.zeros(dim, 1, dtype = torch.float).to(device),
'db' : torch.scalar_tensor(0).to(device)}
self.lr = lr.to(device)
def forward(self,x):
z = torch.mm(self.w.T,x) + self.b
a = self.sigmoid(z)
return a
def sigmoid(self,z):
return 1/(1+torch.exp(-z))
def backward(self,x,yhat, y):
self.grads['dw'] = (1/x.shape[1])*torch.mm(x,(yhat-y).T)
self.grads['db'] = (1/x.shape[1])*torch.sum(yhat-y)
def optimize(self):
self.w = self.w - self.lr * self.grads['dw']
self.b = self.b - self.lr * self.grads['db']
def loss(yhat, y):
m = y.size()[1]
return -(1/m)*torch.sum(y*torch.log(yhat) + (1-y)*torch.log(1-yhat))
def predict(yhat, y):
y_prediction = torch.zeros(1,y.size()[1])
for i in range(yhat.size()[1]):
if yhat[0,i] <=0.5:
y_prediction[0,i] = 0
else:
y_prediction[0,i] = 1
return 100 - torch.mean(torch.abs(y_prediction-y))*100
## hyperparams
costs = []
dim = x_flatten.shape[0]
learning_rate = torch.scalar_tensor(0.0001).to(device)
num_iterations = 100
lrmodel = LR(dim, learning_rate)
lrmodel.to(device)
## transform the data
def transform_data(x, y):
x_flatten = x.T
y = y.unsqueeze(0)
return x_flatten, y
## training the model
# update 값을 저장해주기 위해 for문으로 써서 돌린다.
for i in range(num_iterations):
x, y = next(iter(train_dataset))
test_x, test_y = next(iter(test_dataset))
x, y = transform_data(x, y)
test_x, test_y = transform_data(test_x, test_y)
# forward
yhat = lrmodel.forward(x.to(device))
cost = loss(yhat.data.cpu(), y)
train_pred = predict(yhat, y)
# backward
lrmodel.backward(x.to(device),
yhat.to(device),
y.to(device))
lrmodel.optimize()
## test
yhat_test = lrmodel.forward(test_x.to(device))
test_pred = predict(yhat_test, test_y)
if i % 10 == 0:
costs.append(cost)
if i % 10 == 0:
print("Cost after iteration {}: {} | Train Acc: {} | Test Acc: {}".format(i,
cost,
train_pred,
test_pred))
## the trend in the context of loss
plt.plot(costs)
plt.show()
☝ super(LR,self) == super() : 파이썬 3.0부터 둘이 같은 의미를 가진다.