learning rate가 남의 다른 사람 이야기인줄 알았는데, 간단한 사례에서 경험해본 시행착오를 정리해본다
def mygradient(x, y) :
global errors, w, b
hy = w * x + b;
d = y - hy
g = x.dot(d)
w += g * learning_rate
b += d.sum() * learning_rate
print(g, w, b)
err = d ** 2
errors.append(err.sum())
X : 0 ~ 100까지 정수 100개
Y : X에서 -10 ~ +10 의 차이를 갖는 값
코드
import pandas as pd
from io import StringIO
import numpy as np;
import matplotlib.pyplot as plt
np.random.RandomState(1)
X = np.random.randint(100, size=100)
DY = np.random.randint(-10, 10, size=100)
Y = X + DY
plt.scatter(X, Y)
w와 b는 0 ~ 1 사이 실수
코드
w = np.random.rand()
b = np.random.rand()
print(w, b)
0.8764259753937205 0.890855917746639
learning_rate는 0.1, 0.01, .... 10개
epochs는 모두 50개
코드
learning_rate = [0.1]
epochs = [50]
for _ in range(9) :
learning_rate.append(learning_rate[-1] / 10)
epochs.append(50)
여러번 실험을 하려고 클래스로 잘 정리해보았음
gradient를 계산해서 w, b를 갱신하는 부분과
epoch만큼 반복하는 부분
학습한 값으로 예측하는 부분임
class myperc:
def __init__(self, w, b, lr, epochs) :
self.w_ = w
self.b_ = b
self.lr_ = lr
self.epochs_ = epochs
self.errors_ = []
def mygradient(self, x, y) :
hy = self.w_ * x + self.b_;
d = y - hy
g = x.dot(d)
self.w_ += g * self.lr_
self.b_ += d.sum() * self.lr_
err = d ** 2
self.errors_.append(err.sum())
def train(self, x, y) :
for _ in range(self.epochs_) :
self.mygradient(x, y)
def pred(self, x) :
pp = self.w_ * x + self.b_;
return pp
learning_rate = [0.00000001]
epochs = [1000]
for _ in range(4) :
learning_rate.append(learning_rate[-1] / 10)
epochs.append(50)
tr = []
YY = []
for i in range(len(learning_rate)) :
obj = myperc(w, b, learning_rate[i], epochs[i])
obj.train(X, Y)
tr.append(obj)
YY.append(obj.pred(X))
은 수렴하는 것 처럼 보인다
import pandas as pd
from io import StringIO
import numpy as np;
import matplotlib.pyplot as plt
np.random.RandomState(1)
X = np.random.randint(100, size=100)
#Y = np.random.randint(100, size=100)
DY = np.random.randint(-10, 10, size=100)
#X = np.array([1,2,3,4,5,6,7,8,9,10])
#Y = np.array([1,2,3,4,5,6,7,8,9,10])
Y = X + DY
plt.scatter(X, Y)
class myperc:
def __init__(self, w, b, lr, epochs) :
self.w_ = w
self.b_ = b
self.lr_ = lr
self.epochs_ = epochs
self.errors_ = []
def mygradient(self, x, y) :
hy = self.w_ * x + self.b_;
d = y - hy
g = x.dot(d)
self.w_ += g * self.lr_
self.b_ += d.sum() * self.lr_
#print(g, w, b)
err = d ** 2
self.errors_.append(err.sum())
def train(self, x, y) :
for _ in range(self.epochs_) :
self.mygradient(x, y)
def pred(self, x) :
pp = self.w_ * x + self.b_;
return pp
w = np.random.rand()
b = np.random.rand()
print(w, b)
learning_rate = [0.1]
epochs = [50]
for _ in range(9) :
learning_rate.append(learning_rate[-1] / 10)
epochs.append(50)
tr = []
YY = []
for i in range(len(learning_rate)) :
obj = myperc(w, b, learning_rate[i], epochs[i])
obj.train(X, Y)
tr.append(obj)
YY.append(obj.pred(X))
plt.figure(figsize=(10, 40))
for i in range(len(learning_rate)) :
plt.subplot(len(learning_rate), 2, i * 2 + 1)
plt.title("learning rate : " + str(learning_rate[i]))
plt.scatter(range(1, tr[i].epochs_ + 1), np.array(tr[i].errors_))
plt.subplot(len(learning_rate), 2, i * 2 + 2)
plt.title("learning rate : " + str(learning_rate[i]))
plt.scatter(X, Y)
plt.plot(X, YY[i], "rs-")
learning_rate = [0.00000001]
epochs = [1000]
for _ in range(4) :
learning_rate.append(learning_rate[-1] / 10)
epochs.append(50)
tr = []
YY = []
for i in range(len(learning_rate)) :
obj = myperc(w, b, learning_rate[i], epochs[i])
obj.train(X, Y)
tr.append(obj)
YY.append(obj.pred(X))
plt.figure(figsize=(10, 20))
for i in range(len(learning_rate)) :
plt.subplot(len(learning_rate), 2, i * 2 + 1)
plt.title("learning rate : " + str(learning_rate[i]))
plt.scatter(range(1, tr[i].epochs_ + 1), np.array(tr[i].errors_))
plt.subplot(len(learning_rate), 2, i * 2 + 2)
plt.title("learning rate : " + str(learning_rate[i]))
plt.scatter(X, Y)
plt.plot(X, YY[i], "rs-")