from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(
perch_length, perch_weight, random_state=42
)
train_input = train_input.reshape(-1, 1)
test_input = test_input.reshape(-1, 1)
from sklearn.neighbors import KNeighborsRegressor
knr = KNeighborsRegressor(n_neighbors=3)
knr.fit(train_input, train_target)
print(knr.predict([[50]]))
[1033.33333333]
import matplotlib.pyplot as plt
distances, indexes = knr.kneighbors([[50]])
plt.scatter(train_input, train_target)
plt.scatter(train_input[indexes], train_target[indexes], marker='D')
plt.scatter(50, 1033, marker='^')
plt.show()
👉 훈련 세트 범위 밖의 데이터에 대해 예측 불가
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(train_input, train_target)
print(lr.predict([[50]]))
print(lr.coef_, lr.intercept_)
[1241.83860323]
[39.01714496] -709.0186449535477 // 기울기, y절편
plt.scatter(train_input, train_target)
plt.plot([10, 50], [10*lr.coef_+lr.intercept_, 50*lr.coef_+lr.intercept_])
plt.scatter(50, 1241.8, marker='^')
plt.show()
print(lr.score(train_input, train_target))
print(lr.score(test_input, test_target))
0.939846333997604 // 훈련 set R²
0.8247503123313558 // 테스트 set R²
-> 과대적합
-> 직선의 한계 : 길이가 너무 작아지면 음수로 떨어짐
train_poly = np.column_stack((train_input ** 2, train_input))
test_poly = np.column_stack((test_input ** 2, test_input))
lr = LinearRegression()
lr.fit(train_poly, train_target)
print(lr.predict([[50**2, 50]]))
print(lr.coef_, lr.intercept_)
[1573.98423528] // 예측 무게
[ 1.01433211 -21.55792498] 116.0502107827827 // a, b, c
point = np.arange(15, 50)
plt.scatter(train_input, train_target)
plt.plot(point, 1.01*point**2 -21.6*point + 116.05 )
plt.scatter([50], [1574], marker='^')
plt.show()
print(lr.score(train_poly, train_target))
print(lr.score(test_poly, test_target))
0.9706807451768623 // 훈련 set R²
0.9775935108325122 // 테스트 set R²
-> 조금더 과대적합이 필요