import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
pd_data = pd.read_csv(url, header=None)
#print(pd_data.head())
np_data = pd_data.to_numpy()
.
.
ยป indep_var (๋
๋ฆฝ๋ณ์, x) : Length
ยป dep_var (์ข
์๋ณ์, y) : Diameter
x = np_data[:, 1].astype(np.float64) # Length
y = np_data[:, 2].astype(np.float64) # Diameter
fit_line = np.polyfit(x,y,1) # regresstion line ์ถ์
f = np.poly1d(fit_line) #
print(f)
# result
# 0.8155 x - 0.01941
_, axe = plt.subplots()
axe.scatter(x,y)
axe.plot(x, fit_line[0]*x + fit_line[1],color="y")
x = sm.add_constant(x) # constant๋ฅผ ๊ณ์ฐํ ๊ณต๊ฐ์ ํ๋ ์ค์ผํจ.
print(x)
# result
# [[1. 0.455]
[1. 0.35 ]
[1. 0.53 ]
...
[1. 0.6 ]
[1. 0.625]
[1. 0.71 ]]
reg_model = sm.OLS(y,x)
reg_result = reg_model.fit()
reg_result.summary()
reg_result.params
# result
# array([-0.01941371, 0.81546069]) : [y์ ํธ, x๊ณ์]
reg_result.rsquared
# result : 0.9737971035056835
.
.
ยป indep_var (๋
๋ฆฝ๋ณ์, x1, x2, x3) : Length(x1), Diameter(x2), Height(x3)
ยป dep_var (์ข
์๋ณ์, y) : Rings
x = np_data[:,1:4].astype(np.float64) # Length, Diameter, Height
y = np_data[:,-1].astype(np.float64) # Rings (๋์ด)
x = sm.add_constant(x)
reg_result = sm.OLS(y, x).fit()
reg_result.summary()
โฃ ์ถ์ ๋ ํ๊ท์ :
y = 2.8365 - 11.9327รLength +25.7661รDiameter + 20.3582รHeight