
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
pd_data = pd.read_csv(url, header=None)
#print(pd_data.head())
np_data = pd_data.to_numpy()

.
.
ยป indep_var (๋
๋ฆฝ๋ณ์, x) : Length
ยป dep_var (์ข
์๋ณ์, y) : Diameter
x = np_data[:, 1].astype(np.float64)  # Length
y = np_data[:, 2].astype(np.float64)  # Diameter
fit_line = np.polyfit(x,y,1)  # regresstion line ์ถ์ 
f = np.poly1d(fit_line)   # 
print(f)
# result 
# 0.8155 x - 0.01941
_, axe = plt.subplots()
axe.scatter(x,y)
axe.plot(x, fit_line[0]*x + fit_line[1],color="y")
x = sm.add_constant(x)   # constant๋ฅผ ๊ณ์ฐํ  ๊ณต๊ฐ์ ํ๋ ์ค์ผํจ.
print(x)
# result
# [[1.    0.455]
   [1.    0.35 ]
   [1.    0.53 ]
   ...
   [1.    0.6  ]
   [1.    0.625]
   [1.    0.71 ]]
reg_model = sm.OLS(y,x)
reg_result = reg_model.fit()
reg_result.summary()
reg_result.params
# result 
# array([-0.01941371,  0.81546069]) : [y์ ํธ, x๊ณ์]
reg_result.rsquared   
# result : 0.9737971035056835
.
.
ยป indep_var (๋
๋ฆฝ๋ณ์, x1, x2, x3) : Length(x1), Diameter(x2), Height(x3)
ยป dep_var (์ข
์๋ณ์, y) : Rings
x = np_data[:,1:4].astype(np.float64) # Length, Diameter, Height
y = np_data[:,-1].astype(np.float64)  # Rings (๋์ด)
x = sm.add_constant(x)
reg_result = sm.OLS(y, x).fit()
reg_result.summary()
โฃ ์ถ์ ๋ ํ๊ท์ :
y = 2.8365 - 11.9327รLength +25.7661รDiameter + 20.3582รHeight