import pandas as pd
fish = pd.read_csv('https://bit.ly/fish_csv_data')
fish.head()
fish_input = fish[['Weight', 'Length', 'Diagonal', 'Height', 'Width']].to_numpy()
fish_target = fish['Species'].to_numpy()
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(
fish_input, fish_target, random_state = 42
)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)
from sklearn.neighbors import KNeighborsClassifier
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(train_scaled, train_target)
print(kn.classes_)
['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
print(kn.predict(test_scaled[:5]))
['Perch' 'Smelt' 'Pike' 'Perch' 'Perch']
import numpy as np
proba = kn.predict_proba(test_scaled[:5])
print(np.round(proba, decimals=4))
[[0. 0. 1. 0. 0. 0. 0. ],
[0. 0. 0. 0. 0. 1. 0. ],
[0. 0. 0. 1. 0. 0. 0. ],
[0. 0. 0.6667 0. 0.3333 0. 0. ],
[0. 0. 0.6667 0. 0.3333 0. 0. ]]
-> ์ด์์ 3๊ฐ๋ก ์ ํด๋์์ ํ๋ฅ = 0, 1/3, 2/3, 3/3
bream_smelt_indexes = (train_target == 'Bream') | (train_target == 'Smelt')
train_bream_smelt = train_scaled[bream_smelt_indexes]
target_bream_smelt = train_target[bream_smelt_indexes]
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(train_bream_smelt, target_bream_smelt)
print(lr.predict(train_bream_smelt[:5]))
['Bream' 'Smelt' 'Bream' 'Bream' 'Bream']
print(lr.predict_proba(train_bream_smelt[:5]))
[[0.99759855 0.00240145],
[0.02735183 0.97264817],
[0.99486072 0.00513928],
[0.98584202 0.01415798],
[0.99767269 0.00232731]]
-> ๋๋ฏธ: ์์ฑ , ๋น์ด: ์์ฑ
print(lr.coef_, lr.intercept_)
[[-0.4037798 -0.57620209 -0.66280298 -1.01290277 -0.73168947]][-2.16155132]
decisions = lr.decision_function(train_bream_smelt[:5])
print(decisions)
from scipy.special import expit
print(expit(decisions))
-> ์์ฑ ํด๋์ค์ ๋ํ์ฌ z ๊ฐ์ ๊ณ์ฐํจ
lr = LogisticRegression(C=20, max_iter=1000)
lr.fit(train_scaled, train_target)
print(lr.score(train_scaled, train_target))
print(lr.score(test_scaled, test_target))
0.9327731092436975 // ์ ํ๋
0.925
proba = lr.predict_proba(test_scaled[:5])
print(np.round(proba, decimals=3))
[[0. 0.014 0.841 0. 0.136 0.007 0.003],
[0. 0.003 0.044 0. 0.007 0.946 0. ],
[0. 0. 0.034 0.935 0.015 0.016 0. ],
[0.011 0.034 0.306 0.007 0.567 0. 0.076],
[0. 0. 0.904 0.002 0.089 0.002 0.001]]
print(lr.coef_.shape, lr.intercept_.shape)
(7, 5) (7,)
-> 7๊ฐ ํ์ ๊ฐ ํด๋์ค๋ฅผ ์๋ฏธ
-> 5๊ฐ ํ์ ๊ฐ ํน์ฑ์ ๊ณฑํด์ง๋ ๊ณ์
-> y ์ ํธ์ด 7๊ฐ์ด๋ฏ๋ก z๊ฐ 7๊ฐ
ํด๋์ค๋ง๋ค ์ ํ ํจ์๊ฐ ํ๋์ฉ ์์ฑ (z๊ฐ 7๊ฐ)
๋ฐ๋ผ์ ์์์ ์์ธกํ ํ๋ฅ ์ ํ ํ์...
= ์ํ๋ง๋ค 7๊ฐ์ ์ ํ ํจ์๋ฅผ ์ ์ฉ ์ํจ ๊ฒฐ๊ณผ์ ํ๋ฅ
์ด์ง ๋ถ๋ฅ๋ฅผ 7๋ฒ ํ๋ จํด์ ์ ํ ํจ์๋ฅผ ๋ง๋ ๋ค
๊ฐ ์ํ๋ง๋ค ํด๋์ค ํ๋๋ฅผ ์์ฑ, ๋๋จธ์ง๋ ์์ฑ์ผ๋ก ๋๊ณ ์ด์ง ๋ถ๋ฅ
-> ํ๋์ ์ํ ๋น 7๋ฒ ๋ฐ๋ณต -> 7๊ฐ z๊ฐ
๊ฐ์ฅ ํฐ ํ๋ฅ ๊ฐ = ์์ธก ํด๋์ค
๋ค์ค ๋ถ๋ฅ๋ฅผ ํ๋ฅ ๋ก ํํ
z๊ฐ 7๊ฐ ์ถ๋ ฅ
decision = lr.decision_function(test_scaled[:5])
print(np.round(decision, decimals=2))
[[ -6.5 1.03 5.16 -2.73 3.34 0.33 -0.63],
[-10.86 1.93 4.77 -2.4 2.98 7.84 -4.26],
[ -4.34 -6.23 3.17 6.49 2.36 2.42 -3.87],
[ -0.68 0.45 2.65 -1.19 3.26 -5.75 1.26],
[ -6.4 -1.99 5.82 -0.11 3.5 -0.11 -0.71]]
from scipy.special import softmax
proba = softmax(decision, axis=1)
print(np.round(proba, decimals=3))
[[0. 0.014 0.841 0. 0.136 0.007 0.003],
[0. 0.003 0.044 0. 0.007 0.946 0. ],
[0. 0. 0.034 0.935 0.015 0.016 0. ],
[0.011 0.034 0.306 0.007 0.567 0. 0.076],
[0. 0. 0.904 0.002 0.089 0.002 0.001]
-> ์์์ ์ถ๋ ฅํ ํ๋ฅ ๊ฐ๊ณผ ๋์ผ
๐ ํผ๊ณต MLDL-9