import pandas as pd lot_pnilp = pd.read_csv( 'https://raw.githubusercontent.com/Bob-Cheol/bobtory/master/study/HongongMachine/2nd_study/etc/lpp_pnilp.csv', header=None ) lot_pnilp = [v for v in lot_pnilp.values[0]] lot_parea = pd.read_csv( 'https://raw.githubusercontent.com/Bob-Cheol/bobtory/master/study/HongongMachine/2nd_study/etc/lpp_parea.csv', header=None ) lot_parea = [v for v in lot_parea.values[0]] lot_is_gangnam = pd.read_csv( 'https://raw.githubusercontent.com/Bob-Cheol/bobtory/master/study/HongongMachine/2nd_study/etc/lpp_is_gangnam.csv', header=None ) lot_is_gangnam = [v for v in lot_is_gangnam.values[0]]
print(len(lot_pnilp)) print(len(lot_parea)) print(len(lot_is_gangnam))
print(lot_is_gangnam[0:10]) print(lot_is_gangnam[-10:-1]) print(lot_is_gangnam.count(0)) print(lot_is_gangnam.count(1))
lot_pnilp_F = lot_pnilp[:14_511] lot_pnilp_T = lot_pnilp[14_511:] lot_parea_F = lot_parea[:14_511] lot_parea_T = lot_parea[14_511:] lot_is_gangnam_F = lot_is_gangnam[:14_511] lot_is_gangnam_T = lot_is_gangnam[14_511:]
import matplotlib.pyplot as plt plt.figure(figsize=[10,7]) plt.scatter( # 강남구 논현동 lot_parea_T, lot_pnilp_T ) plt.scatter( # 강북구 수유동 lot_parea_F, lot_pnilp_F ) plt.legend( ['True', 'False'] ) plt.xlabel('parea') plt.ylabel('pnilp') plt.show()
index_parea_under_10000_T = [a < 10_000 for a in lot_parea_T] index_parea_under_10000_F = [a < 10_000 for a in lot_parea_F] print(sum(index_parea_under_10000_T),'/',len(lot_parea_T)) print(sum(index_parea_under_10000_F),'/',len(lot_parea_F))
plt.figure(figsize=[10,7]) plt.scatter( # 강남구 논현동 [v for tf, v in zip(index_parea_under_10000_T, lot_parea_T) if tf], [v for tf, v in zip(index_parea_under_10000_T, lot_pnilp_T) if tf] ) plt.scatter( # 강북구 수유동 [v for tf, v in zip(index_parea_under_10000_F, lot_parea_F) if tf], [v for tf, v in zip(index_parea_under_10000_F, lot_pnilp_F) if tf] ) plt.legend( ['True', 'False'] ) plt.xlabel('parea') plt.ylabel('pnilp') plt.show()
index_parea_under_1000_T = [a < 1_000 for a in lot_parea_T] index_parea_under_1000_F = [a < 1_000 for a in lot_parea_F] print(sum(index_parea_under_1000_T),'/',len(lot_parea_T)) print(sum(index_parea_under_1000_F),'/',len(lot_parea_F)) plt.figure(figsize=[10,7]) plt.scatter( # 강남구 논현동 [v for tf, v in zip(index_parea_under_1000_T, lot_parea_T) if tf], [v for tf, v in zip(index_parea_under_1000_T, lot_pnilp_T) if tf] ) plt.scatter( # 강북구 수유동 [v for tf, v in zip(index_parea_under_1000_F, lot_parea_F) if tf], [v for tf, v in zip(index_parea_under_1000_F, lot_pnilp_F) if tf] ) plt.legend( ['True', 'False'] ) plt.xlabel('parea') plt.ylabel('pnilp') plt.show()
lot_data = [[p, a] for p, a in zip(lot_pnilp, lot_parea)]
print(lot_data[0:10])
from sklearn.neighbors import KNeighborsClassifier
kn = KNeighborsClassifier()
kn.fit(lot_data, lot_is_gangnam)
kn.score(lot_data, lot_is_gangnam)
kn.predict([[5_000_000, 100]])
kn.predict([[10_000_000, 100]])
print(kn._fit_X)
print(kn._y)
kn = KNeighborsClassifier(n_neighbors=len(lot_data)) kn.fit(lot_data, lot_is_gangnam) kn.score(lot_data, lot_is_gangnam)
print(lot_is_gangnam.count(0)/len(lot_is_gangnam))