import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
차례대로 판다스, 넘파이, 매트플롯, 씨본.
df = pd.read_csv('파일명')
sns.countplot(data = df, x = 'column name')
df_corr = df[num_cols].corr() OR df.corr()
sns.heatmap(df_corr, fmt = '.2f', annot = True)
annnot = False 시 matrix 안에 수치 기입 X.
fmt(from Formatting)의 경우 소수 두 자리 까지.
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_curve, roc_auc_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.tree import export_graphviz
StandardScaler : 피쳐들의 표준화.
train_test_split : 데이터셋 나누는 용도.
accuracy_score, classification_report, roc_curve, roc_auc_score : 모델 평가 지표.
DecisionTreeClassifier : 결정트리 분류기
plot_tree : 결정트리 visualize
export_graphiz : 결정트리 visualize
def sigmoid(z):
return 1/(1+np.exp(-z))
x= np.arange(-10,10,0.1) # -10부터 10까지 0.1 간격으로 숫자 배열반환.
print(type(x)) ,print(x)
plt.plot(x,sigmoid(x))
plt.show()
def BCE(y,y_pred):
delta =1e-7 # log(0) 방지용.
N = len(y)
Loss = -1/N * np.sum(y*np.log(y_pred+delta)+(1-y)*np.log(1-y_pred+delta))
return Loss
ex)
y_pred = np.array([0.6,0.1,0.5,0.8,0.7,0.45])
y = np.array([1,0,0,1,1,0])
BCE(y,y_pred)
Out : 0.4144979814
Logistic Regression model
Eucliean distance, Manhattan distance
def euclidean_distance(x1, x2):
dist = 0
for i in range(len(x1)):
dist += (x1[i]-x2[i])**2
return dist ** 0.5
def manhattan_distance(x1, x2):
mandist =0
for i in range(len(x1)):
mandist += abs(x1[i]-x2[i])
return mandist
ex)
print(euclidean_distance(x1=[3,5],x2=[6,9]))
print(manhattan_distance(x1=[3,5],x2=[6,9]))
Out : 5.0 7
KNN Classifier
plt plot 설명
# calculating probability 예측의 불확실성 추정.
#y_pred = DTC.predict(X_test)
y_pred_prob = DTC.predict_proba(X_test)[:,1]
# AUC
fpr, tpr, thresholds = roc_curve(y_test,y_pred_prob)
plt.plot([0,1],[0,1])
plt.plot(fpr,tpr,label = 'ROC Curve')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('DecisionTreeClassifier ROC')
plt.grid()
plt.legend(loc='upper left')
plt.show()
# ROC curve
print('AUC : {}'.format(roc_auc_score(y_test,y_pred_prob)))
정보 감사합니다.