from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
X = titanic[['pclass', 'age', 'sibsp', 'parch', 'fare', 'gender']]
y = titanic['survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
dt = DecisionTreeClassifier(max_depth=2, random_state=13)
dt.fit(X_train, y_train)
pred = dt.predict(X_test)
print(accuracy_score(y_test, pred))
# 디카프리오의 생존확률(Decaprio: 0.1507537688442211)
import numpy as np
decaprio = np.array([[3, 18, 0, 0, 5, 1]])
# array형태여서 인덱싱
print('Decaprio: ', dt.predict_proba(decaprio)[0,1])
# 윈슬릿의 생존확률(winslet: 0.9326424870466321)
winslet = np.array([[1, 16, 1, 1, 100, 0]])
print('winslet: ', dt.predict_proba(winslet)[0,1])
Reference
1) 제로베이스 데이터스쿨 강의자료