
명칭
라이브러리 : sklearn.tree.DecisionTreeClassifier, sklearn.tree.DecisionTreeRegressor
# 라이브러리
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier,plot_tree
# 데이터
titaninc_df = pd.read_csv('C:/Users/user/Documents/ML/titanic/train.csv')
titaninc_df.info()
# 분석 컬럼 설정
X_features = ['Pclass','Sex','Age','Fare','Embarked']
# 인코딩, 결측치 처리
le = LabelEncoder()
titaninc_df['Sex'] = le.fit_transform(titaninc_df['Sex'])
le2 = LabelEncoder()
titaninc_df['Pclass'] = le2.fit_transform(titaninc_df['Pclass'])
age_mean = titaninc_df['Age'].mean()
titaninc_df['Age'] = titaninc_df['Age'].fillna(age_mean)
le3 = LabelEncoder()
titaninc_df['Embarked'] = titaninc_df['Embarked'].fillna('S')
titaninc_df['Embarked'] = le3.fit_transform(titaninc_df['Embarked'])
# 모델 학습
X = titaninc_df[X_features]
y = titaninc_df['Survived']
model_dt = DecisionTreeClassifier()
model_dt.fit(X,y)
# 시각화
plt.figure(figsize = (10,5))
plot_tree(model_dt, feature_names=X_features, class_names=['Not Survived','Survived'], filled= True)
plt.show()


# 라이브러리
from sklearn.linear_model import LogisticRegression # 로지스틱회귀
from sklearn.tree import DecisionTreeClassifier # 의사결정나무
from sklearn.ensemble import RandomForestClassifier # 랜덤포레스트
from sklearn.metrics import accuracy_score,f1_score # 평가
# 모델 정리
model_lor = LogisticRegression()
model_dt = DecisionTreeClassifier(random_state=42)
model_rf = RandomForestClassifier(random_state=42)
# 데이터 설정
X_features = ['Pclass','Sex','Age','Fare','Embarked']
X = titaninc_df[X_features]
y = titaninc_df['Survived']
# 훈련
model_lor.fit(X,y)
model_dt.fit(X,y)
model_rf.fit(X,y)
# 예측
y_lor_pred = model_lor.predict(X)
y_dt_pred = model_dt.predict(X)
y_rf_pred = model_rf.predict(X)
# 평가 함수 정의
def get_score(model_name, y_true, y_pred):
acc = accuracy_score(y_true, y_pred).round(3)
f1 = f1_score(y_true,y_pred).round(3)
print(model_name, 'acc 스코어 : ',acc, 'f1_score : ', f1)
# 평가
get_score('lor',y,y_lor_pred)
get_score('dt ',y,y_dt_pred)
get_score('rf ',y,y_rf_pred)
# 변수 중요도 추출
model_rf.feature_importances_

sklearn.neighbors.KNeighborsClassifier, sklearn.neighbors.KNeighborsRegressor# 라이브러리
from sklearn.neighbors import KNeighborsClassifier
# 모델링
model_knn = KNeighborsClassifier()
# 데이터 설정
X_features = ['Pclass','Sex','Age','Fare','Embarked']
X = titaninc_df[X_features]
y = titaninc_df['Survived']
# 학습
model_knn.fit(X,y)
# 예측
y_knn_pred = model_knn.predict(X)
# 평가 함수 정의
def get_score(model_name, y_true, y_pred):
acc = accuracy_score(y_true, y_pred).round(3)
f1 = f1_score(y_true,y_pred).round(3)
print(model_name, 'acc 스코어 : ',acc, 'f1_score : ', f1)
# 평가
get_score('knn',y,y_knn_pred)

sklearn.ensemble.GradientBoostingClassifier, sklearn.ensemble.GradientBoostingRegressorxgboost.XGBRegressorlightgbm.LGBMClassifier, lightgbm.LGBMRegressor# 라이브러리
!pip install xgboost
!pip install lightgbm
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
# 모델링
model_gbm = GradientBoostingClassifier(random_state= 42)
model_xgb = XGBClassifier(random_state= 42)
model_lgb = LGBMClassifier(random_state= 42)
# 데이터 설정
X_features = ['Pclass','Sex','Age','Fare','Embarked']
X = titaninc_df[X_features]
y = titaninc_df['Survived']
# 학습
model_gbm.fit(X,y)
model_xgb.fit(X,y)
model_lgb.fit(X,y)
# 예측
y_gbm_pred = model_gbm.predict(X)
y_xgb_pred = model_xgb.predict(X)
y_lgb_pred = model_lgb.predict(X)
# 평가 함수 정의
def get_score(model_name, y_true, y_pred):
acc = accuracy_score(y_true, y_pred).round(3)
f1 = f1_score(y_true,y_pred).round(3)
print(model_name, 'acc 스코어 : ',acc, 'f1_score : ', f1)
# 평가
get_score('gbm ',y,y_gbm_pred)
get_score('xgb ',y,y_xgb_pred)
get_score('lgb ',y,y_lgb_pred)