목 차
1. 의사결정나무 (DicisionTree)
2. 랜덤포레스트 (RandomForest)
3. xgboost
4. 교차검증 (CrossValidation: CV)
1) 활용 해보기
from sklearn.datasets import load_breast_cancer
def make_dataset():
iris = load_breast_cancer() # data 변수에 저장
#데이터프레임화(pandas)
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target # target 열 생성 후 target 값 대입
x_train, y_train, x_test, y_test = train_test_split(df.drop('target',axis=1), df['target'], test_size=0.5, random_state=1004) # target 열 생성 후 iris에 저장, 열 삭제 후 target 값 반환
return x_train, x_test, y_train, y_test
x_train, x_test, y_train, y_test = make_dataset()
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=0)
model.fit(x_train, y_train)
pred = model.predict(x_test)
accuracy_score(y_test, pred)
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(
max_depth=3, # 하이퍼 파라미터로 제한하기
min_samples_leaf=2, # 하이퍼 파라미터 제한하기2
criterion='entropy', # 하이퍼 파라미터 제한하기3
min_samples_split=5, # 하이퍼 파라미터 제한하기4
random_state=0)
model.fit(x_train, y_train)
pred = model.predict(x_test)
accuracy_score(y_test, pred)
from sklearn.ensemble import RandomForestClassfier
model = RandomForestClassfier(random_state=0)
model.fit(x_train, y_train)
pred = model.predict(x_test)
accuracy_score(y_test, pred)
from sklearn.ensemble import RandomForestClassfier
model = RandomForestClassfier(
max_depth=3, # 하이퍼파라미터 1
n_estimators=200, # 하이퍼파라미터 2
min_samples_split=5,# 하이퍼파라미터 3
random_state=0) # 하이퍼파라미터 4
model.fit(x_train, y_train)
pred = model.predict(x_test)
accuracy_score(y_test, pred)
# 0.94736842~ + 하이퍼파라미터 (검증값 파라미터와 기록 해두면서 하면 비교하기 좋다!)