이미지 출처
Key Python Libraries for Data Analysis and Code examples
이미지 출처
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
# 예시 데이터셋 만들기
X, y = make_classification(n_samples = 1000, n_features = 20, n_informative=10, n_redundant = 10, random_state=11)
# 1. train, test 나누지 않았을 때 결과
model = DecisionTreeClassifier(random_state=111)
model.fit(X,y)
y_pred_no_split = model.predict(X)
# 정확도 측정
acc_no_split = accuracy_score(y, y_pred_no_split)
acc_no_split # 과적합
# 2. train, test 나누었을 떄 결과
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=111)
model_split = DecisionTreeClassifier(random_state=111)
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
print(acc_train) #과적합
print(acc_test)