- 여기서는 머신러닝 극히 일부만, 간단하게.
- 자세한거는 별도로 글 써야지
모델링: 기본 형태
model = LogisticRegression()
model.fit(x_train, y_train)
pred = model.predict(x_test)
회귀
모델들
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
!pip install xgboost
from xgboost import XGBRegressor
평가
import
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
출력 해보자..
print(mean_squared_error(y_test, pred))
print(mean_squared_error(y_test, pred, squared = False))
print(mean_absolute_error(y_test, pred))
print(mean_absolute_percentage_error(y_test, pred))
분류
모델들
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
!pip install xgboost
from xgboost import XGBClassifier
평가
import
from sklearn.metrics import confusion_matrix
from sklearn.metircs import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
출력 해보자..
print(confusion_matrix(y_test, pred))
print(accuracy_score(y_test, pred))
print(precision_score(y_test, pred))
print(recall_score(y_test, pred))
print(f1_score(y_test, pred))
Feature importance 그래프
feature_importance_series = pd.Series(model.feature_importance_, index=df.drop(columns=[target]).columns)
feature_importance_top10= feature_importance_series.sort_values(ascending=False)[:10]
plt.fiure(figsize=(10, 10))
sns.barplot(x=feature_importance_top10, y=feature_importance_top10.index)
plt.title('Top 10 Feature Importance')
plt.show()