scikit-learn Cheat Sheet

AIVILLAIN·2023년 3월 8일
0

Importing scikit-learn

import sklearn

Data Preparation

Load Dataset

from sklearn.datasets import load_dataset
data = load_dataset('dataset_name')

Split Dataset

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Scaling Data

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Model Selection

Linear Regression

from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

Logistic Regression

from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Decision Tree

from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Random Forest

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

K-Nearest Neighbors

from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Support Vector Machines

from sklearn.svm import SVC
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Naive Bayes

from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Neural Network

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Model Evaluation

Accuracy

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)

Confusion Matrix

from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

Classification Report

from sklearn.metrics import classification_report
classification_report(y_test, y_pred)

Cross Validation

from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, X, y, cv=5)

Model Tuning

from sklearn.model_selection import GridSearchCV
parameters = {'parameter1': [value1, value2], 'parameter2': [value3, value4]}
clf = GridSearchCV(clf, parameters)
clf.fit(X_train, y_train)
profile
소신있는 오픈마인드

0개의 댓글