import pandas as pd
red_url = 'https://raw.githubusercontent.com/PinkWink/ML_tutorial/master/dataset/winequality-red.csv'
white_url = 'https://raw.githubusercontent.com/PinkWink/ML_tutorial/master/dataset/winequality-white.csv'
red_wine = pd.read_csv(red_url, sep=';')
white_wine = pd.read_csv(white_url, sep=';')
red_wine.head()
white_wine.head()
white_wine.columns
red_wine['color'] = 1.
white_wine['color'] = 0.
wine = pd.concat([red_wine, white_wine])
wine.info()
wine['quality'].unique()
import plotly.express as px
fig = px.histogram(wine, x='quality')
fig.show()
wine['quality'].value_counts()
fig = px.histogram(wine, x='quality', color='color')
fig.show()
➔ plotly.exptress는 아주 간편하고 강력한 기능을 제공한다.
X = wine.drop(['color'], axis=1)
y = wine['color']
from sklearn.model_selection import train_test_split
import numpy as np
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
np.unique(y_train, return_counts=True)
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Histogram(x=X_train['quality'], name='Train'))
fig.add_trace(go.Histogram(x=X_test['quality'], name='Test'))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.7)
fig.show()
from sklearn.tree import DecisionTreeClassifier
wine_tree = DecisionTreeClassifier(max_depth=2, random_state=13)
wine_tree.fit(X_train, y_train)
from sklearn.metrics import accuracy_score
y_pred_tr = wine_tree.predict(X_train)
y_pred_test = wine_tree.predict(X_test)
print('Train Acc: ', accuracy_score(y_train, y_pred_tr))
print('Test Acc: ', accuracy_score(y_test, y_pred_test))
fig = go.Figure()
fig.add_trace(go.Box(y=X['fixed acidity'], name='fixed adidity'))
fig.add_trace(go.Box(y=X['chlorides'], name='chlorides'))
fig.add_trace(go.Box(y=X['quality'], name='quality'))
fig.show()
from sklearn.preprocessing import MinMaxScaler, StandardScaler
MMS = MinMaxScaler()
SS = StandardScaler()
MMS.fit(X)
SS.fit(X)
X_ss = SS.transform(X)
X_mms = MMS.transform(X)
X_ss_pd = pd.DataFrame(X_ss, columns=X.columns)
X_mms_pd = pd.DataFrame(X_mms, columns=X.columns)
def px_box(target_df):
fig = go.Figure()
fig.add_trace(go.Box(y=target_df['fixed acidity'], name='fixed adidity'))
fig.add_trace(go.Box(y=target_df['chlorides'], name='chlorides'))
fig.add_trace(go.Box(y=target_df['quality'], name='quality'))
fig.show()
px_box(X_mms_pd)
px_box(X_ss_pd)
X_train, X_test, y_train, y_test = train_test_split(X_mms_pd, y, test_size=0.2, random_state=13)
wine_tree = DecisionTreeClassifier(max_depth=2, random_state=13)
wine_tree.fit(X_train, y_train)
print('Tran Acc : ', accuracy_score(y_train, y_pred_tr))
print('Test Acc : ', accuracy_score(y_test, y_pred_test))
dict(zip(X_train.columns, wine_tree.feature_importances_))
wine['taste'] = [1. if grade > 5 else 0. for grade in wine['quality']]
wine.head()
X = wine.drop(['taste'], axis=1)
y = wine['taste']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
wine_tree = DecisionTreeClassifier(max_depth=2, random_state=13)
wine_tree.fit(X_train, y_train)
y_pred_tr = wine_tree.predict(X_train)
y_pred_test = wine_tree.predict(X_test)
print('Train Acc : ', accuracy_score(y_train, y_pred_tr))
print('Test Acc : ', accuracy_score(y_test, y_pred_test))
import matplotlib.pyplot as plt
import sklearn.tree as tree
plt.figure(figsize=(12,8))
tree.plot_tree(wine_tree, feature_names=X.columns);
X = wine.drop(['taste','quality'], axis=1)
y = wine['taste']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
wine_tree = DecisionTreeClassifier(max_depth=2, random_state=13)
wine_tree.fit(X_train, y_train)
y_pred_tr = wine_tree.predict(X_train)
y_pred_test = wine_tree.predict(X_test)
print('Train Acc : ', accuracy_score(y_train, y_pred_tr))
print('Test Acc : ', accuracy_score(y_test, y_pred_test))
plt.figure(figsize=(12,8))
tree.plot_tree(wine_tree, feature_names=X.columns,
rounded=True,
filled=True);
plt.show()