1. Lib
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, log_loss, classification_report)
from imblearn.over_sampling import SMOTE
import xgboost
from scipy.stats import pearsonr, chi2_contingency
from itertools import combinations
from statsmodels.stats.proportion import proportion_confint
import warnings
warnings.filterwarnings("ignore")
2. Code
trace = go.Scatter(
y = rf.feature_importances_,
x = 데이터 컬럼,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 1,
size = 13,
color = rf.feature_importances_,
colorscale='Portland',
showscale=True
),
text = 데이터 컬럼
)
data = [trace]
layout= go.Layout(
autosize= True,
title= 'Random Forest Feature Importance',
hovermode= 'closest',
xaxis= dict(
ticklen= 5,
showgrid=False,
zeroline=False,
showline=False
),
yaxis=dict(
title= 'Feature Importance',
showgrid=False,
zeroline=False,
ticklen= 5,
gridwidth= 2
),
showlegend= False
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig,filename='RF Feature Importances')
3. 다른 방법
feat = 데이터 컬럼
imp = gb.feature_importances_
df = pd.DataFrame({'Feature': feat, 'Importance': imp})
df = df.sort_values('Importance', ascending=False)[:10]
sns.barplot(x='Importance', y='Feature', data=df);