Week.8 Porto Seguro’s Safe Driver Prediction

최두희·2024년 6월 10일

!pip install plotly

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly.offline as py
py.init_notebook_mode(connected = True)
import plotly.graph_objs as go
import plotly.tools as tls
import warnings
from collections import Counter
from sklearn.feature_selection import mutual_info_classif
warnings.filterwarnings('ignore')

train = pd.read_csv("../data/train.csv")
train.head()

rows = train.shape[0]
columns = train.shape[1]
print("The train dataset contains {0} rows and {1} columns".format(rows, columns))

train.isnull().any().any()

!pip install missingno

import missingno as msno
msno.matrix(df = train_copy.iloc[:,2:39], figsize = (20, 14), color = (0.42, 0.1, 0.05))

data = [go.Bar(
x = train['target'].value_counts().index.values,
y = train['target'].value_counts().values,
text = "Distribution of target variable")]
layout = go.Layout(title = 'Target variable distribution')
fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'basic-bar')

Counter(train.dtypes.values)

train_float = train.select_dtypes(include = ['float64'])
train_int = train.select_dtypes(include = ['int64'])

colormap = plt.cm.magma
plt.figure(figsize = (16, 12))
plt.title("Pearson correlation of continuous features", y = 1.05, size = 15)
sns.heatmap(data = train_float.corr(), linewidths = 0.1, vmax = 1.0, square = True,
cmap = colormap, linecolor = 'white', annot = True)

data = [go.Heatmap(
x = train_int.columns.values,
y = train_int.columns.values,
z = train_int.corr().values,
colorscale = 'Viridis',
reversescale = False,
opacity = 1.0)]

layout = go.Layout(
title = 'Pearson Correlation of Integer-type features',
xaxis = dict(ticks = '', nticks = 36),
yaxis = dict(ticks = ''),
width = 900, height = 700)

fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'labelled-heatmap')

profile
안녕하세요!

0개의 댓글