sns.factorplot('Embarked','Survived',data=data)
fig=plt.gcf()
fig.set_size_inches(5,3)
plt.show()
f,ax=plt.subplots(2,2,figsize=(20,15))
sns.countplot('Embarked', data=data,ax=ax[0,0])
ax[0,0].set_title('No. Of Passengers Boarded')
sns.countplot('Embarked', hue='Sex', data=data, ax=ax[0,1])
ax[0,1].set_title('Male-Female Split for Embarked')
sns.countplot('Embarked', hue='Survived', data=data, ax=ax[1,0])
ax[1,0].set_title('Embarked vs Survived')
sns.countplot('Embarked', hue='Pclass', data=data, ax=ax[1,1])
ax[1,1].set_title('Embarked vs Pclass')
plt.subplots_adjust(wspace=0.2, hspace=0.5)
plt.show
sns.factorplot('Pclass','Survived',hue='Sex',col='Embarked',data=data)
plt.show()
S에 가장 많은 탑승객이 있고, 대다수가 Pclass3에 속하였습니다.
Pclass1과 Pclass2가 구조되기 적절한 위치이다.
data['Embarked'].fillna('S',inplace=True)
data.Embarked.isnull().any #NaN값이 없다
Sibling = brother, sister, stepbrother, stepsister
Spouse = husband, wife
pd.crosstab([data.SibSp],data.Survived).style.background_gradient(cmap='summer_r')
f,ax=plt.subplots(1,2,figsize=(20,8))
sns.barplot('SibSp','Survived',data=data,ax=ax[0])
ax[0].set_title('SibSp vs Survived')
sns.factorplot('SibSp','Survived',data=data,ax=ax[1])
ax[1].set_title('SibSp vs Survived')
plt.show()
pd.crosstab(data.SibSp,data.Pclass).style.background_gradient(cmap='summer_r')
pd.crosstab(data.Parch,data.Pclass).style.background_gradient(cmap='summer_r')
f,ax=plt.subplots(1,2,figsize=(20,8))
sns.barplot('Parch','Survived',data=data,ax=ax[0])
ax[0].set_title('Parch vs Survived')
sns.factorplot('Parch','Survived',data=data, ax=ax[1])
ax[1].set_title('Parch vs Survived')
plt.close(0)
plt.show()
print('Highest Fare was:', data['Fare'].max())
print('Lowest Fare was:', data['Fare'].min())
print('Average Fare was;', data['Fare'].mean())
f,ax=plt.subplots(1,3,figsize=(20, 8))
sns.distplot(data[data['Pclass']==1].Fare,ax=ax[0])
ax[0].set_title('Fares in Pclass 1')
sns.distplot(data[data['Pclass']==2].Fare,ax=ax[1])
ax[1].set_title('Fares in Pclass 2')
sns.distplot(data[data['Pclass']==3].Fare, ax=ax[2])
ax[2].set_title('Fares in Pclass 3')
plt.show()
sns.heatmap(data.corr(),annot=True,cmap='RdYlGn',linewidths=0.2) # correlation matrix
fig=plt.gcf()
fig.set_size_inches(10,8)
plt.show()