import pandas as pd
import seaborn as sns
# Ipython 디스플레이 설정 변경
pd.set_option('display.max_columns',10)
pd.set_option('display.max_colwidth',20)
titanic = sns.load_dataset('titanic')
# titanic 데이터셋에서 age,sex 등 5개 열을 설택하여 데이터프레임 만들기
df = titanic.loc[:,['age','sex','class','fare','survived']]
# df기반 pivot_table 만들기
# (행인덱스로 class 열 사용, 열인덱스로 sex열 사용)
# (value로 age값 사용, 집계함수로 mean()-> 평균 사용)
pdf1 = pd.pivot_table(df,
index='class',
columns='sex',
values='age',
aggfunc='mean')
print(pdf1.head())
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
sex female male class First 34.611765 41.281386 Second 28.722973 30.740707 Third 21.750000 26.507589 <class 'pandas.core.frame.DataFrame'>
pdf2 = pd.pivot_table(df,index='class',columns='sex',values='survived',aggfunc=['mean','sum'])
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
mean sum sex female male female male class First 0.968085 0.368852 91 45 Second 0.921053 0.157407 70 17 Third 0.500000 0.135447 72 47
print(pdf2[ ('mean','female') ])
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
class First 0.968085 Second 0.921053 Third 0.500000
pdf3 = pd.pivot_table(df, index=['class','sex'],
columns='survived',
values=['age','fare'],
aggfunc=['mean','max'])
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
mean max \ age fare age survived 0 1 0 1 0 1 class sex First female 25.666667 34.939024 110.604167 105.978159 50.0 63.0 male 44.581967 36.248000 62.894910 74.637320 71.0 80.0 Second female 36.000000 28.080882 18.250000 22.288989 57.0 55.0 male 33.369048 16.022000 19.488965 21.095100 70.0 62.0 Third female 23.818182 19.329787 19.773093 12.464526 48.0 63.0
fare survived 0 1 class sex First female 151.55 512.3292 male 263.00 512.3292 Second female 26.00 65.0000 male 73.50 39.0000
print(pdf3.index)
print(pdf3.columns)
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
MultiIndex([( 'First', 'female'), ( 'First', 'male'), ('Second', 'female'), ('Second', 'male'), ( 'Third', 'female'), ( 'Third', 'male')], names=['class', 'sex']) MultiIndex([('mean', 'age', 0), ('mean', 'age', 1), ('mean', 'fare', 0), ('mean', 'fare', 1), ( 'max', 'age', 0), ( 'max', 'age', 1), ( 'max', 'fare', 0), ( 'max', 'fare', 1)], names=[None, None, 'survived'])
# xs 인덱서 사용 - default: axis=0
print(pdf3.xs('First',level='class'))
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
mean max \ age fare age fare survived 0 1 0 1 0 1 0 sex female 25.666667 34.939024 110.604167 105.978159 50.0 63.0 151.55 male 44.581967 36.248000 62.894910 74.637320 71.0 80.0 263.00
survived 1 sex female 512.3292 male 512.3292
# xs 인덱서 사용 - 멀티인덱싱처럼 레벨 순서대로 튜플형태로 전달
print(pdf3.xs( ('First', 'female') ) )
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
survived mean age 0 25.666667 1 34.939024 fare 0 110.604167 1 105.978159 max age 0 50.000000 1 63.000000 fare 0 151.550000 1 512.329200
# xs 인덱서 사용 - 행인덱스 레벨을 직접 지정하는 방법
print(pdf3.xs('male',level='sex')
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
mean max age fare age fare survived 0 1 0 1 0 1 0 class First 44.581967 36.248000 62.894910 74.637320 71.0 80.0 263.00 Second 33.369048 16.022000 19.488965 21.095100 70.0 62.0 73.50 Third 27.255814 22.274211 12.204469 15.579696 74.0 45.0 69.55
survived 1 class First 512.3292 Second 39.0000 Third 56.4958
# xs인덱서 사용 - 여러개의 행인덱스를 선택할때 레벨을 지정하는 방식
print(pdf3.xs( ('Second','male'), level=([0,'sex']))
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
mean max age fare age fare survived 0 1 0 1 0 1 0 1 class sex Second male 33.369048 16.022 19.488965 21.0951 70.0 62.0 73.5 39.0
print(pdf3.xs('mean',axis=1))
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
age fare survived 0 1 0 1 class sex First female 25.666667 34.939024 110.604167 105.978159 male 44.581967 36.248000 62.894910 74.637320 Second female 36.000000 28.080882 18.250000 22.288989 male 33.369048 16.022000 19.488965 21.095100 Third female 23.818182 19.329787 19.773093 12.464526 male 27.255814 22.274211 12.204469 15.579696
# survived 레벨이 1인 데이터 선택
print(pdf3.xs(1,level='survived',axis=1))
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
mean max age fare age fare class sex First female 34.939024 105.978159 63.0 512.3292 male 36.248000 74.637320 80.0 512.3292 Second female 28.080882 22.288989 55.0 65.0000 male 16.022000 21.095100 62.0 39.0000 Third female 19.329787 12.464526 63.0 31.3875 male 22.274211 15.579696 45.0 56.4958
# max,fare, survived=0 인 데이터 선택
print(pdf3.xs(('max','fare',0), level = [0,1,2], axis=1))
🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽🔽
max fare survived 0 class sex First female 151.55 male 263.00 Second female 26.00 male 73.50 Third female 69.55 male 69.55