import pandas as pd
# data 출처: https://www.kaggle.com/hesh97/titanicdataset-traincsv/data
train_data = pd.read_csv('../train.csv')
train_data.head()
boolean selection으로 row 선택하기
30대이면서 1등석에 탄 사람 선택하기
class_ = train_data['Pclass'] == 1
age_ = (train_data['Age'] >= 30) & (train_data['Age'] < 40)
train_data[class_ & age_]
새 column 추가하기
train_data['Age_double'] = train_data['Age'] * 2
train_data.head()
train_data['Age_tripple'] = train_data['Age_double'] + train_data['Age']
train_data.head()
train_data.insert(3, 'Fare10', train_data['Fare'] / 10)
train_data.head()
column 삭제하기
train_data.drop('Age_tripple', axis=1)
train_data.head()
train_data.drop('Age_double', axis=1)
train_data.head()
train_data.drop(['Age_double', 'Age_tripple'], axis=1, inplace=True)
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# data 출처: https://www.kaggle.com/hesh97/titanicdataset-traincsv/data
train_data = pd.read_csv('./train.csv')
train_data.head()
변수(column) 사이의 상관계수(correlation)
train_data.corr()
plt.matshow(train_data.corr())
머신러닝과 데이터 분석 A-Z 올인원 패키지 Online. 👉 https://bit.ly/3cB3C8y