csv 데이터로 부터 DataFrame 생성
import pandas as pd
# data 출처: https://www.kaggle.com/hesh97/titanicdataset-traincsv/data
train_data = pd.read_csv('../train.csv')
train_data.head()
read_csv 함수 파라미터
train_data = pd.read_csv('../train.csv', index_col='PassengerId', usecols=['PassengerId', 'Survived', 'Pclass', 'Name'])
train_data
train_data.columns
>>> Index(['Survived', 'Pclass', 'Name'], dtype='object')
column 선택하기
import pandas as pd
# data 출처: https://www.kaggle.com/hesh97/titanicdataset-traincsv/data
train_data = pd.read_csv('../train.csv')
train_data.head()
하나의 컬럼 선택하기
train_data['Survived']
# datatype : Series
>>> 0 0
1 1
2 1
3 1
4 0
..
886 0
887 1
888 0
889 1
890 0
Name: Survived, Length: 891, dtype: int64
복수의 컬럼 선택하기
train_data[['Survived', 'Name', 'Age', 'Embarked']]
# datatype : Dataframe
import numpy as np
import pandas as pd
# data 출처: https://www.kaggle.com/hesh97/titanicdataset-traincsv/data
train_data = pd.read_csv('../train.csv')
train_data.head()
dataframe slicing
train_data[7:10]
row 선택하기
train_data.index = np.arange(100, 991)
train_data.tail()
train_data.loc[986]
>>> PassengerId 887
Survived 0
Pclass 2
Name Montvila, Rev. Juozas
Sex male
Age 27.0
SibSp 0
Parch 0
Ticket 211536
Fare 13.0
Cabin NaN
Embarked S
Name: 986, dtype: object
train_data.loc[[986, 100, 110, 990]]
train_data.iloc[[0, 100, 200, 2]]
row, column 동시에 선택하기
train_data.loc[[986, 100, 110, 990], ['Survived', 'Name', 'Sex', 'Age']]
train_data.iloc[[101, 100, 200, 102], [1, 4, 5]]
머신러닝과 데이터 분석 A-Z 올인원 패키지 Online. 👉 https://bit.ly/3cB3C8y