파이썬 데이터 핸들링 기본(3)
1. Series & DataFrame
DataFrame은 2차원 배열 형태, Series는 하나의 컬럼 형태와 비슷
df = pd.DataFrame({"name": ['Bulbasaur', 'Charmander','Squirtle','Caterpie'],
"evolution": ['Ivysaur','Charmeleon','Wartortle','Metapod'],
"type": ['grass', 'fire', 'water', 'bug'],
"hp": [45, 39, 44, 45],
"pokedex": ['yes', 'no','yes','no']})
df2 = df.copy()
df2.to_csv('sample.csv')
pd.concat([df, new_df], axis=0).reset_index(drop=True)
import numpy as np
Ans.loc[[0,2],'place'] = np.nan
2. Visualization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("white")
sns.histplot(df.total_bill);
sns.jointplot(x ="total_bill", y ="tip", data = df);
sns.pairplot(df);
sns.stripplot(x = "day", y = "total_bill", hue = "sex", data = df);
g = sns.FacetGrid(df, col = "time")
g.map(plt.hist, "tip");
g.add_legend();
sns.lmplot(x = 'Age', y = 'Fare', data = df, hue = 'Sex', fit_reg=False)
binsVal = np.arange(0,600,10)
plt.hist(df2, bins = binsVal)
3. Deleting
del df['class']
df.dropna(how='any')