import pandas as pd
data = pd.Series([1,2,3,4])
data
data = pd.Series([1,2,3,4]), index=['a','b','c','d'])
data['b'] # 2
population_dict = {
'korea' : 5180,
'japan' : 12718.
'china' : 141500.
'usa' : 32676
}
population = pd.Series(population_dict)
gdp_dict = {
'korea' : 169320000,
'japan' : 51670000.
'china' : 1409250000.
'usa' : 2041280000
}
gdp = pd.Series(gdp_dict)
country = pd.DataFrame({
'population' : population,
'gdp' : gdp
})
country.index # Index(['china','japan','korea','ues'], dtype='object')
country.object(['gdp','population'], dtype='object')
country['gdp']
type(country['gdp'])
# pandas.core.series.Series
gdp_per_capita = country['gdp'] / country['population']
country['gdp per capita'] = gdp_per_capita
country.to_csv("./country.csv")
country.to_excel("./country.xlsx")
country = pd.read_csv("./country.csv")
countty = pd.read_excel("./country.xlsx")
country.loc['china']
country.loc['japan':'Korea',:'population']
country.iloc[0]
country.iloc[1:3, :2]
dataframe = pd.DataFrame(columns=['이름','나이','주소'])
dataframe.loc[0] = ['임원균', '26', '서울']
dataframe.loc[1] = {'이름':'철수', '나이':'25', '주소':'인천'}
dataframe.loc[1, '이름'] = '영희'
dataframe['전화번호'] = np.nan
dataframe.loc[0, '전화번호'] = '01012341234'
len(dataframe) # 2
dataframe["이름"]
dataframe[["이름","주소","나이"]]
dataframe.isnull()
dataframe.notnull()
dataframe.dropna()
dataframe['전화번호'] = dataframe['전화번호'].fillna('전화번호 없음')
A = pd.Series([2,4,6], index=[0,1,2])
B = pd.Series([1,3,5], index=[1,2,3])
A + B
A.add(B, fill_value=0)
A = pd.DataFrame(np.random.randint(0, 10, (2,2)), columns=list("AB"))
B = A = pd.DataFrame(np.random.randint(0, 10, (3,3)), columns=list("BAC"))
A + B
A.add(B, fill_value=0)
data = {
'A': [ i+5 for i in range(3)],
'B': [ i**2 for i in range(3) ]
}
df = pd.DataFrame(data)
df['A'].sum() # 18
df.sum()
df.mean()
df = pd.DataFrame({
'col1' : [2,1,9,8,7,4],
'col2' : ['A','A','B', np.nan, 'D', 'C'],
'col3' : [0,1,9,4,2,3],
})
#df(수정 전)
c1 c2 c3
---------------
0 2 A 0
1 1 A 1
2 9 B 9
3 8 NaN 4
4 7 D 2
5 4 C 3
#df(수정 후)
c1 c2 c3
---------------
1 1 A 1
0 2 A 0
5 4 C 3
4 7 D 2
3 8 NaN 4
2 9 B 9
#df(수정 전)
c1 c2 c3
---------------
0 2 A 0
1 1 A 1
2 9 B 9
3 8 NaN 4
4 7 D 2
5 4 C 3
#df(수정 후)
c1 c2 c3
---------------
2 9 B 9
3 8 NaN 4
4 7 D 2
5 4 C 3
0 2 A 0
1 1 A 1
#df(수정 전)
c1 c2 c3
---------------
0 2 A 0
1 1 A 1
2 9 B 9
3 8 NaN 4
4 7 D 2
5 4 C 3
#df(수정 후)
c1 c2 c3
---------------
1 1 A 1
0 2 A 0
2 9 B 9
5 4 C 3
4 7 D 2
3 8 NaN 4
csv 데이터를 읽고, 정렬해서 height가 큰 순서대로 상위 5개 나무의 정보를 확인
height 변수를 큰 순서대로 정렬하고, 상위 5개의 나무를 출력하라
import pandas as pd
# ./data/tree_data.csv 파일을 읽어서 작업해보세요!
tree = pd.read_csv("./data/tree_data.csv")
top_tree = tree.sort_values('height', ascending = False)
print(top_tree.iloc[0:5])
>>> beans circumference height
>>> --------------------------------
>>> 0 72459 2.30 994.98
>>> 17 27374 1.92 986.82
>>> 31 91643 2.00 982.68
>>> 90 21681 0.51 953.62
>>> 92 5831 1.37 946.11