import pandas as pd
import numpy as np
from pandas import Series
f = lambda x,y : x+y
f(1,4)
5
# 하나의 argument만 처리하는 lambda함수
f = lambda x : x/2
f(3)
1.5
f = lambda x : x**2
f(3)
9
# 이름을 할당하지 않는 lambda함수
(lambda x: x+1)(5)
6
ex = [1, 2, 3, 4, 5]
f = lambda x : x**2
list(map(f,ex))
[1, 4, 9, 16, 25]
# 두 개 이상의 argument가 있을 때는 두개의 sequence형을 써야함
f = lambda x, y : x+y
list(map(f, ex, ex))
[2, 4, 6, 8, 10]
# 익명함수 그대로 사용할 수 있음
list(map(lambda x : x+x, ex))
[2, 4, 6, 8, 10]
s1 = Series(np.arange(10))
s1.head(5)
0 0
1 1
2 2
3 3
4 4
dtype: int32
s1.map(lambda x : x**2).head(5)
0 0
1 1
2 4
3 9
4 16
dtype: int64
d1 = {1: 'A', 2: 'B', 3: 'C'}
s1.map(d1).head(5) #dict type으로 데이터 교체 & 없는 값은 NaN
0 NaN
1 A
2 B
3 C
4 NaN
dtype: object
s2 = Series(np.arange(10, 20))
s1.map(s2).head(5) # 같은 위치의 데이터를 s2로 전환
0 10
1 11
2 12
3 13
4 14
dtype: int32
df = pd.read_csv("wages.csv")
df.head()
earn | height | sex | race | ed | age | |
---|---|---|---|---|---|---|
0 | 79571.299011 | 73.89 | male | white | 16 | 49 |
1 | 96396.988643 | 66.23 | female | white | 16 | 62 |
2 | 48710.666947 | 63.77 | female | white | 16 | 33 |
3 | 80478.096153 | 63.22 | female | other | 16 | 95 |
4 | 82089.345498 | 63.08 | female | white | 17 | 43 |
df.sex.unique()
array(['male', 'female'], dtype=object)
df["sex_code"] = df.sex.map({"male" : 0, "female" : 1}) #성별 str -> 성별 code
df.head(5)
earn | height | sex | race | ed | age | sex_code | |
---|---|---|---|---|---|---|---|
0 | 79571.299011 | 73.89 | male | white | 16 | 49 | 0 |
1 | 96396.988643 | 66.23 | female | white | 16 | 62 | 1 |
2 | 48710.666947 | 63.77 | female | white | 16 | 33 | 1 |
3 | 80478.096153 | 63.22 | female | other | 16 | 95 | 1 |
4 | 82089.345498 | 63.08 | female | white | 17 | 43 | 1 |
df. sex.replace({"male": 0, "female" : 1}).head() # dict type 적용
0 0
1 1
2 1
3 1
4 1
Name: sex, dtype: int64
df.sex.replace(["male", "female"], [0,1], inplace=True)
df.head(5)
earn | height | sex | race | ed | age | sex_code | |
---|---|---|---|---|---|---|---|
0 | 79571.299011 | 73.89 | 0 | white | 16 | 49 | 0 |
1 | 96396.988643 | 66.23 | 1 | white | 16 | 62 | 1 |
2 | 48710.666947 | 63.77 | 1 | white | 16 | 33 | 1 |
3 | 80478.096153 | 63.22 | 1 | other | 16 | 95 | 1 |
4 | 82089.345498 | 63.08 | 1 | white | 17 | 43 | 1 |
df_info = df[["earn", "height", "age"]]
df_info.head()
earn | height | age | |
---|---|---|---|
0 | 79571.299011 | 73.89 | 49 |
1 | 96396.988643 | 66.23 | 62 |
2 | 48710.666947 | 63.77 | 33 |
3 | 80478.096153 | 63.22 | 95 |
4 | 82089.345498 | 63.08 | 43 |
f = lambda x : x.max() - x.min()
df_info.apply(f) #각 column 별로 결과값 반환
earn 318047.708444
height 19.870000
age 73.000000
dtype: float64
df_info.sum()
earn 4.474344e+07
height 9.183125e+04
age 6.250800e+04
dtype: float64
df_info.apply(sum)
earn 4.474344e+07
height 9.183125e+04
age 6.250800e+04
dtype: float64
def f(x):
return Series([x.min(), x.max()], index=["min", "max"])
df_info.apply(f)
earn | height | age | |
---|---|---|---|
min | -98.580489 | 57.34 | 22 |
max | 317949.127955 | 77.21 | 95 |
f = lambda x: -x
df_info.applymap(f).head(5)
earn | height | age | |
---|---|---|---|
0 | -79571.299011 | -73.89 | -49 |
1 | -96396.988643 | -66.23 | -62 |
2 | -48710.666947 | -63.77 | -33 |
3 | -80478.096153 | -63.22 | -95 |
4 | -82089.345498 | -63.08 | -43 |
f = lambda x: -x
df_info["earn"].apply(f).head(5)
0 -79571.299011
1 -96396.988643
2 -48710.666947
3 -80478.096153
4 -82089.345498
Name: earn, dtype: float64