1.query[str.startwith, str.endwith, str.contain, filter**]**
2.concat
예시문제
data = [
{"cd":"A060310", "nm":"3S", "open":2920, "close":2800},
{"cd":"A095570", "nm":"AJ네트웍스", "open":1920, "close":1900},
{"cd":"A006840", "nm":"AK홀딩스", "open":2020, "close":2010},
{"cd":"A054620", "nm":"APS홀딩스", "open":3120, "close":3200}
]
df = DataFrame(data=data)
df = df.set_index('cd')
>>
nm open close
cd
A060310 3S 2920 2800
A095570 AJ네트웍스 1920 1900
A006840 AK홀딩스 2020 2010
A054620 APS홀딩스 3120 3200
df.query("close > open")
str = ("close > open")
df.query(str)
>>
nm open close
cd
A054620 APS홀딩스 3120 3200
df.query("close > open" and "open > 2000")
>>
nm open close
cd
A060310 3S 2920 2800
A006840 AK홀딩스 2020 2010
A054620 APS홀딩스 3120 3200
print(df.query("nm.str.startswith('A')"))
>>
nm open close
cd
A095570 AJ네트웍스 1920 1900
A006840 AK홀딩스 2020 2010
A054620 APS홀딩스 3120 3200
print(df.query("nm.str.contains('홀딩스')"))
>>
nm open close
cd
A006840 AK홀딩스 2020 2010
A054620 APS홀딩스 3120 3200
---------------------------------------
data = {
"PER": [6.42, 17.63, 21.09, 13.93],
"PBR": [1.10, 1.49, 2.06, 1.88],
"DPS": [1416, 1716, 2994, 1755]
}
index = ["2019/12", "2020/12", "2021/12", "2022/12(E)"]
df = pd.DataFrame(data=data, index=index)
>>
PER PBR DPS
2019/12 6.42 1.10 1416
2020/12 17.63 1.49 1716
2021/12 21.09 2.06 2994
2022/12(E) 13.93 1.88 1755
df.filter(items=["PER","PBR"])
>>
PER PBR
2019/12 6.42 1.10
2020/12 17.63 1.49
2021/12 121.09 2.06
2022/12(E) 13.93 1.88
df.filter(regex="P.R", axis=1)
>>
PER PBR
2019/12 6.42 1.10
2020/12 17.63 1.49
2021/12 21.09 2.06
2022/12(E) 13.93 1.88
df.filter(regex="12$", axis=0)
>>
PER PBR DPS
2019/12 6.42 1.10 1416
2020/12 17.63 1.49 1716
2021/12 21.09 2.06 2994
df.filter(like="2022", axis=0)
---------------------
columns = ['open', 'high', 'low', 'close']
index1 = [
datetime(2022, 9, 8),
datetime(2022, 9, 7),
datetime(2022, 9, 6),
datetime(2022, 9, 5),
datetime(2022, 9, 2)
]
index2 = [
datetime(2022, 9, 1),
datetime(2022, 8, 31),
datetime(2022, 8, 30),
datetime(2022, 8, 29),
datetime(2022, 8, 26)
]
df1 = pd.DataFrame(data=np.arange(0, 20).reshape(5,4), index=index1, columns=columns)
df2 = pd.DataFrame(data=np.arange(20,40).reshape(5,4), index=index2, columns=columns)
open high low close
2022-09-08 0 1 2 3
2022-09-07 4 5 6 7
2022-09-06 8 9 10 11
2022-09-05 12 13 14 15
2022-09-02 16 17 18 19
pd.concat([df1, df2], axis=0)
>>
open high low close
2022-09-08 0 1 2 3
2022-09-07 4 5 6 7
2022-09-06 8 9 10 11
2022-09-05 12 13 14 15
2022-09-02 16 17 18 19
2022-09-01 20 21 22 23
2022-08-31 24 25 26 27
2022-08-30 28 29 30 31
2022-08-29 32 33 34 35
2022-08-26 36 37 38 39
index = [
datetime(2022, 9, 8),
datetime(2022, 9, 7),
datetime(2022, 9, 6),
datetime(2022, 9, 5),
datetime(2022, 9, 2)
]
df1 = pd.DataFrame(data=np.arange(0, 10).reshape(5,2), index=index, columns=['open', 'high'])
df2 = pd.DataFrame(data=np.arange(10,20).reshape(5,2), index=index, columns=['low', 'close'])
pd.concat([df1, df2], axis=1)
--------------------------
index1 = [
datetime(2022, 9, 8),
datetime(2022, 9, 7)
]
index2 = [
datetime(2022, 9, 6),
datetime(2022, 9, 5)
]
df1 = pd.DataFrame(np.arange(0, 6).reshape(2,3), index=index1, columns=['open', 'high', 'low'])
df2 = pd.DataFrame(np.arange(6,12).reshape(2,3), index=index2, columns=['high', 'low', 'close'])
pd.concat([df1, df2], axis=0, join='outer')
>>
open high low close
2022-09-08 0.0 1.0 NaN NaN
2022-09-07 2.0 3.0 NaN NaN
2022-09-06 4.0 5.0 NaN NaN
2022-09-05 6.0 7.0 NaN NaN
2022-09-02 8.0 9.0 NaN NaN
2022-09-08 NaN NaN 10.0 11.0
2022-09-07 NaN NaN 12.0 13.0
2022-09-06 NaN NaN 14.0 15.0
2022-09-05 NaN NaN 16.0 17.0
2022-09-02 NaN NaN 18.0 19.0
pd.concat([df1, df2], axis=0, join='inner')
>>
2022-09-08
2022-09-07
2022-09-06
2022-09-05
2022-09-02
2022-09-08
2022-09-07
2022-09-06
2022-09-05
2022-09-02
df1 = pd.DataFrame(np.arange(0, 4).reshape(2,2))
df2 = pd.DataFrame(np.arange(4, 8).reshape(2,2))
pd.concat([df1, df2], axis= False)
>>
0 1
0 0 1
1 2 3
0 4 5
1 6 7
index = ["2020", "2021"]
data1 = {
"PER": [21.09, 13.55],
"PBR": [2.06, 1.80]
}
data2 = {
"PER": [12.40, 24.19],
"PBR": [1.58, 1.44]
}
df1 = pd.DataFrame(data=data1, index=index)
df2 = pd.DataFrame(data=data2, index=index)
>>
PER PBR
2020 21.09 2.06
2021 13.55 1.80
PER PBR
2020 12.40 1.58
2021 24.19 1.44
combined = pd.concat([df1, df2], keys=["삼성전자", "LG전자"])
print(combined)
>>
PER PBR
삼성전자 2020 21.09 2.06
2021 13.55 1.80
LG전자 2020 12.40 1.58
2021 24.19 1.44
combined = pd.concat([df1, df2], keys=["삼성전자", "LG전자"])
combined.index.names = ["회사", "연도"]
print(combined)
**>>
PER PBR
회사 연도
삼성전자 2020 21.09 2.06
2021 13.55 1.80
LG전자 2020 12.40 1.58
2021 24.19 1.44
------------------------------
data = [
["2차전지(생산)", "SK이노베이션", 10.19, 1.29],
["해운", "팬오션", 21.23, 0.95],
["시스템반도체", "티엘아이", 35.97, 1.12],
["해운", "HMM", 21.52, 3.20],
["시스템반도체", "아이에이", 37.32, 3.55],
["2차전지(생산)", "LG화학", 83.06, 3.75]
]
columns = ["테마", "종목명", "PER", "PBR"]
df = pd.DataFrame(data=data, columns=columns)
>>**
테마 종목명 PER PBR
0 2차전지(생산) SK이노베이션 10.19 1.29
1 해운 팬오션 21.23 0.95
2 시스템반도체 티엘아이 35.97 1.12
3 해운 HMM 21.52 3.20
4 시스템반도체 아이에이 37.32 3.55
5 2차전지(생산) LG화학 83.06 3.75
df.groupby(by="테마")[["PER","PBR"]].mean