- t-test λ 2κ° κ·Έλ£Ήμ νκ· λΉκ΅
- ANOVA λ 3κ° μ΄μ κ·Έλ£Ήμ νκ· λΉκ΅
β£ λ°μ΄ν° : μ£ΌμΌλ³ κ΅ν΅μ¬κ³ - μ¬λ§μμ
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
file_name = 'μ£ΌμΌλ³_κ΅ν΅μ¬κ³ _20230320214703.csv'
death_2011_2021 = pd.read_csv(file_name, encoding='cp949').to_numpy()
print(death_2011_2021[:5])
# result
# [['μμ ' 'μ£ΌμΌλ³(1)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)'
# 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)' 'μ¬λ§μμ (λͺ
)'
# 'μ¬λ§μμ (λͺ
)']
# ['2011' 'μ£Ό' '186' '156' '143' '182' '185' '202' '225' '227' '217' '233'
# '228' '250']
# ['2011' 'μΌ' '209' '183' '195' '247' '208' '213' '237' '245' '257' '287'
# '286' '228']
# ['2012' 'μ£Ό' '206' '194' '179' '233' '218' '238' '198' '193' '233' '261'
# '238' '196']
# ['2012' 'μΌ' '212' '199' '224' '250' '226' '238' '218' '216' '253' '272'
# '270' '227']]
1. 2021λ
μ£ΌμΌ μ¬λ§μμ νκ· μ μ°¨μ΄κ° μλκ° ?
2. 2011 ~ 2021λ
μ¬λ§μμμ μλ³ μ°¨μ΄κ° μλκ° ?
def do_mean_comparison(g1, g2) :
print('*************************')
print('g1 shapiro :', stats.shapiro(g1))
print('g2 shapiro :', stats.shapiro(g2))
print('levene :', stats.levene(g1,g2))
print('ttest_ind :', stats.ttest_ind(g1,g2))
print('*************************')
1. 2021λ μ£ΌμΌ μ¬λ§μμ νκ· μ μ°¨μ΄κ° μλκ° ?
# 2021λ
λ λ°μ΄ν° μΆμΆνκΈ°
# 1.
death_2021 = death_2011_2021[-2:]
# 2.
filter = death_2011_2021[:,0] == '2021'
death_2021 = death_2011_2021[filter]
print(death_2021)
# result
# [['2021' 'μ£Ό' '107' '107' '128' '119' '154' '124' '139' '129' '131' '156'
# '163' '149']
# ['2021' 'μΌ' '90' '96' '81' '93' '101' '103' '115' '118' '115' '156'
# '122' '120']]
death_2021_day = death_2021[0,2:].astype(np.float64)
print('day:', death_2021_day)
# day: [107. 107. 128. 119. 154. 124. 139. 129. 131. 156. 163. 149.]
death_2021_night = death_2021[1,2:].astype(np.float64)
print('night:', death_2021_night)
# night: [ 90. 96. 81. 93. 101. 103. 115. 118. 115. 156. 122. 120.]
do_mean_comparison(death_2021_day,death_2021_night)
def get_95_ci(vals) :
vals_mean = np.mean(vals)
vals_std = np.std(vals)
print('upper : ', vals_mean + 1.96*vals_std)
print('lower : ', vals_mean - 1.96*vals_std)
print(get_95_ci(death_2021_day))
print(get_95_ci(death_2021_night))
# upper : 168.83852850332045
# lower : 98.82813816334624
# None
# upper : 146.36222494390665
# lower : 71.97110838942669
# None
2. 2011 ~ 2021λ μλ³ μ°¨μ΄κ° μλκ° ?
death_2011_2021 = death_2011_2021[1:,]
death_2011_2021[:5]
# result
# [['2011' 'μ£Ό' '186' '156' '143' '182' '185' '202' '225' '227' '217' '233'
# '228' '250']
# ['2011' 'μΌ' '209' '183' '195' '247' '208' '213' '237' '245' '257' '287'
# '286' '228']
# ['2012' 'μ£Ό' '206' '194' '179' '233' '218' '238' '198' '193' '233' '261'
# '238' '196']
# ['2012' 'μΌ' '212' '199' '224' '250' '226' '238' '218' '216' '253' '272'
# '270' '227']
# ['2013' 'μ£Ό' '190' '136' '198' '164' '215' '195' '221' '192' '211' '236'
# '223' '209']]
death_2011_2021 = death_2011_2021[:,2:].astype(np.int64)
print(death_2011_2021[:5])
# result
# [[186 156 143 182 185 202 225 227 217 233 228 250]
# [209 183 195 247 208 213 237 245 257 287 286 228]
# [206 194 179 233 218 238 198 193 233 261 238 196]
# [212 199 224 250 226 238 218 216 253 272 270 227]
# [190 136 198 164 215 195 221 192 211 236 223 209]]
mon_mean = np.mean(death_2011_2021, axis=0)
print(mon_mean)
# [169.77272727 143.77272727 159.13636364 165.77272727 176.36363636 170.27272727
# 175.13636364 77.18181818 190.81818182 212.86363636 196.5 185.13636364]
3κ° μ΄μ κ·Έλ£Ήμ νκ· μ°¨μ΄ λ₯Ό κ²μ ν΄μΌνκΈ° λλ¬Έμ, ANOVA λ₯Ό μ¬μ©ν΄μΌ νλ€.
3κ° κ·Έλ£Ή μ΄μμ μ§λ¨μ νκ· μ°¨μ΄ κ²μ
.
.
.
2011~2021λ λμ μλ³ νκ· μλ μ°¨μ΄κ° μμκΉ ?
lista = [death_2011_2021[:,x] for x in range(12)]
stats.f_oneway(*lista)
Β» p-value < 0.05 : κ·λ¬΄κ°μ€ κΈ°κ°
Β» μ¦, μλ³ νκ· κ°μ μ°¨μ΄κ° μλ€κ³ λ³Ό μ μλ€.
2011 ~ 2021λ λμ μΌκ° μ¬λ§μμλ μλ³ μ°¨μ΄κ° μμκΉ ?
death_night = death_2011_2021[1::2, :]
print(death_night)
# [[209 183 195 247 208 213 237 245 257 287 286 228]
# [212 199 224 250 226 238 218 216 253 272 270 227]
# [199 199 211 216 205 241 223 220 219 263 256 250]
# [200 175 205 188 172 182 211 176 245 247 227 218]
# [207 167 203 186 187 188 180 215 188 229 221 198]
# [177 157 156 193 137 159 171 161 188 245 201 212]
# [170 151 168 146 173 144 187 165 214 222 176 193]
# [155 153 160 165 138 118 144 188 179 169 135 159]
# [144 102 128 134 131 134 109 122 137 169 128 161]
# [132 115 115 96 118 143 124 134 137 147 116 86]
# [ 90 96 81 93 101 103 115 118 115 156 122 120]]
lista = [death_night[:, x] for x in range(12)]
stats.f_oneway(*lista)
Β» p-value = 0.15835 > 0.05 : κ·λ¬΄κ°μ€ μ±ν
Β» 2011~2021λ
λ μΌκ° μ¬λ§μμλ μλ³ μ°¨μ΄κ° μλ€.
2011 ~ 2021λ λμ μ£Όκ° μ¬λ§μμλ μλ³ μ°¨μ΄κ° μμκΉ ?
death_day = death_2011_2021[::2, :]
print(death_day)
#
listb = [death_day[:, x] for x in range(12)]
stats.f_oneway(*listb)
Β» p-value < 0.05 : κ·λ¬΄κ°μ€ κΈ°κ°
Β» μ¦, 2011 ~ 2021λ
λμ μ£Όκ° μ¬λ§μμλ μλ³ μ°¨μ΄κ° μλ€.