중심 극한 정리
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
population_size = 10000
sample_sizes = [5, 10, 20, 50, 100, 500]
num_samples = 1000
np.random.seed(0)
population = np.random.randint(1, 13, population_size)
plt.figure(figsize=(8, 4))
sns.histplot(population, bins=12, kde=True, color='purple')
plt.title("Population Distribution (12-sided Die)")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()
for sample_size in sample_sizes:
sample_means = []
for _ in range(num_samples):
sample = np.random.choice(population, size=sample_size, replace=True)
sample_means.append(np.mean(sample))
plt.figure(figsize=(8, 4))
sns.histplot(sample_means, bins=30, kde=True, color='skyblue')
plt.title(f"Distribution of Sample Means (Sample Size = {sample_size})")
plt.xlabel("Sample Mean")
plt.ylabel("Frequency")
plt.show()
Shapiro-Wilk 테스트
import numpy as np
import pandas as pd
from scipy.stats import shapiro
np.random.seed(0)
population = np.random.randint(1, 13, 10000)
sample_sizes = [5, 10, 20, 50, 100, 500]
shapiro_results = []
for sample_size in sample_sizes:
sample_means = []
for _ in range(1000):
sample = np.random.choice(population, size=sample_size, replace=True)
sample_means.append(np.mean(sample))
stat, p_value = shapiro(sample_means)
shapiro_results.append((sample_size, stat, p_value))
shapiro_df = pd.DataFrame(shapiro_results, columns=["Sample Size", "W Statistic", "p-value"])
print(shapiro_df)