<수업 내용>
Iris data의 종 별 features의 mean, std
import pandas as pd
df=pd.read_csv('./Iris.csv')
df
df['Species'].unique()
import numpy as np
df_sepal_length_setosa=df[(df['Species']=='Iris-setosa')]['SepalLengthCm']
df_sepal_width_setosa=df[(df['Species']=='Iris-setosa')]['SepalWidthCm']
df_petal_length_setosa=df[(df['Species']=='Iris-setosa')]['PetalLengthCm']
df_petal_width_setosa=df[(df['Species']=='Iris-setosa')]['PetalWidthCm']
df_sepal_length_setosa_mean=np.mean(df_sepal_length_setosa)
df_sepal_length_setosa_std=np.std(df_sepal_length_setosa)
df_sepal_width_setosa_mean=np.mean(df_sepal_width_setosa)
df_sepal_width_setosa_std=np.std(df_sepal_width_setosa)
df_petal_length_setosa_mean=np.mean(df_petal_length_setosa)
df_petal_length_setosa_std=np.std(df_petal_length_setosa)
df_petal_width_setosa_mean=np.mean(df_petal_width_setosa)
df_petal_width_setosa_std=np.std(df_petal_width_setosa)
print(df_sepal_length_setosa_mean,df_sepal_length_setosa_std)
print(df_sepal_width_setosa_mean,df_sepal_width_setosa_std)
print(df_petal_length_setosa_mean,df_petal_length_setosa_std)
print(df_petal_width_setosa_mean,df_petal_width_setosa_std)
df_sepal_length_versicolor=df[(df['Species']=='Iris-versicolor')]['SepalLengthCm']
df_sepal_width_versicolor=df[(df['Species']=='Iris-versicolor')]['SepalWidthCm']
df_petal_length_versicolor=df[(df['Species']=='Iris-versicolor')]['PetalLengthCm']
df_petal_width_versicolor=df[(df['Species']=='Iris-versicolor')]['PetalWidthCm']
df_sepal_length_versicolor_mean=np.mean(df_sepal_length_versicolor)
df_sepal_length_versicolor_std=np.std(df_sepal_length_versicolor)
df_sepal_width_versicolor_mean=np.mean(df_sepal_width_versicolor)
df_sepal_width_versicolor_std=np.std(df_sepal_width_versicolor)
df_petal_length_versicolor_mean=np.mean(df_petal_length_versicolor)
df_petal_length_versicolor_std=np.std(df_petal_length_versicolor)
df_petal_width_versicolor_mean=np.mean(df_petal_width_versicolor)
df_petal_width_versicolor_std=np.std(df_petal_width_versicolor)
print(df_sepal_length_versicolor_mean,df_sepal_length_versicolor_std)
print(df_sepal_width_versicolor_mean,df_sepal_width_versicolor_std)
print(df_petal_length_versicolor_mean,df_petal_length_versicolor_std)
print(df_petal_width_versicolor_mean,df_petal_width_versicolor_std)
df_sepal_length_virginica=df[(df['Species']=='Iris-virginica')]['SepalLengthCm']
df_sepal_width_virginica=df[(df['Species']=='Iris-virginica')]['SepalWidthCm']
df_petal_length_virginica=df[(df['Species']=='Iris-virginica')]['PetalLengthCm']
df_petal_width_virginica=df[(df['Species']=='Iris-virginica')]['PetalWidthCm']
df_sepal_length_virginica_mean=np.mean(df_sepal_length_virginica)
df_sepal_length_virginica_std=np.std(df_sepal_length_virginica)
df_sepal_width_virginica_mean=np.mean(df_sepal_width_virginica)
df_sepal_width_virginica_std=np.std(df_sepal_width_virginica)
df_petal_length_virginica_mean=np.mean(df_petal_length_virginica)
df_petal_length_virginica_std=np.std(df_petal_length_virginica)
df_petal_width_virginica_mean=np.mean(df_petal_width_virginica)
df_petal_width_virginica_std=np.std(df_petal_width_virginica)
print(df_sepal_length_virginica_mean,df_sepal_length_virginica_std)
print(df_sepal_width_virginica_mean,df_sepal_width_virginica_std)
print(df_petal_length_virginica_mean,df_petal_length_virginica_std)
print(df_petal_width_virginica_mean,df_petal_width_virginica_std)
정규분포 함수
def normal_distribution(data,mean,std):
normal=(1/(np.sqrt((2*np.pi*(std**2)))))*np.exp((-1/2)*(((data-mean)/std)**2))
return normal
12가지 분류의 정규분포 확률밀도함수 구하기
df['SepalLengthCm']
df['SepalWidthCm']
df['PetalLengthCm']
df['PetalWidthCm']
data_sepal_length_setosa=np.linspace(min(df['SepalLengthCm']),max(df['SepalLengthCm']),1000)
data_sepal_length_versicolor=np.linspace(min(df['SepalLengthCm']),max(df['SepalLengthCm']),1000)
data_sepal_length_virginica=np.linspace(min(df['SepalLengthCm']),max(df['SepalLengthCm']),1000)
setosa_sepal_length_normal=normal_distribution(data_sepal_length_setosa,df_sepal_length_setosa_mean,df_sepal_length_setosa_std)
versicolor_sepal_length_normal=normal_distribution(data_sepal_length_versicolor,df_sepal_length_versicolor_mean,df_sepal_length_versicolor_std)
virginica_sepal_length_normal=normal_distribution(data_sepal_length_virginica,df_sepal_length_virginica_mean,df_sepal_length_virginica_std)
data_sepal_width_setosa=np.linspace(min(df['SepalWidthCm']),max(df['SepalWidthCm']),1000)
data_sepal_width_versicolor=np.linspace(min(df['SepalWidthCm']),max(df['SepalWidthCm']),1000)
data_sepal_width_virginica=np.linspace(min(df['SepalWidthCm']),max(df['SepalWidthCm']),1000)
setosa_sepal_width_normal=normal_distribution(data_sepal_width_setosa,df_sepal_width_setosa_mean,df_sepal_width_setosa_std)
versicolor_sepal_width_normal=normal_distribution(data_sepal_width_versicolor,df_sepal_width_versicolor_mean,df_sepal_width_versicolor_std)
virginica_sepal_width_normal=normal_distribution(data_sepal_width_virginica,df_sepal_width_virginica_mean,df_sepal_width_virginica_std)
data_petal_length_setosa=np.linspace(min(df['PetalLengthCm']),max(df['PetalLengthCm']),1000)
data_petal_length_versicolor=np.linspace(min(df['PetalLengthCm']),max(df['PetalLengthCm']),1000)
data_petal_length_virginica=np.linspace(min(df['PetalLengthCm']),max(df['PetalLengthCm']),1000)
setosa_petal_length_normal=normal_distribution(data_petal_length_setosa,df_petal_length_setosa_mean,df_petal_length_setosa_std)
versicolor_petal_length_normal=normal_distribution(data_petal_length_versicolor,df_petal_length_versicolor_mean,df_petal_length_versicolor_std)
virginica_petal_length_normal=normal_distribution(data_petal_length_virginica,df_petal_length_virginica_mean,df_petal_length_virginica_std)
data_petal_width_setosa=np.linspace(min(df['PetalWidthCm']),max(df['PetalWidthCm']),1000)
data_petal_width_versicolor=np.linspace(min(df['PetalWidthCm']),max(df['PetalWidthCm']),1000)
data_petal_width_virginica=np.linspace(min(df['PetalWidthCm']),max(df['PetalWidthCm']),1000)
setosa_petal_width_normal=normal_distribution(data_petal_width_setosa,df_petal_width_setosa_mean,df_petal_width_setosa_std)
versicolor_petal_width_normal=normal_distribution(data_petal_width_versicolor,df_petal_width_versicolor_mean,df_petal_width_versicolor_std)
virginica_petal_width_normal=normal_distribution(data_petal_width_virginica,df_petal_width_virginica_mean,df_petal_width_virginica_std)
첫 행 데이터의 likelihood구하기
sepal_length_setosa_like=df.iloc[0,1]
sepal_width_setosa_like=df.iloc[0,2]
petal_length_setosa_like=df.iloc[0,3]
petal_width_setosa_like=df.iloc[0,4]
sls=normal_distribution(sepal_length_setosa_like,df_sepal_length_setosa_mean,df_sepal_length_setosa_std)
sws=normal_distribution(sepal_width_setosa_like,df_sepal_width_setosa_mean,df_sepal_width_setosa_std)
pls=normal_distribution(petal_length_setosa_like,df_petal_length_setosa_mean,df_petal_length_setosa_std)
pws=normal_distribution(petal_width_setosa_like,df_petal_width_setosa_mean,df_petal_width_setosa_std)
setosa_evidence=sls*sws*pls*pws
sepal_length_versicolor_like=df.iloc[0,1]
sepal_width_versicolor_like=df.iloc[0,2]
petal_length_versicolor_like=df.iloc[0,3]
petal_width_versicolor_like=df.iloc[0,4]
slv=normal_distribution(sepal_length_versicolor_like,df_sepal_length_versicolor_mean,df_sepal_length_versicolor_std)
swv=normal_distribution(sepal_width_versicolor_like,df_sepal_width_versicolor_mean,df_sepal_width_versicolor_std)
plv=normal_distribution(petal_length_versicolor_like,df_petal_length_versicolor_mean,df_petal_length_versicolor_std)
pwv=normal_distribution(petal_width_versicolor_like,df_petal_width_versicolor_mean,df_petal_width_versicolor_std)
versicolor_evidence=slv*swv*plv*pwv
sepal_length_virginica_like=df.iloc[0,1]
sepal_width_virginica_like=df.iloc[0,2]
petal_length_virginica_like=df.iloc[0,3]
petal_width_virginica_like=df.iloc[0,4]
slvv=normal_distribution(sepal_length_virginica_like,df_sepal_length_virginica_mean,df_sepal_length_virginica_std)
swvv=normal_distribution(sepal_width_virginica_like,df_sepal_width_virginica_mean,df_sepal_width_virginica_std)
plvv=normal_distribution(petal_length_virginica_like,df_petal_length_virginica_mean,df_petal_length_virginica_std)
pwvv=normal_distribution(petal_width_virginica_like,df_petal_width_virginica_mean,df_petal_width_virginica_std)
virginica_evidence=slvv*swvv*plvv*pwvv
시각화
import matplotlib.pyplot as plt
fig,ax=plt.subplots(4,1,figsize=(10,10))
ax[0].set_title("sepal length (cm)",size=10)
ax[0].plot(data_sepal_length_setosa, setosa_sepal_length_normal)
ax[0].plot(data_sepal_length_versicolor,versicolor_sepal_length_normal)
ax[0].plot(data_sepal_length_virginica,virginica_sepal_length_normal)
ax[0].scatter(df.iloc[0,1],sls, marker='o')
ax[0].scatter(df.iloc[0,1],slv, marker='o')
ax[0].scatter(df.iloc[0,1],slvv, marker='o')
ax[0].legend()
ax[1].set_title("sepal width (cm)",size=10)
ax[1].plot(data_sepal_width_setosa,setosa_sepal_width_normal)
ax[1].plot(data_sepal_width_versicolor,versicolor_sepal_width_normal)
ax[1].plot(data_sepal_width_virginica,virginica_sepal_width_normal)
ax[1].scatter(df.iloc[0,2], sws, marker='o')
ax[1].scatter(df.iloc[0,2], swv, marker='o')
ax[1].scatter(df.iloc[0,2], swvv, marker='o')
ax[2].set_title("petal length (cm)",size=10)
ax[2].plot(data_petal_length_setosa,setosa_petal_length_normal)
ax[2].plot(data_petal_length_versicolor,versicolor_petal_length_normal)
ax[2].plot(data_petal_length_virginica,virginica_petal_length_normal)
ax[2].scatter(df.iloc[0,3], pls, marker='o')
ax[2].scatter(df.iloc[0,3], plv, marker='o')
ax[2].scatter(df.iloc[0,3], plvv, marker='o')
ax[3].set_title("petal width (cm)",size=10)
ax[3].plot(data_petal_width_setosa,setosa_petal_width_normal)
ax[3].plot(data_petal_width_versicolor,versicolor_petal_width_normal)
ax[3].plot(data_petal_width_virginica,virginica_petal_width_normal)
ax[3].scatter(df.iloc[0,4],pws, marker='o')
ax[3].scatter(df.iloc[0,4],pwv, marker='o')
ax[3].scatter(df.iloc[0,4],pwvv, marker='o')
posterior 값
print(setosa_evidence,versicolor_evidence,virginica_evidence)
첫 번째 데이터는 setosa로 판별 할 수 있다