[청년취업사관학교 새싹]핀테커스 수업 8주차(10/20)

장민정·2023년 10월 20일
0
post-thumbnail

<수업 내용>

Iris data의 종 별 features의 mean, std

import pandas as pd

df=pd.read_csv('./Iris.csv')
df

df['Species'].unique()

##1

import numpy as np

df_sepal_length_setosa=df[(df['Species']=='Iris-setosa')]['SepalLengthCm']
df_sepal_width_setosa=df[(df['Species']=='Iris-setosa')]['SepalWidthCm']
df_petal_length_setosa=df[(df['Species']=='Iris-setosa')]['PetalLengthCm']
df_petal_width_setosa=df[(df['Species']=='Iris-setosa')]['PetalWidthCm']


df_sepal_length_setosa_mean=np.mean(df_sepal_length_setosa)
df_sepal_length_setosa_std=np.std(df_sepal_length_setosa)

df_sepal_width_setosa_mean=np.mean(df_sepal_width_setosa)
df_sepal_width_setosa_std=np.std(df_sepal_width_setosa)

df_petal_length_setosa_mean=np.mean(df_petal_length_setosa)
df_petal_length_setosa_std=np.std(df_petal_length_setosa)

df_petal_width_setosa_mean=np.mean(df_petal_width_setosa)
df_petal_width_setosa_std=np.std(df_petal_width_setosa)

print(df_sepal_length_setosa_mean,df_sepal_length_setosa_std)
print(df_sepal_width_setosa_mean,df_sepal_width_setosa_std)
print(df_petal_length_setosa_mean,df_petal_length_setosa_std)
print(df_petal_width_setosa_mean,df_petal_width_setosa_std)

##2

df_sepal_length_versicolor=df[(df['Species']=='Iris-versicolor')]['SepalLengthCm']
df_sepal_width_versicolor=df[(df['Species']=='Iris-versicolor')]['SepalWidthCm']
df_petal_length_versicolor=df[(df['Species']=='Iris-versicolor')]['PetalLengthCm']
df_petal_width_versicolor=df[(df['Species']=='Iris-versicolor')]['PetalWidthCm']

df_sepal_length_versicolor_mean=np.mean(df_sepal_length_versicolor)
df_sepal_length_versicolor_std=np.std(df_sepal_length_versicolor)

df_sepal_width_versicolor_mean=np.mean(df_sepal_width_versicolor)
df_sepal_width_versicolor_std=np.std(df_sepal_width_versicolor)

df_petal_length_versicolor_mean=np.mean(df_petal_length_versicolor)
df_petal_length_versicolor_std=np.std(df_petal_length_versicolor)

df_petal_width_versicolor_mean=np.mean(df_petal_width_versicolor)
df_petal_width_versicolor_std=np.std(df_petal_width_versicolor)

print(df_sepal_length_versicolor_mean,df_sepal_length_versicolor_std)
print(df_sepal_width_versicolor_mean,df_sepal_width_versicolor_std)
print(df_petal_length_versicolor_mean,df_petal_length_versicolor_std)
print(df_petal_width_versicolor_mean,df_petal_width_versicolor_std)

##3

df_sepal_length_virginica=df[(df['Species']=='Iris-virginica')]['SepalLengthCm']
df_sepal_width_virginica=df[(df['Species']=='Iris-virginica')]['SepalWidthCm']
df_petal_length_virginica=df[(df['Species']=='Iris-virginica')]['PetalLengthCm']
df_petal_width_virginica=df[(df['Species']=='Iris-virginica')]['PetalWidthCm']

df_sepal_length_virginica_mean=np.mean(df_sepal_length_virginica)
df_sepal_length_virginica_std=np.std(df_sepal_length_virginica)

df_sepal_width_virginica_mean=np.mean(df_sepal_width_virginica)
df_sepal_width_virginica_std=np.std(df_sepal_width_virginica)

df_petal_length_virginica_mean=np.mean(df_petal_length_virginica)
df_petal_length_virginica_std=np.std(df_petal_length_virginica)

df_petal_width_virginica_mean=np.mean(df_petal_width_virginica)
df_petal_width_virginica_std=np.std(df_petal_width_virginica)

print(df_sepal_length_virginica_mean,df_sepal_length_virginica_std)
print(df_sepal_width_virginica_mean,df_sepal_width_virginica_std)
print(df_petal_length_virginica_mean,df_petal_length_virginica_std)
print(df_petal_width_virginica_mean,df_petal_width_virginica_std)

정규분포 함수

def normal_distribution(data,mean,std):
  normal=(1/(np.sqrt((2*np.pi*(std**2)))))*np.exp((-1/2)*(((data-mean)/std)**2))
  return normal

12가지 분류의 정규분포 확률밀도함수 구하기

df['SepalLengthCm']
df['SepalWidthCm']
df['PetalLengthCm']
df['PetalWidthCm']

#1
data_sepal_length_setosa=np.linspace(min(df['SepalLengthCm']),max(df['SepalLengthCm']),1000)
data_sepal_length_versicolor=np.linspace(min(df['SepalLengthCm']),max(df['SepalLengthCm']),1000)
data_sepal_length_virginica=np.linspace(min(df['SepalLengthCm']),max(df['SepalLengthCm']),1000)

setosa_sepal_length_normal=normal_distribution(data_sepal_length_setosa,df_sepal_length_setosa_mean,df_sepal_length_setosa_std)
versicolor_sepal_length_normal=normal_distribution(data_sepal_length_versicolor,df_sepal_length_versicolor_mean,df_sepal_length_versicolor_std)
virginica_sepal_length_normal=normal_distribution(data_sepal_length_virginica,df_sepal_length_virginica_mean,df_sepal_length_virginica_std)

#2
data_sepal_width_setosa=np.linspace(min(df['SepalWidthCm']),max(df['SepalWidthCm']),1000)
data_sepal_width_versicolor=np.linspace(min(df['SepalWidthCm']),max(df['SepalWidthCm']),1000)
data_sepal_width_virginica=np.linspace(min(df['SepalWidthCm']),max(df['SepalWidthCm']),1000)

setosa_sepal_width_normal=normal_distribution(data_sepal_width_setosa,df_sepal_width_setosa_mean,df_sepal_width_setosa_std)
versicolor_sepal_width_normal=normal_distribution(data_sepal_width_versicolor,df_sepal_width_versicolor_mean,df_sepal_width_versicolor_std)
virginica_sepal_width_normal=normal_distribution(data_sepal_width_virginica,df_sepal_width_virginica_mean,df_sepal_width_virginica_std)

#3
data_petal_length_setosa=np.linspace(min(df['PetalLengthCm']),max(df['PetalLengthCm']),1000)
data_petal_length_versicolor=np.linspace(min(df['PetalLengthCm']),max(df['PetalLengthCm']),1000)
data_petal_length_virginica=np.linspace(min(df['PetalLengthCm']),max(df['PetalLengthCm']),1000)

setosa_petal_length_normal=normal_distribution(data_petal_length_setosa,df_petal_length_setosa_mean,df_petal_length_setosa_std)
versicolor_petal_length_normal=normal_distribution(data_petal_length_versicolor,df_petal_length_versicolor_mean,df_petal_length_versicolor_std)
virginica_petal_length_normal=normal_distribution(data_petal_length_virginica,df_petal_length_virginica_mean,df_petal_length_virginica_std)

#4
data_petal_width_setosa=np.linspace(min(df['PetalWidthCm']),max(df['PetalWidthCm']),1000)
data_petal_width_versicolor=np.linspace(min(df['PetalWidthCm']),max(df['PetalWidthCm']),1000)
data_petal_width_virginica=np.linspace(min(df['PetalWidthCm']),max(df['PetalWidthCm']),1000)

setosa_petal_width_normal=normal_distribution(data_petal_width_setosa,df_petal_width_setosa_mean,df_petal_width_setosa_std)
versicolor_petal_width_normal=normal_distribution(data_petal_width_versicolor,df_petal_width_versicolor_mean,df_petal_width_versicolor_std)
virginica_petal_width_normal=normal_distribution(data_petal_width_virginica,df_petal_width_virginica_mean,df_petal_width_virginica_std)

첫 행 데이터의 likelihood구하기

sepal_length_setosa_like=df.iloc[0,1]
sepal_width_setosa_like=df.iloc[0,2]
petal_length_setosa_like=df.iloc[0,3]
petal_width_setosa_like=df.iloc[0,4]
sls=normal_distribution(sepal_length_setosa_like,df_sepal_length_setosa_mean,df_sepal_length_setosa_std)
sws=normal_distribution(sepal_width_setosa_like,df_sepal_width_setosa_mean,df_sepal_width_setosa_std)
pls=normal_distribution(petal_length_setosa_like,df_petal_length_setosa_mean,df_petal_length_setosa_std)
pws=normal_distribution(petal_width_setosa_like,df_petal_width_setosa_mean,df_petal_width_setosa_std)
setosa_evidence=sls*sws*pls*pws

sepal_length_versicolor_like=df.iloc[0,1]
sepal_width_versicolor_like=df.iloc[0,2]
petal_length_versicolor_like=df.iloc[0,3]
petal_width_versicolor_like=df.iloc[0,4]
slv=normal_distribution(sepal_length_versicolor_like,df_sepal_length_versicolor_mean,df_sepal_length_versicolor_std)
swv=normal_distribution(sepal_width_versicolor_like,df_sepal_width_versicolor_mean,df_sepal_width_versicolor_std)
plv=normal_distribution(petal_length_versicolor_like,df_petal_length_versicolor_mean,df_petal_length_versicolor_std)
pwv=normal_distribution(petal_width_versicolor_like,df_petal_width_versicolor_mean,df_petal_width_versicolor_std)
versicolor_evidence=slv*swv*plv*pwv


sepal_length_virginica_like=df.iloc[0,1]
sepal_width_virginica_like=df.iloc[0,2]
petal_length_virginica_like=df.iloc[0,3]
petal_width_virginica_like=df.iloc[0,4]
slvv=normal_distribution(sepal_length_virginica_like,df_sepal_length_virginica_mean,df_sepal_length_virginica_std)
swvv=normal_distribution(sepal_width_virginica_like,df_sepal_width_virginica_mean,df_sepal_width_virginica_std)
plvv=normal_distribution(petal_length_virginica_like,df_petal_length_virginica_mean,df_petal_length_virginica_std)
pwvv=normal_distribution(petal_width_virginica_like,df_petal_width_virginica_mean,df_petal_width_virginica_std)
virginica_evidence=slvv*swvv*plvv*pwvv

시각화

import matplotlib.pyplot as plt

fig,ax=plt.subplots(4,1,figsize=(10,10))

ax[0].set_title("sepal length (cm)",size=10)
ax[0].plot(data_sepal_length_setosa, setosa_sepal_length_normal)
ax[0].plot(data_sepal_length_versicolor,versicolor_sepal_length_normal)
ax[0].plot(data_sepal_length_virginica,virginica_sepal_length_normal)
ax[0].scatter(df.iloc[0,1],sls, marker='o')
ax[0].scatter(df.iloc[0,1],slv, marker='o')
ax[0].scatter(df.iloc[0,1],slvv, marker='o')
ax[0].legend()

ax[1].set_title("sepal width (cm)",size=10)
ax[1].plot(data_sepal_width_setosa,setosa_sepal_width_normal)
ax[1].plot(data_sepal_width_versicolor,versicolor_sepal_width_normal)
ax[1].plot(data_sepal_width_virginica,virginica_sepal_width_normal)
ax[1].scatter(df.iloc[0,2], sws, marker='o')
ax[1].scatter(df.iloc[0,2], swv, marker='o')
ax[1].scatter(df.iloc[0,2], swvv, marker='o')

ax[2].set_title("petal length (cm)",size=10)
ax[2].plot(data_petal_length_setosa,setosa_petal_length_normal)
ax[2].plot(data_petal_length_versicolor,versicolor_petal_length_normal)
ax[2].plot(data_petal_length_virginica,virginica_petal_length_normal)
ax[2].scatter(df.iloc[0,3], pls, marker='o')
ax[2].scatter(df.iloc[0,3], plv, marker='o')
ax[2].scatter(df.iloc[0,3], plvv, marker='o')



ax[3].set_title("petal width (cm)",size=10)
ax[3].plot(data_petal_width_setosa,setosa_petal_width_normal)
ax[3].plot(data_petal_width_versicolor,versicolor_petal_width_normal)
ax[3].plot(data_petal_width_virginica,virginica_petal_width_normal)
ax[3].scatter(df.iloc[0,4],pws, marker='o')
ax[3].scatter(df.iloc[0,4],pwv, marker='o')
ax[3].scatter(df.iloc[0,4],pwvv, marker='o')

posterior 값

print(setosa_evidence,versicolor_evidence,virginica_evidence)

첫 번째 데이터는 setosa로 판별 할 수 있다

0개의 댓글