Z-score 데이터 표준화 방법에 대해서 살펴보자.
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
for col in df.select_dtypes('float').columns:
df[col + '_zscore'] = (df[col] - df[col].mean()) / df[col].std(ddof=0)
from scipy.stats import zscore
zscore(df.select_dtypes('float'))
# or,
pd.DataFrame(
zscore(df.select_dtypes('float')),
columns=df.select_dtypes('float').columns
)
# or,
df.select_dtypes('float').apply(zscore)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
pd.DataFrame(
scaler.fit_transform(df.select_dtypes('float')),
columns=df.select_dtypes('float').columns
)