df = pd.DataFrame( {
'A' : [10, 20, -10, 0, 25],
'B' : [1, 2, 3, 1, 0]
})
df
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
mms.fit(df)
mms.data_max_, mms.data_min_, mms.data_range_
mms.transform(df)
mms.inverse_transform(df_mms)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(df)
ss.transform(df)
ss.mean_, ss.scale_
df = pd.DataFrame({
'A' : [-0.1, 0, 0.1, 0.2, 0.3, 0.4, 1.0, 1.1, 5.0]
})
df
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
mm = MinMaxScaler()
ss = StandardScaler()
rs = RobustScaler()
df_scaler = df.copy()
df_scaler['MinMax'] = mm.fit_transform(df)
df_scaler['Standard'] = ss.fit_transform(df)
df_scaler['Robust'] = rs.fit_transform(df)
df_scaler
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
plt.figure(figsize=(16,6))
sns.boxplot(data=df_scaler, orient='h')
MinMax와 standard는 이상치에 민감하다
Robust는 이상치에 영향을 덜 받음