df = pd.DataFrame({
'A': ['a', 'b', 'c', 'a', 'b'],
'B': [1, 2, 3, 1, 0]
})
df
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(df['A'])
le.classes_
le.transform(df['A'])
le.fit_transform(df['A'])
df['le_A'] = le.transform(df['A'])
df
le.transform(['c'])
le.inverse_transform(df['le_A'])
df = pd.DataFrame({
'A': ['a', 'b', 'c', 'a', 'b'],
'B': [1, 2, 3, 1, 0]
})
df
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
mms.fit(df)
mms.data_max_, mms.data_min_, mms.data_range_
df_mms = mms.transform(df)
df_mms
mms.inverse_transform(df_mms)
mms.fit_transform(df)
df
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(df)
ss.mean_, ss.scale_
df_ss = ss.transform(df)
df_ss
df = pd.DataFrame({
'A': [-0.1, 0., 0.1, 0.2, 0.3, 0.4, 1.0, 1.1, 5.0]
})
df
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
mm = MinMaxScaler()
ss = StandardScaler()
rs = RobustScaler()
df_scaler = df.copy()
df_scaler['MinMax'] = mm.fit_transform(df)
df_scaler['Standard'] = ss.fit_transform(df)
df_scaler['Robust'] = rs.fit_transform(df)
df_scaler
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')
plt.figure(figsize=(16, 6))
sns.boxplot(data=df_scaler, orient='h');