OneHotEncoder
을 사용OneHotEncoder
:n개의 범주형 데이터를 n개의 비트(0, 1) 벡터로 표현from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
# load
iris = load_iris()
X = iris.data
y = iris.target
# OneHotEncoder
enc = OneHotEncoder(sparse=False, handle_unknown='ignore')
enc.fit(y.reshape(len(y), 1))
y_onehot = enc.transform(y.reshape(len(y), 1))
# 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=13)
import tensorflow as tf
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(32, input_shape=(4, ), activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(3, activation='softmax'),
])
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
hist = model.fit(X_train, y_train, epochs=100)
model.evaluate(X_test, y_test, verbose=2)
'''
1/1 - 0s - loss: 0.0952 - accuracy: 1.0000 - 123ms/epoch - 123ms/step
[0.09515346586704254, 1.0]
'''
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(hist.history['loss'])
plt.plot(hist.history['accuracy'])
plt.title('model loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show()
Reference
1) 제로베이스 데이터스쿨 강의자료
2) https://steadiness-193.tistory.com/244