df.hist(figsize=(8, 8))
label_name = 'Outcome'
df.columns
>>>>
Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
dtype='object')
# X, y 만들기
X = df.drop(columns=label_name)
y = df[label_name]
X.shape, y.shape
>>>>
((768, 8), (768,))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
>>>>
((614, 8), (154, 8), (614,), (154,))
import tensorflow as tf
print(dir(tf.keras.activation)[:10])
>>>>
['deserialize', 'elu', 'exponential', 'gelu', 'get',
'hard_sigmoid', 'linear', 'relu', 'selu', 'serialize',
'sigmoid', 'softmax', 'softplus', 'softsign', 'swish', 'tanh']
: 이진분류문제(출력층)
plt.plot(x, tf.keras.activations.sigmoid(x), linestyle='--', label='sigmoid')
plt.axvline(0)
plt.legend()
plt.plot(x, tf.keras.activations.tanh(x), linestyle='--', label='tanh')
plt.axvline(0)
plt.legend()
plt.plot(x, tf.keras.activations.swish(x), linestyle='--', label="swish")
plt.axvline(0)
plt.legend()
: 은닉층에 주로 사용
plt.plot(x, tf.keras.activations.relu(x), linestyle='--', label="relu")
plt.axvline(0)
plt.legend()
: 다중 클래스 분류 문제 (출력층)
plt.plot(x, tf.keras.activations.softplus(x), linestyle='--', label="softplus")
plt.axvline(0)
plt.legend()
input_shape = X.shape[1]
# 이진분류 ->sigmoid
# 다중분류 ->softmax
model = tf.keras.models.Sequential([
tk.keras.layers.Dense(units=128, input_shape=[input_shape]),
tf.keras.layers.Dense(128, activation='selu'),
tf.keras.layers.Dense(128, activation='selu'),
tf.keras.layers.Dense(128, activation='selu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(1, activation='sigmoid')])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
class PrintDot(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
history = model.fit(X_train, y_train, epochs=1000, validation_split=0.2, callbacks=[early_stop, PrintDot()], verbose=0)
# 학습 결과의 history 값을 가져와서 비교하기 위해 데이터프레임으로 반환
```python
df_hist = pd.DataFrame(history.history)
df_hist.tail()
df_hist[['loss', 'val_loss']].plot()
df_hist[['accuracy', 'val_accuracy']].plot()
y_pred = model.predict(X_test)
y_pred.shape
>>>> (154, 1)
# 임계값(0.5)을 정해서 특정값 이상이면 True, 아니면 False로 변환해서 사용
# flatten() : 예측값을 1차원으로 변환
y_predict = (y_pred.flatten() > 0.5).astype(int)
y_predict
test_loss, test_acc = model.evaluate(X_test, y_test)
test_loss, test_acc
>>>> loss: 0.6754 - accuracy: 0.7403
# 직접 정확도 구하기
(y_test == y_predict).mean()
>>>> 0.7402597402597403