import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
from os.path import isdir, join
import random
import copy
tf.__version__
use_colab = True
assert use_colab in [True, False]
from google.colab import drive
drive.mount('/content/drive')
#Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
if use_colab:
DATASET_PATH = "/content/drive/MyDrive/Datasets"
else:
DATASET_PATH = "./"
data_wav = np.load(os.path.join(DATASET_PATH, "speech_spec_8000.npy"))
data_wav.shape
#(50620, 130, 126)
librosa.display.specshow(librosa.amplitude_to_db(data_wav[219], ref=np.max), x_axis='time')
plt.title('Power spectrogram')
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()
plt.show()
data_label = np.load(os.path.join(DATASET_PATH, "speech_label_8000.npy"))
print(data_label.shape)
#(50620, 1)
# target list
label_value = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'unknown', 'silence']
new_label_value = dict()
for i, l in enumerate(label_value):
new_label_value[l] = i
label_value = new_label_value
temp = [] # List
for v in data_label:
temp.append(label_value[v[0]])
data_label = np.array(temp)
del temp
data_label[100]
# 3
train_wav, test_wav, train_label, test_label = train_test_split(#)
# reshape for conv layers Conv2D -> 차원이 더 늘어납니다. 데이터 shape도 바뀝니다!
train_wav = train_wav.reshape(# TODO)
test_wav = test_wav.reshape(# TODO)
print(train_wav.shape)
print(test_wav.shape)
print(train_label.shape)
print(test_label.shape)
del data_wav # 메모리 관리를 위해 변수 삭제
del data_label
#(45558, 130, 126, 1)
#(5062, 130, 126, 1)
#(45558,)
#(5062,)
print('Train_Wav Demension : ' + str(np.shape(train_wav)))
print('Train_Label Demension : ' + str(np.shape(train_label)))
print('Test_Wav Demension : ' + str(np.shape(test_wav)))
print('Test_Label Demension : ' + str(np.shape(test_label)))
print('Number Of Labels : ' + str(len(label_value)))
#Train_Wav Demension : (45558, 130, 126, 1)
#Train_Label Demension : (45558,)
#Test_Wav Demension : (5062, 130, 126, 1)
#Test_Label Demension : (5062,)
#Number Of Labels : 12
batch_size = 32
max_epochs = 10
# the save point
if use_colab:
checkpoint_dir ='./drive/My Drive/train_ckpt/spectrogram/exp1'
if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir)
else:
checkpoint_dir = 'spectrogram/exp1'
def one_hot_label(wav, label):
label = tf.one_hot(label, depth=12)
return wav, label
# for train
train_dataset = tf.data.Dataset.from_tensor_slices((train_wav, train_label))
train_dataset = train_dataset.map(one_hot_label)
train_dataset = train_dataset.shuffle(len(train_wav)).repeat().batch(batch_size=batch_size)
print(train_dataset)
#<BatchDataset shapes: ((None, 130, 126, 1), (None, 12)), types: (tf.float32, tf.float32)>
#<BatchDataset shapes: ((None, 130, 126, 1), (None, 12)), types: (tf.float32, tf.float32)>
#<BatchDataset shapes: ((None, 130, 126, 1), (None, 12)), types: (tf.float32, tf.float32)> #<BatchDataset shapes: ((None, 130, 126, 1), (None, 12)), types: (tf.float32, tf.float32)>
input_tensor = layers.Input(shape=(130, 126, 1))
# Conv2D(16, (3,3)...) # Conv2D(16, 3) => 3x3 정사각형으로 알아서 입력됩니다.
# Conv2D(16, 9) => 9x9 = 81개
x = layers.Conv2D(16, 3, padding='same', activation='relu')(input_tensor) # 16 * 9 + bias
skip_1 = layers.Conv2D(16, 3, strides=2, padding='same', activation='relu')(x)
skip_1 = layers.Dropout(0.3)(skip_1)
x = layers.Conv2D(32, 3, padding='same', activation='relu')(skip_1)
x = tf.concat([x, skip_1], -1)
skip_2 = layers.Conv2D(32, 3, strides=2, padding='same', activation='relu')(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(skip_2)
x = tf.concat([x, skip_2], -1)
skip_3 = layers.Conv2D(64, 3, strides=2, padding='same', activation='relu')(x)
x = layers.Conv2D(128, 3, padding='same')(skip_3)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = tf.concat([x, skip_3], -1)
x = layers.Conv2D(128, 3, strides=2, padding='same', activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dense(64)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
output_tensor = layers.Dense(12)(x) # softmax
# output_tensor = layers.Dense(1)(x)
model = tf.keras.Model(input_tensor, output_tensor)
optimizer=tf.keras.optimizers.Adam(1e-3)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), # BinaryCrossentropy()
optimizer=optimizer,
metrics=['accuracy'])
# model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
# optimizer=optimizer,
# metrics=['accuracy'])
# without training, just inference a model:
predictions = model(train_wav[0:1], training=False)
print("Predictions: ", predictions.numpy())
#Predictions: [[-0.00723778 -0.01634607 0.00220009 -0.04531168 -0.05090157 -0.05140428
# -0.00673903 0.01359626 -0.00684556 -0.0425383 -0.0102446 0.03124363]]
model.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 130, 126, 1) 0
__________________________________________________________________________________________________
conv2d (Conv2D) (None, 130, 126, 16) 160 input_1[0][0]
__________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 65, 63, 16) 2320 conv2d[0][0]
__________________________________________________________________________________________________
dropout (Dropout) (None, 65, 63, 16) 0 conv2d_1[0][0]
__________________________________________________________________________________________________
conv2d_2 (Conv2D) (None, 65, 63, 32) 4640 dropout[0][0]
__________________________________________________________________________________________________
tf.concat (TFOpLambda) (None, 65, 63, 48) 0 conv2d_2[0][0]
dropout[0][0]
__________________________________________________________________________________________________
conv2d_3 (Conv2D) (None, 33, 32, 32) 13856 tf.concat[0][0]
__________________________________________________________________________________________________
conv2d_4 (Conv2D) (None, 33, 32, 64) 18496 conv2d_3[0][0]
__________________________________________________________________________________________________
tf.concat_1 (TFOpLambda) (None, 33, 32, 96) 0 conv2d_4[0][0]
conv2d_3[0][0]
__________________________________________________________________________________________________
conv2d_5 (Conv2D) (None, 17, 16, 64) 55360 tf.concat_1[0][0]
__________________________________________________________________________________________________
conv2d_6 (Conv2D) (None, 17, 16, 128) 73856 conv2d_5[0][0]
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 17, 16, 128) 512 conv2d_6[0][0]
__________________________________________________________________________________________________
activation (Activation) (None, 17, 16, 128) 0 batch_normalization[0][0]
__________________________________________________________________________________________________
tf.concat_2 (TFOpLambda) (None, 17, 16, 192) 0 activation[0][0]
conv2d_5[0][0]
__________________________________________________________________________________________________
conv2d_7 (Conv2D) (None, 9, 8, 128) 221312 tf.concat_2[0][0]
__________________________________________________________________________________________________
flatten (Flatten) (None, 9216) 0 conv2d_7[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 64) 589888 flatten[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 64) 256 dense[0][0]
__________________________________________________________________________________________________
activation_1 (Activation) (None, 64) 0 batch_normalization_1[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (None, 12) 780 activation_1[0][0]
==================================================================================================
Total params: 981,436
Trainable params: 981,052
Non-trainable params: 384
__________________________________________________________________________________________________
tf.keras.utils.plot_model(model, show_shapes=True, dpi=64)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir,
save_weights_only=True,
monitor='val_loss',
mode='auto', # max, min, auto
save_best_only=True,
verbose=1)
# using `tf.data.Dataset`
history = model.fit(train_dataset,
epochs=max_epochs,
steps_per_epoch=len(train_wav) // batch_size,
validation_data=test_dataset,
validation_steps=len(test_wav) // batch_size,
callbacks=[cp_callback]
)
Epoch 1/10
1423/1423 [==============================] - 100s 64ms/step - loss: 0.7321 - accuracy: 0.7664 - val_loss: 0.3276 - val_accuracy: 0.8920
Epoch 00001: val_loss improved from inf to 0.32763, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 2/10
1423/1423 [==============================] - 84s 59ms/step - loss: 0.2894 - accuracy: 0.9069 - val_loss: 0.2339 - val_accuracy: 0.9258
Epoch 00002: val_loss improved from 0.32763 to 0.23386, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 3/10
1423/1423 [==============================] - 83s 59ms/step - loss: 0.2005 - accuracy: 0.9344 - val_loss: 0.2009 - val_accuracy: 0.9343
Epoch 00003: val_loss improved from 0.23386 to 0.20087, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 4/10
1423/1423 [==============================] - 85s 60ms/step - loss: 0.1564 - accuracy: 0.9482 - val_loss: 0.2115 - val_accuracy: 0.9270
Epoch 00004: val_loss did not improve from 0.20087
Epoch 5/10
1423/1423 [==============================] - 83s 58ms/step - loss: 0.1274 - accuracy: 0.9570 - val_loss: 0.1684 - val_accuracy: 0.9415
Epoch 00005: val_loss improved from 0.20087 to 0.16836, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 6/10
1423/1423 [==============================] - 83s 58ms/step - loss: 0.1150 - accuracy: 0.9613 - val_loss: 0.2502 - val_accuracy: 0.9219
Epoch 00006: val_loss did not improve from 0.16836
Epoch 7/10
1423/1423 [==============================] - 83s 58ms/step - loss: 0.0967 - accuracy: 0.9675 - val_loss: 0.1555 - val_accuracy: 0.9492
Epoch 00007: val_loss improved from 0.16836 to 0.15549, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 8/10
1423/1423 [==============================] - 83s 58ms/step - loss: 0.0855 - accuracy: 0.9710 - val_loss: 0.1529 - val_accuracy: 0.9466
Epoch 00008: val_loss improved from 0.15549 to 0.15287, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 9/10
1423/1423 [==============================] - 82s 58ms/step - loss: 0.0874 - accuracy: 0.9704 - val_loss: 0.1443 - val_accuracy: 0.9525
Epoch 00009: val_loss improved from 0.15287 to 0.14430, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
Epoch 10/10
1423/1423 [==============================] - 82s 58ms/step - loss: 0.0713 - accuracy: 0.9758 - val_loss: 0.1330 - val_accuracy: 0.9561
Epoch 00010: val_loss improved from 0.14430 to 0.13299, saving model to ./drive/My Drive/train_ckpt/spectrogram/exp1
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs_range = range(len(acc))
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
model.load_weights(checkpoint_dir)
<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd2542d0cd0>
results = model.evaluate(test_dataset)
159/159 [==============================] - 4s 21ms/step - loss: 0.1329 - accuracy: 0.9561
def final_score():
print("Model params num : " + str(model.count_params()))
print("Accuracy : " + str(results[1]))
s = (model.count_params() * 32) / (1024 ** 2)
score = 50 * (results[1] + min((1/s), 1))
print("score : " + str(score))
final_score()
# Model params num : 981436
# Accuracy : 0.9561437964439392
# score : 49.47658038867302