Simple speech recognition
Audio 데이터를 다뤄서 학습하는 방법을 배워보도록 합시다.
머신러닝 모델을 학습하는 방법은 아래와 같습니다.
Dataset
# 기본으로 설정되어있는 Dataset path
DATASET_PATH = "/content/drive/MyDrive/Datasets"
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
from os.path import isdir, join
import random
import copy
import sys
tf.__version__
#2.15.0
use_colab = True
assert use_colab in [True, False]
from google.colab import drive
drive.mount('/content/drive')
# Mounted at /content/drive
if use_colab:
DATASET_PATH = "/content/drive/MyDrive/dataset/wave_cls"
else:
DATASET_PATH = "./Datasets"
if not os.path.isdir(DATASET_PATH):
os.makedirs(DATASET_PATH)
speech_data = np.load(os.path.join(DATASET_PATH, "speech_wav_8000.npz"), allow_pickle=True) #npy, npz
print(speech_data.files)
#['wav_vals', 'label_vals']
print(speech_data["wav_vals"].shape, speech_data["label_vals"].shape)
# (50620, 8000) (50620, 1)
idx = 219 # 총 50,620개의 오디오 파일이 존재
test_audio = speech_data["wav_vals"][idx]
test_labels = speech_data["label_vals"][idx]
import IPython.display as ipd
sr = 8000 # 1초동안 재생되는 샘플의 갯수
data = test_audio
print(data[2990:3000])
plt.plot(data)
print(test_labels)
ipd.Audio(data, rate=sr)
sklearn.model_selection.train_test_split(train, label, test_size=None, shuffle=True) ```
sr = 8000
train_wav, test_wav, train_label, test_label = train_test_split(#,
#,
test_size=0.1,
shuffle=True)
# for convolution layers
train_wav = train_wav.reshape([-1, #, 1])
test_wav = test_wav.reshape([-1, #, 1])
print(train_wav.shape)
print(test_wav.shape)
print(train_label.shape)
print(test_label.shape)
set(speech_data["label_vals"].flatten())
{'down',
'go',
'left',
'no',
'off',
'on',
'right',
'silence',
'stop',
'unknown',
'up',
'yes'}
# del raw dataset for memory
del speech_data
#target_list
label_value = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'unknown', 'silence']
new_label_value = dict()
for i, l in enumerate(label_value):
new_label_value[l] = i
label_value = new_label_value
label_value
{'down': 3,
'go': 9,
'left': 4,
'no': 1,
'off': 7,
'on': 6,
'right': 5,
'silence': 11,
'stop': 8,
'unknown': 10,
'up': 2,
'yes': 0}
temp = []
for v in train_label:
temp.append(label_value[v[0]]) # ["down"] => "down"
train_label = np.array(temp)
temp = []
for v in test_label:
temp.append(label_value[v[0]])
test_label = np.array(temp)
del temp
print('Train_Wav Demension : ' + str(np.shape(train_wav)))
print('Train_Label Demension : ' + str(np.shape(train_label)))
print('Test_Wav Demension : ' + str(np.shape(test_wav)))
print('Test_Label Demension : ' + str(np.shape(test_label)))
print('Number Of Labels : ' + str(len(label_value)))
train_label[0:10]
# the save point
if use_colab:
checkpoint_dir ='./drive/MyDrive/train_ckpt/wave/exp1'
if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir)
else:
checkpoint_dir = 'wave/exp1'
tf.one_hot( indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None )
def one_hot_label(wav, label):
label = tf.one_hot(label, depth=#)
return wav, label
# [0, 1, 2]
# 0 => [1, 0, 0]
# 1 => [0, 1, 0]
# 2 => [0, 0, 1]
batch_size = 16
# for train
train_dataset = tf.data.Dataset.from_tensor_slices((#, #))
train_dataset = train_dataset.map(#)
train_dataset = train_dataset.shuffle(10000).repeat().batch(batch_size)
print(train_dataset)
# for test
test_dataset = tf.data.Dataset.from_tensor_slices((#, #))
test_dataset = test_dataset.map(#)
test_dataset = test_dataset.batch(batch_size)
print(test_dataset)
for t, l in train_dataset.take(1):
print(l)
<BatchDataset shapes: ((None, 8000, 1), (None, 12)), types: (tf.float32, tf.float32)> <BatchDataset shapes: ((None, 8000, 1), (None, 12)), types: (tf.float32, tf.float32)>
# TODO
# layers.Dense(12, activation='softmax') -> tf.keras.losses.CategoricalCrossentropy(from_logits=False)
# layers.Dense(12) -> tf.keras.losses.CategoricalCrossentropy(from_logits=True)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=#), # Ture or False
optimizer="adam",# 1e-3 # tf.keras.optimizers.adam(1e-4)
metrics=['accuracy'])
# without training, just inference a model
predictions = model(train_wav[0:1], training=False)
print("Predictions: ", predictions.numpy())
model.summary()
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir,
save_weights_only=True,
monitor='val_loss',
mode='auto',
save_best_only=True,
verbose=1)
tf.keras.optimizers.schedules.CosineDecay( initial_learning_rate, decay_steps, alpha=0.0, name=None )
def decayed_learning_rate(step): step = min(step, decay_steps) cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) decayed = (1 - alpha) * cosine_decay + alpha return initial_learning_rate * decayed
decay_steps = 100 lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( initial_learning_rate, decay_steps)
cos_decay = tf.keras.experimental.CosineDecay(#1e-3, # 초기 learning rate
#decay_steps) # 에폭기준으로 언제 0이 될건지
lr_callback = tf.keras.callbacks.LearningRateScheduler(cos_decay, verbose=1)
# using `tf.data.Dataset`, model.fit_generator
history = model.fit(# dataset,
steps_per_epoch=#,
epochs=#,
callbacks=[#,#],
validation_data=#,
validation_steps=#)
# fit(
# x=None, y=None, batch_size=None, epochs=1, verbose='auto',
# callbacks=None, validation_split=0.0, validation_data=None, shuffle=True,
# class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None,
# validation_steps=None, validation_batch_size=None, validation_freq=1,
# max_queue_size=10, workers=1, use_multiprocessing=False
# )
# fit_generator(
# generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None,
# validation_data=None, validation_steps=None, validation_freq=1,
# class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False,
# shuffle=True, initial_epoch=0
# )
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs_range = range(len(acc))
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
model.load_weights(checkpoint_dir)
results = model.evaluate(test_dataset)
def final_score():
print("Model params num : " + str(model.count_params()))
print("Accuracy : " + str(results[1]))
s = (model.count_params() * 32) / (1024 ** 2)
score = 50 * (results[1] + min((1/s), 1))
print("score : " + str(score))
final_score()