우리가 해야 될 일은 다음과 같다.
librosa에 load 하고 전체 음원을 구간별로 나누는 클래스
만들 폴더의 경로와 폴더에서 wav파일을 가져오는 클래스
단일 wav 음원을 mel-spectrogram으로 바꾸는 클래스
1, 2, 3을 모두 합쳐 폴더 안의 wav 파일을 mel-spectrogram으로 바꾸는 클래스
import librosa
class AudioProcessor:
"""
A class used to process audio data, including loading and splitting audio files.
Attributes
----------
sr : int
The sample rate to use when loading audio files.
duration : float
The duration (in seconds) to split the audio files into segments.
Methods
-------
load_audio(audio_path):
Loads an audio file using the specified sample rate.
split_audio(y):
Splits the loaded audio into segments of the specified duration.
"""
def __init__(self, sr, duration):
"""
Constructs all the necessary attributes for the AudioProcessor object.
Parameters
----------
sr : int
The sample rate to use when loading audio files.
duration : float
The duration (in seconds) to split the audio files into segments.
"""
self.sr = sr
self.duration = duration
def load_audio(self, audio_path):
"""
Loads an audio file using the specified sample rate.
Parameters
----------
audio_path : str
The path to the audio file to be loaded.
Returns
-------
tuple
A tuple (y, sr) where y is the audio time series and sr is the sampling rate.
"""
return librosa.load(audio_path, sr=self.sr)
def split_audio(self, y):
"""
Splits the loaded audio into segments of the specified duration.
Parameters
----------
y : np.ndarray
The audio time series data.
Returns
-------
list
A list of audio segments where each segment is a numpy array.
"""
n_samples = int(self.sr * self.duration)
return [y[i:i + n_samples] for i in range(0, len(y), n_samples)]
import unittest
from unittest.mock import patch
import numpy as np
from preprocessing.audio_processor import AudioProcessor
class TestAudioProcessor(unittest.TestCase):
def setUp(self):
self.sr = 22050
self.duration = 2.0
self.processor = AudioProcessor(self.sr, self.duration)
t = np.linspace(0, self.duration, int(self.sr * self.duration), endpoint=False)
self.test_tone = 0.5 * np.sin(2 * np.pi * 440 * t)
@patch('librosa.load')
def test_load_audio(self, mock_load):
mock_load.return_value = (self.test_tone, self.sr)
y, sr = self.processor.load_audio("dummy_path.wav")
mock_load.assert_called_once_with('dummy_path.wav', sr=self.sr)
self.assertEqual(sr, self.sr)
np.testing.assert_array_equal(y, self.test_tone)
def test_split_audio(self):
y = self.test_tone
segments = self.processor.split_audio(y)
expected_segments = int(np.ceil(len(y) / (self.sr * self.duration)))
self.assertEqual(len(segments), expected_segments)
for segment in segments[:-1]:
self.assertEqual(len(segment), int(self.sr * self.duration))
self.assertTrue(len(segments[-1]) <= int(self.sr * self.duration))
if __name__ == '__main__':
unittest.main()
import os
class FileSystemHelper:
"""
A helper class for filesystem operations related to WAV files.
Methods
-------
make_dir(path):
Creates a directory if it doesn't already exist.
get_wav_files(folder_path):
Retrieves a list of paths to WAV files within a specified folder.
"""
@staticmethod
def make_dir(path):
"""
Creates a directory if it doesn't already exist.
Parameters
----------
path : str
The path of the directory to be created.
"""
if not os.path.exists(path):
os.makedirs(path)
@staticmethod
def get_wav_files(folder_path):
"""
Retrieves a list of paths to WAV files within a specified folder.
Parameters
----------
folder_path : str
The path of the folder where WAV files are located.
Returns
-------
list
A list of paths to WAV files (absolute paths).
"""
return [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('wav')]
import unittest
import os
from tempfile import TemporaryDirectory
from preprocessing.file_system_helper import FileSystemHelper
class TestFileSystemHelper(unittest.TestCase):
def test_make_dir(self):
"""Test the make_dir method."""
with TemporaryDirectory() as tempdir:
test_path = os.path.join(tempdir, 'test_directory')
FileSystemHelper.make_dir(test_path)
# Verify that the directory was created
self.assertTrue(os.path.exists(test_path))
self.assertTrue(os.path.isdir(test_path))
def test_get_wav_files(self):
"""Test the get_wav_files method."""
with TemporaryDirectory() as tempdir:
test_folder = os.path.join(tempdir, 'test_folder')
os.makedirs(test_folder, exist_ok=True)
# Create test files
test_files = ['test1.wav', 'test2.wav', 'test3.txt']
for filename in test_files:
open(os.path.join(test_folder, filename), 'a').close()
# Call the method under test
wav_files = FileSystemHelper.get_wav_files(test_folder)
# Verify the result
expected_files = [os.path.join(test_folder, file) for file in ['test1.wav', 'test2.wav']]
self.assertEqual(sorted(wav_files), sorted(expected_files))
if __name__ == '__main__':
unittest.main()
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
from preprocessing.file_system_helper import FileSystemHelper
class MelSpectrogramGenerator:
"""
A class to generate and save Mel spectrograms from audio data.
Attributes
----------
save_path : str
The path where the generated spectrograms will be saved.
sr : int
The sample rate of the audio data.
n_mels : int
Number of Mel bands to generate.
hop_length : int
Hop length (in samples) between successive frames.
Methods
-------
generate_and_save(y, file_name):
Generates a Mel spectrogram from audio data `y` and saves it as an image.
"""
def __init__(self, save_path, sr, n_fft, n_mels, hop_length):
"""
Initializes the MelSpectrogramGenerator instance.
Parameters
----------
save_path : str
The path where the generated spectrograms will be saved.
sr : int
The sample rate of the audio data.
n_mels : int
Number of Mel bands to generate.
hop_length : int
Hop length (in samples) between successive frames.
"""
self.save_path = save_path
self.sr = sr
self.n_fft = n_fft
self.n_mels = n_mels
self.hop_length = hop_length
FileSystemHelper.ensure_directory_exists(save_path)
def generate_and_save(self, y, file_name):
"""
Generates a Mel spectrogram from audio data `y` and saves it as an image.
Parameters
----------
y : np.ndarray
Audio time series.
file_name : str
File name to save the generated spectrogram image.
"""
# Generate Mel spectrogram and convert to log scale
mel = librosa.feature.melspectrogram(y=y, sr=self.sr,n_fft=self.n_fft, n_mels=self.n_mels, hop_length=self.hop_length)
log_mel = librosa.power_to_db(mel, ref=np.max)
# Plot and save the spectrogram as an image
plt.figure(figsize=(20, 10))
librosa.display.specshow(log_mel, sr=self.sr, hop_length=self.hop_length, cmap='viridis', x_axis='time',
y_axis='mel')
plt.colorbar(format='%+2.0f dB') # 색상 막대 추가
plt.savefig(f"{self.save_path}/{file_name}", bbox_inches='tight', pad_inches=0)
plt.close()
import unittest
import numpy as np
from unittest.mock import patch, MagicMock
from preprocessing.mel_spectrogram_generator import MelSpectrogramGenerator
import tempfile
import os
import warnings
class TestMelSpectrogramGenerator(unittest.TestCase):
def setUp(self):
"""Set up the test case."""
self.temp_dir = tempfile.TemporaryDirectory()
self.save_path = self.temp_dir.name # Use temporary directory
self.sr = 22050
self.n_mels = 128
self.hop_length = 512
# Create a MagicMock for librosa.display.specshow
self.mock_specshow = MagicMock()
# Create a MagicMock for matplotlib.pyplot.savefig
self.mock_savefig = MagicMock()
self.generator = MelSpectrogramGenerator(self.save_path, self.sr, self.n_mels, self.hop_length)
def tearDown(self):
"""Clean up the temporary directory."""
self.temp_dir.cleanup()
@patch('librosa.feature.melspectrogram')
@patch('librosa.power_to_db')
@patch('librosa.display.specshow', new_callable=MagicMock)
@patch('matplotlib.pyplot.savefig', new_callable=MagicMock)
def test_generate_and_save(self, mock_savefig, mock_specshow, mock_power_to_db, mock_melspectrogram):
"""Test the generate_and_save method."""
# Mock return values
mock_mel = np.zeros((self.n_mels, 100)) # Mock Mel spectrogram
mock_melspectrogram.return_value = mock_mel # Ensure consistent shapes
mock_power_to_db.return_value = mock_mel
# Call the method under test
test_y = np.random.randn(1000)
test_file_name = 'test_spec'
self.generator.generate_and_save(test_y, test_file_name)
# Assertions to verify the behavior
mock_melspectrogram.assert_called_once_with(test_y, sr=self.sr, n_mels=self.n_mels, hop_length=self.hop_length)
mock_power_to_db.assert_called_once_with(mock_mel, ref=np.max)
mock_specshow.assert_called_once()
mock_savefig.assert_called_once_with(f"{self.save_path}/{test_file_name}.png", bbox_inches='tight',
pad_inches=0)
if __name__ == '__main__':
unittest.main()
class ConvertToMelSpectrogram:
"""
A class to convert audio files in a folder to mel spectrogram images.
Attributes:
folder_path (str): Path to the folder containing audio files.
save_path (str): Path to the folder where spectrogram images will be saved.
sr (int): Sample rate for audio processing.
duration (float): Duration of the audio clips to be processed.
n_mels (int): Number of mel bands to generate.
hop_length (int): Number of samples between successive frames.
audio_processor (AudioProcessor): Instance of AudioProcessor to handle audio loading and processing.
mel_generator (MelSpectrogramGenerator): Instance of MelSpectrogramGenerator to generate and save mel spectrograms.
"""
def __init__(self, folder_path, save_path, sr, n_fft, duration, n_mels, hop_length):
"""
Initializes the ConverToMelSpectrogram class with provided parameters.
Args:
folder_path (str): Path to the folder containing audio files.
save_path (str): Path to the folder where spectrogram images will be saved.
sr (int): Sample rate for audio processing.
duration (float): Duration of the audio clips to be processed.
n_mels (int): Number of mel bands to generate.
hop_length (int): Number of samples between successive frames.
"""
self.folder_path = folder_path
self.save_path = save_path
self.sr = sr
self.n_fft = n_fft
self.duration = duration
self.n_mels = n_mels
self.hop_length = hop_length
self.audio_processor = AudioProcessor(sr, duration)
self.mel_generator = MelSpectrogramGenerator(save_path, sr, n_fft, n_mels, hop_length)
def convert_folder_to_mel_spectrogram(self):
"""
Converts all audio files in the specified folder to mel spectrogram images.
The spectrogram images are saved in the specified save_path directory.
"""
wav_files = FileSystemHelper.get_files_by_extension(self.folder_path, 'wav')
for audio_path in wav_files:
y, sr = self.audio_processor.load_audio(audio_path)
audio_slices = self.audio_processor.split_audio(y)
for i, y_slice in enumerate(audio_slices):
file_name = f"{os.path.splitext(os.path.basename(audio_path))[0]}_part{i}.png"
self.mel_generator.generate_and_save(y_slice, file_name)
import unittest
import os
import tempfile
import numpy as np
import soundfile as sf
import librosa
from unittest.mock import patch
from preprocessing.convert_to_mel_spectrogram import ConvertToMelSpectrogram
from preprocessing.audio_processor import AudioProcessor
from preprocessing.mel_spectrogram_generator import MelSpectrogramGenerator
class TestConvertToMelSpectrogram(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.mkdtemp() # 임시 디렉토리 생성
self.audio_files = [
self.create_temp_audio_file('audio1.wav', duration=5, sr=22050),
self.create_temp_audio_file('audio2.wav', duration=3, sr=22050),
self.create_temp_audio_file('audio3.wav', duration=7, sr=22050)
]
self.converter = ConvertToMelSpectrogram(
self.temp_dir, # 임시 디렉토리 사용
self.temp_dir, # 저장 디렉토리도 임시 디렉토리로 설정
sr=22050,
duration=5.0,
n_mels=128,
hop_length=512
)
def tearDown(self):
# 테스트 종료 후 임시 파일 삭제
for audio_file in self.audio_files:
os.remove(audio_file)
def create_temp_audio_file(self, filename, duration, sr):
# 임시 WAV 파일 생성
audio_path = os.path.join(self.temp_dir, filename)
y = np.random.randn(int(duration * sr))
sf.write(audio_path, y, sr) # soundfile로 WAV 파일 생성
return audio_path
@patch('librosa.load')
@patch.object(MelSpectrogramGenerator, 'generate_and_save')
def test_convert_folder_to_mel_spectrogram(self, mock_generate_and_save, mock_librosa_load):
# 모의 처리된 librosa.load 함수 설정
mock_librosa_load.side_effect = self.mock_librosa_load
# convert_folder_to_mel_spectrogram 메서드 호출
self.converter.convert_folder_to_mel_spectrogram()
# generate_and_save가 적절한 파일명으로 호출되었는지 확인
expected_calls = [
unittest.mock.call(np.array([0.0]), 'audio1_part0.png'),
unittest.mock.call(np.array([0.0]), 'audio2_part0.png'),
unittest.mock.call(np.array([0.0]), 'audio3_part0.png'),
]
mock_generate_and_save.assert_has_calls(expected_calls, any_order=True)
def mock_librosa_load(self, audio_path, sr=None):
# 임의의 오디오 데이터 반환
return np.array([0.0]), 22050
if __name__ == '__main__':
unittest.main()
메인 파일을 2개를 만들어서 관리하자.
preprocessing_main과 model_main을 만들어서 실제로 실행할 것 이다.
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
from preprocessing.convert_to_mel_spectrogram import ConvertToMelSpectrogram
def main():
folder_path = '/Users/seong-gyeongjun/Downloads/vocal artist/stone/wav'
save_path = '/Users/seong-gyeongjun/Downloads/vocal artist/stone/mel'
sr = 22050
n_fft = 2048
duration = 3.0
n_mels = 128
hop_length = n_fft // 4
converter = ConvertToMelSpectrogram(folder_path, save_path, sr, n_fft, duration, n_mels, hop_length)
converter.convert_folder_to_mel_spectrogram()
if __name__ == '__main__':
main()
이런 이미지 파일이 save_folder에 수백장 저장된 걸 볼 수 있다.