우리가 해야 될 일은 다음과 같다.
librosa에 load 하고 전체 음원을 구간별로 나누는 클래스
만들 폴더의 경로와 폴더에서 wav파일을 가져오는 클래스
단일 wav 음원을 mel-spectrogram으로 바꾸는 클래스
1, 2, 3을 모두 합쳐 폴더 안의 wav 파일을 mel-spectrogram으로 바꾸는 클래스
import librosa
class AudioProcessor:
A class used to process audio data, including loading and splitting audio files.
sr : int
The sample rate to use when loading audio files.
duration : float
The duration (in seconds) to split the audio files into segments.
Loads an audio file using the specified sample rate.
Splits the loaded audio into segments of the specified duration.
def __init__(self, sr, duration):
Constructs all the necessary attributes for the AudioProcessor object.
sr : int
The sample rate to use when loading audio files.
duration : float
The duration (in seconds) to split the audio files into segments.
self.sr = sr
self.duration = duration
def load_audio(self, audio_path):
Loads an audio file using the specified sample rate.
audio_path : str
The path to the audio file to be loaded.
A tuple (y, sr) where y is the audio time series and sr is the sampling rate.
return librosa.load(audio_path, sr=self.sr)
def split_audio(self, y):
Splits the loaded audio into segments of the specified duration.
y : np.ndarray
The audio time series data.
A list of audio segments where each segment is a numpy array.
n_samples = int(self.sr * self.duration)
return [y[i:i + n_samples] for i in range(0, len(y), n_samples)]
import unittest
from unittest.mock import patch
import numpy as np
from preprocessing.audio_processor import AudioProcessor
class TestAudioProcessor(unittest.TestCase):
def setUp(self):
self.sr = 22050
self.duration = 2.0
self.processor = AudioProcessor(self.sr, self.duration)
t = np.linspace(0, self.duration, int(self.sr * self.duration), endpoint=False)
self.test_tone = 0.5 * np.sin(2 * np.pi * 440 * t)
def test_load_audio(self, mock_load):
mock_load.return_value = (self.test_tone, self.sr)
y, sr = self.processor.load_audio("dummy_path.wav")
mock_load.assert_called_once_with('dummy_path.wav', sr=self.sr)
self.assertEqual(sr, self.sr)
np.testing.assert_array_equal(y, self.test_tone)
def test_split_audio(self):
y = self.test_tone
segments = self.processor.split_audio(y)
expected_segments = int(np.ceil(len(y) / (self.sr * self.duration)))
self.assertEqual(len(segments), expected_segments)
for segment in segments[:-1]:
self.assertEqual(len(segment), int(self.sr * self.duration))
self.assertTrue(len(segments[-1]) <= int(self.sr * self.duration))
if __name__ == '__main__':
import os
class FileSystemHelper:
A helper class for filesystem operations related to WAV files.
Creates a directory if it doesn't already exist.
Retrieves a list of paths to WAV files within a specified folder.
def make_dir(path):
Creates a directory if it doesn't already exist.
path : str
The path of the directory to be created.
if not os.path.exists(path):
def get_wav_files(folder_path):
Retrieves a list of paths to WAV files within a specified folder.
folder_path : str
The path of the folder where WAV files are located.
A list of paths to WAV files (absolute paths).
return [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('wav')]
import unittest
import os
from tempfile import TemporaryDirectory
from preprocessing.file_system_helper import FileSystemHelper
class TestFileSystemHelper(unittest.TestCase):
def test_make_dir(self):
"""Test the make_dir method."""
with TemporaryDirectory() as tempdir:
test_path = os.path.join(tempdir, 'test_directory')
# Verify that the directory was created
def test_get_wav_files(self):
"""Test the get_wav_files method."""
with TemporaryDirectory() as tempdir:
test_folder = os.path.join(tempdir, 'test_folder')
os.makedirs(test_folder, exist_ok=True)
# Create test files
test_files = ['test1.wav', 'test2.wav', 'test3.txt']
for filename in test_files:
open(os.path.join(test_folder, filename), 'a').close()
# Call the method under test
wav_files = FileSystemHelper.get_wav_files(test_folder)
# Verify the result
expected_files = [os.path.join(test_folder, file) for file in ['test1.wav', 'test2.wav']]
self.assertEqual(sorted(wav_files), sorted(expected_files))
if __name__ == '__main__':
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
from preprocessing.file_system_helper import FileSystemHelper
class MelSpectrogramGenerator:
A class to generate and save Mel spectrograms from audio data.
save_path : str
The path where the generated spectrograms will be saved.
sr : int
The sample rate of the audio data.
n_mels : int
Number of Mel bands to generate.
hop_length : int
Hop length (in samples) between successive frames.
generate_and_save(y, file_name):
Generates a Mel spectrogram from audio data `y` and saves it as an image.
def __init__(self, save_path, sr, n_fft, n_mels, hop_length):
Initializes the MelSpectrogramGenerator instance.
save_path : str
The path where the generated spectrograms will be saved.
sr : int
The sample rate of the audio data.
n_mels : int
Number of Mel bands to generate.
hop_length : int
Hop length (in samples) between successive frames.
self.save_path = save_path
self.sr = sr
self.n_fft = n_fft
self.n_mels = n_mels
self.hop_length = hop_length
def generate_and_save(self, y, file_name):
Generates a Mel spectrogram from audio data `y` and saves it as an image.
y : np.ndarray
Audio time series.
file_name : str
File name to save the generated spectrogram image.
# Generate Mel spectrogram and convert to log scale
mel = librosa.feature.melspectrogram(y=y, sr=self.sr,n_fft=self.n_fft, n_mels=self.n_mels, hop_length=self.hop_length)
log_mel = librosa.power_to_db(mel, ref=np.max)
# Plot and save the spectrogram as an image
plt.figure(figsize=(20, 10))
librosa.display.specshow(log_mel, sr=self.sr, hop_length=self.hop_length, cmap='viridis', x_axis='time',
plt.colorbar(format='%+2.0f dB') # 색상 막대 추가
plt.savefig(f"{self.save_path}/{file_name}", bbox_inches='tight', pad_inches=0)
import unittest
import numpy as np
from unittest.mock import patch, MagicMock
from preprocessing.mel_spectrogram_generator import MelSpectrogramGenerator
import tempfile
import os
import warnings
class TestMelSpectrogramGenerator(unittest.TestCase):
def setUp(self):
"""Set up the test case."""
self.temp_dir = tempfile.TemporaryDirectory()
self.save_path = self.temp_dir.name # Use temporary directory
self.sr = 22050
self.n_mels = 128
self.hop_length = 512
# Create a MagicMock for librosa.display.specshow
self.mock_specshow = MagicMock()
# Create a MagicMock for matplotlib.pyplot.savefig
self.mock_savefig = MagicMock()
self.generator = MelSpectrogramGenerator(self.save_path, self.sr, self.n_mels, self.hop_length)
def tearDown(self):
"""Clean up the temporary directory."""
@patch('librosa.display.specshow', new_callable=MagicMock)
@patch('matplotlib.pyplot.savefig', new_callable=MagicMock)
def test_generate_and_save(self, mock_savefig, mock_specshow, mock_power_to_db, mock_melspectrogram):
"""Test the generate_and_save method."""
# Mock return values
mock_mel = np.zeros((self.n_mels, 100)) # Mock Mel spectrogram
mock_melspectrogram.return_value = mock_mel # Ensure consistent shapes
mock_power_to_db.return_value = mock_mel
# Call the method under test
test_y = np.random.randn(1000)
test_file_name = 'test_spec'
self.generator.generate_and_save(test_y, test_file_name)
# Assertions to verify the behavior
mock_melspectrogram.assert_called_once_with(test_y, sr=self.sr, n_mels=self.n_mels, hop_length=self.hop_length)
mock_power_to_db.assert_called_once_with(mock_mel, ref=np.max)
mock_savefig.assert_called_once_with(f"{self.save_path}/{test_file_name}.png", bbox_inches='tight',
if __name__ == '__main__':
class ConvertToMelSpectrogram:
A class to convert audio files in a folder to mel spectrogram images.
folder_path (str): Path to the folder containing audio files.
save_path (str): Path to the folder where spectrogram images will be saved.
sr (int): Sample rate for audio processing.
duration (float): Duration of the audio clips to be processed.
n_mels (int): Number of mel bands to generate.
hop_length (int): Number of samples between successive frames.
audio_processor (AudioProcessor): Instance of AudioProcessor to handle audio loading and processing.
mel_generator (MelSpectrogramGenerator): Instance of MelSpectrogramGenerator to generate and save mel spectrograms.
def __init__(self, folder_path, save_path, sr, n_fft, duration, n_mels, hop_length):
Initializes the ConverToMelSpectrogram class with provided parameters.
folder_path (str): Path to the folder containing audio files.
save_path (str): Path to the folder where spectrogram images will be saved.
sr (int): Sample rate for audio processing.
duration (float): Duration of the audio clips to be processed.
n_mels (int): Number of mel bands to generate.
hop_length (int): Number of samples between successive frames.
self.folder_path = folder_path
self.save_path = save_path
self.sr = sr
self.n_fft = n_fft
self.duration = duration
self.n_mels = n_mels
self.hop_length = hop_length
self.audio_processor = AudioProcessor(sr, duration)
self.mel_generator = MelSpectrogramGenerator(save_path, sr, n_fft, n_mels, hop_length)
def convert_folder_to_mel_spectrogram(self):
Converts all audio files in the specified folder to mel spectrogram images.
The spectrogram images are saved in the specified save_path directory.
wav_files = FileSystemHelper.get_files_by_extension(self.folder_path, 'wav')
for audio_path in wav_files:
y, sr = self.audio_processor.load_audio(audio_path)
audio_slices = self.audio_processor.split_audio(y)
for i, y_slice in enumerate(audio_slices):
file_name = f"{os.path.splitext(os.path.basename(audio_path))[0]}_part{i}.png"
self.mel_generator.generate_and_save(y_slice, file_name)
import unittest
import os
import tempfile
import numpy as np
import soundfile as sf
import librosa
from unittest.mock import patch
from preprocessing.convert_to_mel_spectrogram import ConvertToMelSpectrogram
from preprocessing.audio_processor import AudioProcessor
from preprocessing.mel_spectrogram_generator import MelSpectrogramGenerator
class TestConvertToMelSpectrogram(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.mkdtemp() # 임시 디렉토리 생성
self.audio_files = [
self.create_temp_audio_file('audio1.wav', duration=5, sr=22050),
self.create_temp_audio_file('audio2.wav', duration=3, sr=22050),
self.create_temp_audio_file('audio3.wav', duration=7, sr=22050)
self.converter = ConvertToMelSpectrogram(
self.temp_dir, # 임시 디렉토리 사용
self.temp_dir, # 저장 디렉토리도 임시 디렉토리로 설정
def tearDown(self):
# 테스트 종료 후 임시 파일 삭제
for audio_file in self.audio_files:
def create_temp_audio_file(self, filename, duration, sr):
# 임시 WAV 파일 생성
audio_path = os.path.join(self.temp_dir, filename)
y = np.random.randn(int(duration * sr))
sf.write(audio_path, y, sr) # soundfile로 WAV 파일 생성
return audio_path
@patch.object(MelSpectrogramGenerator, 'generate_and_save')
def test_convert_folder_to_mel_spectrogram(self, mock_generate_and_save, mock_librosa_load):
# 모의 처리된 librosa.load 함수 설정
mock_librosa_load.side_effect = self.mock_librosa_load
# convert_folder_to_mel_spectrogram 메서드 호출
# generate_and_save가 적절한 파일명으로 호출되었는지 확인
expected_calls = [
unittest.mock.call(np.array([0.0]), 'audio1_part0.png'),
unittest.mock.call(np.array([0.0]), 'audio2_part0.png'),
unittest.mock.call(np.array([0.0]), 'audio3_part0.png'),
mock_generate_and_save.assert_has_calls(expected_calls, any_order=True)
def mock_librosa_load(self, audio_path, sr=None):
# 임의의 오디오 데이터 반환
return np.array([0.0]), 22050
if __name__ == '__main__':
메인 파일을 2개를 만들어서 관리하자.
preprocessing_main과 model_main을 만들어서 실제로 실행할 것 이다.
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
from preprocessing.convert_to_mel_spectrogram import ConvertToMelSpectrogram
def main():
folder_path = '/Users/seong-gyeongjun/Downloads/vocal artist/stone/wav'
save_path = '/Users/seong-gyeongjun/Downloads/vocal artist/stone/mel'
sr = 22050
n_fft = 2048
duration = 3.0
n_mels = 128
hop_length = n_fft // 4
converter = ConvertToMelSpectrogram(folder_path, save_path, sr, n_fft, duration, n_mels, hop_length)
if __name__ == '__main__':
이런 이미지 파일이 save_folder에 수백장 저장된 걸 볼 수 있다.