Python Librosa: 生成音频信号的语谱图、MFCC 图和对数梅尔谱图
这个任务可以使用 Python 中的 Librosa 库来完成。首先,我们需要导入所需的库和模块:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
然后,我们需要定义一个函数来生成语谱图、mfcc 图和对数梅尔谱图。该函数需要接受一个音频文件的路径作为参数,并返回生成的图像数据。
def generate_features(audio_path):
# Load audio file
y, sr = librosa.load(audio_path, sr=None)
# Generate spectrogram
spectrogram = np.abs(librosa.stft(y))
spectrogram_db = librosa.amplitude_to_db(spectrogram, ref=np.max)
# Generate MFCCs
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
mfccs_db = librosa.amplitude_to_db(mfccs, ref=np.max)
# Generate mel spectrogram
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
mel_spectrogram_db = librosa.amplitude_to_db(mel_spectrogram, ref=np.max)
return spectrogram_db, mfccs_db, mel_spectrogram_db
接下来,我们需要遍历数据集中的每个音频文件,并生成对应的图像数据,并将它们保存到新的文件夹中。
# Dataset path
dataset_path = 'D:/论文代码/data/'
# Output path
output_path = 'D:/论文代码/outputs/'
# Get list of subdirectories in dataset path
subdirs = [os.path.join(dataset_path, d) for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
# Loop through subdirectories
for subdir in subdirs:
print('Processing directory:', subdir)
# Get list of audio files in subdirectory
audio_files = [os.path.join(subdir, f) for f in os.listdir(subdir) if f.endswith('.wav')]
# Create output directory for subdirectory
subdir_name = os.path.basename(subdir)
output_dir = os.path.join(output_path, subdir_name)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Loop through audio files in subdirectory
for audio_file in audio_files:
print('Processing file:', audio_file)
# Generate features
spectrogram, mfccs, mel_spectrogram = generate_features(audio_file)
# Save spectrogram
spectrogram_file = os.path.join(output_dir, os.path.splitext(os.path.basename(audio_file))[0] + '_spectrogram.png')
plt.figure(figsize=(10, 4))
librosa.display.specshow(spectrogram, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency power spectrogram')
plt.savefig(spectrogram_file)
plt.close()
# Save MFCCs
mfccs_file = os.path.join(output_dir, os.path.splitext(os.path.basename(audio_file))[0] + '_mfccs.png')
plt.figure(figsize=(10, 4))
librosa.display.specshow(mfccs, x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-frequency cepstral coefficients (MFCCs)')
plt.savefig(mfccs_file)
plt.close()
# Save mel spectrogram
mel_spectrogram_file = os.path.join(output_dir, os.path.splitext(os.path.basename(audio_file))[0] + '_mel_spectrogram.png')
plt.figure(figsize=(10, 4))
librosa.display.specshow(mel_spectrogram, y_axis='mel', x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.savefig(mel_spectrogram_file)
plt.close()
print('Done')
以上代码将会生成语谱图、mfcc 图和对数梅尔谱图,并将它们保存到新的文件夹中,要求语谱图分类与数据集的子目录相同,音频文件在数据集下的子目录中。
原文地址: https://www.cveoy.top/t/topic/nMQH 著作权归作者所有。请勿转载和采集!