音频信号转MFCC、语谱图、梅尔谱图并生成新特征图

import os
import librosa
import librosa.display
import numpy as np
from PIL import Image

# 定义数据集路径
data_path = 'D:/论文代码/casia汉语情感语料库/'

# 定义保存MFCC图的路径
mfcc_path = 'D:/论文代码/MFCC/'

# 定义保存语谱图的路径
spectrogram_path = 'D:/论文代码/语谱图/'

# 定义保存梅尔谱图的路径
mel_path = 'D:/论文代码/梅尔谱图/'

# 定义保存特征图相乘后的新图片的路径
new_path = 'D:/论文代码/新特征图/'

# 定义MFCC参数
n_mfcc = 13
hop_length = 256

# 遍历数据集中的子目录
for subdir, dirs, files in os.walk(data_path):
    # 遍历子目录下的音频文件
    for file in files:
        # 如果文件是音频文件
        if file.endswith('.wav'):
            # 读取音频文件
            audio_path = os.path.join(subdir, file)
            y, sr = librosa.load(audio_path, sr=None)
            
            # 计算MFCC图
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length)
            
            # 保存MFCC图
            mfcc_dir = os.path.join(mfcc_path, subdir[len(data_path):])
            os.makedirs(mfcc_dir, exist_ok=True)
            mfcc_path_new = os.path.join(mfcc_dir, os.path.splitext(file)[0] + '.png')
            librosa.display.specshow(mfcc, x_axis='time')
            plt.savefig(mfcc_path_new, bbox_inches='tight', pad_inches=0)
            plt.close()
            
            # 计算语谱图
            spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length)
            spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
            
            # 保存语谱图
            spectrogram_dir = os.path.join(spectrogram_path, subdir[len(data_path):])
            os.makedirs(spectrogram_dir, exist_ok=True)
            spectrogram_path_new = os.path.join(spectrogram_dir, os.path.splitext(file)[0] + '.png')
            librosa.display.specshow(spectrogram_db, x_axis='time', y_axis='mel')
            plt.savefig(spectrogram_path_new, bbox_inches='tight', pad_inches=0)
            plt.close()
            
            # 计算梅尔谱图
            mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mfcc, hop_length=hop_length)
            mel_db = librosa.power_to_db(mel, ref=np.max)
            
            # 保存梅尔谱图
            mel_dir = os.path.join(mel_path, subdir[len(data_path):])
            os.makedirs(mel_dir, exist_ok=True)
            mel_path_new = os.path.join(mel_dir, os.path.splitext(file)[0] + '.png')
            librosa.display.specshow(mel_db, x_axis='time', y_axis='mel')
            plt.savefig(mel_path_new, bbox_inches='tight', pad_inches=0)
            plt.close()
            
            # 读取MFCC图、语谱图、梅尔谱图
            mfcc_img = Image.open(mfcc_path_new)
            spectrogram_img = Image.open(spectrogram_path_new)
            mel_img = Image.open(mel_path_new)
            
            # 将图片转化为numpy array
            mfcc_array = np.array(mfcc_img)
            spectrogram_array = np.array(spectrogram_img)
            mel_array = np.array(mel_img)
            
            # 对三种特征图进行相乘
            new_array = mfcc_array * spectrogram_array * mel_array
            
            # 将numpy array转化为PIL Image
            new_img = Image.fromarray(new_array)
            
            # 保存新的特征图
            new_dir = os.path.join(new_path, subdir[len(data_path):])
            os.makedirs(new_dir, exist_ok=True)
            new_path_new = os.path.join(new_dir, os.path.splitext(file)[0] + '.png')
            new_img.save(new_path_new)