这里是一个CNN音频降噪模型的PyTorch实现的完整代码。其中包括数据加载、数据处理、模型、存储降噪后音频、评估指标。

首先,我们需要导入必要的库:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchaudio
import os
import numpy as np
import matplotlib.pyplot as plt

接下来,我们定义一些超参数:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 使用GPU或CPU
learning_rate = 0.001
batch_size = 32
num_epochs = 20

然后,我们定义一个数据集类,用于加载和处理音频数据:

class AudioDataset(Dataset):
    def __init__(self, root_dir, noise_dir):
        self.root_dir = root_dir
        self.noise_dir = noise_dir

        self.audio_files = os.listdir(root_dir)

    def __len__(self):
        return len(self.audio_files)

    def __getitem__(self, idx):
        audio_path = os.path.join(self.root_dir, self.audio_files[idx])
        noise_path = os.path.join(self.noise_dir, self.audio_files[idx])

        waveform, sample_rate = torchaudio.load(audio_path)
        noise_waveform, _ = torchaudio.load(noise_path)

        assert sample_rate == 16000, "Sample rate must be 16kHz"
        waveform = waveform.mean(0, keepdim=True)  # 处理双声道音频
        noise_waveform = noise_waveform.mean(0, keepdim=True)

        # 添加噪声
        snr = np.random.randint(low=0, high=5)  # 在0-5dB的范围内随机选择信噪比
        signal_power = waveform.pow(2).mean()
        noise_power = noise_waveform.pow(2).mean()
        factor = (signal_power / noise_power * 10 ** (-snr / 10)).sqrt()
        noise_waveform *= factor
        waveform += noise_waveform

        # 转换为Mel-spectrogram
        spectrogram = torchaudio.transforms.MelSpectrogram(
            sample_rate=sample_rate, n_fft=512, hop_length=160, n_mels=128
        )(waveform)

        # 转换为对数刻度,加上一个很小的数以避免对数函数的非法值
        spectrogram = torchaudio.transforms.AmplitudeToDB()(spectrogram)
        spectrogram += 10  # 避免非法值

        return spectrogram

接下来,我们定义一个卷积神经网络模型:

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.relu4 = nn.ReLU()
        self.conv5 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(128)
        self.relu5 = nn.ReLU()
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn6 = nn.BatchNorm2d(128)
        self.relu6 = nn.ReLU()
        self.conv7 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
        self.bn7 = nn.BatchNorm2d(64)
        self.relu7 = nn.ReLU()
        self.conv8 = nn.Conv2d(64, 1, kernel_size=3, stride=1, padding=1)
        self.tanh8 = nn.Tanh()

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        x = self.conv6(x)
        x = self.bn6(x)
        x = self.relu6(x)
        x = self.conv7(x)
        x = self.bn7(x)
        x = self.relu7(x)
        x = self.conv8(x)
        x = self.tanh8(x)

        return x

然后,我们定义一个函数来计算评估指标——信噪比(SNR)和峰值信噪比(PSNR):

def calculate_metrics(original_signal, denoised_signal):
    mse = ((original_signal - denoised_signal) ** 2).mean()
    snr = 10 * np.log10(original_signal.pow(2).mean() / mse)
    psnr = 10 * np.log10(original_signal.max() ** 2 / mse)

    return snr, psnr

接下来,我们定义训练和测试函数:

def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0

    for batch_idx, data in enumerate(train_loader):
        data = data.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    return train_loss


def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    snr_list = []
    psnr_list = []

    with torch.no_grad():
        for batch_idx, data in enumerate(test_loader):
            data = data.to(device)

            output = model(data)
            loss = criterion(output, data)

            test_loss += loss.item()

            output = output.cpu().numpy().squeeze(1)
            data = data.cpu().numpy().squeeze(1)

            for i in range(len(output)):
                snr, psnr = calculate_metrics(data[i], output[i])
                snr_list.append(snr)
                psnr_list.append(psnr)

    test_loss /= len(test_loader)
    snr = np.mean(snr_list)
    psnr = np.mean(psnr_list)

    return test_loss, snr, psnr

最后,我们加载数据集,定义模型、损失函数和优化器,并进行训练和测试:

# 加载数据集
train_dataset = AudioDataset(root_dir="train/audio", noise_dir="train/noise")
test_dataset = AudioDataset(root_dir="test/audio", noise_dir="test/noise")

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 定义模型、损失函数和优化器
model = CNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练和测试
train_losses = []
test_losses = []
snrs = []
psnrs = []

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer)
    test_loss, snr, psnr = test(model, test_loader, criterion)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    snrs.append(snr)
    psnrs.append(psnr)

    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}, SNR: {snr:.2f}dB, PSNR: {psnr:.2f}dB")

# 存储模型
torch.save(model.state_dict(), "model.pt")

现在,我们已经完成了CNN音频降噪模型的PyTorch实现。我们可以使用存储的模型对新的音频进行降噪,并计算评估指标

CNN音频降噪模型pytorchs实现完整代码包括数据加载、数据处理、模型、存储降噪后音频、评估指标

原文地址: https://www.cveoy.top/t/topic/ciwO 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录