CNN音频降噪模型pytorchs实现完整代码包括数据加载、数据处理、模型、存储降噪后音频、评估指标
这里是一个CNN音频降噪模型的PyTorch实现的完整代码。其中包括数据加载、数据处理、模型、存储降噪后音频、评估指标。
首先,我们需要导入必要的库:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchaudio
import os
import numpy as np
import matplotlib.pyplot as plt
接下来,我们定义一些超参数:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 使用GPU或CPU
learning_rate = 0.001
batch_size = 32
num_epochs = 20
然后,我们定义一个数据集类,用于加载和处理音频数据:
class AudioDataset(Dataset):
def __init__(self, root_dir, noise_dir):
self.root_dir = root_dir
self.noise_dir = noise_dir
self.audio_files = os.listdir(root_dir)
def __len__(self):
return len(self.audio_files)
def __getitem__(self, idx):
audio_path = os.path.join(self.root_dir, self.audio_files[idx])
noise_path = os.path.join(self.noise_dir, self.audio_files[idx])
waveform, sample_rate = torchaudio.load(audio_path)
noise_waveform, _ = torchaudio.load(noise_path)
assert sample_rate == 16000, "Sample rate must be 16kHz"
waveform = waveform.mean(0, keepdim=True) # 处理双声道音频
noise_waveform = noise_waveform.mean(0, keepdim=True)
# 添加噪声
snr = np.random.randint(low=0, high=5) # 在0-5dB的范围内随机选择信噪比
signal_power = waveform.pow(2).mean()
noise_power = noise_waveform.pow(2).mean()
factor = (signal_power / noise_power * 10 ** (-snr / 10)).sqrt()
noise_waveform *= factor
waveform += noise_waveform
# 转换为Mel-spectrogram
spectrogram = torchaudio.transforms.MelSpectrogram(
sample_rate=sample_rate, n_fft=512, hop_length=160, n_mels=128
)(waveform)
# 转换为对数刻度,加上一个很小的数以避免对数函数的非法值
spectrogram = torchaudio.transforms.AmplitudeToDB()(spectrogram)
spectrogram += 10 # 避免非法值
return spectrogram
接下来,我们定义一个卷积神经网络模型:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU()
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU()
self.conv4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.bn4 = nn.BatchNorm2d(128)
self.relu4 = nn.ReLU()
self.conv5 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.bn5 = nn.BatchNorm2d(128)
self.relu5 = nn.ReLU()
self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.bn6 = nn.BatchNorm2d(128)
self.relu6 = nn.ReLU()
self.conv7 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
self.bn7 = nn.BatchNorm2d(64)
self.relu7 = nn.ReLU()
self.conv8 = nn.Conv2d(64, 1, kernel_size=3, stride=1, padding=1)
self.tanh8 = nn.Tanh()
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.conv6(x)
x = self.bn6(x)
x = self.relu6(x)
x = self.conv7(x)
x = self.bn7(x)
x = self.relu7(x)
x = self.conv8(x)
x = self.tanh8(x)
return x
然后,我们定义一个函数来计算评估指标——信噪比(SNR)和峰值信噪比(PSNR):
def calculate_metrics(original_signal, denoised_signal):
mse = ((original_signal - denoised_signal) ** 2).mean()
snr = 10 * np.log10(original_signal.pow(2).mean() / mse)
psnr = 10 * np.log10(original_signal.max() ** 2 / mse)
return snr, psnr
接下来,我们定义训练和测试函数:
def train(model, train_loader, criterion, optimizer):
model.train()
train_loss = 0
for batch_idx, data in enumerate(train_loader):
data = data.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, data)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader)
return train_loss
def test(model, test_loader, criterion):
model.eval()
test_loss = 0
snr_list = []
psnr_list = []
with torch.no_grad():
for batch_idx, data in enumerate(test_loader):
data = data.to(device)
output = model(data)
loss = criterion(output, data)
test_loss += loss.item()
output = output.cpu().numpy().squeeze(1)
data = data.cpu().numpy().squeeze(1)
for i in range(len(output)):
snr, psnr = calculate_metrics(data[i], output[i])
snr_list.append(snr)
psnr_list.append(psnr)
test_loss /= len(test_loader)
snr = np.mean(snr_list)
psnr = np.mean(psnr_list)
return test_loss, snr, psnr
最后,我们加载数据集,定义模型、损失函数和优化器,并进行训练和测试:
# 加载数据集
train_dataset = AudioDataset(root_dir="train/audio", noise_dir="train/noise")
test_dataset = AudioDataset(root_dir="test/audio", noise_dir="test/noise")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 定义模型、损失函数和优化器
model = CNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 训练和测试
train_losses = []
test_losses = []
snrs = []
psnrs = []
for epoch in range(num_epochs):
train_loss = train(model, train_loader, criterion, optimizer)
test_loss, snr, psnr = test(model, test_loader, criterion)
train_losses.append(train_loss)
test_losses.append(test_loss)
snrs.append(snr)
psnrs.append(psnr)
print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}, SNR: {snr:.2f}dB, PSNR: {psnr:.2f}dB")
# 存储模型
torch.save(model.state_dict(), "model.pt")
现在,我们已经完成了CNN音频降噪模型的PyTorch实现。我们可以使用存储的模型对新的音频进行降噪,并计算评估指标
原文地址: https://www.cveoy.top/t/topic/ciwO 著作权归作者所有。请勿转载和采集!