ConvTasNet 模型完整代码讲解:语音分离神经网络
ConvTasNet是一种用于语音源分离的神经网络模型,其结构基于深度卷积神经网络和时间域信号处理技术。该模型可以有效地从混合语音信号中分离出单独的说话者语音。
下面是ConvTasNet的完整代码讲解:
- 导入所需库和模块
import torch
import torch.nn as nn
import torch.nn.functional as F
- 定义ConvTasNet模型
class ConvTasNet(nn.Module):
def __init__(self, N, L, B, H, P, X, R, C, norm_type='gLN'):
super(ConvTasNet, self).__init__()
# Hyper-parameters
self.N, self.L, self.B, self.H, self.P, self.X, self.R, self.C = N, L, B, H, P, X, R, C
self.norm_type = norm_type
# Submodules
self.encoder = Encoder(self.N, self.L)
self.separator = Separator(self.B, self.H, self.P, self.X, self.R, self.C, self.norm_type)
self.decoder = Decoder(self.N, self.L)
def forward(self, x):
# Input shape (batch, samples)
z = self.encoder(x) # Shape (batch, N, L)
s = self.separator(z) # Shape (batch, C*B, L//H)
y = self.decoder(s) # Shape (batch, samples)
return y
- 定义Encoder模块
class Encoder(nn.Module):
def __init__(self, N, L):
super(Encoder, self).__init__()
# Hyper-parameters
self.N, self.L = N, L
# Submodules
self.embedding = nn.Conv1d(1, self.N, self.L, stride=self.L)
self.prelu = nn.PReLU()
def forward(self, x):
# Input shape (batch, samples)
x = x.unsqueeze(1) # Shape (batch, 1, samples)
z = self.embedding(x) # Shape (batch, N, L)
z = self.prelu(z) # Shape (batch, N, L)
return z
- 定义Separator模块
class Separator(nn.Module):
def __init__(self, B, H, P, X, R, C, norm_type):
super(Separator, self).__init__()
# Hyper-parameters
self.B, self.H, self.P, self.X, self.R, self.C = B, H, P, X, R, C
self.norm_type = norm_type
# Submodules
self.conv1d_BN_PReLU = nn.Sequential(nn.Conv1d(self.B, self.H, 1), nn.BatchNorm1d(self.H), nn.PReLU())
self.conv1d_PReLU = nn.Sequential(nn.Conv1d(self.H, self.H, self.P, stride=self.H), nn.PReLU())
self.recurrent = nn.Sequential(nn.Conv1d(self.H, self.H, self.X, stride=self.H), nn.BatchNorm1d(self.H), nn.PReLU(),
nn.Conv1d(self.H, self.H, self.X, stride=self.H), nn.BatchNorm1d(self.H), nn.PReLU())
self.conv1d_out = nn.Conv1d(self.H, self.C * self.B, 1)
if self.norm_type == 'gLN':
self.norm = gLN(self.H)
elif self.norm_type == 'cLN':
self.norm = cLN(self.H)
def forward(self, z):
# Input shape (batch, N, L)
if self.norm_type == 'gLN':
z = self.norm(z) # Shape (batch, N, L)
z = z.view(z.size(0), self.B, -1) # Shape (batch, B, L//H)
z = self.conv1d_BN_PReLU(z) # Shape (batch, H, L//H)
y = self.conv1d_PReLU(z) # Shape (batch, H, L//H)
for _ in range(self.R):
y = self.recurrent(y + z) # Shape (batch, H, L//H)
s = self.conv1d_out(y) # Shape (batch, C*B, L//H)
s = s.view(s.size(0), self.C, self.B, -1) # Shape (batch, C, B, L//H)
s = s.permute(0, 2, 1, 3).contiguous() # Shape (batch, B, C, L//H)
s = s.view(s.size(0), -1, s.size(3)) # Shape (batch, C*B, L//H)
return s
- 定义Decoder模块
class Decoder(nn.Module):
def __init__(self, N, L):
super(Decoder, self).__init__()
# Hyper-parameters
self.N, self.L = N, L
# Submodules
self.conv1d = nn.Conv1d(self.N, 1, self.L, stride=self.L)
def forward(self, s):
# Input shape (batch, C*B, L//H)
s = s.view(s.size(0), -1, 1, s.size(2)) # Shape (batch, C, B, L//H)
s = s.permute(0, 2, 1, 3).contiguous() # Shape (batch, B, C, L//H)
s = s.view(s.size(0), -1, s.size(3)) # Shape (batch, B*C, L//H)
x_hat = self.conv1d(s) # Shape (batch, samples)
x_hat = x_hat.squeeze(1) # Shape (batch, samples)
return x_hat
- 定义gLN和cLN模块
class gLN(nn.Module):
def __init__(self, C):
super(gLN, self).__init__()
# Submodules
self.gamma = nn.Parameter(torch.ones(1, C, 1))
self.beta = nn.Parameter(torch.zeros(1, C, 1))
def forward(self, x):
# Input shape (batch, channels, samples)
mean = x.mean(-1, keepdim=True)
var = x.var(-1, keepdim=True, unbiased=False)
x = (x - mean) / torch.sqrt(var + 1e-12)
x = self.gamma * x + self.beta
return x
class cLN(nn.Module):
def __init__(self, C):
super(cLN, self).__init__()
# Submodules
self.gamma = nn.Parameter(torch.ones(1, C, 1))
self.beta = nn.Parameter(torch.zeros(1, C, 1))
def forward(self, x):
# Input shape (batch, channels, samples)
mean = x.mean(1, keepdim=True)
var = x.var(1, keepdim=True, unbiased=False)
x = (x - mean) / torch.sqrt(var + 1e-12)
x = self.gamma * x + self.beta
return x
以上就是ConvTasNet模型的完整代码讲解。
原文地址: https://www.cveoy.top/t/topic/nnpv 著作权归作者所有。请勿转载和采集!