Real-time Speech Enhancement with PyTorch
import argparse
import os
import numpy as np
import pyaudio
import torch
import torch.nn as nn
from scipy.io import wavfile
from torch.autograd import Variable
from tqdm import tqdm
from data_preprocess import slice_signal, window_size, sample_rate
from model import Generator
from utils import emphasis
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = sample_rate
p = pyaudio.PyAudio()
generator = Generator()
generator.load_state_dict(torch.load('epochs/generator-9(1).pkl', map_location='cpu'))
if torch.cuda.is_available():
generator.cuda()
z = nn.init.normal(torch.Tensor(1, 1024, 8))
z = Variable(z)
if torch.cuda.is_available():
z = z.cuda()
def callback(in_data, frame_count, time_info, status):
noisy_slice = np.frombuffer(in_data, dtype=np.int16)
noisy_slice = slice_signal(noisy_slice, window_size, 1, sample_rate)[0]
noisy_slice = torch.from_numpy(emphasis(noisy_slice[np.newaxis, np.newaxis, :])).type(torch.FloatTensor)
if torch.cuda.is_available():
noisy_slice = noisy_slice.cuda()
noisy_slice = Variable(noisy_slice)
generated_speech = generator(noisy_slice, z).data.cpu().numpy()
generated_speech = emphasis(generated_speech, emph_coeff=0.95, pre=False)
generated_speech = generated_speech.reshape(-1)
return (generated_speech.astype(np.int16), pyaudio.paContinue)
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK_SIZE,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
try:
continue
except KeyboardInterrupt:
break
stream.stop_stream()
stream.close()
p.terminate()
# 出现报错Traceback (most recent call last):
# File "D:\graduation_design\real_time_processing.py", line 38, in callback
# File "D:\graduation_design\data_preprocess.py", line 33, in slice_signal
# noisy_slice = slice_signal(noisy_slice, window_size, 1, sample_rate)[0]
# wav, sr = librosa.load(file, sr=sample_rate)
# File "D:\python\lib\site-packages\librosa\core\audio.py", line 129, in load
# return pa.is_stream_active(self._stream)
# TypeError
# 错误内容:这个错误是由于在回调函数中的某个地方出现了另一个错误,导致程序崩溃。具体错误信息不足以确定问题,建议检查代码并进行调试,以确定问题所在。可能需要检查数据预处理、模型加载、变量类型等方面的问题。
The provided code demonstrates real-time speech enhancement using a PyTorch generator model and PyAudio for audio input and output. It processes audio in chunks and applies a callback function for noise reduction. The error message suggests an issue within the callback function, possibly related to data preprocessing, model loading, or variable types. To troubleshoot the error, it is recommended to review the code, particularly the slice_signal function in data_preprocess.py, and ensure that the data types and model loading are correct. Additional debugging techniques, such as print statements or using a debugger, can help identify the root cause of the error.
原文地址: https://www.cveoy.top/t/topic/nZEw 著作权归作者所有。请勿转载和采集!