Real-time Speech Enhancement with PyTorch

import argparse
import os

import numpy as np
import pyaudio
import torch
import torch.nn as nn
from scipy.io import wavfile
from torch.autograd import Variable
from tqdm import tqdm

from data_preprocess import slice_signal, window_size, sample_rate
from model import Generator
from utils import emphasis

CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = sample_rate

p = pyaudio.PyAudio()

generator = Generator()
generator.load_state_dict(torch.load('epochs/generator-9(1).pkl', map_location='cpu'))
if torch.cuda.is_available():
    generator.cuda()

z = nn.init.normal(torch.Tensor(1, 1024, 8))
z = Variable(z)
if torch.cuda.is_available():
    z = z.cuda()

def callback(in_data, frame_count, time_info, status):
    noisy_slice = np.frombuffer(in_data, dtype=np.int16)
    noisy_slice = slice_signal(noisy_slice, window_size, 1, sample_rate)[0]
    noisy_slice = torch.from_numpy(emphasis(noisy_slice[np.newaxis, np.newaxis, :])).type(torch.FloatTensor)
    if torch.cuda.is_available():
        noisy_slice = noisy_slice.cuda()
    noisy_slice = Variable(noisy_slice)
    generated_speech = generator(noisy_slice, z).data.cpu().numpy()
    generated_speech = emphasis(generated_speech, emph_coeff=0.95, pre=False)
    generated_speech = generated_speech.reshape(-1)
    return (generated_speech.astype(np.int16), pyaudio.paContinue)

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                output=True,
                frames_per_buffer=CHUNK_SIZE,
                stream_callback=callback)

stream.start_stream()

while stream.is_active():
    try:
        continue
    except KeyboardInterrupt:
        break

stream.stop_stream()
stream.close()

p.terminate()

# 出现报错Traceback (most recent call last):
#   File "D:\graduation_design\real_time_processing.py", line 38, in callback
#   File "D:\graduation_design\data_preprocess.py", line 33, in slice_signal
#     noisy_slice = slice_signal(noisy_slice, window_size, 1, sample_rate)[0]
#     wav, sr = librosa.load(file, sr=sample_rate)
#   File "D:\python\lib\site-packages\librosa\core\audio.py", line 129, in load
#     return pa.is_stream_active(self._stream)
# TypeError

# 错误内容：这个错误是由于在回调函数中的某个地方出现了另一个错误，导致程序崩溃。具体错误信息不足以确定问题，建议检查代码并进行调试，以确定问题所在。可能需要检查数据预处理、模型加载、变量类型等方面的问题。

The provided code demonstrates real-time speech enhancement using a PyTorch generator model and PyAudio for audio input and output. It processes audio in chunks and applies a callback function for noise reduction. The error message suggests an issue within the callback function, possibly related to data preprocessing, model loading, or variable types. To troubleshoot the error, it is recommended to review the code, particularly the slice_signal function in data_preprocess.py, and ensure that the data types and model loading are correct. Additional debugging techniques, such as print statements or using a debugger, can help identify the root cause of the error.

Real-time Speech Enhancement with PyTorch