import librosa import matplotlib import numpy as np import matplotlib.pyplot as plt from scipy.fft import fft import librosa.display

plt.figure(dpi=600) # 将显示的所有图分辨率调高 matplotlib.rc('font',family='SimHei') # 显示中文 matplotlib.rcParams['axes.unicode_minus']=False # 显示符号

def displayWaveform(sample1, sample2): # 显示语音时域波形 ''' display waveform of a given speech sample :param sample_name: speech sample name :param fs: sample frequency :return: ''' samples1, sr1 = librosa.load(sample1, sr=16000) samples2, sr2 = librosa.load(sample2, sr=16000) # samples = samples[6000:16000]

print(len(samples1), sr1)
print(len(samples2), sr2)
time1 = np.arange(0, len(samples1)) * (1.0 / sr1)
time2 = np.arange(0, len(samples2)) * (1.0 / sr2)

plt.figure(figsize=(18, 8))
plt.subplot(211)
plt.plot(time1, samples1)
plt.title('语音信号1时域波形')
plt.xlabel('时长(秒)')
plt.ylabel('振幅')
plt.ylim(-0.5, 0.5) # 修改纵坐标刻度范围
plt.subplot(212)
plt.plot(time2, samples2)
plt.title('语音信号2时域波形')
plt.xlabel('时长(秒)')
plt.ylabel('振幅')
plt.ylim(-0.5, 0.5) # 修改纵坐标刻度范围
plt.subplots_adjust(hspace=0.5) # 调整子图间距
# plt.savefig('your dir\语音信号时域波形图', dpi=600)
plt.show()

def displaySpectrum(sample1, sample2): # 显示语音频域谱线 x1, sr1 = librosa.load(sample1, sr=16000) x2, sr2 = librosa.load(sample2, sr=16000) print(len(x1), len(x2)) # ft = librosa.stft(x) # magnitude = np.abs(ft) # 对fft的结果直接取模(取绝对值),得到幅度magnitude # frequency = np.angle(ft) # (0, 16000, 121632)

ft1 = fft(x1)
ft2 = fft(x2)
magnitude1 = np.absolute(ft1)  # 对fft的结果直接取模(取绝对值),得到幅度magnitude
magnitude2 = np.absolute(ft2)  # 对fft的结果直接取模(取绝对值),得到幅度magnitude
frequency1 = np.linspace(0, sr1, len(magnitude1))  # (0, 16000, 121632)
frequency2 = np.linspace(0, sr2, len(magnitude2))  # (0, 16000, 121632)

print(len(magnitude1), type(magnitude1), np.max(magnitude1), np.min(magnitude1))
print(len(frequency1), type(frequency1), np.max(frequency1), np.min(frequency1))
print(len(magnitude2), type(magnitude2), np.max(magnitude2), np.min(magnitude2))
print(len(frequency2), type(frequency2), np.max(frequency2), np.min(frequency2))

# plot spectrum,限定[:40000]
plt.figure(figsize=(18, 8))
plt.subplot(211)
plt.plot(frequency1[:40000], magnitude1[:40000])  # magnitude spectrum
plt.title('语音信号1频域谱线')
plt.xlabel('频率(赫兹)')
plt.ylabel('幅度')
plt.subplot(212)
plt.plot(frequency2[:40000], magnitude2[:40000])  # magnitude spectrum
plt.title('语音信号2频域谱线')
plt.xlabel('频率(赫兹)')
plt.ylabel('幅度')
plt.subplots_adjust(hspace=0.5) # 调整子图间距
#plt.savefig('your dir\语音信号频谱图', dpi=600)
plt.show()

def displaySpectrogram(sample1, sample2): x1, sr1 = librosa.load(sample1, sr=16000) x2, sr2 = librosa.load(sample2, sr=16000)

# compute power spectrogram with stft(short-time fourier transform):
# 基于stft,计算power spectrogram
spectrogram1 = librosa.amplitude_to_db(librosa.stft(x1))
spectrogram2 = librosa.amplitude_to_db(librosa.stft(x2))

# show
plt.figure(figsize=(18, 8))
plt.subplot(211)
librosa.display.specshow(spectrogram1, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('语音信号1对数谱图')
plt.xlabel('时长(秒)')
plt.ylabel('频率(赫兹)')
plt.subplot(212)
librosa.display.specshow(spectrogram2, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('语音信号2对数谱图')
plt.xlabel('时长(秒)')
plt.ylabel('频率(赫兹)')
plt.subplots_adjust(hspace=0.5) # 调整子图间距
plt.show()

if name == 'main': sample1 = r'p232_036.wav' sample2 = r'enhanced1_p232_036.wav' displayWaveform(sample1, sample2) displaySpectrum(sample1, sample2) displaySpectrogram(sample1, sample2)

修改纵坐标刻度内容:可以使用plt.ylim()函数来修改纵坐标的刻度范围,例如:

plt.ylim(-0.5, 0.5) # 将纵坐标刻度范围限定在[-0.5, 0.5]之间

将其添加到displayWaveform()函数的子图中,即可实现对纵坐标刻度范围的修改。完整代码如下:

import librosa
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft
import librosa.display



plt.figure(dpi=600) # 将显示的所有图分辨率调高
matplotlib.rc('font',family='SimHei') # 显示中文
matplotlib.rcParams['axes.unicode_minus']=False # 显示符号


def displayWaveform(sample1, sample2): # 显示语音时域波形
    '''
    display waveform of a given speech sample
    :param sample_name: speech sample name
    :param fs: sample frequency
    :return:
    '''
    samples1, sr1 = librosa.load(sample1, sr=16000)
    samples2, sr2 = librosa.load(sample2, sr=16000)
    # samples = samples[6000:16000]

    print(len(samples1), sr1)
    print(len(samples2), sr2)
    time1 = np.arange(0, len(samples1)) * (1.0 / sr1)
    time2 = np.arange(0, len(samples2)) * (1.0 / sr2)

    plt.figure(figsize=(18, 8))
    plt.subplot(211)
    plt.plot(time1, samples1)
    plt.title('语音信号1时域波形')
    plt.xlabel('时长(秒)')
    plt.ylabel('振幅')
    plt.ylim(-0.5, 0.5) # 修改纵坐标刻度范围
    plt.subplot(212)
    plt.plot(time2, samples2)
    plt.title('语音信号2时域波形')
    plt.xlabel('时长(秒)')
    plt.ylabel('振幅')
    plt.ylim(-0.5, 0.5) # 修改纵坐标刻度范围
    plt.subplots_adjust(hspace=0.5) # 调整子图间距
    # plt.savefig('your dir\语音信号时域波形图', dpi=600)
    plt.show()

def displaySpectrum(sample1, sample2): # 显示语音频域谱线
    x1, sr1 = librosa.load(sample1, sr=16000)
    x2, sr2 = librosa.load(sample2, sr=16000)
    print(len(x1), len(x2))
    # ft = librosa.stft(x)
    # magnitude = np.abs(ft)  # 对fft的结果直接取模(取绝对值),得到幅度magnitude
    # frequency = np.angle(ft)  # (0, 16000, 121632)

    ft1 = fft(x1)
    ft2 = fft(x2)
    magnitude1 = np.absolute(ft1)  # 对fft的结果直接取模(取绝对值),得到幅度magnitude
    magnitude2 = np.absolute(ft2)  # 对fft的结果直接取模(取绝对值),得到幅度magnitude
    frequency1 = np.linspace(0, sr1, len(magnitude1))  # (0, 16000, 121632)
    frequency2 = np.linspace(0, sr2, len(magnitude2))  # (0, 16000, 121632)

    print(len(magnitude1), type(magnitude1), np.max(magnitude1), np.min(magnitude1))
    print(len(frequency1), type(frequency1), np.max(frequency1), np.min(frequency1))
    print(len(magnitude2), type(magnitude2), np.max(magnitude2), np.min(magnitude2))
    print(len(frequency2), type(frequency2), np.max(frequency2), np.min(frequency2))

    # plot spectrum,限定[:40000]
    plt.figure(figsize=(18, 8))
    plt.subplot(211)
    plt.plot(frequency1[:40000], magnitude1[:40000])  # magnitude spectrum
    plt.title('语音信号1频域谱线')
    plt.xlabel('频率(赫兹)')
    plt.ylabel('幅度')
    plt.subplot(212)
    plt.plot(frequency2[:40000], magnitude2[:40000])  # magnitude spectrum
    plt.title('语音信号2频域谱线')
    plt.xlabel('频率(赫兹)')
    plt.ylabel('幅度')
    plt.subplots_adjust(hspace=0.5) # 调整子图间距
    #plt.savefig('your dir\语音信号频谱图', dpi=600)
    plt.show()


def displaySpectrogram(sample1, sample2):
    x1, sr1 = librosa.load(sample1, sr=16000)
    x2, sr2 = librosa.load(sample2, sr=16000)

    # compute power spectrogram with stft(short-time fourier transform):
    # 基于stft,计算power spectrogram
    spectrogram1 = librosa.amplitude_to_db(librosa.stft(x1))
    spectrogram2 = librosa.amplitude_to_db(librosa.stft(x2))

    # show
    plt.figure(figsize=(18, 8))
    plt.subplot(211)
    librosa.display.specshow(spectrogram1, y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title('语音信号1对数谱图')
    plt.xlabel('时长(秒)')
    plt.ylabel('频率(赫兹)')
    plt.subplot(212)
    librosa.display.specshow(spectrogram2, y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title('语音信号2对数谱图')
    plt.xlabel('时长(秒)')
    plt.ylabel('频率(赫兹)')
    plt.subplots_adjust(hspace=0.5) # 调整子图间距
    plt.show()


if __name__ == '__main__':
    sample1 = r'p232_036.wav'
    sample2 = r'enhanced1_p232_036.wav'
    displayWaveform(sample1, sample2)
    displaySpectrum(sample1, sample2)
    displaySpectrogram(sample1, sample2)

Python语音信号处理:时域波形、频域谱线和对数谱图可视化

原文地址: https://www.cveoy.top/t/topic/nvYI 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录