Python语音信号处理:波形、频谱和声谱图可视化
import librosa
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft
import librosa.display
plt.figure(dpi=600) # 将显示的所有图分辨率调高
matplotlib.rc("font",family='SimHei') # 显示中文
matplotlib.rcParams['axes.unicode_minus']=False # 显示符号
def displayWaveform(sample1, sample2): # 显示语音时域波形
'''
display waveform of a given speech sample
:param sample_name: speech sample name
:param fs: sample frequency
:return:
'''
samples1, sr1 = librosa.load(sample1, sr=16000)
samples2, sr2 = librosa.load(sample2, sr=16000)
# samples = samples[6000:16000]
print(len(samples1), sr1)
print(len(samples2), sr2)
time1 = np.arange(0, len(samples1)) * (1.0 / sr1)
time2 = np.arange(0, len(samples2)) * (1.0 / sr2)
plt.figure(figsize=(18, 8))
plt.subplot(211)
plt.plot(time1, samples1)
plt.title("语音信号1时域波形")
plt.xlabel("时长(秒)")
plt.ylabel("振幅")
plt.subplot(212)
#plt.ylim(-0.2, 0.2)
plt.plot(time2, samples2)
plt.title("语音信号2时域波形")
plt.xlabel("时长(秒)")
plt.ylabel("振幅")
plt.subplots_adjust(hspace=0.5) # 调整子图间距
# plt.savefig("your dir\语音信号时域波形图", dpi=600)
plt.show()
def displaySpectrum(sample1, sample2): # 显示语音频域谱线
x1, sr1 = librosa.load(sample1, sr=16000)
x2, sr2 = librosa.load(sample2, sr=16000)
print(len(x1), len(x2))
# ft = librosa.stft(x)
# magnitude = np.abs(ft) # 对fft的结果直接取模(取绝对值),得到幅度magnitude
# frequency = np.angle(ft) # (0, 16000, 121632)
ft1 = fft(x1)
ft2 = fft(x2)
magnitude1 = np.absolute(ft1) # 对fft的结果直接取模(取绝对值),得到幅度magnitude
magnitude2 = np.absolute(ft2) # 对fft的结果直接取模(取绝对值),得到幅度magnitude
frequency1 = np.linspace(0, sr1, len(magnitude1)) # (0, 16000, 121632)
frequency2 = np.linspace(0, sr2, len(magnitude2)) # (0, 16000, 121632)
print(len(magnitude1), type(magnitude1), np.max(magnitude1), np.min(magnitude1))
print(len(frequency1), type(frequency1), np.max(frequency1), np.min(frequency1))
print(len(magnitude2), type(magnitude2), np.max(magnitude2), np.min(magnitude2))
print(len(frequency2), type(frequency2), np.max(frequency2), np.min(frequency2))
# plot spectrum,限定[:40000]
plt.figure(figsize=(18, 8))
plt.plot(frequency1[:40000], magnitude1[:40000], label='原始语音') # magnitude spectrum
plt.plot(frequency2[:40000], magnitude2[:40000], label='增强语音') # magnitude spectrum
plt.title("语音信号频域谱线")
plt.xlabel("频率(赫兹)")
plt.ylabel("幅度")
plt.legend()
# plt.savefig("your dir\语音信号频谱图", dpi=600)
plt.show()
def displaySpectrogram(sample1, sample2):
x1, sr1 = librosa.load(sample1, sr=16000)
x2, sr2 = librosa.load(sample2, sr=16000)
# compute power spectrogram with stft(short-time fourier transform):
# 基于stft,计算power spectrogram
spectrogram1 = librosa.amplitude_to_db(librosa.stft(x1))
spectrogram2 = librosa.amplitude_to_db(librosa.stft(x2))
# show
plt.figure(figsize=(18, 8))
plt.subplot(211)
librosa.display.specshow(spectrogram1, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('语音信号1对数谱图')
plt.xlabel('时长(秒)')
plt.ylabel('频率(赫兹)')
plt.subplot(212)
librosa.display.specshow(spectrogram2, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('语音信号2对数谱图')
plt.xlabel('时长(秒)')
plt.ylabel('频率(赫兹)')
plt.subplots_adjust(hspace=0.5) # 调整子图间距
plt.show()
if __name__ == '__main__':
sample1 = r'p376_295.wav'
sample2 = r'enhanced_p376_295.wav'
displayWaveform(sample1, sample2)
displaySpectrum(sample1, sample2)
displaySpectrogram(sample1, sample2)
让对比图中的横纵坐标保持一致,第二个图和第一个图一致内容:修改displaySpectrum函数中的以下两行代码:
plt.subplot(211)
plt.plot(frequency1[:40000], magnitude1[:40000]) # magnitude spectrum
修改为:
plt.figure(figsize=(18, 8))
plt.plot(frequency1[:40000], magnitude1[:40000], label='原始语音') # magnitude spectrum
plt.plot(frequency2[:40000], magnitude2[:40000], label='增强语音') # magnitude spectrum
plt.title("语音信号频域谱线")
plt.xlabel("频率(赫兹)")
plt.ylabel("幅度")
plt.legend()
# plt.savefig("your dir\语音信号频谱图", dpi=600)
plt.show()
原文地址: https://www.cveoy.top/t/topic/nCDJ 著作权归作者所有。请勿转载和采集!