导入相关包
import librosa
import numpy as np
import matplotlib.pyplot as plt
from playsound import playsound
import soundfile as sf
加载音源
# 干净的声音
clean_wav_file = 'sf1_cln.wav'
clean, fs = librosa.load(clean_wav_file, sr=None)
# 带噪音的声音
noise_wav_file = 'sf1_n0L.wav'
noise, fs = librosa.load(noise_wav_file, sr=None)
# 播放对比
playsound(clean_wav_file)
playsound(noise_wav_file)
谱减法-简单减
# 计算noise信号的频谱
S_noisy = librosa.stft(noise, n_fft=256, hop_length=128, win_length=256)
D,T = S_noisy.shape
Mag_noisy = np.abs(S_noisy)
Phase_noisy = np.angle(S_noisy)
# 能量谱
Power_noisy = Mag_noisy**2
# 估计噪声信号的能量,由于噪声信号未知,这里假设含噪信号的前30帧为噪声
Mag_noise = np.mean(np.abs(S_noisy[:,:30]), axis=1, keepdims=True)
Power_noise = Mag_noise**2
Power_noise = np.tile(Power_noise, [1,T])
# 能量谱减或者幅度谱减都可以
Power_enhance = Power_noisy - Power_noise
# 保证能量大于0
Power_enhance[Power_enhance < 0] = 0
Mag_enhance = np.sqrt(Power_enhance)
# 对信号进行恢复
S_enhance = Mag_enhance * np.exp(1j * Phase_noisy)
enhance = librosa.istft(S_enhance, hop_length=128, win_length=256)
sf.write('enhance1.wav', enhance, fs)
# 播放对比
playsound(noise_wav_file)
playsound('enhance1.wav')
# 绘制频谱
plt.subplot(3, 1, 1)
plt.specgram(clean, NFFT=256, Fs=fs)
plt.xlabel('clean specgram')
plt.subplot(3, 1, 2)
plt.specgram(noise, NFFT=256, Fs=fs)
plt.xlabel('noise specgram')
plt.subplot(3, 1, 3)
plt.specgram(enhance, NFFT=256, Fs=fs)
plt.xlabel('enhance specgram')
谱减法-过减法
S_noisy = librosa.stft(noise, n_fft=256, hop_length=128, win_length=256)
D,T = S_noisy.shape
Mag_noisy = np.abs(S_noisy)
Phase_noisy = np.angle(S_noisy)
Power_noisy = Mag_noisy**2
Mag_noise = np.mean(np.abs(S_noisy[:,:30]), axis=1, keepdims=True)
Power_noise = Mag_noise**2
Power_noise = np.tile(Power_noise, [1,T])
alpha = 4
gamma = 1
# 多减点
Power_enhance = np.power(Power_noisy, gamma) - alpha * np.power(Power_noise, gamma)
Power_enhance = np.power(Power_enhance, 1 / gamma)
# 对于过小的值用beta*Power_noise替代
beta= 0.0001
mask= (Power_enhance >= beta * Power_noise) - 0
Power_enhance = mask * Power_enhance + beta * (1 - mask) * Power_noise
Mag_enhance = np.sqrt(Power_enhance)
# 信号恢复
S_enhance = Mag_enhance * np.exp(1j * Phase_noisy)
enhance = librosa.istft(S_enhance, hop_length=128, win_length=256)
sf.write('enhance2.wav', enhance, fs)
plt.specgram(enhance, NFFT=256, Fs=fs)
plt.xlabel('enhance specgram')
谱减法-引入平滑机制(能量谱平滑)
S_noisy = librosa.stft(noise, n_fft=256, hop_length=128, win_length=256)
D,T = S_noisy.shape
Mag_noisy = np.abs(S_noisy)
Phase_noisy = np.angle(S_noisy)
Power_noisy = Mag_noisy**2
Mag_noise = np.mean(np.abs(S_noisy[:,:30]), axis=1, keepdims=True)
Power_noise = Mag_noise**2
Power_noise = np.tile(Power_noise, [1,T])
# 先对幅度谱平滑一次
Mag_noisy_new = np.copy(Mag_noisy)
k = 1
for t in range(k, T-k):
Mag_noisy_new[:,t] = np.mean(Mag_noisy[:, t-k:t+k+1], axis=1)
Power_noisy = Mag_noisy_new**2
# 超减法去噪
alpha = 4
gamma = 1
Power_enhance = np.power(Power_noisy, gamma) - alpha * np.power(Power_noise, gamma)
Power_enhance = np.power(Power_enhance, 1 / gamma)
# 对于过小的值用beta*Power_noise替代
beta= 0.0001
mask= (Power_enhance >= beta * Power_noise) - 0
Power_enhance = mask * Power_enhance + beta * (1 - mask) * Power_noise
Mag_enhance = np.sqrt(Power_enhance)
Mag_enhance_new = np.copy(Mag_enhance)
# 计算最大噪声残差
maxnr = np.max(np.abs(S_noisy[:,:31]) - Mag_noise, axis=1)
# 平滑
k = 1
for t in range(k, T-k):
index = np.where(Mag_enhance[:, t] < maxnr)[0]
temp = np.min(Mag_enhance[:, t-k:t+k+1], axis=1)
Mag_enhance_new[index, t] = temp[index]
# 信号恢复
S_enhance = Mag_enhance * np.exp(1j * Phase_noisy)
enhance = librosa.istft(S_enhance, hop_length=128, win_length=256)
sf.write('enhance3.wav', enhance, fs)
plt.specgram(enhance, NFFT=256, Fs=fs)
plt.xlabel('enhance specgram')
课件及资料:SpeechProcessing: 语音处理 - Gitee.com