Matlab实现WAV音频文件计算声品质参数：dBA、响度、粗糙度、尖锐度、波动度

1.dBA

首先读取WAV文件

[x,Fs] = audioread('pink.wav');   %读取音频文件

对时域信号进行加窗划分

function [dBA,dBZ,t,windowTime] = analyzeSignal(x,Fs)
responseType = 'fast';
C = 55;
t = 1/Fs:1/Fs:length(x)/Fs;
%% 确定傅里叶窗的大小
if strcmp(responseType,'slow')
   duration = 1.0;
else
   duration = 0.125;
end
N = ceil(duration*Fs);
N = 2^nextpow2(N);
%% 确定信号的dBA
windowStart = [1:N:(length(x)-N)];
dBA = zeros(length(windowStart),1);
dBZ = zeros(length(windowStart),1);
windowTime = t(windowStart+round((N-1)/2));
for i = [1:length(windowStart)]
   [dBA(i),dBZ(i)] = estimateLevel(x(windowStart(i)-1+[1:N]),Fs,C);
end

调用子函数计算dBA

function [dBA,dBZ] = estimateLevel(x,Fs,C)
X = abs(fft(x));
%% 避免产生对数取0的情况
X(find(X == 0)) = 1e-17;
%% 保证奈奎斯特采样定律
f = (Fs/length(X))*[0:(length(X)-1)];
ind = find(f<Fs/2); f = f(ind); X = X(ind);
%% 通过A-weighting滤波器实现dBA计算
A = filterA(f);
XA = A'.*X;
%Z-weighting filter
Z = zeros(length(f),1);
Z(1:end) = 1;
XZ = Z.*X;
%% 用能量计算dBA
totalEnergy = sum(XA.^2)/length(XA);
meanEnergy = totalEnergy/((1/Fs)*length(x));
dBA = 10*log10(meanEnergy)+C;

totalEnergy1 = sum(XZ.^2)/length(XZ);
meanEnergy1 = totalEnergy1/((1/Fs)*length(x));
dBZ = 10*log10(meanEnergy1)+C;

A-weighted滤波器子函数

function A = filterA(f,plotFilter)
%% Define filter coefficients.
c1 = 3.5041384e16;
c2 = 20.598997^2;
c3 = 107.65265^2;
c4 = 737.86223^2;
c5 = 12194.217^2;

%% Evaluate A-weighting filter.
f(find(f == 0)) = 1e-17;
f = f.^2; num = c1*f.^4;
den = ((c2+f).^2) .* (c3+f) .* (c4+f) .* ((c5+f).^2);
A = num./den;

%% 画dBA计权图.
if exist('plotFilter') & plotFilter
    
   % Plot using dB scale.
   figure(2); clf;
   semilogx(sqrt(f),10*log10(A));
   title('A-weighting Filter');
   xlabel('Frequency (Hz)');
   ylabel('Magnitude (dB)');
   xlim([10 100e3]); grid on;
   ylim([-70 10]);
   
   % Plot using linear scale.
   figure(3); plot(sqrt(f),A);
   title('A-weighting Filter');
   xlabel('Frequency (Hz)');
   ylabel('Amplitude');
   xlim([0 44.1e3/2]); grid on;

end

2.响度

首先确定窗的大小

function [Loudness,Sharpness,Roughness,Fluctuation] = frameToCalculate(x,Fs);
%% 划分时域帧的长度
responseType = 'fast';
%避免输入信号不存在的情况
if ~exist('x')
   [x,Fs] = audioread();
   t = (1/Fs)*[0:(length(x)-1)]; t = t+81;
else
   t = (1/Fs)*[0:(length(x)-1)];
end
%根据fast和slow进行时间帧的划分
if strcmp(responseType,'slow')
   duration = 1;
else
   duration = 0.125;
end
N = ceil(duration*Fs);
N = 2^nextpow2(N);
% Estimate signal level (within each windowed segment).
windowStart = [1:N:(length(x)-N)];
Loudness = zeros(length(windowStart),1);
Sharpness = zeros(length(windowStart),1);
Roughness = zeros(length(windowStart),1);
Fluctuation = zeros(length(windowStart),1);
windowTime = t(windowStart+round((N-1)/2));
%% 按照划分的时间帧，分帧计算每帧的平均响度
for i = [1:length(windowStart)]
   Loudness(i) = estimateLoudness(x(windowStart(i)-1+[1:N]),Fs);
   Sharpness(i) = estimateSharpness(x(windowStart(i)-1+[1:N]),Fs);
   Roughness(i) = estimateRoughness(x(windowStart(i)-1+[1:N]),Fs);
   Fluctuation(i) = estimateFluctuation(x(windowStart(i)-1+[1:N]),Fs);
end
end

本文采用Zwicker模型，通过24Barks的特征响度进行总响度计算：

function loudness = estimateLoudness(x,Fs)
%% 24Barks划分
fc = [20 100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000;
    50 150 250 350 450 570 700 840 1000 1170 1370 1600 1850 2150 2500 2900 3400 4000 4800 5800 7000 8500 10500 13500;
    100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000 15500];
BandWidth = [80 100 100 100 110 120 140 150 160 190 210 240 280 320 380 450 550 700 900 1100 1300 1800 2500 3500];
%% 中心频带率，有何意义
z = zeros(1,length(fc(2,:)));
for i = 1:length(z)
    z(i) = 13.*atan(0.76*fc(2,i)/1000)+3.5.*(atan(fc(2,i)/1000)).^2; %?
end
%% 听阈下限声压级，用getData在听阈曲线上估计出来
Lspl = [48.72457794 28.15401982 16.78690581 10.97541703 7.18374514 3.645302897 1.116392826 -0.152013044 0.093691208 -0.165558761 -0.9278816 -1.94268155 -2.95710523 -4.475730591 -6.751034742 -7.765458422 -8.528157532 -8.030352439 -5.516869434 -1.490781387 4.047911702 6.813119278 8.569170952 11.83632259];
%% Calculate magnitude of FFT.
X = abs(fft(x));
X(find(X == 0)) = 1e-17;%避免产生对数取0的情况
% Retain frequencies below Nyquist rate.
f = (Fs/length(X))*[0:(length(X)-1)];
ind = find(f<Fs/2); f = f(ind); X = X(ind);
%% 按bark进行声压级计算
SPL = zeros(1,24);
for i = 1:24
    location = find(f>=fc(1,i)&f<=fc(3,i));
    if ~isempty(location)
        totalEnergy = sum(X(location).^2)/length(location);
        SPL(i) = 10*log10(totalEnergy/((1/Fs)*length(x)))+55;
    else
        SPL(i) = 0;
    end
end
%% 按Bark进行响度计算
N = zeros(1,length(Lspl));
N = 0.08.*((10.^(Lspl/10)).^0.23).*((0.5+0.5.*10.^((SPL-Lspl)/10)).^0.23-1);%得到24bark响度分布图
loudness = sum(N);
end

3.尖锐度Sharpness

同样选择Zwicker模型

function sharpness = estimateSharpness(x,Fs)
%% 24Barks划分
fc = [20 100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000;
    50 150 250 350 450 570 700 840 1000 1170 1370 1600 1850 2150 2500 2900 3400 4000 4800 5800 7000 8500 10500 13500;
    100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000 15500];
BandWidth = [80 100 100 100 110 120 140 150 160 190 210 240 280 320 380 450 550 700 900 1100 1300 1800 2500 3500];
%% 中心频带率，有何意义？
z = zeros(1,length(fc(2,:)));
for i = 1:length(z)
    z(i) = 13.*atan(0.76*fc(2,i)/1000)+3.5.*(atan(fc(2,i)/1000)).^2; %?
end
%% 根据中心频带率z加上响度计权函数
gz = zeros(1,length(z));
for i = 1:length(z)
    if z(i)<=16
        gz(i) = z(i);
    else
        gz(i) = 0.06.*exp(0.17*z(i));
    end
end
%% 听阈下限声压级，用getData在听阈曲线上估计出来
Lspl = [48.72457794 28.15401982 16.78690581 10.97541703 7.18374514 3.645302897 1.116392826 -0.152013044 0.093691208 -0.165558761 -0.9278816 -1.94268155 -2.95710523 -4.475730591 -6.751034742 -7.765458422 -8.528157532 -8.030352439 -5.516869434 -1.490781387 4.047911702 6.813119278 8.569170952 11.83632259];
%% Calculate magnitude of FFT.
X = abs(fft(x));
X(find(X == 0)) = 1e-17;%避免产生对数取0的情况
% Retain frequencies below Nyquist rate.
f = (Fs/length(X))*[0:(length(X)-1)];
ind = find(f<Fs/2); f = f(ind); X = X(ind);
%% 按bark进行声压级计算
SPL = zeros(1,24);
for i = 1:24
    location = find(f>=fc(1,i)&f<=fc(3,i));
    if ~isempty(location)
        totalEnergy = sum(X(location).^2)/length(location);
        SPL(i) = 10*log10(totalEnergy/((1/Fs)*length(x)))+55;
    else
        SPL(i) = 0;
    end
end
%% 按Bark进行响度计算
N = zeros(1,length(Lspl));
N = 0.08.*((10.^(Lspl/10)).^0.23).*((0.5+0.5.*10.^((SPL-Lspl)/10)).^0.23-1);%得到24bark响度分布图
%% 计算sharpness
sharpness = 0.104.*sum(N.*gz.*z)./sum(N);
end

4.粗糙度

粗糙度计算公式

由于掩蔽深度无法用数学公式进行描述，在这里用一个系数乘以响度简单代替（希望各位能教教我更准确的数值计算方法）

function roughness = estimateRoughness(x,Fs)
%% 24Barks划分
fc = [20 100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000;
    50 150 250 350 450 570 700 840 1000 1170 1370 1600 1850 2150 2500 2900 3400 4000 4800 5800 7000 8500 10500 13500;
    100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000 15500];
BandWidth = [80 100 100 100 110 120 140 150 160 190 210 240 280 320 380 450 550 700 900 1100 1300 1800 2500 3500];
%% 中心频带率
z = zeros(1,length(fc(2,:)));
for i = 1:length(z)
    z(i) = 13.*atan(0.76*fc(2,i)/1000)+3.5.*(atan(fc(2,i)/1000)).^2; %
end
%% 听阈下限声压级，用getData在听阈曲线上估计出来
Lspl = [48.72457794 28.15401982 16.78690581 10.97541703 7.18374514 3.645302897 1.116392826 -0.152013044 0.093691208 -0.165558761 -0.9278816 -1.94268155 -2.95710523 -4.475730591 -6.751034742 -7.765458422 -8.528157532 -8.030352439 -5.516869434 -1.490781387 4.047911702 6.813119278 8.569170952 11.83632259];
%% Calculate magnitude of FFT.
X = abs(fft(x));
X(find(X == 0)) = 1e-17;%避免产生对数取0的情况
% Retain frequencies below Nyquist rate.
f = (Fs/length(X))*[0:(length(X)-1)];
ind = find(f<Fs/2); f = f(ind); X = X(ind);
%% 按bark进行声压级计算
SPL = zeros(1,24);
for i = 1:24
    location = find(f>=fc(1,i)&f<=fc(3,i));
    if ~isempty(location)
        totalEnergy = sum(X(location).^2)/length(location);
        SPL(i) = 10*log10(totalEnergy/((1/Fs)*length(x)))+55;
    else
        SPL(i) = 0;
    end
end
%% 按Bark进行粗糙度计算
N = zeros(1,length(Lspl));
N = 0.08.*((10.^(Lspl/10)).^0.23).*((0.5+0.5.*10.^((SPL-Lspl)/10)).^0.23-1);%得到24bark响度分布图
fmod = 300/1000;  %调制声频率
k = 0.01;   %掩蔽深度转换为响度的系数
L = k.*N;   %掩蔽深度
roughness = 0.3.*fmod.*sum(L);
end

5.波动度

根据计算模型

function fluctuation = estimateFluctuation(x,Fs)
%% 24Barks划分
fc = [20 100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000;
    50 150 250 350 450 570 700 840 1000 1170 1370 1600 1850 2150 2500 2900 3400 4000 4800 5800 7000 8500 10500 13500;
    100 200 300 400 510 630 770 920 1080 1270 1480 1720 2000 2320 2700 3150 3700 4400 5300 6400 7700 9500 12000 15500];
BandWidth = [80 100 100 100 110 120 140 150 160 190 210 240 280 320 380 450 550 700 900 1100 1300 1800 2500 3500];
%% 中心频带率
z = zeros(1,length(fc(2,:)));
for i = 1:length(z)
    z(i) = 13.*atan(0.76*fc(2,i)/1000)+3.5.*(atan(fc(2,i)/1000)).^2; %?
end
%% 听阈下限声压级，用getData在听阈曲线上估计出来
Lspl = [48.72457794 28.15401982 16.78690581 10.97541703 7.18374514 3.645302897 1.116392826 -0.152013044 0.093691208 -0.165558761 -0.9278816 -1.94268155 -2.95710523 -4.475730591 -6.751034742 -7.765458422 -8.528157532 -8.030352439 -5.516869434 -1.490781387 4.047911702 6.813119278 8.569170952 11.83632259];
%% Calculate magnitude of FFT.
dBZ(dBZ == 0) = 1e-17;%避免产生对数取0的情况
%% 按bark进行声压级计算
SPL = zeros(1,24);
for i = 1:24
    location = find(f>=fc(1,i)&f<=fc(3,i));
    if ~isempty(location)
        SPL(i) = sum(X(location))/length(location);
    else
        SPL(i) = 0;
    end
end
%% 按Bark进行响度计算
N = zeros(1,length(Lspl));
N = 0.08.*((10.^(Lspl/10)).^0.23).*((0.5+0.5.*10.^((SPL-Lspl)/10)).^0.23-1);%得到24bark响度分布图
k = 0.01;   %掩蔽深度转换为响度的系数
L = k.*N;   %掩蔽深度
fmod = 300/1000;  %调制频率
fluctuation = 0.008.*sum(L)/((fmod/4)+(4/fmod));
end

（对于掩蔽深度的计算暂未找到合适的数学模型）

Matlab实现WAV音频文件计算声品质参数：dBA、响度、粗糙度、尖锐度、波动度

猜你喜欢