This documentation page applies to an outdated AMT version (1.5.0). Click here for the most recent page.
function [diff, avgDiff_ERB, norm] = mckenzie2022( A, B, domFlag, f, norm, w, plotFlag, lim)
%MCKENZIE2022 Binaural perceptual similarity (Predicted Binaural Colouration)
% Usage: [diff, avgDiff_ERB, norm] = mckenzie2022( A, B, domFlag, f);
%
% Input parameters:
% A : Reference input data matrix
% B : Test input data matrix
% domFlag : Specifies whether input data is in time (0) / frequency (1) / frequency_dB (2) domain
% f : struct, if (domFlag == 0) it contains fs, nfft, minFreq, maxFreq; if (domFlag == 1 or 2), it specifies the FFT sample frequencies
%
%
% This function compares the spectra of A and B in terms of PERCEPTUALLY
% WEIGHTED error. For multi-dimensional inputs the comparison is made along
% the first dimension. Averages are output for each column of data.
% Perceptual error takes into account the lesser importance of quieter sounds
% and less sensitive frequencies. Frequency bins are weighted with respect to
% the ISO 226 loudness curves for an average listening level of 75 dB SPL by
% default. A contribution half as loud is deemed half as important using the
% Sones scale.
% The perceptual average difference is further weighted with respect to ERB
% bandwidth. Simply, this reduces the contribution to the average calculation
% of higher frequency components where our ears are less sensitive. It's like
% a logarithmic type average.
% An iterative optimisation process is used to find the input normalisation
% which results in the lowest error metric. This is generally somewhere
% around the point at which the two input signals have the same mean value -
% but can easily vary by a few dB. The optimum normalisation is different for
% perceptual / absolute error metrics.
%
% Optional input parameters:
%
% 'norm' if empty (DEFAULT), Iterate to find optimal normalisation
% else, apply norm dB or normalisation to input B
%
% 'w' (DEFAULT=1) Sample point weightings
%
% 'plotFlag' (DEFAULT=0) Don't show (0) or show (1) plot normalisation curve
%
% 'lim' (DEFAULT=0.05) if lim >= 1 it specififies the
% number of normalisation iterations. If lim < 1 it
% specifies the resolution/change in PBC that must be
% reached by sequential normalisation steps i.e.
% iterations will stop once they result in changes
% to the PBC value that are less than lim (or 100000
% iterations)
%
% 'SPL' (DEFAULT=75) the average dB SPL value at which comparisons are made
%
% 'initInc' (DEFAULT=0.2) The starting offset increment
%
% 'scale' (DEFAULT=0.4) The absolute scaling value by which
% the offset increment is adjusted each time an
% increase in PBC in detected
%
%
%
% References:
% T. McKenzie, C. Armstrong, L. Ward, D. Murphy, and G. Kearney.
% Predicting the colouration between binaural signals. Appl. Sci.,
% 12(2441), 2022.
%
%
% See also: plot_mckenzie2022 demo_mckenzie2022 exp_mckenzie2022
%
% Url: http://amtoolbox.org/amt-1.5.0/doc/models/mckenzie2022.php
% #StatusDoc: Good
% #StatusCode: Good
% #Verification: Unknown
% #Requirements: MATLAB M-Stats M-Curvefit
% #Author: Thomas McKenzie (2022): initial implementation
% #Author: Cal Armstrong (2022): initial implementation
% #Author: Lauren Ward (2022): initial implementation
% #Author: Damian Murphy (2022): initial implementation
% #Author: Gavin Kearney (2022): initial implementation
% #Author: Piotr Majdak (2023): documentation and structure adaptations
% This file is licensed unter the GNU General Public License (GPL) either
% version 3 of the license, or any later version as published by the Free Software
% Foundation. Details of the GPLv3 can be found in the AMT directory "licences" and
% at <https://www.gnu.org/licenses/gpl-3.0.html>.
% You can redistribute this file and/or modify it under the terms of the GPLv3.
% This file is distributed without any warranty; without even the implied warranty
% of merchantability or fitness for a particular purpose.
%% DEFAULT OPTIONAL INPUTS
if nargin<8; lim = 0.05; end
if nargin<7; plotFlag = 0; end
if nargin<6; w = 1; end
if nargin<5; norm = []; end
%% FUNCTION SETTINGS
SPL = 75; % Sound Pressure Level
initInc = 0.2; % Initial Increment
scale = 0.6;
% Factors control the impact of each perceptual modification made to the
% input signals. Selecting 0 for all factors will compute the same as a
% Basic Spectral Difference (A-B). Selecting 1 for each factor will
% compute the basic SD. Factions are also accepted, which linearly vary
% the results inbetween an Actual and Predicted Binaural Colouration
% calculation. The factors should remain between -1 and 1 for 'meaningful'
% results.
pFac = 1; % Phons influence Factor
sFac = 1; % Sones influence Factor
erbFac = 1; % ERB influence Factor
% The Sones scale considers an increase in 10 Phons to be a doubling of
% Sones such that 40 Phons=1Sone. This factor allows that scale to be
% skewed within the PBC model such that an increase of sSkw phones results
% in a doubling of skewed Sones.
sSkw = 10; % Sones skew factor
%% CONVERT INPUTS TO FREQUENCY (dB) DOMAIN IF REQUIRED
switch domFlag
% Time domain -> Frequency (dB) domain
case 0
minfftSample = round(f.minFreq / f.fs * f.nfft)+1;
maxfftSample = round(f.maxFreq / f.fs * f.nfft)+1;
freq = (0 : f.fs/f.nfft : f.fs-(f.fs/f.nfft))';
freq = freq(minfftSample:maxfftSample);
A = 20*log10(abs(fft( A, f.nfft )));
A = A(minfftSample:maxfftSample, :, :);
B = 20*log10(abs(fft( B, f.nfft )));
B = B(minfftSample:maxfftSample, :, :);
% Frequency domain -> Frequency (dB) domain
case 1
A = 20*log10(abs(A));
B = 20*log10(abs(B));
freq = f(:);
% Frequency (dB) domain
case 2
freq = f(:);
otherwise
error('Unrecognised integer input: domFlag. Must be 0, 1 or 2.');
end
%% HANDLE INPUTS
sizeA = size(A);
% Map sample point weightings
if size(w) == 1
w = repmat(w, [1, sizeA(2:end)]);
end
% Define maximum iterations / resolution limit
if lim >= 1
maxIt = limit; % Maximum Iterations
minRes = 0; % Minimum Resolution
elseif lim > 0
maxIt = 100000;
minRes = lim;
else
error('Positive numeric value expected for input variable "limit"');
end
offset = NaN(maxIt, 1);
err = NaN(maxIt, 1); % Error
%% FUNCTION SETUP
% The code in setupPBC() is separated for convenience and computational
% efficiency. If mckenzie2022() is called repeatedly with the same
% frequency point analysis then is may be benificial to extract setupPBC(),
% store the output variables and adapt mckenzie2022() to accept the data as
% input variables.
[EL, ERBWeights] = local_setupPBC(sizeA, freq);
%% FIND INPUT NORMALISATION
% Iterate to find perceptually optimum value
if isempty(norm)
% Initial comparison to find jump value
[~, temp, jump] = local_calculatePBC(A, B, 0);
temp_weighted = temp .* w;
err(1) = sum(temp_weighted(:)) ./ sum(w(:));
% For each iteration...
for i = 2:maxIt
% Find offset for this iteration
switch i
% Second comparison (No offset)
case 2
offset(i) = jump;
% Third comparison (Try out positive offset)
case 3
offset(i) = jump + initInc;
% All other comparisons
otherwise
% If there was a reduction in perceptual error with respect
% to the previous offset, carry on increasing the offset in
% that direction (up or down)
if err(i-1) < err(i-2)
offset(i) = offset(i-1) + initInc;
% If there was no improvement / an increase in error
else
% If this is the third iteration, switch the offset
% direction, but keep the offset resolution the same
if i == 4
initInc = initInc * -1;
offset(i) = jump + initInc;
% Otherwise, switch the offset direction and reduce the
% offset resolution
else
% If the resolution has got too fine, meaning
% reducing the offset resolution would make minimal
% difference to the PBC, quit
if abs(err(i-1)-err(i-2)) < minRes ||...
abs(err(i-2)-err(i-3)) < minRes
break;
else
initInc = initInc * -scale;
offset(i) = offset(i-1) + initInc;
end
end
end
end
% Calculate PBC with calculated offset and save result
[~, temp, ~] = local_calculatePBC(A, B, offset(i));
temp_weighted = temp .* w;
err(i) = sum(temp_weighted(:)) ./ sum(w(:));
end
if plotFlag
figure; hold on; set(gca,'FontSize', 15);
colorNormalisation = flipud(parula(i));
ylabel('Average PTD (sones)')
xlabel('Test dataset normalisation (dB)')
set(gcf, 'Color', 'w');
pbaspect([1.7 1 1]);
grid on; box on;
scatter(0, err(1),100,'x','LineWidth',1.5,'MarkerEdgeColor',colorNormalisation(1,:));
for plotCounter = 2:i
scatter(offset(plotCounter), err(plotCounter),100,'x',...
'LineWidth',1.5,'MarkerEdgeColor',colorNormalisation(plotCounter,:));
end
end
% Find optimal offset
[~, minIndex] = min(err);
norm = offset(minIndex);
end
%% CALCULATE PERCEPTUAL SPECTRAL DIFFERENCE
[diff, avgDiff_ERB, ~] = local_calculatePBC(A, B, norm);
%% INTERNAL FUNCTIONS
function [EL, ERBWeights] = local_setupPBC(sizeA, freq)
% ISO 226 Declarations
iso226SPL = zeros(30, 91);
iso226Freq = zeros(30, 91);
Y = zeros(length(freq), 91);
EL = zeros(length(freq), 91);
% For all ISO standardised listening levels (ISO 226: 0-90dB SPL)
for l = 1:91
% Save equal loudness contour
[iso226SPL(1:29, l), iso226Freq(1:29, l)] = local_iso226(l-1);
iso226Freq(30, l) = 20000;
iso226SPL(30, l) = iso226SPL(1, l);
% Fit curve to equal loudness contour
iso226Fit = fit(iso226Freq(:, l), iso226SPL(:, l), 'pchip');
% Interpolate to input frequency bins and remove equivalent 1KHz
% loudness offset
Y(:, l) = iso226Fit(freq) - (l - 1);
% Save the offset required in dB to equate the loudness of any
% frequency bin to that of 1KHz for a given absolute loudness
% (0-90dB SPL)
% ... A.K.A. flip the equal loudness contour 1KHz offset!
EL(:, l) = -Y(:, l) ;
end
% Calculate ERB bandwidths
ERB = 0.108.*freq + 24.7;
% Calculate ERB weights and repeat for input matrix multiplication
ERBWeights_temp = (1./ERB) ./ max(1./ERB);
ERBWeights = repmat(ERBWeights_temp, [1, sizeA(2:end)]);
end
function [pDiff, avgPDiff_ERB, jump] = local_calculatePBC(A, B, norm)
%% APPLY OFFSET AND NORMALISE INPUTS (75dB)
B = B + norm;
meanValue = mean([A(:); B(:)]);
A = A + (SPL-meanValue);
B = B + (SPL-meanValue);
%% EQUAL LOUDNESS (PHONS)
% Save matrices that select the correct loudness contour offsets to use
% for each frequency bin for each input spectrum. This is calculated by
% linear interpolation of the integer rounded input matricies with a
% maximum value of 90 and a minimum value of 0.
% Round to hi/lo intergers and save fractional component
LC_A_hi = min(ceil(A), 90); LC_A_hi = max(LC_A_hi, 0);
LC_A_lo = min(floor(A), 90); LC_A_lo = max(LC_A_lo, 0);
A_frac = rem(A, 1);
LC_B_hi = min(ceil(B), 90); LC_B_hi = max(LC_B_hi, 0);
LC_B_lo = min(floor(B), 90); LC_B_lo = max(LC_B_lo, 0);
B_frac = rem(B, 1);
% find loudness contour offsets for integer values
EL_A_hi = EL(sub2ind(size(EL),...
repmat((1:sizeA(1))', [1, sizeA(2:end)]),...
LC_A_hi+1));
EL_A_lo = EL(sub2ind(size(EL),...
repmat((1:sizeA(1))', [1, sizeA(2:end)]),...
LC_A_lo+1));
EL_B_hi = EL(sub2ind(size(EL),...
repmat((1:sizeA(1))', [1, sizeA(2:end)]),...
LC_B_hi+1));
EL_B_lo = EL(sub2ind(size(EL),...
repmat((1:sizeA(1))', [1, sizeA(2:end)]),...
LC_B_lo+1));
% Interpoloate between integer based loudness contour offsets
EL_A = EL_A_lo + (A_frac.*(EL_A_hi - EL_A_lo));
EL_B = EL_B_lo + (B_frac.*(EL_B_hi - EL_B_lo));
% Account for equal loudness / convert input data to phones scale
A_EL = A + EL_A;
B_EL = B + EL_B;
% Apply Factor
A_EL = ((1-pFac) .* A) + (pFac .* A_EL);
B_EL = ((1-pFac) .* B) + (pFac .* B_EL);
%% CONVERT TO SONES
A_EL_Sones = 2.^((A_EL-40)/sSkw);
B_EL_Sones = 2.^((B_EL-40)/sSkw);
% Apply Factor
A_EL_Sones = ((1-sFac) .* A_EL) + (sFac .* A_EL_Sones);
B_EL_Sones = ((1-sFac) .* B_EL) + (sFac .* B_EL_Sones);
%% FIND PERCEPTUAL AVERAGE DIFFERENCE WITH ERB WEIGHTING
pDiff = (B_EL_Sones - A_EL_Sones);
avgPDiff_ERB = sum(ERBWeights .* abs(pDiff)) ./ sum(ERBWeights);
% Apply Factor
avgPDiff_ERB = ((1-erbFac) .* mean(abs(pDiff))) + (erbFac .* avgPDiff_ERB);
% Jump provides a rough idea of the level difference required to make
% to two inputs sound more similar
A_level = sum(ERBWeights .* (A_EL_Sones)) ./ sum(ERBWeights);
B_level = sum(ERBWeights .* (B_EL_Sones)) ./ sum(ERBWeights);
jump = mean(A_level(:)) - mean(B_level(:));
end
end
function [spl, freq] = local_iso226(phon)
% Equal Loudness Curves from ISO 226
f = [20 25 31.5 40 50 63 80 100 125 160 200 250 315 400 500 630 800 ...
1000 1250 1600 2000 2500 3150 4000 5000 6300 8000 10000 12500];
af = [0.532 0.506 0.480 0.455 0.432 0.409 0.387 0.367 0.349 0.330 0.315 ...
0.301 0.288 0.276 0.267 0.259 0.253 0.250 0.246 0.244 0.243 0.243 ...
0.243 0.242 0.242 0.245 0.254 0.271 0.301];
Lu = [-31.6 -27.2 -23.0 -19.1 -15.9 -13.0 -10.3 -8.1 -6.2 -4.5 -3.1 ...
-2.0 -1.1 -0.4 0.0 0.3 0.5 0.0 -2.7 -4.1 -1.0 1.7 ...
2.5 1.2 -2.1 -7.1 -11.2 -10.7 -3.1];
Tf = [ 78.5 68.7 59.5 51.1 44.0 37.5 31.5 26.5 22.1 17.9 14.4 ...
11.4 8.6 6.2 4.4 3.0 2.2 2.4 3.5 1.7 -1.3 -4.2 ...
-6.0 -5.4 -1.5 6.0 12.6 13.9 12.3];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Error Trapping
if((phon < 0) || (phon > 90))
disp('Phon value out of bounds!')
spl = 0;
freq = 0;
else
%Setup user-defined values for equation
Ln = phon;
%Deriving sound pressure level from loudness level (iso226 sect 4.1)
Af=4.47E-3 * (10.^(0.025*Ln) - 1.15) + (0.4*10.^(((Tf+Lu)/10)-9 )).^af;
Lp=((10./af).*log10(Af)) - Lu + 94;
%Return user data
spl = Lp;
freq = f;
end
end