This documentation page applies to an outdated AMT version (1.0.0). Click here for the most recent page.
function [doa, params] = mclachlan2021(template,target,varargin)
%MCLACHLAN2021 Bayesian dynamic sound localization model
% Usage: [results,template,target] = mclachlan2014(template,target,'num_exp',20,'sig_S',4.2);
%
% Input parameters:
%
% template.fs : sampling rate (Hz)
% template.fc : ERB frequency channels (Hz)
% template.itd0 : itd computed for each hrir (samples)
% template.H : Matrix containing absolute values of HRTFS for all grid points
% template.coords : Matrix containing cartesian coordinates of all grid points, normed to radius 1m
% template.T : angular template for each coordinate
% target.fs : sampling rate
% target.fc : ERB frequency channels
% target.itd0 : itd corresponding to source position
% target.S : sound source spectrum
% target.H : Matrix containing absolute values of HRTFS for all
% source directions
%
% target.coords : Matrix containing cartesian coordinates of all
% source positions to be estimated, normed to radius 1m
%
% target.T : angular template for each coordinate
%
% Output parameters:
%
% doa : directions of arrival in spherical coordinates,
% contains the fields '.est' (estimated DOA
% [num_sources, num_repetitions, 3]) and '.real'
% (actual DOA [num_sources, 3])
%
% params : additional model's data computerd for estimations
%
%
% 'params' contains the following fields:
%
% '.est_idx' Indices corresponding to template direction where
% the maximum probability density for each source
% position is found
%
% '.est_loglik' Log-likelihood of each estimated direction
%
% '.post_prob' Maximum posterior probability density for each target source
%
% '.freq_channels' Number of auditory channels
%
% '.T_template' Struct with template data elaborated by the model
%
% '.T_target' Struct with target data elaborated by the model
%
% '.Tidx' Helper with indexes to parse
% the features from T and X
%
% MCLACHLAN2021 accepts the following optional parameters:
%
% 'num_exp',num_exp Set the number of localization trials.
% Default is num_exp = 500.
%
% 'SNR',SNR Set the signal to noise ratio corresponding to
% different sound source intensities.
% Default value is SNR = 75 [dB]
%
% 'dt',dt Time between each acoustic measurement in seconds.
% Default value is dt = 0.005.
%
% 'sig_itd0',sig Set standard deviation for the noise on the initial
% itd. Default value is sig_itd0 = 0.569.
%
% 'sig_itdi',sig Set standard deviation for the noise on the itd
% change per time step. Default value is sig_itdi = 1.
%
% 'sig_I',sig Set standard deviation for the internal noise.
% Default value is sig_I = 3.5.
%
% 'sig_S',sig Set standard deviation for the variation on the
% source spectrum. Default value is sig_S = 3.5.
%
% 'rot_type',type Set rotation type. Options are 'yaw', 'pitch' and
% 'roll'. Default value is 'yaw'.
%
% 'rot_size',size Set rotation amount in degrees. Default value is
% rot_size = 0.
%
% 'stim_dur',dur Set stimulus duration in seconds. Default value is
% stim_dur = 0.1.
%
% Further, cache flags (see amt_cache) can be specified.
%
%
% Description:
% ------------
%
% MCLACHLAN2021(...) is a dynamic ideal-observer model of human sound
% localization, by which we mean a model that performs optimal
% information processing within a Bayesian context. The model considers
% all available spatial information contained within the acoustic
% signals encoded by each ear over a specified hear rotation. Parameters
% for the optimal Bayesian model are determined based on psychoacoustic
% discrimination experiments on interaural time difference and sound
% intensity.
%
%
% Requirements:
% -------------
%
% 1) SOFA API v1.1 or higher from
% http://sourceforge.net/projects/sofacoustics for Matlab (e.g. in
% thirdparty/SOFA)
%
% See also: plot_reijniers2014 reijniers2014
% reijniers2014_metrics baumgartner2013
% demo_mclachlan2021
% mclachlan2021_metrics
% mclachlan2021_featureextraction
% mclachlan2021_rotatedirs
%
%
% References:
% R. Barumerli, P. Majdak, R. Baumgartner, J. Reijniers, M. Geronazzo,
% and F. Avanzini. Predicting directional sound-localization of human
% listeners in both horizontal and vertical dimensions. In Audio
% Engineering Society Convention 148. Audio Engineering Society, 2020.
%
% R. Barumerli, P. Majdak, R. Baumgartner, M. Geronazzo, and F. Avanzini.
% Evaluation of a human sound localization model based on bayesian
% inference. In Forum Acusticum, 2020.
%
% J. Reijniers, D. Vanderleist, C. Jin, C. S., and H. Peremans. An
% ideal-observer model of human sound localization. Biological
% Cybernetics, 108:169--181, 2014.
%
% G. McLachlan, P. Majdak, J. Reijniers, and H. Peremans. Towards
% modelling active dynamic sound localisation based on Bayesian
% inference. Acta Acustica, 2021.
%
%
% Url: http://amtoolbox.sourceforge.net/amt-0.10.0/doc/models/mclachlan2021.php
% Copyright (C) 2009-2020 Piotr Majdak and the AMT team.
% This file is part of Auditory Modeling Toolbox (AMT) version 1.0.0
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>.
% #StatusDoc: Good
% #StatusCode: Good
% #Verification: Unknown
% #Requirements: MATLAB SOFA M-Stats
% #Author: Michael Sattler
% #Author: Roberto Barumerli
% (adapted from code provided by Jonas Reijniers)
%% Check input options
definput.import={'amt_cache'};
%definput.flags.type = {'fig2'};
definput.keyvals.num_exp = 500;
definput.keyvals.SNR = 75;
definput.keyvals.dt = 0.005;
definput.keyvals.rot_type = 'yaw';
definput.keyvals.rot_size = 0.1;
definput.keyvals.stim_dur = 0.1;
% parameters of the model computed in the supplementary material
definput.keyvals.sig_itd0 = 0.569;
definput.keyvals.sig_itdi = 2*0.569;
definput.keyvals.sig_I = 3.5;
definput.keyvals.sig_S = 3.5;
definput.keyvals.sig = 5;
[flags,kv] = ltfatarghelper({'num_exp','SNR','dt','rot_type','rot_size',...
'stim_dur','sig_itd0','sig_itdi','sig_I',...
'sig_S','sig'}, definput, varargin);
% prevent infinite matrix if rot_size==0
if kv.rot_size == 0
kv.rot_size = 0.1;
end
%% derived parameters
rot_speed = kv.rot_size/kv.stim_dur; % rotation speed
steps = floor(kv.stim_dur/kv.dt); % amount of measurements steps during stimulus
alpha = linspace(0,kv.rot_size,steps); % vector with angles at each measurement step
%% sample uniformly over sphere with N is number of directions
% NOTE: amt_load('reijniers2014', 'dirs.mat') contains the sampled point on a unitary
% sphere
dirs=amt_load('reijniers2014','dirs.mat');
dirs=dirs.dirs;
if(isempty(dirs))
error('New directions grid not available. Please check your internet connection!')
end
%% remove the points from the unitary sphere below HRTF lowest elevation
idx = find(dirs(:,3) > min(template.coords(:, 3)));
dirs = dirs(idx,:);
num_dirs = length(idx);
%% rotate coordinate system according to yaw, pitch and roll values
dangle = 1; % rotation in degrees to compute dITD
rot_type = kv.rot_type;
rot_size = kv.rot_size;
dirs_rot = mclachlan2021_rotatedirs(dirs,dangle,rot_type); % rotated uniform grid
target_rot = mclachlan2021_rotatedirs(target.coords,dangle,rot_type); % rotated target coordinates
%% interpolate at uniformly distributed directions and update H and itd
% calculate spherical harmonic coefficients of H and itd, using tikonov regularization
SHorder = 15; % spherical harmonic order
[AZ,EL] = cart2sph(template.coords(:,1),template.coords(:,2),template.coords(:,3));
Y_N = SH(SHorder, [AZ EL]);
% tikonov regularisation
lambda = 4.;
SIG = eye((SHorder+1)^2);
SIG(1:(2+1)^2,1:(2+1)^2) = 0;
cH(:,:,1) = transpose((Y_N'*Y_N+lambda*SIG)\Y_N'*squeeze(template.H(:,:,1))');
cH(:,:,2) = transpose((Y_N'*Y_N+lambda*SIG)\Y_N'*squeeze(template.H(:,:,2))');
citd = (Y_N'*Y_N+lambda*SIG)\Y_N'*template.itd0(:);
% interpolate at uniformly distributed directions and update
[AZ,EL] = cart2sph(dirs(:,1),dirs(:,2),dirs(:,3));
Y_N = SH(SHorder, [AZ EL]);
template.H = [];
template.itd0 = [];
template.H(:,:,1) = transpose(Y_N*squeeze(cH(:,:,1))');
template.H(:,:,2) = transpose(Y_N*squeeze(cH(:,:,2))');
template.itd0 = Y_N*citd;
template.coords = dirs;
% interpolate at uniformly distributed directions and update
[AZ,EL] = cart2sph(target.coords(:,1),target.coords(:,2),target.coords(:,3));
Y_N = SH(SHorder, [AZ EL]);
target.H = [];
target.itd0 = [];
target.H(:,:,1) = transpose(Y_N*squeeze(cH(:,:,1))');
target.H(:,:,2) = transpose(Y_N*squeeze(cH(:,:,2))');
target.itd0 = Y_N*citd;
% interpolate template to rotated matrix
[AZ,EL] = cart2sph(dirs_rot(:,1),dirs_rot(:,2),dirs_rot(:,3));
Y_N = SH(SHorder, [AZ EL]);
template.itdt = Y_N*citd;
% interpolate target to rotated coordinates
[AZ,EL] = cart2sph(target_rot(:,1),target_rot(:,2),target_rot(:,3));
Y_N = SH(SHorder, [AZ EL]);
target.itdt = Y_N*citd;
%% transform HRTF and itd to perceptually relevant units
% itd trasformation through jnd - see supplementary material for parameters
a = 32.5e-6;
b = 0.095;
% itd at start of rotation
template.itd0 = sign(template.itd0) .* ((log(a + b * abs(template.itd0)) - log(a)) / b);
target.itd0 = sign(target.itd0) .* ((log(a + b * abs(target.itd0)) - log(a)) / b);
% itd after rotation
template.itdt = sign(template.itdt) .* ((log(a + b * abs(template.itdt)) - log(a)) / b);
target.itdt = sign(target.itdt) .* ((log(a + b * abs(target.itdt)) - log(a)) / b);
% itd change per degree
template.itdd = (template.itdt-template.itd0)/(dangle);
target.itdd = (target.itdt-target.itd0)/(dangle);
% account for SNR and frequency-dependent hearing sensitivity (see section 2.1 in SI)
% add source spectrum to target and to template
temp_H = template.H + repmat(target.S(:), 1, size(template.H, 2), 2);
targ_H = target.H + repmat(target.S(:), 1, size(target.H, 2), 2);
SNR = kv.SNR; % defined as maximal SNR (in interval 2kHz-7kHz)
% see last formula in the supplementary materials
temp_H = max(temp_H ,-SNR);
temp_H(template.fc<=2000,:,:) = max(temp_H(template.fc<=2000,:,:),-SNR + 10);
temp_H(template.fc>=7000,:,:) = max(temp_H(template.fc>=7000,:,:),-SNR + 20);
targ_H = max(targ_H,-SNR);
targ_H(target.fc<=2000,:,:) = max(targ_H(target.fc<=2000,:,:),-SNR + 10);
targ_H(target.fc>=7000,:,:) = max(targ_H(target.fc>=7000,:,:),-SNR + 20);
%% define templates
% T_template has size [Q x (2xfc+1)], where Q is number of sampled points
% and fc = number of frequency channels
T_template=[template.itd0, template.itdd,...
squeeze(temp_H(:,:,1)-temp_H(:,:,2))', ...
0.5.*squeeze(temp_H(:,:,1)+temp_H(:,:,2))'];
T_target=[target.itd0, target.itdd, ...
squeeze(targ_H(:,:,1)-targ_H(:,:,2))', ...
0.5.*squeeze(targ_H(:,:,1)+targ_H(:,:,2))'];
%% define error covariance matrix
sig_itd0 = kv.sig_itd0; %0.569;
sig_itdi = kv.sig_itdi; %still unknown, currently set to 1
sig_I = kv.sig_I; % 3.5; Intensity discrimination for broadband signal
sig_S = kv.sig_S; %3.5; Source's template error
sig = kv.sig; % Expected variance on the source strength - interchannel noise communication
sig_i = [sig_itd0, repmat(sig_itdi,1,steps-1)]; % var on ITD at each time step
% create M_beta covariance matrix
W = diag(1./sig_i.^2); % weight matrix
X = ones(steps,2); % each column a slope of a parameter beta
X(:,2) = alpha;
M_beta = inv(X.'*W*X); % covariance matrix of ITD0 and ITDd
Sig = blkdiag(M_beta, 2*sig_I^2*eye(length(template.fc)), ...
(sig_I^2/2 + sig_S^2)*eye(length(template.fc)) + sig^2); % full covariance matrix
%% simulate num_exp experimental trials
num_exp = kv.num_exp;
invSig = inv(Sig);
num_src = size(target.coords,1);
log_lik = zeros(num_src, num_exp);
doa_idx = zeros(num_src, num_exp);
post_prob = zeros(num_src, num_exp, num_dirs);
doa_estimations = zeros(num_src, num_exp, size(template.coords, 2));
entropy = zeros(num_src,1); % entropy in bits
Entropy = zeros(num_src,1);
if nargout > 1
X_all = zeros(num_src, num_exp, size(T_target, 2));
end
for e = 1:num_exp
amt_disp(sprintf('experiment %i', e),'volatile');
X = mvnrnd(T_target,Sig);
if nargout > 1
X_all(:,e,:) = X;
end
for s = 1:num_src
for d = 1:num_dirs
% Formula R
u_diff = (X(s,:)-T_template(d,:));
post_prob(s,e,d) = abs(exp(-0.5* u_diff*invSig*transpose(u_diff)));
end
% normalize
post_prob(s,e,:) = post_prob(s,e,:)/sum(post_prob(s,e,:) + eps);
% maximum a posteriori
[log_lik(s,e), doa_idx(s,e)] = max(post_prob(s,e,:));
entropy(s)= - squeeze(post_prob(s,e,:))'*log2(squeeze(post_prob(s,e,:)) + eps);
doa_estimations(s,e,:) = template.coords(doa_idx(s,e), :);
end
Entropy = Entropy + entropy; %cumulative entropy over experiments
end
amt_disp();
Entropy = Entropy/num_exp;
Information = log2(num_dirs) - Entropy;
%% results
doa.est = doa_estimations;
doa.real = target.coords;
% user required more than the estimations
if nargout > 1
params.template_coords = template.coords;
params.post_prob = post_prob;
params.entropy = Entropy;
params.information = Information;
params.est_idx = doa_idx;
params.est_loglik = log_lik;
params.X = X_all;
params.T_template = T_template;
params.T_target = T_target;
params.freq_channels = template.fc;
params.Tidx.itd = 1;
assert(length(target.fc)==length(template.fc))
params.Tidx.Hp = params.Tidx.itd + (1:length(target.fc));
params.Tidx.Hm = params.Tidx.Hp(end) + (1:length(target.fc));
else
clear X_all post_prob doa_idx log_lik
end
end
function Y_N = SH(N, dirs)
% calculate spherical harmonics up to order N for directions dirs [azi ele;...] (in radiant)
%
N_dirs = size(dirs, 1);
N_SH = (N+1)^2;
dirs(:,2) = pi/2 - dirs(:,2); % convert to inclinations
Y_N = zeros(N_SH, N_dirs);
% n = 0
Lnm = legendre(0, cos(dirs(:,2)'));
Nnm = sqrt(1./(4*pi)) * ones(1,N_dirs);
CosSin = zeros(1,N_dirs);
CosSin(1,:) = ones(1,size(dirs,1));
Y_N(1, :) = Nnm .* Lnm .* CosSin;
% n > 0
idx = 1;
for n=1:N
m = (0:n)';
Lnm = legendre(n, cos(dirs(:,2)'));
condon = (-1).^[m(end:-1:2);m] * ones(1,N_dirs);
Lnm = condon .* [Lnm(end:-1:2, :); Lnm];
mag = sqrt( (2*n+1)*factorial(n-m) ./ (4*pi*factorial(n+m)) );
Nnm = mag * ones(1,N_dirs);
Nnm = [Nnm(end:-1:2, :); Nnm];
CosSin = zeros(2*n+1,N_dirs);
% m=0
CosSin(n+1,:) = ones(1,size(dirs,1));
% m>0
CosSin(m(2:end)+n+1,:) = sqrt(2)*cos(m(2:end)*dirs(:,1)');
% m<0
CosSin(-m(end:-1:2)+n+1,:) = sqrt(2)*sin(m(end:-1:2)*dirs(:,1)');
Ynm = Nnm .* Lnm .* CosSin;
Y_N(idx+1:idx+(2*n+1), :) = Ynm;
idx = idx + 2*n+1;
end
Y_N = Y_N.';
end