This documentation page applies to an outdated AMT version (1.0.0). Click here for the most recent page.
function [template, target] = mclachlan2021_featureextraction(SOFAtemplate, varargin)
%MCLACHLAN2021_FEATUREEXTRACTION - extract HRTF using gammatone frequency bands and ITDs from SOFA object
%
% Usage: [template, target] = mclachlan2021_featureextraction(SOFAobj)
%
% Input parameters:
% SOFAtemplate: Struct in SOFA format with HRTFs
%
% Output parameters:
% template : template struct with spectral components
% target : template struct with spectral components
%
% MCLACHLAN2021_FEATUREEXTRACTION(...) computes temporally integrated
% spectral magnitude profiles and itd.
%
% MCLACHLAN2021_FEATUREEXTRACTION accepts the following optional parameters:
%
% 'source_ir',source_ir Set the sound source's impulse reponse.
% Default value a broadband sound source
% with 0dB amplitude.
%
% 'fs',fs Set the sampling rate to fs.
% Default value takes the fs of SOFA template object.
%
% 'fb_ch',fb_ch Set the number of channels for the gammatone
% filterbank to fb_ch. Default value is 30.
%
% 'fb_low',fb_low Set the lowest frequency in the filterbank to
% fb_low. Default value is 300 Hz.
%
% 'fb_high',fb_high Set the highest frequency in the filterbank to
% fhigh. Default value is 15000 Hz.
%
% 'ir_pad',len Define the padding length for the impulse responses
% before being convolved with gammatone filters.
% Default value is 0.05 s.
%
% 'targ_az',targ_az Set the azimuth of a set of sound sources
% to targ_el. It can be a scalar or a column vector
% Default value is []: all target azimuths are
% used. Must have the same size of targ_el.
%
% 'targ_el',targ_el Set the elevation of a set of sound sources
% to targ_el. It can be a scalar or a column vector
% Default value is []: all target elevations are
% used. Must have the same size of targ_az.
%
% See also: exp_reijniers2014 reijniers2014
%
% References:
% r. barumerli, p. majdak, j. reijniers, r. baumgartner, m. geronazzo,
% and f. avanzini. predicting directional sound-localization of human
% listeners in both horizontal and vertical dimensions. journal of the
% audio engineering society, may 2020.
%
% J. Reijniers, D. Vanderleist, C. Jin, C. S., and H. Peremans. An
% ideal-observer model of human sound localization. Biological
% Cybernetics, 108:169--181, 2014.
%
%
% Url: http://amtoolbox.sourceforge.net/amt-0.10.0/doc/modelstages/mclachlan2021_featureextraction.php
% Copyright (C) 2009-2020 Piotr Majdak and the AMT team.
% This file is part of Auditory Modeling Toolbox (AMT) version 1.0.0
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>.
% R. Barumerli, P. Majdak, R. Baumgartner, J. Reijniers, M. Geronazzo,
% and F. Avanzini. Predicting directional sound-localization of human
% listeners in both horizontal and vertical dimensions. In Audio
% Engineering Society Convention 148. Audio Engineering Society, 2020.
%
%
% Url: http://amtoolbox.sourceforge.net/amt-0.10.0/doc/modelstages/reijniers2014_featureextraction.php
%
% AUTHOR: Michael Sattler and Roberto Barumerli (adapted from code provided by Jonas Reijniers)
definput.import={'amt_cache'};
% sampling rate
definput.keyvals.fs = SOFAtemplate.Data.SamplingRate;
% gammatone filterbank parameters
definput.keyvals.fb_ch = 30;
definput.keyvals.fb_low = 300;
definput.keyvals.fb_high = 15e3;
% 50ms' pad
definput.keyvals.ir_pad = 0.05*definput.keyvals.fs;
definput.keyvals.targ_az = [];
definput.keyvals.targ_el = [];
% sound source
% broad band sound source at 0dB
definput.keyvals.source_ir = 0;
[~, kv] = ltfatarghelper({'source_ir', 'fs', 'fb_ch','fb_low','fb_high', ...
'ir_pad', 'targ_az','targ_el'}, definput, varargin);
if(kv.fs ~= SOFAtemplate.Data.SamplingRate)
assert(kv.fs ~= SOFAtemplate.Data.SamplingRate)
end
% Get directions from SOFA file and convert them in cartesian
% coordinates
coords = SOFAcalculateAPV(SOFAtemplate);
% assume position on a sphere with radius of 1 meter
coords(:, 3) = 1;
% convert polar (radians) to cartesian
template_coords = zeros(size(SOFAtemplate.SourcePosition));
[template_coords(:,1), template_coords(:,2), template_coords(:,3)] = ...
sph2cart(coords(:,1)*pi/180,coords(:,2)*pi/180,coords(:,3));
%% ITD computation
% do alignment as was performed by Katz 2014
template_itd = itdestimator(SOFAtemplate,'Threshold', 'lp', ...
'upper_cutfreq', 3000, 'butterpoly', 10, 'threshlvl', -10);
%% Pad HRIR vector
time_idx = find(SOFAtemplate.API.Dimensions.Data.IR == 'N');
dir_idx = find(SOFAtemplate.API.Dimensions.Data.IR == 'M');
ear_idx = find(SOFAtemplate.API.Dimensions.Data.IR == 'R');
% permute in order to use ufilterbankz
hrir = permute(double(SOFAtemplate.Data.IR),[time_idx, dir_idx, ear_idx]);
% pad to account for longer filters in the filterbank
pad_mat = zeros(kv.ir_pad - SOFAtemplate.API.('N'), SOFAtemplate.API.('M'), SOFAtemplate.API.('R'));
hrir = cat(1, hrir, pad_mat);
%% Gammatone filterbank
fc = fc_ERB(kv.fb_ch, kv.fb_low, kv.fb_high);
% if the number of channels exceed the fb_high
% the vector will be shorter than kv.fb_ch
kv.fb_ch = length(fc);
[bgt,agt] = gammatone(fc,kv.fs,'complex');
%% H_L and H_R generation
template_hrtf = 2*real(ufilterbankz(bgt,agt,hrir(:,:)));
hrtf_size = size(hrir);
template_hrtf = reshape(template_hrtf,[hrtf_size(1),kv.fb_ch,hrtf_size(2),hrtf_size(3)]);
clear hrir
% Averaging over time (RMS)
template_hrtf = 20*log10(squeeze(rms(template_hrtf, 'dim', 1))+eps); % in dB
% normalize
template_hrtf = template_hrtf - max(template_hrtf(:));
%% S computation
if isequal(kv.source_ir, definput.keyvals.source_ir)
S = zeros(kv.fb_ch, 1);
else
% if S is not the default value compute its spectrum
% pad to account for longer filters in the filterbank
kv.source_ir = padarray(kv.source_ir(:), ...
[abs(kv.ir_pad - length(kv.source_ir)) 0],'post');
S = 2*real(ufilterbankz(bgt,agt,kv.source_ir));
% Averaging over time (RMS)
S = 20*log10(squeeze(rms(S, 'dim', 1))+eps);
end
% create struct
template.fs = kv.fs;
template.fc = fc;
template.itd0 = template_itd;
template.H = template_hrtf;
template.coords = template_coords;
% if target required
if nargout > 1
%% target computation
if(~isempty(kv.targ_az) || ~isempty(kv.targ_el))
assert(numel(kv.targ_az)==numel(kv.targ_el))
target_idx = SOFAfind(SOFAtemplate, kv.targ_az, kv.targ_el);
if(numel(target_idx) ~= numel(kv.targ_az))
amt_disp(['Requested HRTF''s points: ',numel(kv.targ_az)*numel(kv.targ_el),' Found:',numel(target_idx)]);
end
target_hrtf = template_hrtf(:,target_idx,:);
target_itd = template_itd(target_idx);
target_coords = template_coords(target_idx, :);
else
target_hrtf = template_hrtf;
target_itd = template_itd;
target_coords = template_coords;
end
target.fs = kv.fs;
target.fc = fc;
target.itd0 = target_itd;
target.S = S;
target.H = target_hrtf;
target.coords = target_coords;
end
function fc = fc_ERB(n_channels, freq_start, freq_end)
% ERB computation according to Moore and Glasberg 1983
c = 1;
fc = zeros(n_channels, 1);
fc(1) = freq_start;
while (c < n_channels)
c =c + 1;
fc(c) = fc(c-1) + 6.23 * (fc(c-1)/1000)^2 + 93.39 * (fc(c-1)/1000) + 28.52;
end
fc(fc > freq_end) = [];