THE AUDITORY MODELING TOOLBOX

Applies to version: 0.9.5

View the help

Go to function

BAUMGARTNER2014 - Model for localization in saggital planes

Program code:

function varargout = baumgartner2014( target,template,varargin )
%BAUMGARTNER2014 Model for localization in saggital planes
%   Usage:    [p,respang] = baumgartner2014( target,template )
%             [p,respang,tang] = baumgartner2014( target,template )
%             [p,respang,tang] = baumgartner2014( target,template,fs,S,lat,stim,fsstim )
%
%   Input parameters:
%     target  : binaural impulse response(s) referring to the directional 
%               transfer function(s) (DFTs) of the target sound(s).
%     template: binaural impulse responses of all available
%               listener-specific DTFs of the sagittal plane referring to
%               the perceived lateral angle of the target sound
%
%   Output parameters:
%     p       : predicted probability mass vectors for response angles 
%               with respect to target positions
%               1st dim: response angle
%               2nd dim: target angle
%     respang : polar response angles (after regularization of angular 
%               sampling)
%     tang    : polar target angles (usefull if sagittal-plane HRTFs are
%               extracted directly from SOFA object)
%
%   BAUMGARTNER2014(...) is a model for sound-source localization
%   in sagittal planes (SPs). It bases on the comparison of internal sound 
%   representation with a template and results in a probabilistic
%   prediction of polar angle response.
%
%   BAUMGARTNER2014 accepts the following optional parameters:
%
%     'fs',fs        Define the sampling rate of the impulse responses. 
%                    Default value is 48000 Hz.
%
%     'S',S          Set the listener-specific sensitivity threshold 
%                    (threshold of the sigmoid link function representing 
%                    the psychometric link between transformation from the
%                    distance metric and similarity index) to S. 
%                    Default value is 1.
%
%     'lat',lat      Set the apparent lateral angle of the target sound to
%                    lat. Default value is 0° (median SP).
%
%     'stim',stim    Define the stimulus (source signal without directional
%                    features). As default an impulse is used.
%
%     'fsstim',fss   Define the sampling rate of the stimulus. 
%                    Default value is 48000 Hz.
%
%     'flow',flow    Set the lowest frequency in the filterbank to
%                    flow. Default value is 700 Hz.
%
%     'fhigh',fhigh  Set the highest frequency in the filterbank to
%                    fhigh. Default value is 18000 Hz.
%
%     'space',sp     Set spacing of auditory filter bands (i.e., distance 
%                    between neighbouring bands) to sp in number of
%                    equivalent rectangular bandwidths (ERBs). 
%                    Default value is 1 ERB.
%
%     'conGain',cg   Set the contralateral gain cg of the sigmoid function 
%                    applied for binaural weighting of monaural similarity 
%                    indices. Default value is 13 degrees.
%
%     'polsamp',ps   Define the the polar angular sampling of the current
%                    SP. As default the sampling of ARI's HRTF format at
%                    the median SP is used, i.e.,
%                    ps = [-30:5:70,80,100,110:5:210] degrees.
%
%     'mrsmsp',mrs   Set the motoric response scatter mrs within the median 
%                    sagittal plane. Default value is 17° in accordance
%                    with scatter of unimodal response distribution
%                    proposed in Langendijk and Bronkhorst (2002).
%
%   BAUMGARTNER2014 accepts the following flags:
%
%     'gammatone'    Use the Gammatone filterbank for peripheral processing. 
%                    This is the default.
%
%     'cqdft'        Use a filterbank approximation based on DFT with 
%                    constant relative bandwidth for peripheral processing. 
%                    This was used by Langendijk and Bronkhorst (2002).
%
%     'ihc'          Incorporate the transduction model of inner hair 
%                    cells used by Dau et al. (1996). This is the default.
%
%     'noihc'        Do not incorporate the IHC stage.
%
%     'regular'      Apply spline interpolation in order to regularize the 
%                    angular sampling of the polar response angle. 
%                    This is the default.
%
%     'noregular'    Disable regularization of angular sampling.
%
%   Requirements: 
%   1) SOFA API from http://sourceforge.net/projects/sofacoustics for Matlab (in e.g. thirdparty/SOFA)
% 
%   2) Data in hrtf/baumgartner2014
%
%
%   See also: plotbaumgartner2013, data_baumgartner2014
%
%   References:
%     R. Baumgartner. Modeling sagittal-plane sound localization with the
%     application to subband-encoded head related transfer functions.
%     Master's thesis, University of Music and Performing Arts, Graz, June
%     2012.
%     
%     R. Baumgartner, P. Majdak, and B. Laback. Assessment of Sagittal-Plane
%     Sound Localization Performance in Spatial-Audio Applications,
%     chapter 4, page expected print date. Springer-Verlag GmbH, accepted for
%     publication, 2013.
%     
%     E. Langendijk and A. Bronkhorst. Contribution of spectral cues to human
%     sound localization. J. Acoust. Soc. Am., 112:1583-1596, 2002.
%     
%     R. Patterson, I. Nimmo-Smith, J. Holdsworth, and P. Rice. An efficient
%     auditory filterbank based on the gammatone function. APU report, 2341,
%     1988.
%     
%
%   Url: http://amtoolbox.sourceforge.net/amt-0.9.5/doc/monaural/baumgartner2014.php

% Copyright (C) 2009-2014 Peter L. Søndergaard and Piotr Majdak.
% This file is part of AMToolbox version 0.9.5
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program.  If not, see <http://www.gnu.org/licenses/>.

    
% AUTHOR: Robert Baumgartner, Acoustics Research Institute, Vienna, Austria

%% Check input options 

definput.flags.fbank = {'gammatone','cqdft','drnl','zilany2007humanized','zilany5'};   % disp('zilany2007humanized used!')
definput.flags.headphonefilter = {'','headphone'};
definput.flags.middleearfilter = {'','middleear'};
definput.flags.ihc = {'noihc','ihc'};    
definput.flags.Ifw = {'nointensityweighting','intensityweighting'};
definput.flags.regularization = {'regular','noregular'};
definput.flags.motoricresponsescatter = {'mrs','nomrs'};
definput.flags.sensitivitymapping = {'sigmoidmapping','normstdmapping'};
definput.flags.settings = {'notprint','print'};

% CP-Falgs:
definput.flags.cp={'fwstd','std','xcorr'};

definput.keyvals.fs=48000;      % Hz
definput.keyvals.S=0.5;         % listener-specific sensitivity parameter
definput.keyvals.lat=0;         % deg
definput.keyvals.stim=[];
definput.keyvals.fsstim=[];
definput.keyvals.space=1;       % No. of ERBs (Cams) 
definput.keyvals.do=1;
definput.keyvals.flow=700;      % Hz
definput.keyvals.fhigh=18000;   % Hz
definput.keyvals.lvltar = 50; 	% dBSPL
definput.keyvals.lvltem = 60;  	% dBSPL
definput.keyvals.SL = [];       % db/ERB; spectral density of target sound re absolut detection threshold
definput.keyvals.conGain=13;    % steepness in degrees of binaural weighting function
definput.keyvals.polsamp=[-30:5:70 80 100 110:5:210];  % polar sampling (for regular)
definput.keyvals.mrsmsp=19;     % degrees
definput.keyvals.gamma=6;       % slope of psychometric function

[flags,kv]=ltfatarghelper(...
  {'fs','S','lat','stim','fsstim','space','do','flow','fhigh',...
  'lvltar','lvltem','SL','conGain','polsamp','mrsmsp','gamma'},definput,varargin);

% Print Settings
if flags.do_print 
  if flags.do_mrs
    fprintf('Settings: DCN = %1.0f; Gamma = %1.0u; Epsilon = %1.0f deg \n',kv.do,kv.gamma,kv.mrsmsp)
  else
    fprintf('Settings: DCN = %1.0f; Gamma = %1.0u; Epsilon = 0 deg \n',kv.do,kv.gamma)
  end
end

% HRTF format
if isstruct(target) % Targets given in SOFA format
  kv.fs = target.Data.SamplingRate;
  [target,tang] = extractsp( kv.lat,target );
end

if isstruct(template) % Template given in SOFA format
  [template,kv.polsamp] = extractsp( kv.lat,template );
end


%% Error handling
if size(template,2) ~= length(kv.polsamp)
  fprintf('\n Error: Second dimension of template and length of polsamp need to be of the same size! \n')
  return
end

if kv.S <= 0
  fprintf('\n Error: Listener-specific uncertainty has to be larger than zero! \n')
  return
end


%% Stimulus 
if isempty(kv.stim) 
    kv.stim = [1;0];%[1;zeros(size(target,1),1)];    % impulse
    kv.fsstim = kv.fs;
elseif isempty(kv.fsstim) 
    kv.fsstim = kv.fs;
end

if flags.do_headphone% || flags.do_drnl
    hpfilt = headphonefilter(kv.fs);
    kv.stim = convolve(kv.stim,hpfilt(:));
end

if flags.do_middleear% || flags.do_drnl
    miearfilt = middleearfilter(kv.fs);
    kv.stim = convolve(kv.stim,miearfilt(:));
end


%% DTF filtering
if ~isequal(kv.fs,kv.fsstim)
    disp('Sorry, sampling rate of stimulus and HRIRs must be equal!')
    return
end

tmp = convolve(target,kv.stim);
target = reshape(tmp,[size(tmp,1),size(target,2),size(target,3)]);


%% Set level
idnztar = target~=0;    % to ignore pausings
idnztem = template~=0;  % to ignore pausings
% aht = setdbspl([1;0],kv.lvltar-kv.SL);  % absolut hearing threshold
for ch = 1:size(template,3)
    target(idnztar(:,:,ch)) = setdbspl(target(idnztar(:,:,ch)),kv.lvltar);
% aht(idnztar(:,:,ch)) = setdbspl(target(idnztar(:,:,ch)),kv.lvltar-kv.SL);
    template(idnztem(:,:,ch)) = setdbspl(template(idnztem(:,:,ch)),kv.lvltem);
end
    
    
%% Cochlear filter bank -> internal representations

if kv.space == 1
  [ireptar,fc] = auditoryfilterbank(target(:,:),kv.fs,...
      'flow',kv.flow,'fhigh',kv.fhigh);
  ireptem = auditoryfilterbank(template(:,:),kv.fs,...
      'flow',kv.flow,'fhigh',kv.fhigh);
else
  fc = audspacebw(kv.flow,kv.fhigh,kv.space,'erb');
  [bgt,agt] = gammatone(fc,kv.fs,'complex');
  ireptar = 2*real(ufilterbankz(bgt,agt,target(:,:)));  % channel (3rd) dimension resolved!
  ireptem = 2*real(ufilterbankz(bgt,agt,template(:,:)));
end
Nfc = length(fc);   % # of bands

% Set back the channel dimension
ireptar = reshape(ireptar,[size(target,1),Nfc,size(target,2),size(target,3)]);
ireptem = reshape(ireptem,[size(template,1),Nfc,size(template,2),size(template,3)]);

% Averaging over time (RMS)
ireptar = 20*log10(squeeze(rms(ireptar)));      % in dB!
ireptem = 20*log10(squeeze(rms(ireptem)));

if size(ireptar,2) ~= size(target,2) % retreive polar dimension if squeezed out
    ireptar = reshape(ireptar,[size(ireptar,1),size(target,2),size(target,3)]);
end


%% Comparison process -> monaural similarity indices (SIs)

si=zeros(size(ireptem,2),size(ireptar,2),size(ireptem,3)); % initialisation
for ch = 1:size(ireptar,3)

  if kv.do == 1 % DCN model
      nrep.tem = dcn(ireptem(:,:,ch),kv);
      nrep.tar = dcn(ireptar(:,:,ch),kv);
  elseif kv.do == 2 
      nrep.tem = diff(ireptem(:,:,ch),kv.do);
      nrep.tar = diff(ireptar(:,:,ch),kv.do);
  else
      nrep.tem = ireptem(:,:,ch);
      nrep.tar = ireptar(:,:,ch);
  end

  for it = 1:size(ireptar,2)

    % Distance Metric
    isd = repmat(nrep.tar(:,it),[1,size(nrep.tem,2),1]) - nrep.tem; 
    if kv.do == 0
      sigma = sqrt(squeeze(var(isd)));
    else
%         pnorm = 1;
%         sigma = sum( abs(isd).^pnorm .* repmat(fw,1,length(kv.polsamp)) ).^(1/pnorm);
      sigma = mean(abs(isd));
    end

    % Similarity Percept
    if not(exist('flags','var')) || flags.do_normstdmapping
      si(:,it,ch) = normpdf(sigma,0,kv.S);
    else % flags.do_sigmoidmapping
      si(:,it,ch) = 1+eps - (1+exp(-kv.gamma*(sigma-kv.S))).^-1;
    end
  end
end


%% Binaural weighting -> binaural SIs
if size(si,3) == 2
    binw = 1./(1+exp(-kv.lat/kv.conGain)); % weight of left ear signal with 0 <= binw <= 1
    si = binw * si(:,:,1) + (1-binw) * si(:,:,2);
end


%% Interpolation (regularize polar angular sampling)
if flags.do_regular
    respang0 = ceil(min(kv.polsamp)*0.2)*5;    % ceil to 5°
    respangs = respang0:5:max(kv.polsamp);
    siint = zeros(length(respangs),size(si,2));
    for tt = 1:size(si,2)
        siint(:,tt) = interp1(kv.polsamp,si(:,tt),respangs,'spline');
    end
    si = siint;
    si(si<0) = 0; % SIs must be positive (necessary due to spline interp)
else
    respangs = kv.polsamp;
end


%% Motoric response scatter
if flags.do_mrs && flags.do_regular && kv.mrsmsp > 0
  
    angbelow = -90:5:min(respangs)-5;
    angabove = max(respangs)+5:5:265;
    respangs = [angbelow,respangs,angabove];
    si = [zeros(length(angbelow),size(si,2)) ; si ; zeros(length(angabove),size(si,2))];
    
    mrs = kv.mrsmsp/cos(deg2rad(kv.lat)); % direction dependent scatter (derivation: const. length rel. to the circumferences of circles considered as cross sections of a unit sphere)
    
    x = 0:2*pi/72:2*pi-2*pi/72;
    kappa = 1/deg2rad(mrs)^2; % concentration parameter (~1/sigma^2 of normpdf)
    mrspdf = exp(kappa*cos(x)) / (2*pi*besseli(0,kappa)); % von Mises PDF 
    for tt = 1:size(si,2)
      %si(:,tt) = circonv(si(:,tt),mrspdf,360/5);
      si(:,tt) = pconv(si(:,tt),mrspdf(:));
    end
    
end


%% Normalization to PMV
p = si ./ repmat(sum(si),size(si,1),1);


%% Output
varargout{1} = p;
if nargout >= 2
    varargout{2} = respangs;
    if nargout >= 3
        varargout{3} = tang;
    end
end
  
end

function t4 = dcn(an,kv)
%DCN Phenomenological model of dorsal cochlear nucleus (DCN)
%   Usage:      out = dcn(in)
%
%   Input parameters:
%     an      : spectral profile in dB
%
%   Output parameters:
%     t4      : activity of type IV unit

%% Parameter Settings
c2 = 1; % inhibitory coupling between type II and type IV neurons
c4 = 1; % coupling between an and type IV neuron
dilatation = 1; % of tonotopical 1-ERB-spacing between type IV and II neurons

%% Calculations
Nb = size(an,1); % # auditory bands
dt4t2 = round(dilatation/kv.space); % tonotopical distance between type IV and II neurons
t4 = zeros(Nb-dt4t2,size(an,2),size(an,3)); % type IV output
for b = 1:Nb-dt4t2
  t4(b,:,:) = c4 * an(b+dt4t2,:,:) - c2 * an(b,:,:);
end

t4 = max(t4,0); %disp('only rising edges')
end