function out = may2011(input,fs)
%may2011 Azimuthal localization of concurrent talkers
% Usage: out = may2011(input,fs);
%
% Input parameters:
% input : Binaural audio signal. Size: (*time ear*).
%
% fs : Sampling frequency (in Hz).
%
% Output parameters:
% out : Structure containing the results:
%
% - param*: Processing parameters.
%
% - azFrames*: Vector with the frame-based azimuth estimates. Size: nFrames.
%
% - azimuth*: Matrix with the time-frequency azimuth estimates. Size: (*nFilter x nFrames*).
%
% - rangeAZ*: Vector with the azimuth grid. Size: nAzDir.
%
% - prob*: Matrix with the 3D probability map. Size: (*nFilter x nFrames x nAzDir*).
%
% - loglik*: Matrix with the 3D log-likelihood map. Size: (*nFilter x nFrames x nAzDir*).
%
% - itd*: Matrix with the interaural time differences. Size: (*nFilter x nFrames*).
%
% - ild*: Matrix with the interaural level differences. Size (*nFilter x nFrames*).
%
% - ic*: Matrix with the interaural coherences. Size (*nFilter x nFrames*).
%
%
% MAY2011 is a probabilistic model of azimuthal direction estimation with concurrent talkers.
% Note that in the current implementation, all model parameters are hard-coded
% in the auxdata modeldata.mat.
%
% nFrames is the approximate number of 10-ms segments in input.
%
% nFilter is 32 unless otherwise specified in the code.
%
% nAzDir is 37 unless otherwise specified in the code.
%
% References:
% T. May, S. van de Par, and A. Kohlrausch. A probabilistic model for
% robust localization based on a binaural auditory front-end. IEEE Trans
% Audio Speech Lang Proc, 19:1--13, 2011.
%
% N. Roman, D. L. Wang, and G. J. Brown. Speech segregation based on
% sound localization. The Journal of the Acoustical Society of America,
% 114(4):2236--2252, 2003.
%
%
% See also: may2011_interpolateparabolic
% may2011_classifygmm
% may2011_xcorrnorm
% may2011_findlocalpeaks
% may2011_cbarlabel
% may2011_gammatone
% may2011_framedata
% may2011_neuraltransduction
% may2011_estazimuthgmm
% may2011_fireprint
% may2011_gammatoneinit
% demo_may2011
%
% Url: http://amtoolbox.org/amt-1.6.0/doc/models/may2011.php
% #StatusDoc: Good
% #StatusCode: Good
% #Verification: Unknown
% #Requirements: MATLAB M-Signal
% #Author: Tobias May (2014)
% #Author: Michael Mihocic (2024): check if Octave is used -> return warning
% This file is licensed unter the GNU General Public License (GPL) either
% version 3 of the license, or any later version as published by the Free Software
% Foundation. Details of the GPLv3 can be found in the AMT directory "licences" and
% at <https://www.gnu.org/licenses/gpl-3.0.html>.
% You can redistribute this file and/or modify it under the terms of the GPLv3.
% This file is distributed without any warranty; without even the implied warranty
% of merchantability or fitness for a particular purpose.
% ***********************************************************************
if isoctave
warning([mfilename ' is not supported in Octave.']);
return;
end
% Initialize persistent memory
persistent AMT_may2011PERC AMT_may2011PERmethod
%% *********************** CHECK INPUT ARGUMENTS ************************
%
%
% Check for proper input arguments
if nargin ~= 2
help(mfilename);
error('Wrong number of input arguments!');
end
%% ******************* INITIALIZE AZIMUTH CLASSIFIER ********************
%
%
% Block parameter
blockSec = 20e-3; % Block size in seconds
hopSec = 10e-3; % Step size in seconds
winType = 'rectwin';
methodGMM = 'default';
% Select GMM preset
switch lower(methodGMM)
case 'default'
% Room 5 -90:5:90
A.gmmModel = 'modeldata.mat';
otherwise
error(['GMM module ''',lower(methodGMM),'''is not available.'])
end
% Check if localization module can be re-loaded from persistent memory
if ~isempty(AMT_may2011PERC) && ~isempty(AMT_may2011PERmethod) && isequal(methodGMM,AMT_may2011PERmethod)
% Re-load GMMs
C = AMT_may2011PERC;
else
% Load localization module
x=amt_load('may2011',A.gmmModel);
C=x.C;
% Store classifier to persistent memory
AMT_may2011PERC = C; AMT_may2011PERmethod = methodGMM;
end
% Store classifier
A.GMM = C;
% Extract gammatone filterbank settings from the classifier
GFB = A.GMM.featSpace(1).GFB;
% Adapt fs in GFB structure according to input signal
if ~isequal(GFB.fs,fs)
GFB.fs = fs;
end
% Haircell processing
if isfield(C.featSpace(1),'neuralTransduction')
A.haircellModel = C.featSpace(1).neuralTransduction;
else
A.haircellModel = 'roman';
end
% Cross-correlation method
if isfield(C.featSpace(1),'xcorrMethod') && ...
~isempty(C.featSpace(1).xcorrMethod)
A.xcorrMethod = C.featSpace(1).xcorrMethod;
else
A.xcorrMethod = 'power';
end
% ITD parameter
A.bXcorrNorm = true;
A.bXcorrDetrend = true;
A.maxDelay = round(1e-3 * fs);
A.lags = (-A.maxDelay:1:A.maxDelay).';
A.domain = 'time';
% Resolution of GMM models
stepSizeGMM = 1;
% For short access
gmmFinal = C.classifier.gmmFinal;
% Build azimuth grid
azIdx = 1:stepSizeGMM:length(C.azimuth);
azGrid = C.azimuth(1:stepSizeGMM:end);
azStep = diff(azGrid(1:2));
% Number of different azimuth directions
nAz = length(azGrid);
% Determine length of input signal
nSamples = size(input,1);
% Block processing parameters
blockSamples = 2 * round(fs * blockSec / 2);
hopSamples = 2 * round(fs * hopSec / 2);
overSamples = blockSamples - hopSamples;
% Calculate number of frames
nFrames = fix((nSamples-overSamples)/hopSamples);
% Allocate memory
prob = zeros(GFB.nFilter,nFrames,nAz);
loglik = zeros(GFB.nFilter,nFrames,nAz);
feat = zeros(nFrames,2);
itd = zeros(GFB.nFilter,nFrames);
ild = zeros(GFB.nFilter,nFrames);
ic = zeros(GFB.nFilter,nFrames);
%% ************************ GAMMATONE FILTERING *************************
%
%
% Perform gammatone filtering
bm = may2011_gammatone(input,GFB);
%% ******************** BINAURAL FEATURE EXTRACTION *********************
%
%
% Loop over number of channels
for ii = 1 : GFB.nFilter
% Neural transduction
left = may2011_neuraltransduction(bm(:,ii,1), fs, A.haircellModel);
right = may2011_neuraltransduction(bm(:,ii,2), fs, A.haircellModel);
% Framing
frameL = may2011_framedata(left, blockSamples, hopSamples, winType);
frameR = may2011_framedata(right,blockSamples, hopSamples, winType);
% =====================================================================
% ITD ESTIMATE
% =====================================================================
%
% Cross-correlation analysis
switch lower(A.xcorrMethod)
case 'power'
% Time-based
xcorr = may2011_xcorrnorm(frameL,frameR,A.maxDelay,A.bXcorrDetrend,...
A.bXcorrNorm);
end
% Maximum search per frame
bM = transpose(argmax(xcorr,1));
% Refine lag position by applying parabolic interpolation
[delta,currIC] = may2011_interpolateparabolic(xcorr,bM);
% Calculate ITD with fractional part
feat(:,1) = A.lags(bM)/fs + delta * (1/fs);
% =====================================================================
% ILD ESTIMATE
% =====================================================================
%
% Calculate energy per frame
bmEnergyL = sum(abs(frameL).^2)/blockSamples;
bmEnergyR = sum(abs(frameR).^2)/blockSamples;
% Compute ILD
feat(:,2) = 10 * (log10(bmEnergyR + eps) - log10(bmEnergyL + eps));
% Compensate for square-root compression
if isequal(A.haircellModel,'roman')
% Reverse effect of sqrt() in decibel domain
feat(:,2) = 2 * feat(:,2);
end
% =====================================================================
% CREATE 3-DIMENSIONAL AZIMUTH-DEPENDENT LIKELIHOOD FUNCTION
% =====================================================================
%
% Classify azimuth
prob(ii,:,:) = may2011_classifygmm(feat,gmmFinal{ii}(azIdx));
% Normalize
prob(ii,:,:) = prob(ii,:,:) ./ repmat(sum(prob(ii,:,:),3),[1 1 nAz]);
% Store log-likelihood
loglik(ii,:,:) = log(prob(ii,:,:));
% Store ITD
itd(ii,:) = feat(:,1);
% Store ILD
ild(ii,:) = feat(:,2);
% Store IC
ic(ii,:) = currIC;
end
%% *********************** LOCALIZATION ESTIMATE ************************
%
%
% =========================================================================
% TIME/FREQUENCY-BASED AZIMUTH ESTIMATE
% =========================================================================
%
%
% Maximum search
[tmp,maxIdx] = max(loglik,[],3); %#ok
% Warp to azimuth indices
azimuth = azGrid(maxIdx);
% =========================================================================
% FRAME-BASED AZIMUTH ESTIMATE
% =========================================================================
%
%
% Integrate log-likelihoods across channels
intChanGMM = squeeze(nanmean(loglik,1));
% Maximum search
[tmp,maxIdx] = max(intChanGMM,[],2); %#ok
% Warp to azimuth indices
azFrames = azGrid(maxIdx);
% Apply interpolation to increase azimuth resolution
delta = may2011_interpolateparabolic(exp(intChanGMM).',maxIdx);
% Find overall azimuth estimate
azFrames(:) = azFrames(:) + (azStep*delta(:));
%% ************************ CREATE RESULT STRUCT ************************
%
%
% Create output structure
out.param = A;
out.azFrames = azFrames;
out.azimuth = azimuth;
out.rangeAZ = azGrid;
out.prob = prob;
out.loglik = loglik;
out.itd = itd;
out.ild = ild;
out.ic = ic;
function maxidx = argmax(input, dim)
if nargin < 2 || isempty(dim)
[temp, maxidx] = max(input);
else
[temp, maxidx] = max(input,[],dim);
end
function m=nanmean(x,dim)
m=mean(x(~isnan(x)),dim);