This documentation page applies to an outdated major AMT version. We show it for archival purposes only.
Click here for the documentation menu and here to download the latest AMT (1.6.0).
function out = may2011(input,fs)
%may2011 GMM-based estimation of azimuth direction for concurrent speakers
% Usage: out = may2011(input,fs);
%
% Input parameters:
% input : binaural input signal [nSamples x 2 channels]
%
% fs : sampling frequency in Hertz
%
% Output parameters:
% out : localization structure containing the following fields:
%
% .param = processing parameters
%
% .azFrames = frame-based azimuth estimate [Frames x 1]
%
% .azimuth = time-frequency azimuth estimate [nFilter x nFrames]
%
% .rangeAZ = azimuth grid [nAzDir x 1]
%
% .prob = 3D probability map [nFilter x nFrames x nAzDir]
%
% .loglik = 3D log-likelihood map [nFilter x nFrames x nAzDir]
%
% .itd = interaural time difference [nFilter x nFrames]
%
% .ild = interaural level difference [nFilter x nFrames]
%
% .ic = interaural coherence [nFilter x nFrames]
%
% References:
% T. May, S. van de Par, and A. Kohlrausch. A probabilistic model for
% robust localization based on a binaural auditory front-end. IEEE Trans
% Audio Speech Lang Proc, 19:1-13, 2011.
%
%
% Url: http://amtoolbox.sourceforge.net/amt-0.9.7/doc/binaural/may2011.php
% Copyright (C) 2009-2014 Peter L. Søndergaard and Piotr Majdak.
% This file is part of AMToolbox version 0.9.7
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>.
% Developed with Matlab 7.8.0.347 (R2009a). Please send bug reports to:
%
% Author : Tobias May 2009-2014
% University of Oldenburg and TU/e Eindhoven
% tobias.may@uni-oldenburg.de t.may@tue.nl
%
% History :
% v.0.1 2009/04/05
% v.0.2 2014/02/27 adopted model to run in the AMToolbox
% v.0.3 2015/01/20 integrated in the AMT
% ***********************************************************************
% Initialize persistent memory
persistent AMT_may2011PERC AMT_may2011PERmethod
%% *********************** CHECK INPUT ARGUMENTS ************************
%
%
% Check for proper input arguments
if nargin ~= 2
help(mfilename);
error('Wrong number of input arguments!');
end
%% ******************* INITIALIZE AZIMUTH CLASSIFIER ********************
%
%
% Block parameter
blockSec = 20e-3; % Block size in seconds
hopSec = 10e-3; % Step size in seconds
winType = 'rectwin';
methodGMM = 'default';
% Select GMM preset
switch lower(methodGMM)
case 'default'
% Room 5 -90:5:90
A.gmmModel = 'modeldata.mat';
otherwise
error(['GMM module ''',lower(methodGMM),'''is not available.'])
end
% Check if localization module can be re-loaded from persistent memory
if ~isempty(AMT_may2011PERC) && ~isempty(AMT_may2011PERmethod) && isequal(methodGMM,AMT_may2011PERmethod)
% Re-load GMMs
C = AMT_may2011PERC;
else
% Load localization module
x=amtload('may2011',A.gmmModel);
C=x.C;
% Store classifier to persistent memory
AMT_may2011PERC = C; AMT_may2011PERmethod = methodGMM;
end
% Store classifier
A.GMM = C;
% Extract gammatone filterbank settings from the classifier
GFB = A.GMM.featSpace(1).GFB;
% Adapt fs in GFB structure according to input signal
if ~isequal(GFB.fs,fs)
GFB.fs = fs;
end
% Haircell processing
if isfield(C.featSpace(1),'neuralTransduction')
A.haircellModel = C.featSpace(1).neuralTransduction;
else
A.haircellModel = 'roman';
end
% Cross-correlation method
if isfield(C.featSpace(1),'xcorrMethod') && ...
~isempty(C.featSpace(1).xcorrMethod)
A.xcorrMethod = C.featSpace(1).xcorrMethod;
else
A.xcorrMethod = 'power';
end
% ITD parameter
A.bXcorrNorm = true;
A.bXcorrDetrend = true;
A.maxDelay = round(1e-3 * fs);
A.lags = (-A.maxDelay:1:A.maxDelay).';
A.domain = 'time';
% Resolution of GMM models
stepSizeGMM = 1;
% For short access
gmmFinal = C.classifier.gmmFinal;
% Build azimuth grid
azIdx = 1:stepSizeGMM:length(C.azimuth);
azGrid = C.azimuth(1:stepSizeGMM:end);
azStep = diff(azGrid(1:2));
% Number of different azimuth directions
nAz = length(azGrid);
% Determine length of input signal
nSamples = size(input,1);
% Block processing parameters
blockSamples = 2 * round(fs * blockSec / 2);
hopSamples = 2 * round(fs * hopSec / 2);
overSamples = blockSamples - hopSamples;
% Calculate number of frames
nFrames = fix((nSamples-overSamples)/hopSamples);
% Allocate memory
prob = zeros(GFB.nFilter,nFrames,nAz);
loglik = zeros(GFB.nFilter,nFrames,nAz);
feat = zeros(nFrames,2);
itd = zeros(GFB.nFilter,nFrames);
ild = zeros(GFB.nFilter,nFrames);
ic = zeros(GFB.nFilter,nFrames);
%% ************************ GAMMATONE FILTERING *************************
%
%
% Perform gammatone filtering
bm = may2011gammatone(input,GFB);
%% ******************** BINAURAL FEATURE EXTRACTION *********************
%
%
% Loop over number of channels
for ii = 1 : GFB.nFilter
% Neural transduction
left = may2011neuraltransduction(bm(:,ii,1), fs, A.haircellModel);
right = may2011neuraltransduction(bm(:,ii,2), fs, A.haircellModel);
% Framing
frameL = may2011frameData(left, blockSamples, hopSamples, winType);
frameR = may2011frameData(right,blockSamples, hopSamples, winType);
% =====================================================================
% ITD ESTIMATE
% =====================================================================
%
% Cross-correlation analysis
switch lower(A.xcorrMethod)
case 'power'
% Time-based
xcorr = may2011xcorrNorm(frameL,frameR,A.maxDelay,A.bXcorrDetrend,...
A.bXcorrNorm);
end
% Maximum search per frame
bM = transpose(argmax(xcorr,1));
% Refine lag position by applying parabolic interpolation
[delta,currIC] = may2011interpolateParabolic(xcorr,bM);
% Calculate ITD with fractional part
feat(:,1) = A.lags(bM)/fs + delta * (1/fs);
% =====================================================================
% ILD ESTIMATE
% =====================================================================
%
% Calculate energy per frame
bmEnergyL = sum(abs(frameL).^2)/blockSamples;
bmEnergyR = sum(abs(frameR).^2)/blockSamples;
% Compute ILD
feat(:,2) = 10 * (log10(bmEnergyR + eps) - log10(bmEnergyL + eps));
% Compensate for square-root compression
if isequal(A.haircellModel,'roman')
% Reverse effect of sqrt() in decibel domain
feat(:,2) = 2 * feat(:,2);
end
% =====================================================================
% CREATE 3-DIMENSIONAL AZIMUTH-DEPENDENT LIKELIHOOD FUNCTION
% =====================================================================
%
% Classify azimuth
prob(ii,:,:) = may2011classifyGMM(feat,gmmFinal{ii}(azIdx));
% Normalize
prob(ii,:,:) = prob(ii,:,:) ./ repmat(sum(prob(ii,:,:),3),[1 1 nAz]);
% Store log-likelihood
loglik(ii,:,:) = log(prob(ii,:,:));
% Store ITD
itd(ii,:) = feat(:,1);
% Store ILD
ild(ii,:) = feat(:,2);
% Store IC
ic(ii,:) = currIC;
end
%% *********************** LOCALIZATION ESTIMATE ************************
%
%
% =========================================================================
% TIME/FREQUENCY-BASED AZIMUTH ESTIMATE
% =========================================================================
%
%
% Maximum search
[tmp,maxIdx] = max(loglik,[],3); %#ok
% Warp to azimuth indices
azimuth = azGrid(maxIdx);
% =========================================================================
% FRAME-BASED AZIMUTH ESTIMATE
% =========================================================================
%
%
% Integrate log-likelihoods across channels
intChanGMM = squeeze(nanmean(loglik,1));
% Maximum search
[tmp,maxIdx] = max(intChanGMM,[],2); %#ok
% Warp to azimuth indices
azFrames = azGrid(maxIdx);
% Apply interpolation to increase azimuth resolution
delta = may2011interpolateParabolic(exp(intChanGMM).',maxIdx);
% Find overall azimuth estimate
azFrames(:) = azFrames(:) + (azStep*delta(:));
%% ************************ CREATE RESULT STRUCT ************************
%
%
% Create output structure
out.param = A;
out.azFrames = azFrames;
out.azimuth = azimuth;
out.rangeAZ = azGrid;
out.prob = prob;
out.loglik = loglik;
out.itd = itd;
out.ild = ild;
out.ic = ic;
function maxidx = argmax(input, dim)
if nargin < 2 || isempty(dim)
[temp, maxidx] = max(input);
else
[temp, maxidx] = max(input,[],dim);
end
function m=nanmean(x,dim)
m=mean(x(~isnan(x)),dim);