THE AUDITORY MODELING TOOLBOX

Applies to version: 1.0.0

View the help

Go to function

barumerli2021_featureextraction - extract HRTF using gammatone, frequency bands and ITDs from SOFA object

Program code:

function [varargout] = barumerli2021_featureextraction(sofa_obj, varargin)
%barumerli2021_featureextraction extract HRTF using gammatone, frequency bands and ITDs from SOFA object
%
%   Usage: [template, target] = barumerli2021_featureextraction(SOFAobj)
%
%   Input parameters:
%       hrtf_sbj: Struct in SOFA format with DTFs
%
%   Output parameters:
%     template : internal templates with specific feature points
%     target   : (optional) preprocessed target struct
%
%   BARUMERLI2021_FEATUREEXTRACTION(...) computes temporally integrated
%   spectral magnitude profiles and itd.
%
%   BARUMERLI2021_FEATUREEXTRACTION accepts the following optional parameters:
% 
%   See also: barumerli2021
%
%   Url: http://amtoolbox.sourceforge.net/amt-0.10.0/doc/modelstages/barumerli2021_featureextraction.php

% Copyright (C) 2009-2020 Piotr Majdak and the AMT team.
% This file is part of Auditory Modeling Toolbox (AMT) version 1.0.0
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program.  If not, see <http://www.gnu.org/licenses/>.

%   References: 
%   AUTHOR: Roberto Barumerli
%   Information Engineering Dept., University of Padova, Italy, 2020

    definput.import={'amt_cache', 'barumerli2021_featureextraction'};
    definput.keyvals.fs = sofa_obj.Data.SamplingRate;
    
    [flags, kv]  = ltfatarghelper({}, definput, varargin);
    % some checks... please if you need to change them be careful... 
    assert(~xor(flags.do_source, ~isempty(kv.source_ir)), ...
        'Please add the flag source to enable the source computation!')
    assert(xor(flags.do_all, flags.do_template | flags.do_target))
    if(flags.do_source)
        assert(flags.do_target, ...
        'If you need to add a source, please specify the target flag!')
    end
    
    %% extract coordinates
    % Get directions from SOFA file
    coords = barumerli2021_coordinates(sofa_obj);
    % NOTE: assume position on a sphere with radius of 1 meter
    % if you change it there will be a problem with the cached points
    % for the sphere interpolation! 
    coords.normalize_distance();
    
    % do_all, do_template and do_target allow to reduce the number of times
    % that the features require to be computed
    
    if (flags.do_all || flags.do_template)
        features = local_computefeatures(sofa_obj, coords, kv, flags);
    end
    
    %% TEMPLATE
    if flags.do_template || flags.do_all
        %% SPHERICAL HARMONIC INTERPOLATION
        if flags.do_monoaural
            % split polar
            pl = size(features.polar, 2);
            polar1 = features.polar(:, 1:pl/2);
            polar1 = local_resamplefeatures(polar1, features.coords);
            polar2 = features.polar(:, (pl/2+1):end);
            polar2 = local_resamplefeatures(polar2, features.coords);

            template.polar = [polar1, polar2];
        else
            template.polar = local_resamplefeatures(features.polar, ...
                                                    features.coords);
        end
        [template.lateral, template.coords] = ...
            local_resamplefeatures(features.lateral, features.coords);
        template.lateral_bis = ...
            local_resamplefeatures(features.lateral_bis, features.coords);

        template.fc = features.fc;
    end

    %% TARGET
    if flags.do_target
        target = local_computefeatures(sofa_obj, coords, kv, flags);
    elseif flags.do_all 
        assert(logical(flags.do_source_broadband))
       assert(exist('template', 'var') == 1)
        
        target = template;
        if ~isempty(kv.targ_az)
            coords_search = barumerli2021_coordinates([kv.targ_az, kv.targ_el, ones(size(kv.targ_el))], 'spherical');
            [coords_new, idx] = extract_directions_from_coords(target.coords, coords_search);
	    target.coords = coords_new;
	    target.lateral = target.lateral(idx,:);
	    
            if ~isempty(target.lateral_bis)
                target.lateral_bis = target.lateral_bis(idx,:);
            end
            
            if ~isempty(target.polar)
                target.polar = target.polar(idx,:);
            end
        end
    end

    % output parameters
    if flags.do_template
        varargout{1} = template;
    elseif flags.do_target
        varargout{1} = target;
    else
        varargout{1} = template;
        varargout{2} = target;
    end
end

function [feature] = local_computefeatures(sofa_obj, coords, kv, flags)  
    % normalize HRTF 
    sofa_frontal = local_extractdirections(sofa_obj, barumerli2021_coordinates([0,0,1], 'spherical'));
    sofaFData = sofa_frontal.Data.IR(1, :, :);
    sofa_obj.Data.IR = sofa_obj.Data.IR ./ (max(abs(sofaFData(:)))+eps);

    stimulus = sofa_obj.Data.IR;
    
    if flags.do_target
        coords_search = barumerli2021_coordinates([kv.targ_az, kv.targ_el, ones(size(kv.targ_el))], 'spherical');
        
        [sofa_filtered, coords] = local_extractdirections(sofa_obj, coords_search);
        stimulus = sofa_filtered.Data.IR;
        
        if flags.do_source
            % normalize sound source
            kv.source_ir = kv.source_ir ./ max(abs(kv.source_ir));
            stimulus = local_convolvesource(sofa_filtered.Data.IR, sofa_filtered.Data.SamplingRate, kv.source_ir, kv.source_fs);
        end
    end
        
    %% compute LATERAL
    % parameters to transform into the jnd scale
    % check Reijniers2014 for these magic numbers
    a = 32.5e-6;
    b = 0.095;
    
    if strcmp(flags.feature_lateral, 'itd_broadband')
        itd = itdestimator(stimulus, 'MaxIACCe', 'fs', kv.fs, flags.disp);    
        % transform into the jnd scale - check Reijniers2014
        itd = sign(itd) .* ((log(a + b * abs(itd)) - log(a)) / b); 
    else
        error(['The lateral feature ', ...
            flags.lateral_feature, ' was not recognized'])
    end
    
    % ILD
    ild_broad = (mag2db(squeeze(rms(stimulus(:,1,:), 'dim', 3))) - ...
                    mag2db(squeeze(rms(stimulus(:,2,:),'dim', 3))));
    feature.lateral = itd;
    feature.lateral_bis = ild_broad;
    
    %% compute POLAR
    % compute spectral analysis
    [dtf, fc] = local_spectralanalysis(stimulus, kv);
    for ch=1:size(dtf, 2)
        for side=1:2
            dtf(:,ch,side,:) = sqrt(max(squeeze(dtf(:,ch,side,:)),0));
        end
    end
    % Averaging over time (RMS)
    dtf = (rms(dtf, 'dim', 4)); 
    
    % convert in dB
    dtf = 20*log10(dtf);
    
    if strcmp(flags.feature_polar, 'dtf')
        feature.polar = dtf;
    elseif strcmp(flags.feature_polar, 'pge')
        % compute Positive Gradient Extraction (PGE)
        pge = zeros(size(dtf, 1), size(dtf, 2)-1, size(dtf, 3));
        for i=1:size(dtf,1)
            [pge(i, :, :), gfc] = ...
                baumgartner2014_gradientextraction(squeeze(dtf(i,:,:)), fc);
        end
        fc = gfc;
        feature.polar = pge;
    else
        error(['The polar feature ', ...
                flags.polar_feature, ' was not recognized'])
    end
    
    feature.fc = fc;
    
    %% combine SPECTRAL
    if flags.do_reijniers
        feature.lateral_bis = [squeeze(dtf(:,:,1)) + squeeze(dtf(:,:,2))];
        feature.polar = [squeeze(dtf(:,:,1)) - squeeze(dtf(:,:,2))];
    elseif flags.do_interaural_difference
        feature.polar = [];
    elseif flags.do_monoaural
        polar_idx = find(fc>kv.monoaural_bw(1) & fc<kv.monoaural_bw(2));
        assert(~isempty(polar_idx), 'monoaural frequency bands empty')
        feature.fc = fc(polar_idx);
        feature.polar = reshape(feature.polar(:,polar_idx,:), ...
                                size(feature.polar(:,polar_idx,:), 1),[]);
    else
        error(['The polar combination ', ...
                flags.feature_polar_combine, ' was not recognized'])
    end
    
    feature.coords = coords;
end

function [sofa_obj, coords_new, idx] = local_extractdirections(sofa_obj, coords_search)
    coords = barumerli2021_coordinates(sofa_obj);
    coords.normalize_distance();

    [coords_new, idx] = extract_directions_from_coords(coords, coords_search);

    sofa_obj.API.('M') = length(idx);
    sofa_obj.Data.IR = sofa_obj.Data.IR(idx,:,:);
    sofa_obj.SourcePosition = sofa_obj.SourcePosition(idx,:);
end
    

function [coords_new, idx] = extract_directions_from_coords(coords, coords_search)
    if(coords_search.count_pos() ~= 0)
        % TODO: warning... some points are avoided because of numerical
        [idx, coords_new] = coords.find_positions(coords_search);
        
        if(numel(idx) ~= coords_search.count_pos())
            amt_disp(sprintf('Requested HRTF''s points: %i\nFound: %i', ...
                coords_search.count(), numel(idx)))
        end

    else
        coords_new = coords;
        idx = 1:coords_search.count_pos();
    end
end

function stimulus = local_convolvesource(hrir, hrir_fs, source, source_fs)
    if source_fs <= 0
        error('source_fs is zero')
    end
    if source_fs ~= hrir_fs
        fsgcd = gcd(hrir_fs, source_fs);
        source = resample(source, hrir_fs/fsgcd, source_fs/fsgcd);
    end
    
    stimulus = zeros(size(hrir,1), ...
                     size(hrir,2), ...
                     size(hrir,3) + length(source) - 1);
    for i = 1:size(hrir, 1)
        stimulus(i,:,:) = lconv(squeeze(hrir(i,:,:))',source)';
    end
end

function [dtf, fc] = local_spectralanalysis(stimulus, kv)
    % this function expect the stimulus organized as 
    % [directions, ear_channel, time_index]
    assert(size(stimulus, 2) == 2);
    [dir_len, ear_len, time_len] = size(stimulus);
    dir_idx = 1;
    ear_idx = 2;
    time_idx = 3;
    
    % permute in order to use ufilterbankz
    stimulus = permute(double(stimulus),[time_idx, dir_idx, ear_idx]);
    % pad to account for longer filters in the filterbank
    pad_len = 0.05; % secs
    pad_mat = zeros(pad_len*kv.fs - time_len, dir_len, ear_len);
    stimulus = cat(1, stimulus, pad_mat);
    
    % compute templates features
    if kv.space == 1 % Standard spacing of 1 ERB
      [dtf,fc] = auditoryfilterbank(stimulus(:,:), kv.fs, 'flow', ...
                                            kv.flow, 'fhigh', kv.fhigh);
    else
      fc = audspacebw(kv.flow, kv.fhigh, kv.space, 'erb');
      [bgt,agt] = gammatone(fc, kv.fs, 'complex');
      % channel (3rd) dimension resolved
      dtf = 2*real(ufilterbankz(bgt,agt, stimulus(:,:)));  
    end
    
    % restore 2 channels
    dtf_size = size(stimulus);
    dtf = reshape(dtf,[dtf_size(1),length(fc),dtf_size(2),dtf_size(3)]);
    
    dtf = permute(dtf, [3 2 4 1]);
end

function [feature_interp, coords_interp] = local_resamplefeatures(feature, coords)
    if isempty(feature)
        feature_interp = zeros(0);
        coords_interp = zeros(0);
        return
    end
    
    % sample uniformly over sphere with N is number of directions
    % NOTE: amt_cache('get', 'dirs') contains the sampled point on a unitary
    % sphere
    %dirs = amt_cache('get', 'dirs');
    dirs = amt_load('barumerli2021','dirs.mat');

    % remove the points from the unitary sphere below HRTF lowest elevation
    coords_init = coords.return_positions('cartesian');
    dirs = dirs.cache.value;
    dirs = dirs(dirs(:,3) > min(coords_init(:, 3)),:);
    coords_interp = barumerli2021_coordinates(dirs, 'cartesian');

    %% interpolate at uniformly distributed directions and update feature
    % calculate spherical harmonic coefficients of H and itd, using tikonov regularization
    sh_order = 15; % spherical harmonic order 
    Y_N_tik = local_SH(sh_order, coords); 
    
    % calculate SH coefficients of H and ITD, using tikonov regularization
    lambda = 4.;
    SIG = eye((sh_order+1)^2);
    SIG(1:(2+1)^2,1:(2+1)^2) = 0;
    
    % interpolate at uniformly distributed directions and update
    Y_N_interp = local_SH(sh_order, coords_interp); 
    
    for c = 1:size(feature, 3)
        c_feat = (Y_N_tik'*Y_N_tik+lambda*SIG)\Y_N_tik'*squeeze(feature(:,:,c));
        feature_interp(:,:,c) = Y_N_interp*c_feat;
    end
end

function Y_N = local_SH(N, coords)
% calculate spherical harmonics up to order N for directions dirs [azi ele;...] (in radiant)
% 
    dirs = coords.return_positions('spherical');
    dirs = [deg2rad(dirs(:,1)), deg2rad(dirs(:,2))];
    
    N_dirs = size(dirs, 1);
    N_SH = (N+1)^2;
	dirs(:,2) = pi/2 - dirs(:,2); % convert to inclinations

    assert(N_SH < N_dirs, ...
        ['Spherical harmonics: beware that the number of provided ',...
        'coordinates is too low to obtain a precise interpolation'])
    
    Y_N = zeros(N_SH, N_dirs);

	  % n = 0
	Lnm = legendre(0, cos(dirs(:,2)'));
	Nnm = sqrt(1./(4*pi)) * ones(1,N_dirs);
	CosSin = zeros(1,N_dirs);
	CosSin(1,:) = ones(1,size(dirs,1));
	Y_N(1, :) = Nnm .* Lnm .* CosSin;
	
	  % n > 0
	idx = 1;
    for n=1:N
        
        m = (0:n)';            

		Lnm = legendre(n, cos(dirs(:,2)'));
		condon = (-1).^[m(end:-1:2);m] * ones(1,N_dirs);
		Lnm = condon .* [Lnm(end:-1:2, :); Lnm];
		
		mag = sqrt( (2*n+1)*factorial(n-m) ./ (4*pi*factorial(n+m)) );
		Nnm = mag * ones(1,N_dirs);
		Nnm = [Nnm(end:-1:2, :); Nnm];
		
		CosSin = zeros(2*n+1,N_dirs);
			% m=0
		CosSin(n+1,:) = ones(1,size(dirs,1));
			% m>0
		CosSin(m(2:end)+n+1,:) = sqrt(2)*cos(m(2:end)*dirs(:,1)');
			% m<0
		CosSin(-m(end:-1:2)+n+1,:) = sqrt(2)*sin(m(end:-1:2)*dirs(:,1)');

		Ynm = Nnm .* Lnm .* CosSin;
        Y_N(idx+1:idx+(2*n+1), :) = Ynm;
        idx = idx + 2*n+1;
    end
    
    Y_N = Y_N.';
end