This documentation page applies to an outdated major AMT version. We show it for archival purposes only.
Click here for the documentation menu and here to download the latest AMT (1.6.0).
EXP_DIETZ2011 - Experiments from Dietz et al. 2011

Program code:

function example_output = exp_dietz2011(varargin)
%EXP_DIETZ2011 Experiments from Dietz et al. 2011
%
%   EXP_DIETZ2011(fig) reproduce Fig. no. fig from the Dietz et
%   al. 2011 paper.
%
%   *Note**: The input signals used in this routine are not identical to
%   the ones used in the original paper.
%
%   The following flags can be specified;
%
%     'plot'    Plot the output of the experiment. This is the default.
%
%     'noplot'  Don't plot, only return data.
%
%     'fig3'    Reproduce Fig. 3 panels a + b.
%
%     'fig4'    Reproduce Fig. 4.
%
%     'fig5'    Reproduce Fig. 5.
%
%     'fig6'    Reproduce Fig. 6.
%
%   Examples:
%   ---------
%
%   To display Fig. 3 use :
%
%     exp_dietz2011('fig3');
%
%   To display Fig. 4 use :
%
%     exp_dietz2011('fig4');
%
%   To display Fig. 5 use :
%
%     exp_dietz2011('fig5');
%
%   To display Fig. 6 use :
%
%     exp_dietz2011('fig6');
%
%   References:
%     M. Dietz, S. D. Ewert, and V. Hohmann. Auditory model based direction
%     estimation of concurrent speakers from binaural signals. Speech
%     Communication, 53(5):592-605, 2011. [1]http ]
%     
%     References
%     
%     1. http://www.sciencedirect.com/science/article/pii/S016763931000097X
%     
%
%   Url: http://amtoolbox.sourceforge.net/amt-0.9.5/doc/experiments/exp_dietz2011.php

% Copyright (C) 2009-2014 Peter L. Søndergaard.
% This file is part of AMToolbox version 1.0.0
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program.  If not, see <http://www.gnu.org/licenses/>.

% AUTHOR: Mathias Dietz

definput.flags.type = {'missingflag','fig3','fig4','fig5','fig6'};
definput.flags.plot = {'plot','noplot'};

[flags,keyvals]  = ltfatarghelper({},definput,varargin);


if flags.do_missingflag
    flagnames=[sprintf('%s, ',definput.flags.type{2:end-2}),...
               sprintf('%s or %s',definput.flags.type{end-1},definput.flags.type{end})];
    error('%s: You must specify one of the following flags: %s.',upper(mfilename),flagnames);
end;

% Load polynomial lookup data for converting ITD to azimuth
lookup = load('dietz2011itd2anglelookup.mat');

if flags.do_fig3

    signal=competingtalkers('five_speakers');
    fs = 16000;
    s_pos = [-80 -30 0 30 80];
    ic_threshold=0.98;
    panellabel = 'ab';

    % run IPD model on signal
    [hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs);

    % convert interaural information into azimuth
    itd_unwrapped = ...
        dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
    angl=itd2angle(itd_unwrapped,lookup);

    h_ic=zeros(91,12);
    h_all=histc(angl,-90:2:90);
    for n=1:12
        h_ic(:,n)=histc(angl(hairc_fine.ic(:,n)>ic_threshold&[diff(hairc_fine.ic(:,n))>0; 0],n),-90:2:90);
    end
    example_output.angle_fine = angl;
    example_output.IVS_fine = hairc_fine.ic;
    example_output.histogram_angle_label = -90:2:90;
    example_output.histograms_with_IVS = h_ic;
    example_output.histograms_without_IVS = h_all;

    if flags.do_plot
        figure;
        fontsize = 14;
        set(gcf,'Position',[100 100 1170 700])
        
        for panel = 1:2
            subplot(1,2,panel)
            switch panel
                
              case 1
                bar(-90:2:90,sum(h_all,2),'r')
                title('Mean histogram of all fine-structure channels','Fontsize',fontsize)
								axis([-90 90 0 21900]);
								set(gca,'YTick',[5000 10000 15000 20000],'YTickLabel',{'5k','10k','15k','20k'});
                ymax = max(sum(h_all,2));
              case 2
								bar(-90:2:90,sum(h_ic,2))
								title('Mean histogram with VS filter','Fontsize',fontsize)
								axis([-90 90 0 5600]);
								set(gca,'YTick',[1000:1000:5000],'YTickLabel',{'1k','2k','3k','4k','5k'});
								ymax = max(sum(h_ic,2));
            end
            set(gca,'Fontsize',fontsize)
            set(gca,'XTick',s_pos)
%             xlim([-93 93])
%             ylim([0 ymax*1.1])
            xlabel('Azimuth [deg]','Fontsize',fontsize)
            ylabel('Frequency of occurence','Fontsize',fontsize)
            text (-80,ymax*.95,panellabel(panel),'Fontsize',fontsize+1,'FontWeight','bold')
        end
    end;
end;


if flags.do_fig4
    
    % This reproduces Figure 4 from Dietz et al Speech Comm. 2011

    signal=competingtalkers('two_speakers');
    fs = 16000;
    s_pos = [-80 -30 0 30 80];
    ic_threshold=0.98;
    cn = [10 1]; % channel numbers for separate plots (1st entry also for time plot)
    panellabel = 'abc';

    % run IPD model on signal
    [hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs,'mod_center_frequency_hz',216);
    % convert interaural information into azimuth
    itd_unwrapped = ...
        dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
    angl=itd2angle(itd_unwrapped,lookup);
    angl_fmod216=hairc_mod.itd_lp(:,13:23)*140000; %linear approximation. paper version is better than this
    [hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs,'mod_center_frequency_hz',135);
    angl_fmod135=hairc_mod.itd_lp(:,13:23)*140000; %linear approximation. paper version is better than this

    h_ic=zeros(61,12);
    h_all=histc(angl,-60:2:60);
    h_fmod216=histc(nonzeros(angl_fmod216),-60:2:60);
    h_fmod135=histc(nonzeros(angl_fmod135),-60:2:60);
    for n=1:12
        h_ic(:,n)=histc(angl(hairc_fine.ic(:,n)>ic_threshold&[diff(hairc_fine.ic(:,n))>0; 0],n),-60:2:60);
    end
    example_output.angle_fine = angl;
    example_output.IVS_fine = hairc_fine.ic;
    example_output.histogram_angle_label = -60:2:60;
    example_output.histogram_panel1 = h_fmod216;
    example_output.histogram_panel2 = h_fmod135;
    example_output.histogram_panel3 = sum(h_ic,2);

    if flags.do_plot
        figure;
        fontsize = 14;
        set(gcf,'Position',[100 100 1170 400])
        
        for panel = 1:3
            subplot(1,3,panel)
            switch panel
                
              case 1
                bar(-60:2:60,sum(h_fmod216,2))
                title('histogram of mod ITD channels 13-23','Fontsize',fontsize)
								axis([-50 50 0 7600]);
								set(gca,'YTick',[2500 5000 7500],'YTickLabel',{'2.5k','5k','7.5k'});
                ymax = max(sum(h_fmod216,2));
%                 ylim([0 ymax*1.15])
              case 2
                bar(-60:2:60,sum(h_fmod135,2))
								axis([-50 50 0 7600]);
								set(gca,'YTick',[2500 5000 7500],'YTickLabel',{'2.5k','5k','7.5k'});
                title('histogram of mod ITD channels 13-23','Fontsize',fontsize)
                ymax = max(sum(h_fmod135,2));
%                 ylim([0 ymax*1.15])
              case 3
                bar(-60:2:60,sum(h_ic,2))
                title('histogram of fine ITD channels 1-12','Fontsize',fontsize)
								axis([-50 50 0 76000]);
								set(gca,'YTick',[25000 50000 75000],'YTickLabel',{'25k','50k','75k'});
                ymax = max(sum(h_ic,2));
%                 ylim([0 ymax*1.15])
            end
            set(gca,'Fontsize',fontsize)
            set(gca,'XTick',s_pos)
%             xlim([-63 63])
            
            xlabel('Azimuth [deg]','Fontsize',fontsize)
            ylabel('Frequency of occurence','Fontsize',fontsize)
            text (-40,ymax*1.2,panellabel(panel),'Fontsize',fontsize+1,'FontWeight','bold')
            
        end
    end;
end;

if flags.do_fig5
    % mix signals
    signal1=competingtalkers('one_of_three');
    signal2=competingtalkers('two_of_three');
    signal3=competingtalkers('three_of_three');
    noise  =competingtalkers('bnoise');
    noise = noise(1:40000,:);
    fs = 16000;

    % derive histograms
    ic_threshold=0.98;
    k = zeros(14,38);
    for n = 1:7
        switch n
          case 1
            signal = signal1+noise;
            
          case 2
            signal = signal2+noise;
          case 3
            signal = signal3+noise;
          case 4
            signal = signal1+2*noise;
          case 5
            signal = signal2+2*noise;
          case 6
            signal = signal3+2*noise;
          case 7
            signal = noise;
        end
        % run IPD model on signal
        [hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs);
        % convert interaural information into azimuth
        itd_unwrapped = ...
            dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
        angl=itd2angle(itd_unwrapped,lookup);

        h_ic=zeros(38,12);
        k(2*n,:)=sum(histc(angl,-92.5:5:92.5),2);
        for erb=1:12
            h_ic(:,erb)=histc(angl(hairc_fine.ic(:,erb)>ic_threshold&[diff(hairc_fine.ic(:,erb))>0; 0],erb),-92.5:5:92.5);
        end
        k(2*n-1,:)=sum(h_ic,2);
        example_output.angle_fine(:,:,n)=angl;
        example_output.IVS_fine = hairc_fine.ic;
        example_output.histogram_angle_label = -92.5:5:92.5;
        example_output.histograms_with_IVS(:,n)=sum(h_ic,2);
        example_output.histograms_without_IVS(:,n)=sum(histc(angl,-92.5:5:92.5),2);
    end

    if flags.do_plot
        % plot
        figure;
        set(gcf,'Position',[100 100 990 500])
        y=[0.14 0.57];
        x=[0.1 0.22 0.34 0.49 0.61 0.73 0.88];
        cols = 2;
        condi={'1S 0dB','','2S 0dB','',...
               '3S 0dB','','1S -6dB','',...
               '2S -6dB','','3S -6dB','','noise',''};
        
        for n = 1:14
            axes('position',[x(ceil(n/cols)) y(mod(n-1,cols)+1) 0.11 0.42],...
                 'box','on','fontSize',12)
            
            if mod(n,2)==0
                set(gca,'YTick',[10 20 30],'YTickLabel',{'','',''},'ylim',[0 32])
                set(gca,'Visible','on','XTick',[-30 0 30],...
                        'xlim',[-50 50],'XTickLabel',{'','','',''})
            else
                set(gca,'YTick',[1 2 3],'YTickLabel',{'','',''},'ylim',[0 3.33])
                set(gca,'Visible','on','XTick',[-30 0 30],...
                        'xlim',[-50 50],'XTickLabel',{'-30','0','+30'})
                xlabel(condi(n))
            end
            
            if n==2
                set(gca,'YTick',[10 20 30],'YTickLabel',{'10k','20k','30k'})
                ylabel('no IVS mask')
            elseif n==1        
                set(gca,'YTick',[1 2 3],'YTickLabel',{'1k','2k','3k'})
                ylabel('with IVS mask')
            end
            hold on
            bar(-92.5:5:92.5,k(n,:)/1000)
            colormap gray 
            
        end
        hold off
    end;
end;

if flags.do_fig6

    signal=competingtalkers('one_speaker_reverb');
    fs = 16000;
    s_pos = [-45 0 45];
    ic_threshold=0.98;
    cn = [10 1]; % channel numbers for separate plots (1st entry also for time plot)
    panellabel = 'abcd';

    % run IPD model on signal
    [hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs);
    % convert interaural information into azimuth
    itd_unwrapped = ...
        dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
    angl=itd2angle(itd_unwrapped,lookup);
    angl_fmod=hairc_mod.itd_lp(:,13:23)*140000; %linear approximation. paper version is better than this

    h_ic=zeros(71,12);
    h_all=histc(angl,-70:2:70);
    h_fmod=histc(nonzeros(angl_fmod),-70:2:70);
    for n=1:12
        h_ic(:,n)=histc(angl(hairc_fine.ic(:,n)>ic_threshold&[diff(hairc_fine.ic(:,n))>0; 0],n),-70:2:70);
    end
    example_output.angle_fine = angl;
    example_output.IVS_fine = hairc_fine.ic;
    example_output.histogram_angle_label = -70:2:70;
    example_output.histograms_with_IVS = h_ic;
    example_output.histograms_without_IVS = h_all;
    example_output.histogram_panel1 = sum(h_ic,2);
    example_output.histogram_panel2 = sum(h_ic(:,6:12),2);
    example_output.histogram_panel3 = sum(h_ic(:,1:3),2);
    example_output.histogram_panel4 = sum(h_fmod,2);

    if flags.do_plot
        figure;
        fontsize = 14;
        set(gcf,'Position',[60 100 1370 350])
        
        for panel = 1:4
            subplot(1,4,panel)
            switch panel
                
              case 1
                bar(-70:2:70,sum(h_ic,2))
                title('fine ITD channels 200-1400 Hz','Fontsize',fontsize)
                ymax = max(sum(h_ic,2));
              case 2
                bar(-70:2:70,sum(h_ic(:,6:12),2))
                title('fine ITD channels 500-1400 Hz','Fontsize',fontsize)
                ymax = max(sum(h_ic,2));
              case 3
                bar(-70:2:70,sum(h_ic(:,1:3),2))
                title('fine ITD channels 200-400 Hz','Fontsize',fontsize)
                ymax = max(sum(h_ic,2));
              case 4
                bar(-70:2:70,sum(h_fmod,2))
                title('mod ITD channels 1400-5000 Hz','Fontsize',fontsize)
                ymax = max(sum(h_ic,2));
            end
            set(gca,'Fontsize',fontsize)
            set(gca,'XTick',s_pos)
%             xlim([-73 73])
%             ylim([0 ymax*1.1])
						axis([-73 73 0 5500]);
						set(gca,'YTick',[1000:1000:5000],'YTickLabel',{'1k','2k','3k','4k','5k'});

            xlabel('Azimuth [deg]','Fontsize',fontsize)
            ylabel('Frequency of occurence','Fontsize',fontsize)
            text (-50,ymax*.97,panellabel(panel),'Fontsize',fontsize+1,'FontWeight','bold')
            
        end        
    end;    
end;
THE AUDITORY MODELING TOOLBOX

EXP_DIETZ2011 - Experiments from Dietz et al. 2011

Program code: