This documentation page applies to an outdated major AMT version. We show it for archival purposes only.
Click here for the documentation menu and here to download the latest AMT (1.6.0).
function example_output = exp_dietz2011(varargin)
%EXP_DIETZ2011 Experiments from Dietz et al. 2011
%
% EXP_DIETZ2011(fig) reproduce Fig. no. fig from the Dietz et
% al. 2011 paper.
%
% *Note**: The input signals used in this routine are not identical to
% the ones used in the original paper.
%
% The following flags can be specified;
%
% 'plot' Plot the output of the experiment. This is the default.
%
% 'noplot' Don't plot, only return data.
%
% 'fig3' Reproduce Fig. 3 panels a + b.
%
% 'fig4' Reproduce Fig. 4.
%
% 'fig5' Reproduce Fig. 5.
%
% 'fig6' Reproduce Fig. 6.
%
% Examples:
% ---------
%
% To display Fig. 3 use :
%
% exp_dietz2011('fig3');
%
% To display Fig. 4 use :
%
% exp_dietz2011('fig4');
%
% To display Fig. 5 use :
%
% exp_dietz2011('fig5');
%
% To display Fig. 6 use :
%
% exp_dietz2011('fig6');
%
% References:
% M. Dietz, S. D. Ewert, and V. Hohmann. Auditory model based direction
% estimation of concurrent speakers from binaural signals. Speech
% Communication, 53(5):592-605, 2011. [1]http ]
%
% References
%
% 1. http://www.sciencedirect.com/science/article/pii/S016763931000097X
%
%
% Url: http://amtoolbox.sourceforge.net/amt-0.9.5/doc/experiments/exp_dietz2011.php
% Copyright (C) 2009-2014 Peter L. Søndergaard.
% This file is part of AMToolbox version 1.0.0
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>.
% AUTHOR: Mathias Dietz
definput.flags.type = {'missingflag','fig3','fig4','fig5','fig6'};
definput.flags.plot = {'plot','noplot'};
[flags,keyvals] = ltfatarghelper({},definput,varargin);
if flags.do_missingflag
flagnames=[sprintf('%s, ',definput.flags.type{2:end-2}),...
sprintf('%s or %s',definput.flags.type{end-1},definput.flags.type{end})];
error('%s: You must specify one of the following flags: %s.',upper(mfilename),flagnames);
end;
% Load polynomial lookup data for converting ITD to azimuth
lookup = load('dietz2011itd2anglelookup.mat');
if flags.do_fig3
signal=competingtalkers('five_speakers');
fs = 16000;
s_pos = [-80 -30 0 30 80];
ic_threshold=0.98;
panellabel = 'ab';
% run IPD model on signal
[hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs);
% convert interaural information into azimuth
itd_unwrapped = ...
dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
angl=itd2angle(itd_unwrapped,lookup);
h_ic=zeros(91,12);
h_all=histc(angl,-90:2:90);
for n=1:12
h_ic(:,n)=histc(angl(hairc_fine.ic(:,n)>ic_threshold&[diff(hairc_fine.ic(:,n))>0; 0],n),-90:2:90);
end
example_output.angle_fine = angl;
example_output.IVS_fine = hairc_fine.ic;
example_output.histogram_angle_label = -90:2:90;
example_output.histograms_with_IVS = h_ic;
example_output.histograms_without_IVS = h_all;
if flags.do_plot
figure;
fontsize = 14;
set(gcf,'Position',[100 100 1170 700])
for panel = 1:2
subplot(1,2,panel)
switch panel
case 1
bar(-90:2:90,sum(h_all,2),'r')
title('Mean histogram of all fine-structure channels','Fontsize',fontsize)
axis([-90 90 0 21900]);
set(gca,'YTick',[5000 10000 15000 20000],'YTickLabel',{'5k','10k','15k','20k'});
ymax = max(sum(h_all,2));
case 2
bar(-90:2:90,sum(h_ic,2))
title('Mean histogram with VS filter','Fontsize',fontsize)
axis([-90 90 0 5600]);
set(gca,'YTick',[1000:1000:5000],'YTickLabel',{'1k','2k','3k','4k','5k'});
ymax = max(sum(h_ic,2));
end
set(gca,'Fontsize',fontsize)
set(gca,'XTick',s_pos)
% xlim([-93 93])
% ylim([0 ymax*1.1])
xlabel('Azimuth [deg]','Fontsize',fontsize)
ylabel('Frequency of occurence','Fontsize',fontsize)
text (-80,ymax*.95,panellabel(panel),'Fontsize',fontsize+1,'FontWeight','bold')
end
end;
end;
if flags.do_fig4
% This reproduces Figure 4 from Dietz et al Speech Comm. 2011
signal=competingtalkers('two_speakers');
fs = 16000;
s_pos = [-80 -30 0 30 80];
ic_threshold=0.98;
cn = [10 1]; % channel numbers for separate plots (1st entry also for time plot)
panellabel = 'abc';
% run IPD model on signal
[hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs,'mod_center_frequency_hz',216);
% convert interaural information into azimuth
itd_unwrapped = ...
dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
angl=itd2angle(itd_unwrapped,lookup);
angl_fmod216=hairc_mod.itd_lp(:,13:23)*140000; %linear approximation. paper version is better than this
[hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs,'mod_center_frequency_hz',135);
angl_fmod135=hairc_mod.itd_lp(:,13:23)*140000; %linear approximation. paper version is better than this
h_ic=zeros(61,12);
h_all=histc(angl,-60:2:60);
h_fmod216=histc(nonzeros(angl_fmod216),-60:2:60);
h_fmod135=histc(nonzeros(angl_fmod135),-60:2:60);
for n=1:12
h_ic(:,n)=histc(angl(hairc_fine.ic(:,n)>ic_threshold&[diff(hairc_fine.ic(:,n))>0; 0],n),-60:2:60);
end
example_output.angle_fine = angl;
example_output.IVS_fine = hairc_fine.ic;
example_output.histogram_angle_label = -60:2:60;
example_output.histogram_panel1 = h_fmod216;
example_output.histogram_panel2 = h_fmod135;
example_output.histogram_panel3 = sum(h_ic,2);
if flags.do_plot
figure;
fontsize = 14;
set(gcf,'Position',[100 100 1170 400])
for panel = 1:3
subplot(1,3,panel)
switch panel
case 1
bar(-60:2:60,sum(h_fmod216,2))
title('histogram of mod ITD channels 13-23','Fontsize',fontsize)
axis([-50 50 0 7600]);
set(gca,'YTick',[2500 5000 7500],'YTickLabel',{'2.5k','5k','7.5k'});
ymax = max(sum(h_fmod216,2));
% ylim([0 ymax*1.15])
case 2
bar(-60:2:60,sum(h_fmod135,2))
axis([-50 50 0 7600]);
set(gca,'YTick',[2500 5000 7500],'YTickLabel',{'2.5k','5k','7.5k'});
title('histogram of mod ITD channels 13-23','Fontsize',fontsize)
ymax = max(sum(h_fmod135,2));
% ylim([0 ymax*1.15])
case 3
bar(-60:2:60,sum(h_ic,2))
title('histogram of fine ITD channels 1-12','Fontsize',fontsize)
axis([-50 50 0 76000]);
set(gca,'YTick',[25000 50000 75000],'YTickLabel',{'25k','50k','75k'});
ymax = max(sum(h_ic,2));
% ylim([0 ymax*1.15])
end
set(gca,'Fontsize',fontsize)
set(gca,'XTick',s_pos)
% xlim([-63 63])
xlabel('Azimuth [deg]','Fontsize',fontsize)
ylabel('Frequency of occurence','Fontsize',fontsize)
text (-40,ymax*1.2,panellabel(panel),'Fontsize',fontsize+1,'FontWeight','bold')
end
end;
end;
if flags.do_fig5
% mix signals
signal1=competingtalkers('one_of_three');
signal2=competingtalkers('two_of_three');
signal3=competingtalkers('three_of_three');
noise =competingtalkers('bnoise');
noise = noise(1:40000,:);
fs = 16000;
% derive histograms
ic_threshold=0.98;
k = zeros(14,38);
for n = 1:7
switch n
case 1
signal = signal1+noise;
case 2
signal = signal2+noise;
case 3
signal = signal3+noise;
case 4
signal = signal1+2*noise;
case 5
signal = signal2+2*noise;
case 6
signal = signal3+2*noise;
case 7
signal = noise;
end
% run IPD model on signal
[hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs);
% convert interaural information into azimuth
itd_unwrapped = ...
dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
angl=itd2angle(itd_unwrapped,lookup);
h_ic=zeros(38,12);
k(2*n,:)=sum(histc(angl,-92.5:5:92.5),2);
for erb=1:12
h_ic(:,erb)=histc(angl(hairc_fine.ic(:,erb)>ic_threshold&[diff(hairc_fine.ic(:,erb))>0; 0],erb),-92.5:5:92.5);
end
k(2*n-1,:)=sum(h_ic,2);
example_output.angle_fine(:,:,n)=angl;
example_output.IVS_fine = hairc_fine.ic;
example_output.histogram_angle_label = -92.5:5:92.5;
example_output.histograms_with_IVS(:,n)=sum(h_ic,2);
example_output.histograms_without_IVS(:,n)=sum(histc(angl,-92.5:5:92.5),2);
end
if flags.do_plot
% plot
figure;
set(gcf,'Position',[100 100 990 500])
y=[0.14 0.57];
x=[0.1 0.22 0.34 0.49 0.61 0.73 0.88];
cols = 2;
condi={'1S 0dB','','2S 0dB','',...
'3S 0dB','','1S -6dB','',...
'2S -6dB','','3S -6dB','','noise',''};
for n = 1:14
axes('position',[x(ceil(n/cols)) y(mod(n-1,cols)+1) 0.11 0.42],...
'box','on','fontSize',12)
if mod(n,2)==0
set(gca,'YTick',[10 20 30],'YTickLabel',{'','',''},'ylim',[0 32])
set(gca,'Visible','on','XTick',[-30 0 30],...
'xlim',[-50 50],'XTickLabel',{'','','',''})
else
set(gca,'YTick',[1 2 3],'YTickLabel',{'','',''},'ylim',[0 3.33])
set(gca,'Visible','on','XTick',[-30 0 30],...
'xlim',[-50 50],'XTickLabel',{'-30','0','+30'})
xlabel(condi(n))
end
if n==2
set(gca,'YTick',[10 20 30],'YTickLabel',{'10k','20k','30k'})
ylabel('no IVS mask')
elseif n==1
set(gca,'YTick',[1 2 3],'YTickLabel',{'1k','2k','3k'})
ylabel('with IVS mask')
end
hold on
bar(-92.5:5:92.5,k(n,:)/1000)
colormap gray
end
hold off
end;
end;
if flags.do_fig6
signal=competingtalkers('one_speaker_reverb');
fs = 16000;
s_pos = [-45 0 45];
ic_threshold=0.98;
cn = [10 1]; % channel numbers for separate plots (1st entry also for time plot)
panellabel = 'abcd';
% run IPD model on signal
[hairc_fine, hairc_mod, fc, hairc_ild]=dietz2011(signal,fs);
% convert interaural information into azimuth
itd_unwrapped = ...
dietz2011unwrapitd(hairc_fine.itd_lp,hairc_ild(:,1:12),hairc_fine.f_inst,2.5);
angl=itd2angle(itd_unwrapped,lookup);
angl_fmod=hairc_mod.itd_lp(:,13:23)*140000; %linear approximation. paper version is better than this
h_ic=zeros(71,12);
h_all=histc(angl,-70:2:70);
h_fmod=histc(nonzeros(angl_fmod),-70:2:70);
for n=1:12
h_ic(:,n)=histc(angl(hairc_fine.ic(:,n)>ic_threshold&[diff(hairc_fine.ic(:,n))>0; 0],n),-70:2:70);
end
example_output.angle_fine = angl;
example_output.IVS_fine = hairc_fine.ic;
example_output.histogram_angle_label = -70:2:70;
example_output.histograms_with_IVS = h_ic;
example_output.histograms_without_IVS = h_all;
example_output.histogram_panel1 = sum(h_ic,2);
example_output.histogram_panel2 = sum(h_ic(:,6:12),2);
example_output.histogram_panel3 = sum(h_ic(:,1:3),2);
example_output.histogram_panel4 = sum(h_fmod,2);
if flags.do_plot
figure;
fontsize = 14;
set(gcf,'Position',[60 100 1370 350])
for panel = 1:4
subplot(1,4,panel)
switch panel
case 1
bar(-70:2:70,sum(h_ic,2))
title('fine ITD channels 200-1400 Hz','Fontsize',fontsize)
ymax = max(sum(h_ic,2));
case 2
bar(-70:2:70,sum(h_ic(:,6:12),2))
title('fine ITD channels 500-1400 Hz','Fontsize',fontsize)
ymax = max(sum(h_ic,2));
case 3
bar(-70:2:70,sum(h_ic(:,1:3),2))
title('fine ITD channels 200-400 Hz','Fontsize',fontsize)
ymax = max(sum(h_ic,2));
case 4
bar(-70:2:70,sum(h_fmod,2))
title('mod ITD channels 1400-5000 Hz','Fontsize',fontsize)
ymax = max(sum(h_ic,2));
end
set(gca,'Fontsize',fontsize)
set(gca,'XTick',s_pos)
% xlim([-73 73])
% ylim([0 ymax*1.1])
axis([-73 73 0 5500]);
set(gca,'YTick',[1000:1000:5000],'YTickLabel',{'1k','2k','3k','4k','5k'});
xlabel('Azimuth [deg]','Fontsize',fontsize)
ylabel('Frequency of occurence','Fontsize',fontsize)
text (-50,ymax*.97,panellabel(panel),'Fontsize',fontsize+1,'FontWeight','bold')
end
end;
end;