[ file 2005-SAM-SPARSE-MEAN/README ]
% This file contains an example MATLAB implementation of the
% SAM-SPARSE-MEAN approach for sector-based detection-localization.
%
% "Sector-based" means that the space around a microphone array is
% divided into sectors = volumes of space. In this example, around a
% circular 8-mic. array, we use 18 20-degree sectors.
%
% "detection-localization" means "jointly": for each time frame, for
% each sector of space, we evaluate how much frequency bandwidth is
% occupied by active source(s) in that sector.
%
% - "SAM" = Sector Activity Measure
% - "SPARSE" = sparsity is assumed: within each frequency bin, at most one active sector.
% - "MEAN" = within each sector, the average delay-sum power across all locations in that sector.
% This is done with NO additional cost compared to classical delay-sum.
% Only the A,B,Z parameters are changed compared to classical delay-sum.
%
%
% For a practical example, please refer to the 4-page article:
%
% "A Sector-Based, Frequency-Domain Approach to Detection and Localization of Multiple Speakers"
% G. Lathoud and M. Magimai.-Doss
% ICASSP 2005
%
%
% For a journal version with complete details on SAM-SPARSE-MEAN:
%
% "Sector-Based Detection for Hands-Free Speech Enhancement in Cars"
% G. Lathoud, J. Bourgeois and J. Freudenberger
% EURASIP 2006, special issue on Advances in Microphone Array Processing
% ( we are implementing Eq. (15) )
%
% You can also access its draft online: IDIAP RR-04-67 (there it is Eq. (19)).
%
% ---------
%
% IMPLEMENTATION
%
% Below is an example where:
% - first, the A,B,Z parameters are precomputed.
% [ main file: compute_ssm_parameters.m ]
%
% - second, SAM-SPARSE-MEAN is run on five example recordings,
% made in a meeting room with loudspeakers or humans.
% [ main file: extract_ssm.m ]
%
% These five recordings are fully described and downloadable at:
% http://mmm.idiap.ch/Lathoud/05-ICASSP
% ( They belong to the (larger) AV16.3 corpus. )
%
% - third, the exact same example, with an about 20-time faster C
% implementation. Integration of the C code within MATLAB is done
% through a MEX file. So you need to compile them on your platform
% before use:
%
% mex s051115_compute_gccphat.c
% mex s051114_get_ssm_activeness.c
%
% [ The MATLAB wrapper for the two C files is extract_ssm_fast.m ]
%
% ----------
%
% About MEX compilation options: the two C files proved to be
% compatible with the "-O3", "-march=pentium4" and "-ffast-math"
% options. Below is a step-by-step guide on choosing compilation
% option. This is useful if you want an additional speedup.
%
%
% First type
%
% mex -setup
%
% and select the appropriate one. For example on an Intel linux box,
% I used the GCC one:
%
% 2: /com/softs/matlab6.5.1/bin/gccopts.sh :
% Template Options file for building gcc MEX-files
%
% This creates a compilation option file, which you can now edit.
% For example, in ~/.matlab/R13/mexopts.sh, I replaced the line:
%
% COPTIMFLAGS='-O -DNDEBUG'
%
% with the line:
%
% COPTIMFLAGS='-O3 -DNDEBUG -march=pentium4 -ffast-math'
%
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 1 ) Precomputation of A, B, Z = A * exp( j B ) %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 1.1 ) Defined the geometry of the mic. array.
%
% Create a set of sectors,
% and cover each sector with a grid of points.
% Each grid will be used to approximate the integral
% that gives A, B, Z, in ( 1.2 ).
%
% Sectors are 20-degree each, around the array.
% We create 80 x 80 x 80 points for each sector.
%
% Mic. array geometry, sectors and grids are saved
% into a MATLAB file in the "DATA" subdirectory.
% This one is pretty fast (less than a minute).
create_pointset( 80 );
% ( 1.2 ) Compute the A,B,Z parameters
% using the set of points defined in ( 1.1 ).
%
% Store them in a file in the "DATA" subdirectory.
% This one is pretty long (about 50000 seconds).
compute_ssm_parameters( 'DATA/pointset_np80.mat' );
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 2 ) SAM-SPARSE-MEAN computation on recordings %
% made with a 8-microphone uniform circular array %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 2.1 ) A simple single speaker example to start with
wavefilename_template = 'DATA/seq01-1p-0000_array1_mic*.wav';
ssm_parameters_filename = 'DATA/ssm_parameters_512_16000_342-pointset_np80.mat';
ssm_result = extract_ssm( wavefilename_template, ssm_parameters_filename );
% Save it
save( 'DATA/seq01-1p-0000_ssm_result.mat', 'ssm_result' );
% Show it
load( 'DATA/seq01-1p-0000_ssm_result.mat', 'ssm_result' );
figure;
imagesc( ssm_result.activeness );
title( '"seq01-1p-0000"' );
% ( 2.2 ) Multiple humans (seq37) and loudspeakers (synthmultisource)
a_list = { 'DATA/seq37-3p-0001_array1_', 'DATA/synthmultisource-setup1-', 'DATA/synthmultisource-setup2-', 'DATA/synthmultisource-setup3-' };
% Compute SSM
ssm_parameters_filename = 'DATA/ssm_parameters_512_16000_342-pointset_np80.mat';
for a = 1:numel( a_list )
a_string = a_list{ a };
wavefilename_template = [ a_string 'mic*.wav' ];
ssm_result = extract_ssm( wavefilename_template, ssm_parameters_filename );
save( [ a_string 'result.mat' ], 'ssm_result' );
end
% Show SSM
for a = 1:numel( a_list )
a_string = a_list{ a };
load( [ a_string 'result.mat' ], 'ssm_result' );
figure; imagesc( ssm_result.activeness );
title( [ '"' strrep( a_string( 1:end-1 ), '_', '\_' ) '"' ] );
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 3 ) SAM-SPARSE-MEAN computation on recordings %
% made with a 8-microphone uniform circular array %
% C implementation %
% Don't forget to MEX-compile beforehand. %
% ( see detailed explanations above ) %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 2.1 ) A simple single speaker example to start with
wavefilename_template = 'DATA/seq01-1p-0000_array1_mic*.wav';
ssm_parameters_filename = 'DATA/ssm_parameters_512_16000_342-pointset_np80.mat';
ssm_result = extract_ssm_fast( wavefilename_template, ssm_parameters_filename );
% Save it
save( 'DATA/seq01-1p-0000_ssm_result_fast.mat', 'ssm_result' );
% Show it
load( 'DATA/seq01-1p-0000_ssm_result_fast.mat', 'ssm_result' );
figure;
imagesc( ssm_result.activeness );
title( '"seq01-1p-0000"' );
% ( 2.2 ) Multiple humans (seq37) and loudspeakers (synthmultisource)
a_list = { 'DATA/seq37-3p-0001_array1_', 'DATA/synthmultisource-setup1-', 'DATA/synthmultisource-setup2-', 'DATA/synthmultisource-setup3-' };
% Compute SSM
ssm_parameters_filename = 'DATA/ssm_parameters_512_16000_342-pointset_np80.mat';
for a = 1:numel( a_list )
a_string = a_list{ a };
wavefilename_template = [ a_string 'mic*.wav' ];
ssm_result = extract_ssm_fast( wavefilename_template, ssm_parameters_filename );
save( [ a_string 'result_fast.mat' ], 'ssm_result' );
end
% Show SSM
for a = 1:numel( a_list )
a_string = a_list{ a };
load( [ a_string 'result_fast.mat' ], 'ssm_result' );
figure; imagesc( ssm_result.activeness );
title( [ '"' strrep( a_string( 1:end-1 ), '_', '\_' ) '"' ] );
end