% FUNCTION [ BINSEG, MODEL, P ] = SILENCE_SEG( ENERGY, IN_P ) % % To apply unsupervised activity/silence segmentation to a sequence of energies. % ( typically log energies ). % % If ENERGY has more than one dimension, than minimum duration constraint % is forbidden (IN_P.MINDUR_TYPE must be 'none'). % % 'GG' means activity and silence are each modeled with a Gaussian % distribution. % % 'GU' means silence is modeled with a Gaussian distribution (sil_mu, sil_std), while % activity is modeled with a uniform distribution on [ sil_mu, max( energy ) ]. % % Minimum duration constraint can be imposed on either activity or % silence ( but not on both ). % % %%%%%%%%%% % OUTPUT: % % BINSEG = matrix of zeroes and ones, same size as ENERGY. % P = IN_P, enriched with default values % MODEL: structure with various fields. % % MODEL.THRESHOLD: where the two pdfs intersect (activity and silence). % % If P.MODEL_TYPE is 'GG': % MODEL.SIL_MU mean of the silence Gaussian distribution % MODEL.SIL_STD std dev of the silence Gaussian distribution % MODEL.ACT_MU mean of the activity Gaussian distribution % MODEL.ACT_STD std dev of the activity Gaussian distribution % % If P.MODEL_TYPE is 'GU': % MODEL.SIL_MU mean of the silence Gaussian distribution % MODEL.SIL_STD std dev of the silence Gaussian distribution % MODEL.ACT_PDF 1 / ( MAX( ENERGY ) - MODEL.SIL_MU ) % % By G. Lathoud 2004 % lathoud@idiap.ch function [ binseg, model, p ] = silence_seg( energy, in_p ) if nargin < 1 error( 'silence_seg needs at least one parameter' ) end if nargin < 2 in_p = []; end p = in_p; p_default.model_type = 'GU'; % 'GG' or 'GU' p_default.mindur_type = 'none'; % 'none' or 'activity' or 'silence' p_default.mindur = []; p_default.stop_criterion = 0.01; p_default.verbose = 1; p = fill_default( p, p_default ); if ~any( ismember( { 'GG', 'GU' }, p.model_type ) ) error( 'silence_seg: p.model_type = ''GG'' or ''GU''' ); end if ~any( ismember( { 'none', 'activity', 'silence' }, p.mindur_type ) ) error( 'silence_seg: p.mindur_type = ''none'' or ''activity'' or ''silence''' ); end if any( ismember( { 'activity', 'silence' }, p.mindur_type ) ) & ... isempty( p.mindur ) error( 'silence_seg: needs nonempty p.mindur' ); end if ~strcmp( p.mindur_type, 'none' ) & ( sum( size( energy ) > 1 ) > 1 ) error( ['silence_seg: can apply minimum duration constraint on' ... ' 1-dimensional data only'] ); end if p.verbose disp( 'silence_seg parameters:' ); disp( p ); end % Drop "NaN" and "Inf" values when initializing the threshold % "usable_frames" will also be used in subsequent computations usable_frames = find( (~isnan( energy(:) )) & (~isinf( energy(:) )) ); max_energy = max( energy( usable_frames ) ); min_energy = min( energy( usable_frames ) ); init_threshold = ( max_energy + min_energy ) / 2; % Use this initial threshold to initialize the activity/silence classification binseg = energy > init_threshold; model.threshold = init_threshold; finished = 0; while ~finished finished = all( binseg(:) ) | ~any( binseg(:) ); if ~finished old_binseg = binseg; % Retrain models if strcmp( p.model_type, 'GG' ) sil_frames = intersect( find( ~binseg(:) ), usable_frames ); model.sil_mu = mean( energy( sil_frames ) ); model.sil_std = std( energy( sil_frames ) ); act_frames = intersect( find( binseg(:) ), usable_frames ); model.act_mu = mean( energy( act_frames ) ); model.act_std = std( energy( act_frames ) ); elseif strcmp( p.model_type, 'GU' ) sil_frames = intersect( find( ~binseg(:) ), usable_frames ); model.sil_mu = mean( energy( sil_frames ) ); model.sil_std = std( energy( sil_frames ) ); else error( 'silence_seg: this cannot happen' ); end % Determine threshold if strcmp( p.model_type, 'GG' ) A = 1 / model.sil_std^2 - 1 / model.act_std^2; B = -2 * model.sil_mu / (model.sil_std^2) + 2 * model.act_mu / (model.act_std^2); C = ( model.sil_mu / model.sil_std ) ^2 - ( model.act_mu / model.act_std ) ^2 ... + 2 * log( model.sil_std / model.act_std ); delta = B^2 - 4 * A * C; if delta < 0 error( 'silence_seg: delta < 0!' ); % can't happen! end x1 = ( -B + sqrt( delta ) ) / ( 2 * A ); x2 = ( -B - sqrt( delta ) ) / ( 2 * A ); if (model.sil_mu < x1) & (x1 < model.act_mu) model.threshold = x1; elseif (model.sil_mu < x2) & (x2 < model.act_mu) model.threshold = x2; else error( 'silence_seg: can''t find a threshold!' ); % can't happen! end elseif strcmp( p.model_type, 'GU' ) model.act_pdf = 1 / ( max_energy - model.sil_mu ); x = -2 * log( model.act_pdf * model.sil_std * sqrt( 2 * pi ) ); if x < 0 error( 'silence_seg: x < 0!' ); % should not happen... end model.threshold = model.sil_mu + model.sil_std * sqrt( x ); else error( 'silence_seg: this should not happen' ); end % Resegment if strcmp( p.mindur_type, 'none' ) binseg = ( energy > model.threshold ); elseif strcmp( p.mindur_type, 'activity' ) binseg = ( energy > model.threshold ); half_mindur = floor( p.mindur / 2 ); mask = dilation( erosion( binseg, half_mindur ), half_mindur ); binseg = binseg & mask; elseif strcmp( p.mindur_type, 'silence' ) binseg = ( energy > model.threshold ); half_mindur = floor( p.mindur / 2 ); mask = erosion( dilation( binseg, half_mindur ), half_mindur ); binseg = binseg | mask; else error( 'silence_seg: this cannot happen either' ); end % Convergence? finished = ( sum( old_binseg(:) ~= binseg(:) ) / length( binseg(:) ) < p.stop_criterion ); end end