function big_param = em_big_train( feature, verbose, delta1_threshold, delta2_threshold, insanity_threshold ) % FUNCTION BIG_PARAM = EM_BIG_TRAIN( FEATURE ) % FUNCTION BIG_PARAM = EM_BIG_TRAIN( FEATURE, VERBOSE ) % FUNCTION BIG_PARAM = EM_BIG_TRAIN( FEATURE, VERBOSE, DELTA1_THRESHOLD ) % FUNCTION BIG_PARAM = EM_BIG_TRAIN( FEATURE, VERBOSE, DELTA1_THRESHOLD, DELTA2_THRESHOLD ) % FUNCTION BIG_PARAM = EM_BIG_TRAIN( FEATURE, VERBOSE, DELTA1_THRESHOLD, DELTA2_THRESHOLD, INSANITY_THRESHOLD ) % % 1-dimensional EM bi-Gaussian training. % This can be useful e.g. to model distribution of speech energy (speech and silence). % % Input: % % some data in FEATURE (Inf and NaN values will be ignored) % dimensionality of the FEATURE matrix/vector does not matter. % % optionally VERBOSE (0 or 1) % optionally DELTA1_THRESHOLD (small positive value for termination test, should not be needed) % optionally DELTA2_THRESHOLD (small positive value for termination test, should not be needed) % optionally INSANITY_THRESHOLD (a very small positive value, should not be needed) % % Output: a structure with 6 fields: % % BIG_PARAM.P0 Prior of the "small value" Gaussian % BIG_PARAM.MU0 Mean of the "small value" Gaussian % BIG_PARAM.STD0 Std of the "small value" Gaussian % % BIG_PARAM.P1 Prior of the "large value" Gaussian % BIG_PARAM.MU1 Mean of the "large value" Gaussian % BIG_PARAM.STD1 Std of the "large value" Gaussian % % % Guillaume Lathoud - lathoud@idiap.ch if nargin < 1 error( [ mfilename ' needs at least 1 input argument' ] ); end if ~exist( 'verbose', 'var' ) verbose = 0; end if ~exist( 'delta1_threshold', 'var' ) | isempty( delta1_threshold ) delta1_threshold = 0.01; end if ~exist( 'delta2_threshold', 'var' ) | isempty( delta2_threshold ) delta2_threshold = 1e-4; end if ~exist( 'insanity_threshold', 'var' ) | isempty( insanity_threshold ) insanity_threshold = exp( log( eps ) / 2 ); end feature = feature(:); % ( 1 ) EM training of a bi-Gaussian model ind = find( ~isnan( feature ) & ~isinf( feature ) ); feature = feature( ind ); % Initialization: % Try many different splits of the data % use small values to initialize Gaussian #0 (e.g. energy during silence), % use large values to initialize Gaussian #1 (e.g. energy during speech). n_split = 15; the_min = min( feature ); the_max = max( feature ); % Linearly between min and max thr_list = the_min + (1:n_split)/(n_split+1) * ( the_max - the_min ); % Linearly along the ordered data tmp = sort( feature ); thr_list = sort( [ thr_list tmp( 1 + round( ( 0.5:n_split) / n_split * ( numel( tmp ) - 1 ) ) ).' ] ); log_lkld_list = repmat( NaN, size( thr_list ) ); for a = 1:numel( thr_list ) threshold = thr_list( a ); label = ( feature > threshold ); % Estimate the two Gaussians mu0 = mean( feature( find( ~label ) ) ); std0 = std( feature( find( ~label ) ) ); mu1 = mean( feature( find( label ) ) ); std1 = std( feature( find( label ) ) ); % Initialize the priors p1 = min( 0.9, max( 0.1, mean( label ) ) ); p0 = 1-p1; % Compute overall lkld try log_lkld_list( a ) = sum( my_logsum_fast( log( p0 ) + log_normpdf( feature, mu0, std0 ), log( p1 ) + log_normpdf( feature, mu1, std1 ) ) ); % Store the model init_model( a ).p0 = p0; init_model( a ).mu0 = mu0; init_model( a ).std0 = std0; init_model( a ).p1 = p1; init_model( a ).mu1 = mu1; init_model( a ).std1 = std1; catch log_lkld_list( a ) = -Inf; end end [ tmp, ind_max ] = max( log_lkld_list ); if verbose figure; plot( thr_list, log_lkld_list, '.-' ); hold on; plot( thr_list( ind_max ), log_lkld_list( ind_max ), 'ro-' ); end % Select the most likely model p0 = init_model( ind_max ).p0; mu0 = init_model( ind_max ).mu0; std0 = init_model( ind_max ).std0; p1 = init_model( ind_max ).p1; mu1 = init_model( ind_max ).mu1; std1 = init_model( ind_max ).std1; threshold = thr_list( ind_max ); label = ( feature > threshold ); % EM training n = length( feature ); finished = 0; while ~finished old_label = label; old_param = [ p0 mu0 std0 p1 mu1 std1 ]; % E-step: estimate the posteriors tmp0 = log( p0 ) + log_normpdf( feature, mu0, std0 ); tmp1 = log( p1 ) + log_normpdf( feature, mu1, std1 ); tmp = my_logsum_fast( [ tmp0.'; tmp1.' ] ).'; log_post0 = tmp0 - tmp; log_post1 = tmp1 - tmp; post0 = exp( log_post0 ); post1 = exp( log_post1 ); sum_post0 = exp( my_logsum_fast( log_post0 ) ); sum_post1 = exp( my_logsum_fast( log_post1 ) ); % "label" is used for the termination test only label = ( log_post1 > log_post0 ); % M-step: update the parameters mu0 = sum( feature .* post0 ) / sum_post0; mu1 = sum( feature .* post1 ) / sum_post1; std0 = sqrt( sum( ( ( feature - mu0 ) .^ 2 ) .* post0 ) / sum_post0 ); std1 = sqrt( sum( ( ( feature - mu1 ) .^ 2 ) .* post1 ) / sum_post1 ); p0 = sum_post0 / n; p1 = sum_post1 / n; % Sanity check if abs( p0+p1-1 ) > insanity_threshold error( 'insanity' ); end p1 = 1 - p0; % Termination test param = [ p0 mu0 std0 p1 mu1 std1 ]; delta1 = mean( label(:) ~= old_label(:) ); delta2 = max( abs( old_param - param ) ./ max( eps, abs( old_param ) ) ); %if verbose; disp( sprintf( [ mfilename ': delta1:%g delta2:%g' ], delta1, delta2 ) ); end; finished = ( delta1 < delta1_threshold ) & ( delta2 < delta2_threshold ); end % ( 2 ) Store the parameters and return big_param.p0 = p0; big_param.mu0 = mu0; big_param.std0 = std0; big_param.p1 = p1; big_param.mu1 = mu1; big_param.std1 = std1;