function cbigObject = train_cbig_on_hist( hist_bins, hist_data, cbigInitObject, maxIter ) % function cbigObject = train_cbig_on_hist( hist_bins, hist_data, [ cbigInitObject ], [ maxIter ] ) % % EM training of a "constrained" bi-Gaussian distribution % Note that we constrain the first gaussian (number "0") % to fit only data below the mean of the second gaussian % Note that we constrain the second gaussian (number "1") % to fit only data above the mean of the first gaussian % % "hist_data" must sum to one % % Optional argument: cbigInitObject. % If it is NOT passed, then the default initialization is used. % % The returned value, "cbigObject", has four fields: % "mean_v": vector of 2 values (mean of gaussian #0, mean of gaussian #1) % "std_v" : vector of 2 values (std dev of gaussian #0, std dev of gaussian #1) % "prior0": scalar between 0 and 1 % "prior1": scalar between 0 and 1 % % 2005-01-27 - lathoud@idiap.ch A_FLOOR = 1e-10; if nargin < 2 error( [ mfilename '.train_g2_on_hist() needs at least two input arguments.' ] ); end if ~exist( 'cbigInitObject', 'var' ) cbigInitObject = []; end if ~exist( 'maxIter', 'var' ) maxIter = +Inf; end hist_bins = hist_bins(:); hist_data = hist_data(:); % Sanity check if abs( sum( hist_data ) - 1 ) > 1e-10 error( 'insane #1' ); end hist_data = hist_data / sum( hist_data ); hist_nbins = length( hist_bins ); delta_bin = mean( diff( hist_bins ) ); data_max = max( hist_bins ) + delta_bin / 2; data_min = min( hist_bins ) - delta_bin / 2; % Initialize w = hist_data; x = hist_bins; old_mean = [ Inf Inf ]; old_std = [ Inf Inf ]; if ~isempty( cbigInitObject ) mean_v = cbigInitObject.mean_v; std_v = cbigInitObject.std_v; prior0 = cbigInitObject.prior_v( 1 ); prior1 = cbigInitObject.prior_v( 2 ); else % Initialize distribution tmp_mean = w(:).' * x(:); tmp_std = sqrt( w(:).' * ( ( x(:) - tmp_mean ) .^ 2 ) ); mean_v = [ ( data_min + tmp_mean ) / 2 ( tmp_mean + data_max ) / 2 ]; std_v = [ tmp_std tmp_std ] / 2; % Initialize priors prior0 = 0.5; prior1 = 1 - prior0; end % EM iter=0; while any( ( iter < maxIter ) & ( abs( [ old_mean - mean_v, old_std - std_v ] ) > 1e-10 ) ) iter = iter+1; old_mean = mean_v; old_std = std_v; % E: estimate the posteriors cbigObject.mean_v = mean_v; cbigObject.std_v = std_v; cbigObject.prior_v = [ prior0 prior1 ]; [ post0, post1 ] = compute_post_cbig( x, cbigObject ); % M: update the parameters wpost0 = w(:) .* post0(:); wpost1 = w(:) .* post1(:); prior0 = sum( wpost0 ); prior1 = sum( wpost1 ); mean_v( 1 ) = wpost0(:).' * x(:) / prior0; mean_v( 2 ) = wpost1(:).' * x(:) / prior1; % Sanity check if mean_v( 1 ) >= mean_v( 2 ) error( 'insane #2' ); end std_v( 1 ) = sqrt( ( x(:).' - mean_v( 1 ) ) .^ 2 * wpost0(:) / prior0 ); std_v( 2 ) = sqrt( ( x(:).' - mean_v( 2 ) ) .^ 2 * wpost1(:) / prior1 ); % Variance flooring to avoid unstabilities std_v = max( A_FLOOR, std_v ); end disp( sprintf( 'iter:%d', iter ) ); % Return the results cbigObject.mean_v = mean_v; cbigObject.std_v = std_v; cbigObject.prior_v = [ prior0 prior1 ];