% FUNCTION [ PERF, PERF_OVERLAP ] = EVAL_SEGMAT( RESULTSEG, TRUTHSEG, TOTAL_TIME, [ SMALL_TIME ] )
%
% To evaluate speech/silence classification performance
% on multichannel recordings. This is purely based on 
% time boundaries, NOT on frames.
%
% EVAL_SEGMAT is particularly useful on spontaneous
% multichannel speech, as there are many overlaps.
%
%
% RESULTSEG and TRUTHSEG are two structures of length NCHANNEL.
% We assume the order of channels is the same in both.
%
% RESULTSEG( a ).seg is a 2 x * matrix of values in seconds,
% describing RESULT speech segment boundaries for channel "a".
%
% TRUTHSEG( a ).seg is a 2 x * matrix of values in seconds.
% describing TRUE speech segment boundaries for channel "a".
%
% For both matrices, for each column:
% - row 1: start of speech segment
% - row 2: end of speech segment
%
% TOTAL_TIME is the total duration of the recording in seconds.
%
% SMALL_TIME is a small duration constant in seconds to evaluate
% whether to time values are equal or not ( default 1e-10 ).
%
%
% PERF is a structure.
%
% PERF.CHANNEL( a ) is a structure with several fields 
% (correctly detected speech, FAR, FRR, PRC, RCL and so on).
%
% PERF.OVERALL is a structure with several fields 
% (obtained from early concatenation of all channels).
%
% PERF_OVERLAP is a structure similar to PERF, 
% but evaluation is restricted to segments with
% more than one concurrent speakers (found in either
% result or truth or both).
%
% Therefore the overall performance figures are not necessarily the
% same as the average of the performance figures obtained for each
% channel.
%
% It is easy to find the segments containing errors by looking
% at the PERF.CHANNEL( a ).RGTMAT matrix and the related
% PERF.CHANNEL( a ).XXX_IND fields.
%
% For each column of a RGTMAT matrix:
% - row 1: start time of the segment in seconds
% - row 2: end time of the segment in seconds
% - row 3: RESULT classification: 0 means silent segment, 1 means speech.
% - row 4: TRUE classification: 0 means silent segment, 1 means speech.
%
% All performance figures are derived from those RGTMAT matrices.
%
% For more info... look at the code:
%   dbtype eval_segmat
%
%
% By Guillaume LATHOUD, 2004 - lathoud@idiap.ch

function [ perf, perf_overlap ] = eval_segmat( in_resultseg, in_truthseg, total_time, small_time )

  % ( 0 ) Deal with parameters
  
  if nargin < 3
    error( 'eval_segmat: needs at least 3 input parameters' );
  end
  
  if nargout < 1
    error( 'eval_segmat: needs at least one output parameter' );
  end
  
  if ~exist( 'small_time', 'var' )
    small_time = 1e-10;
  end
  
  % We will also return the input arguments
  
  perf.resultseg  = in_resultseg;
  perf.truthseg   = in_truthseg;
  perf.total_time = total_time;
  
  % Store them for modifications (=definition of silent segments, see below)
  resultseg = in_resultseg;
  truthseg  = in_truthseg;
  
  % Check them
  
  if ~isstruct( resultseg )
    error( 'eval_segmat: needs a structure for "resultseg"' );
  end 
  
  if ~isstruct( truthseg )
    error( 'eval_segmat: needs a structure for "truthseg"' );
  end 
  
  % Check same number of channels
  
  if length( resultseg ) ~= length( truthseg )
    error( 'eval_segmat: inconsistent number of channels!' );
  end
  
  nchannel = length( resultseg );
  
  for a = 1:nchannel
    
    if ~isempty( resultseg(a).seg )
      if ~ismember( size( resultseg(a).seg, 1 ), [2 3] )
	error( sprintf( 'eval_segmat: needs a 2 x N1 or 3 x N1 matrix for "resultseg(%d).seg"', a ) );
      end
    end
    
    if ~isempty( truthseg(a).seg )  
      if ~ismember( size( truthseg(a).seg, 1 ), [2 3] )
	error( sprintf( 'eval_segmat: needs a 2 x N2 or 3 x N2 matrix for "truthseg(%d).seg"', a ) );
      end
    end
    
  end
  
  if ~isnumeric( total_time )
    error( 'eval_segmat: needs a scalar value for "total_time"' );
  end
  
  
  % If needed, convert 2-row format (speech segments only)
  % to 3-row format(speech segments AND silence segments)
  
  for a = 1:nchannel
    
%    if ~isempty( resultseg( a ).seg )
      
      if size( resultseg( a ).seg, 1 ) == 2
	
	% Define silence segments
	resultseg( a ).seg = define_silence_in_seg( resultseg( a ).seg, total_time );
	
	% Remove empty segments
	resultseg( a ).seg = remove_empty_in_seg( resultseg( a ).seg );
	
      end
      
%    end
    
  end
  
  
  for a = 1:nchannel
  
%    if ~isempty( truthseg( a ).seg )
      
      if size( truthseg( a ).seg, 1 ) == 2
	
	% Define silence segments
	truthseg( a ).seg  = define_silence_in_seg( truthseg( a ).seg, total_time );
	
	% Remove empty segments
	truthseg( a ).seg  = remove_empty_in_seg( truthseg( a ).seg );
	
      end
      
 %   end
    
  end
  
  % To store accuracy values
  
  channel = [];  % Structure
  overall = [];  % Structure
  
  for a = 1:nchannel
    
    rgtmat = [0; total_time ];
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % 1) Take care of segments
    
    t = [ resultseg( a ).seg( 1,: )  truthseg( a ).seg( 1, : ) ];
    t = [ t  resultseg( a ).seg( 2,: )  truthseg( a ).seg( 2, : ) ];
    t = sort( unique( t ) );
    
    % "unique" is not enough to suppress duplicates
    % minor errors may appear (e.g. 1e-14)
    t = t( find( diff( [ -Inf t ] ) > small_time ) );
    
    if abs( t( 1 ) ) > small_time
      error( 'eval_segmat: error #123!' );
    end
    t( 1 ) = 0;
    
    if abs( t( end ) - total_time ) > small_time
      error( 'eval_segmat: error #456!' );
    end
    t( end ) = total_time;
    
    rgtmat = [ t(1:end-1); t(2:end) ];
    
    % Check the matrix
    if any( abs( rgtmat( 1, 2:end ) - rgtmat( 2, 1:end-1 ) ) > 0 )
      error( 'eval_segmat: error in rgtmat!' );
    end
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % 2) Take care of activity
    
    seg_middle = mean( rgtmat( 1:2, : ), 1 );
    for b = 1:length( seg_middle )
      
      t = seg_middle( b );
      
      % ( 3.1 ) Copy result activity
      ind = find( ( resultseg(a).seg(1,:) <= t ) & ...
		  ( t <= resultseg(a).seg(2,:) ) );
      
      if isempty( ind )
	rgtmat( 3, b ) = 0;
      else
	% Sanity check
	if length( ind ) > 1
	  error( 'eval_segmat: insanity #3!' );
	end
	
	rgtmat( 3, b ) = resultseg(a).seg(3,ind );
      end

      % ( 3.2 ) Copy GT activity
      ind = find( ( truthseg(a).seg(1,:) <= t ) & ...
		  ( t <= truthseg(a).seg(2,:) ) );
      
      if isempty( ind )
	rgtmat( 4, b ) = 0;
      else
	rgtmat( 4, b ) = truthseg(a).seg( 3,ind );
      end
      
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % 3) Use "rgtmat" to sum:
    % - total duration of correctly detected speech
    % - total duration of missed speech
    % - total duration of correctly detected silence
    % - total duration of missed silence

    correct_speech = 0;
    missed_speech  = 0;
    correct_silence = 0;
    missed_silence  = 0;
    
    seg_dur = diff( rgtmat( 1:2, : ), [], 1 );
    
    % Correctly detected speech
    correct_speech_ind = find( rgtmat( 3, : ) & rgtmat( 4, : ) );
    correct_speech     = sum( seg_dur( correct_speech_ind ) );
    
    % Missed speech
    missed_speech_ind = find( (~rgtmat( 3, : )) & rgtmat( 4, : ) );
    missed_speech     = sum( seg_dur( missed_speech_ind ) );
    
    % Correctly detected silence
    correct_silence_ind = find( (~rgtmat( 3, : )) & (~rgtmat( 4, : )) );
    correct_silence     = sum( seg_dur( correct_silence_ind ) );
    
    % Missed silence
    missed_silence_ind = find( rgtmat( 3, : ) & (~rgtmat( 4, : )) );
    missed_silence     = sum( seg_dur( missed_silence_ind ) );
    
    % Sanity check
    if abs( correct_speech + missed_speech + correct_silence + missed_silence - total_time ) > 1e-10
      error( 'eval_segmat: insanity #6 !' );
    end

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % 4) Count speech segments (rather than durations)
    
    rseg = simplify_seg( resultseg( a ).seg, small_time );
    result_speech_segments = sum( rseg( 3,: ) );
    
    tseg = simplify_seg( truthseg( a ).seg, small_time );
    truth_speech_segments = sum( tseg( 3,: ) );

    iseg = intersect_seg( { rseg, tseg }, small_time );
    
    % Count the number of correct speech segments in the result
    tmp = zeros( 1, size( rseg, 2 ) );
    for t = mean( iseg( 1:2, find( iseg( 3,: ) ) ) )
      tmp( find( rseg( 3,: ) & (rseg( 1,: ) <= t) & (t <= rseg( 2,: )) ) ) = 1;
    end
    result_correct_speech_segments = sum( tmp );
    
    % Count the number of correct segments in the truth
    tmp = zeros( 1, size( tseg, 2 ) );
    for t = mean( iseg( 1:2, find( iseg( 3,: ) ) ) )
      tmp( find( tseg( 3,: ) & (tseg( 1,: ) <= t) & (t <= tseg( 2,: )) ) ) = 1;
    end
    truth_correct_speech_segments = sum( tmp );

    % Store the list of missed truth segments 
    truth_missed_speech_segments = find( ~tmp );
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % 5) Store values for this channel
    
    channel( a ).rgtmat = rgtmat;
    
    % Total durations in seconds
    
    channel( a ).correct_speech  = correct_speech;
    channel( a ).missed_speech   = missed_speech;
    channel( a ).correct_silence = correct_silence;
    channel( a ).missed_silence  = missed_silence;

    % List of the corresponding columns in "channel( a ).rgtmat"
    
    channel( a ).correct_speech_ind  = correct_speech_ind;
    channel( a ).missed_speech_ind   = missed_speech_ind;
    channel( a ).correct_silence_ind = correct_silence_ind;
    channel( a ).missed_silence_ind  = missed_silence_ind;

    % Number of segments
    
    channel( a ).result_speech_segments = result_speech_segments;
    channel( a ).truth_speech_segments  = truth_speech_segments;
    
    channel( a ).result_correct_speech_segments = result_correct_speech_segments;
    channel( a ).truth_correct_speech_segments  = truth_correct_speech_segments;

    channel( a ).truth_missed_speech_segments = truth_missed_speech_segments;
    
  end

  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % 5) Overall durations: concatenate the channels
  
  overall.total_channel_time = nchannel * total_time;
  
  overall.correct_speech  = sum( [ channel.correct_speech ] );
  overall.missed_speech   = sum( [ channel.missed_speech ] );
  overall.correct_silence = sum( [ channel.correct_silence ] );
  overall.missed_silence  = sum( [ channel.missed_silence ] );

  overall.result_speech_segments  = sum( [ channel.result_speech_segments ] );
  overall.truth_speech_segments   = sum( [ channel.truth_speech_segments ] );

  overall.result_correct_speech_segments  = sum( [ channel.result_correct_speech_segments ] );
  overall.truth_correct_speech_segments   = sum( [ channel.truth_correct_speech_segments ] );

  overall.truth_missed_speech_segments    = sum( [ channel.truth_missed_speech_segments ] );
  
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % 6) Compute perf. measures for each channel
  
  for a = 1:nchannel
    
    % ( 6.1 ) Based on durations
    
    % I want to be safe...
    tp = channel( a ).correct_speech;
    fp = channel( a ).missed_silence;
    tn = channel( a ).correct_silence;
    fn = channel( a ).missed_speech;
    
    [ far, frr, hter ] = far_frr( tp, fp, tn, fn );
    [ prc, rcl, F    ] = prc_rcl( tp, fp, tn, fn );
    
    % Store the results
    
    channel( a ).far  = far;
    channel( a ).frr  = frr;
    channel( a ).hter = hter;

    channel( a ).prc = prc;
    channel( a ).rcl = rcl;
    channel( a ).F   = F;

    % ( 6.2 ) Based on number of segments

    channel( a ).seg_prc = channel( a ).result_correct_speech_segments / ( eps + channel( a ).result_speech_segments );
    channel( a ).seg_rcl = channel( a ).truth_correct_speech_segments / ( eps + channel( a ).truth_speech_segments );
    channel( a ).seg_F   = 2 * channel( a ).seg_prc * channel( a ).seg_rcl / ( eps + channel( a ).seg_prc + channel( a ).seg_rcl );
    
  end
  
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % 7) Compute overall perf. measures

  % ( 7.1 ) Based on durations
  
  % I want to be safe...
  tp = overall.correct_speech;
  fp = overall.missed_silence;
  tn = overall.correct_silence;
  fn = overall.missed_speech;
  
  [ far, frr, hter ] = far_frr( tp, fp, tn, fn );
  [ prc, rcl, F    ] = prc_rcl( tp, fp, tn, fn );
  
  % Store the results
  
  overall.far  = far;
  overall.frr  = frr;
  overall.hter = hter;

  overall.prc = prc;
  overall.rcl = rcl;
  overall.F   = F;

  % ( 7.2 ) Based on number of segments

  overall.seg_prc = overall.result_correct_speech_segments / overall.result_speech_segments;
  overall.seg_rcl = overall.truth_correct_speech_segments / overall.truth_speech_segments;
  overall.seg_F   = 2 * overall.seg_prc * overall.seg_rcl  / ( overall.seg_prc + overall.seg_rcl );
  
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
  % 8) Return values
  
  perf.channel = channel;
  perf.overall = overall;
  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ( 9 ) Optional overlap evaluation
  
if nargout > 1
  
  %%%
  
  % ( 9.1 ) Find overlap segments
  
  % In the result

  a_seglist = {};
  for a = 1:length( resultseg )
    seg = resultseg( a ).seg;
    a_seglist{ a } = seg( 1:2, find( seg( 3,: ) ) );
  end
  
  rov_seg =  define_silence_in_seg( find_overlap( a_seglist ), truthseg(1).seg(2,end) );

  % In the truth

  a_seglist = {};
  for a = 1:length( truthseg )
    seg = truthseg( a ).seg;
    a_seglist{ a } = seg( 1:2, find( seg( 3,: ) ) );
  end
  
  tov_seg = define_silence_in_seg( find_overlap( a_seglist ), truthseg(1).seg(2,end) );
  
  % Merge them
  ov_seg = merge_seg( { rov_seg, tov_seg }, small_time );
    
  %%%
  
  % ( 9.2 ) For each channel, extract the "ov_seg"
  % time subset of the result and the truth
  % 
  % -> store it as is
  % -> also store a (temporary) concatenated version

  ov_subset = ov_seg( 1:2, find( ov_seg( 3,: ) ) );
  
  if isempty( ov_subset )
    if ~strcmp( warning, 'off' )
      disp( 'eval_segmat: WARNING! empty overlap subset. You may want to check the ground-truth.' );
    end
    perf_overlap = [];
    perf_overlap.ov_subset = [];
    return;
  end
  
  ov_resultseg = [];
  ov_truthseg  = [];
  
  for a = 1:nchannel
    
    % Intersection between "ov_seg" and result.
      
    ov_resultseg( a ).seg = extract_subset_from_seg( resultseg( a ).seg, ov_subset, small_time );
    
    % Intersection between "ov_seg" and truth
    
    ov_truthseg( a ).seg  = extract_subset_from_seg( truthseg( a ).seg, ov_subset, small_time );
    
  end
  
  % Concatenate the overlaps to be able to call :"eval_perfmat"
  
  ovc_total_time = sum( diff( ov_seg( 1:2, find( ov_seg( 3,: ) ) ) ) );
  
  ovc_resultseg = [];
  ovc_truthseg  = [];
  
  ov_result_timelist = [];
  ovc_result_timelist = [];
  ov_truth_timelist = [];
  ovc_truth_timelist = [];
  
  for a = 1:nchannel
    
    % Process result matrix and store it
    
    dur = diff( ov_resultseg( a ).seg( 1:2, : ), [], 1 );
    t = [ 0 cumsum( dur ) ];
    
    % Sanity check
    if abs( t(end) - ovc_total_time ) > small_time
      error( 'eval_segmat: insanity #7 !' );
    end
    
    ovc_resultseg( a ).seg = [ t( 1:end-1 ); t( 2:end ); ov_resultseg( a ).seg( 3,: ) ];
    
    % Store the time values for later processing of the "rgtmat" matrix
    
    ov_result_timelist( a ).t  = [ ov_resultseg( a ).seg( 1, : ) ov_resultseg( a ).seg( 2,end ) ];
    ovc_result_timelist( a ).t = [ ovc_resultseg( a ).seg( 1, : ) ovc_resultseg( a ).seg( 2,end ) ];
    
    % Format for "eval_segmat" : list speech segments only
    
    ovc_resultseg( a ).seg = ovc_resultseg( a ).seg( 1:2, find( ovc_resultseg( a ).seg( 3,: ) ) );

    %%%
    
    % Process truth matrix and store it
    
    dur = diff( ov_truthseg( a ).seg( 1:2, : ), [], 1 );
    t = [ 0 cumsum( dur ) ];
    
    % Sanity check
    if abs( t(end) - ovc_total_time ) > small_time
      error( 'eval_segmat: insanity #7 !' );
    end
    
    ovc_truthseg( a ).seg = [ t( 1:end-1 ); t( 2:end ); ov_truthseg( a ).seg( 3,: ) ];

    % Store the time values for later processing of the "rgtmat" matrix
    
    ov_truth_timelist( a ).t  = [ ov_truthseg( a ).seg( 1, : ) ov_truthseg( a ).seg( 2,end ) ];
    ovc_truth_timelist( a ).t = [ ovc_truthseg( a ).seg( 1, : ) ovc_truthseg( a ).seg( 2,end ) ];
    
    % Format for "eval_segmat" : list speech segments only

    ovc_truthseg( a ).seg = ovc_truthseg( a ).seg( 1:2, find( ovc_truthseg( a ).seg( 3, : ) ) );
    
  end
  
  %%%
  
  % ( 9.3 ) Call eval_segmat with the concatenated version

  a_perf = eval_segmat( ovc_resultseg, ovc_truthseg, ovc_total_time, small_time );
  
  %%%
  
  % ( 9.4 ) Put the true time values back in
  
  a_perf.resultseg = ov_resultseg;
  a_perf.truthseg  = ov_truthseg;
  
  % Fix the rgtmat matrix of each channel
  
  for a = 1:nchannel
    
    ovc_timelist( a ).t = [ ovc_result_timelist( a ).t  ovc_truth_timelist( a ).t ];
    ov_timelist( a ).t  = [ ov_result_timelist( a ).t   ov_truth_timelist( a ).t  ];
    
    for b = 1:2
      for c = 1:size( a_perf.channel( a ).rgtmat, 2 )
    
	t = a_perf.channel( a ).rgtmat( b, c );
	ind = find( abs( ovc_timelist( a ).t - t ) < small_time );
	
	% Sanity check
	if length( ind ) < 0
	  error( 'eval_segmat: insanity #8!' );
	end
	
	% Put true time value back in
	a_perf.channel( a ).rgtmat( b, c ) = ov_timelist( a ).t( ind( 1 ) );
	
      end
    end
    
  end
  
  %%%
  
  % ( 9.5 ) Store the result
  
  perf_overlap = a_perf;
  
end  % if nargout > 1