% Merge two sounds (vectors of samples) to reach a desired SNR ratio.
% Typically sound1 is a clean speech signal whereas sound2 is a noise signal.
%
% NOTE: whichever the input scale is (typically [-1 +1] or [-32767 +32768]), 
% the output scale is [-os +os] where os = 2 ^ (nBits - 1).
% - use nBits = 1 for output in [-1 +1].
% - use nBits = 16 for output in [-32767 +32767].
% Output values are floats.
% 
% sound1 = N1-by-1
% sound2 = N2-by-1
% desiredSNRdB = desired SNR ratio in dB
% speechthreshold = ratio determining speech/non-speech segments (typically 1e-3).
% samplerate = in kHz (determines window length in segmental energy calculation)
% nBits = size of samples (typically 16 bits)
%
% N1>N2 or N1<N2 is ok: sound2 will be randomly cut/repeated to reach length N1
%
% soundMerged = N1-by-1, sum of alpha1 * sound1 + alpha2 * (cut/repeated sound2)
%
% by Guillaume LATHOUD at IDIAP (lathoud@idiap.ch)


function [soundMerged, alpha1, alpha2] = mergeSounds(sound1, sound2, desiredSNRdB, speechthreshold, samplerate, nBits)
    
    % Cut/repeat sound2 so that both sound vectors have same length
    n1 = length(sound1); n2 = length(sound2);
    
    randomcut = (1+randint(1,1,max(1, n2)));
    aux = sound2(randomcut:length(sound2));  % random cut for a start
    while length(aux) < n1   % Then repeat sound2 as many times as needed
       aux = [aux; sound2];
    end
    if length(aux) > n1   % In all cases cut it if too long
        aux = aux(1:n1);
    end
    sound2 = aux;
    
    % window width (256 for 8 kHz, 512 for 16 kHz sample rate)
    windowlength = max(1, 256 * floor(samplerate/8000));
    
    % calculate the segmental energy vectors
    segE1_vector = segmentalEnergy(sound1, windowlength);
    segE2_vector = segmentalEnergy(sound2, windowlength);
    
    % average values on non-silent segments
    max1 = max(abs(sound1)); max2 = max(abs(sound2));
    segE1 = selectiveMean(segE1_vector, max(max1, max(segE1_vector) * speechthreshold));
    segE2 = selectiveMean(segE2_vector, max(max2, max(segE2_vector) * speechthreshold));
    currentSNR = segE1/segE2;
    
    % calculate the coefficients to apply to the sounds
    max = 2^(nBits-1);
    [alpha1, alpha2] = changeSNR(desiredSNRdB, 10*log10(currentSNR), max, max1, max2);
    
    % Merge
    soundMerged = alpha1 * sound1 + alpha2 * sound2;