function par = Wiener_iter_mel( par ) % function par = Wiener_iter_mel( par ) % % Mandatory input: % par.in_filename (one WAV file, containing possibly multiple channels) % par.out_filename (one WAV file, exact same dimensions as input file) % % Iterative Wiener filtering implementation. % % I used geometric mean-based tricks to determine silent frames/bins, % in two steps: % % - rough determination of silent frequency bins, a bit inspired from % the approximation for white noise in Annex B of (Lathoud et al., % IDIAP RR 06-09). % % - rough determination of silent frames, a bit inspired from the % detection-localization where we count the number of active bins % (Lathoud et al., ICASSP 2005, 2006 and IDIAP RR 05-52). Speech is a % wideband signal -> supposed to be above noise on many bins, % otherwise anyway it would be silence, or too narrowband to be % denoised. % % By G. Lathoud 2006, lathoud@idiap.ch if nargin < 1 error( [ mfilename ' needs 1 input argument.' ] ); end % Check for mandatory parameters required_param = { 'in_filename', 'out_filename' }; check_param( required_param, fieldnames( par ) ); % Fill in default parameters par_default = []; par_default.framelength_sec = 0.016; % In seconds par_default.frameshift_sec = 0.008; % In seconds par_default.blocksize_sec = 10.0; % In seconds par_default.preemp = 0.97; % Pre-emphasis coefficient par_default.silbuffer_sec = 5.0; % In seconds par_default.silmindur_sec = 0.050; par_default.nmelband = 40; % To prepare the H filter in MEL space par_default.bandHz = [ 75 +Inf ]; par_default.T_smooth_nframe = 1; par_default.noise_est_factor = 3; % Positive factor par_default.niter = 1; % Use "2" to remove more noise (but then the speech may sound a bit muffled). % The output signal will be scaled to [ -new_M +new_M ] where 0 < new_M <=1 par_default.new_M = 0.9; par_default.verbose = 1; par = fill_default( par, par_default ); if par.verbose disp( ' ' ); disp( '----------------------------------------------------------------------' ); disp( [ mfilename ' parameters:' ] ); disp( par ); end % Load the input signal [ siz, in.fs, in.nbits ] = wavread( par.in_filename, 'size' ); par.nsample = siz( 1 ); par.nchannel = siz( 2 ); if par.verbose disp( [ mfilename ' read "' par.in_filename '".' ] ); disp( sprintf( [ mfilename ': in.fs: %d, in.nbits: %d, par.nsample: %d, par.nchannel: %d' ], in.fs, in.nbits, par.nsample, par.nchannel ) ); end % Prepare the output out = in; out.x = zeros( par.nsample, par.nchannel ); % Prepare integer parameters par.framelength = round( par.framelength_sec * in.fs ); par.frameshift = round( par.frameshift_sec * in.fs ); par.nframe = 1 + ceil( ( par.nsample - par.framelength ) / par.frameshift ); % ceil: we will append zeroes to the last frame, if necessary par.blocksize_nframe = 1 + floor( ( par.blocksize_sec - par.framelength_sec ) / par.frameshift_sec ); par.silbuffer_nframe = 1 + floor( ( par.silbuffer_sec - par.framelength_sec ) / par.frameshift_sec ); par.silmindur_nframe = max( 0, 1 + floor( ( par.silmindur_sec - par.framelength_sec ) / par.frameshift_sec ) ); if par.verbose disp( sprintf( [ mfilename ': par.framelength: %d, par.frameshift: %d, par.nframe: %d, par.blocksize_nframe: %d, par.silbuffer_nframe: %d, par.silmindur_nframe: %d' ], par.framelength, par.frameshift, par.nframe, par.blocksize_nframe, par.silbuffer_nframe, par.silmindur_nframe ) ); end % Prepare the stuff for MEL-domain filter design tmppar = []; tmppar.fs = in.fs; tmppar.framelength = par.framelength; tmppar.band_hz = par.bandHz; tmppar.B = par.nmelband; fbank_mat = prepare_fbank_mat( tmppar ); % Prepare the "inverse" transformation inv_fbank_mat = fbank_mat .'; % Normalize both tmp = sum( fbank_mat, 2 ); ind = find( tmp > 0 ); fbank_mat( ind, : ) = fbank_mat( ind, : ) ./ repmat( tmp( ind ), 1, size( fbank_mat, 2 ) ); tmp = sum( inv_fbank_mat, 2 ); ind = find( tmp > 0 ); inv_fbank_mat( ind, : ) = inv_fbank_mat( ind, : ) ./ repmat( tmp( ind ), 1, size( inv_fbank_mat, 2 ) ); % Save the result par.H_smooth_mat = inv_fbank_mat * fbank_mat; % Make sure it is hermitian symetric ind = 1:par.framelength-1; ind1 = 1 + ind; ind2 = 2 * par.framelength + 1 - ind; par.H_smooth_mat( ind2, ind2 ) = conj( par.H_smooth_mat( ind1, ind1 ) ); % Prepare window par.win = diag( sparse( hamming( par.framelength, 'periodic' ) ) ); % Where we will accumulate the magnitude data from silent frames (if any) silbufferM = []; % Sanity check: make sure that the whole signal will be processed if par.framelength + ( par.nframe - 1 ) * par.frameshift < par.nsample error( [ mfilename ' is insane.' ] ); end % Save memory as best as possible % (another way is to get rid of most of "out.x" and output it to a file, instead) % (I have no time to do this right now) pack; % Wiener filtering on each channel for ichannel = 1:par.nchannel if par.verbose disp( ' ' ); disp( sprintf( [ mfilename ' is starting to filter channel %d/%d' ], ichannel, par.nchannel ) ); chrono = chrono_start( par.nframe, 20 ); end % Loop through blocks for iframe = 1:par.blocksize_nframe:par.nframe block.start_frame = iframe; block.stop_frame = min( par.nframe, iframe + par.blocksize_nframe - 1 ); block.nframe = block.stop_frame - block.start_frame + 1; block.start_sample = 1 + ( block.start_frame - 1 ) * par.frameshift; block.stop_sample = par.framelength + ( block.stop_frame - 1 ) * par.frameshift; block.nsample = block.stop_sample - block.start_sample + 1; % Get the signal for this channel and this block ("-1" for pre-emphasis) % tmp = in.x( max( 1, ( block.start_sample - 1 ):min( par.nsample, block.stop_sample ) ), ichannel ); tmp_start = max( 1, ( block.start_sample - 1 ) ); tmp_stop = min( par.nsample, block.stop_sample ); tmp = wavread( par.in_filename, [ tmp_start tmp_stop ] ); tmp = tmp( :, ichannel ); % pre-emphasis tmp = tmp( 2:end ) - par.preemp * tmp( 1:end-1 ); % bufferize block.ybuf = buffer( tmp, par.framelength, par.framelength - par.frameshift, 'nodelay' ); % extract the complex spectrogram from the zero-padded signal block.Sbuf = fft( full( par.win * block.ybuf ), 2 * par.framelength, 1 ); % derive magnitude and energy spectrograms block.Mbuf = abs( block.Sbuf ); block.Ebuf = abs( block.Sbuf ) .^ 2; % rough determination of silent frequency bins % A bit inspired from the approximation for white noise % in Annex B of (Lathoud et al., IDIAP RR 06-09) block.is_sil_bin = zeros( size( block.Mbuf ) ); tmp = block.Mbuf; tmp( ( tmp <= 0 ) | isnan( tmp ) | isinf( tmp ) ) = NaN; gmean = exp( nanmean( log( tmp.' ) ).' ); tmp( tmp >= repmat( gmean, 1, block.nframe ) ) = NaN; gmean2 = exp( nanmean( log( tmp.' ) ).' ); threshold = exp( 2 * log( gmean ) - log( gmean2 ) ); ind = setdiff( find( ~isnan( threshold ) ), 1 ); if ~isempty( ind ) block.is_sil_bin( ind, : ) = block.Mbuf( ind, : ) < repmat( threshold( ind ), 1, block.nframe ); end % rough determination of silent frames % A bit inspired from the detection-localization % where we count the number of active bins % (Lathoud et al., ICASSP 2005, 2006 and IDIAP RR 05-52) % % (speech is a wideband signal -> supposed to be above noise on many bins, % otherwise anyway it would be silence, or too narrowband to be denoised). block.n_sil_bin = sum( block.is_sil_bin ); block.is_sil_frame = zeros( size( block.n_sil_bin ) ); tmp = block.n_sil_bin( block.n_sil_bin > 0 ); if ~isempty( tmp ) gmean = exp( mean( log( tmp ) ) ); if any( tmp(:) > gmean ) gmean2 = exp( mean( log( tmp( tmp > gmean ) ) ) ); threshold = gmean2; exp( 2 * log( gmean2 ) - log( max( tmp ) ) ); block.is_sil_frame = block.n_sil_bin > threshold; end end % Try to improve the frame-level decision a bit block.is_sil_frame = closure( block.is_sil_frame, par.silmindur_nframe ); % Append the new noise data to the past noise data ind = find( block.is_sil_frame ); tmp = block.Mbuf( :, ind ); silbufferM = [ silbufferM tmp ]; if size( silbufferM, 2 ) > par.silbuffer_nframe silbufferM = silbufferM( :, end-par.silbuffer_nframe+1:end ); end % Estimate the noise energy spectrum if isempty( silbufferM ) % Supposed to be rare % Fallback: estimate a white noise if par.verbose disp( sprintf( [ mfilename ': warning! Could not determine silent frames (block.stop_frame: %d). Using conservative fallback.' ], block.stop_frame ) ); end En = zeros( 2 * par.framelength, 1 ); % Fallback of the fallback: remove no noise! tmp = block.Ebuf; tmp = tmp( ( tmp > 0 ) & ( ~isnan( tmp ) ) & ( ~isinf( tmp ) ) ); if ~isempty( tmp ); gmean = exp( mean( log( tmp ) ) ); tmp = tmp( tmp < gmean ); if ~isempty( tmp ) En(:) = exp( mean( log( tmp(:) ) ) ); end end else % Supposed to be the general case % (silbufferM not empty) % -> estimate the noise spectrum as the geometric mean of all silent frames tmp = silbufferM; tmp( ( tmp <= 0 ) | isnan( tmp ) | isinf( tmp ) ) = NaN; tmp = tmp .^ 2; En = par.noise_est_factor * exp( nanmean( log( tmp.' ) ).' ); % Very rare case: cannot estimate the noise if sum( ~isnan( En ) ) < 2 En = zeros( 2 * par.framelength, 1 ); else % Rare case: a few NaN left in the noise spectrum ind = find( isnan( En ) ); if ~isempty( ind ) % Fallback: interpolation and extrapolation ind2 = find( ~isnan( En ) ); En( ind ) = exp( interp1( ind2, log( En( ind2 ) ), ind, 'linear', 'extrap' ) ); end end end % Sanity check (not supposed to happen at all) if any( isnan( En ) ) error( [ mfilename ' failed during the estimation of En.' ] ); end % Don't modify the DC component En( 1 ) = 0; % Estimate the speech energy Es = max( 0, block.Ebuf - repmat( En, 1, block.nframe ) ); % Define the Wiener filter in the frequency domain H = Es ./ max( eps, Es + repmat( En, 1, block.nframe ) ); % Smooth it, by going to Mel domain, and back H = smooth_H( H, par ); % Now do a few iterations (iter #1 already done) for iter = 2:par.niter % Current clean speech signal estimate Es = ( H.^2 ) .* block.Ebuf; % Define the Wiener filter in the frequency domain H = Es ./ max( eps, Es + repmat( En, 1, block.nframe ) ); % Smooth it, by going to Mel domain, and back H = smooth_H( H, par ); end % Apply the filter and reconstruct the signal new_x = overlap_add( block.Sbuf, H, par.framelength, par.frameshift, par.preemp ); % Prepare for update: smooth the beginning of the block, % where it overlaps with the previous block. if block.start_frame > 1 s = 1 + ( block.start_frame - 1 ) * par.frameshift; e = par.framelength + ( block.start_frame - 2 ) * par.frameshift; if s < e ind = ( s:e ) - block.start_sample + 1; weight = sin( ( ind - ind(1) ) / ( ind(end) - ind(1) ) * pi / 2 ) .^ 2; new_x( ind ) = new_x( ind ) .* weight(:); end end % Prepare for update: smooth the end of the block, % where it overlaps with the next block. if block.stop_frame < par.nframe s = 1 + block.stop_frame * par.frameshift; e = par.framelength + ( block.stop_frame - 1 ) * par.frameshift; if s < e ind = ( s:e ) - block.start_sample + 1; weight = cos( ( ind - ind(1) ) / ( ind(end) - ind(1) ) * pi / 2 ) .^ 2; new_x( ind ) = new_x( ind ) .* weight(:); end end % Update the output signal for this block ind = block.start_sample:block.stop_sample; ind2 = find( ( 1 <= ind ) & ( ind < par.nsample ) ); out.x( ind( ind2 ), ichannel ) = out.x( ind( ind2 ), ichannel ) + new_x( ind2 ); if par.verbose chrono = chrono_check( chrono, iframe ); end end % loop through the frames of this channel if par.verbose chrono_stop( chrono ); end end % Loop through channels % Write out a_max = max( max( out.x ), -min( out.x ) ); out.x = out.x * par.new_M / ( eps + a_max ); wavwrite( out.x, out.fs, out.nbits, par.out_filename ); if par.verbose disp( ' ' ); disp( [ mfilename ' wrote out "' par.out_filename '".' ] ); disp( [ mfilename ': done.' ] ); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function fbank_mat = prepare_fbank_mat( par ) if nargin < 1 error( [ mfilename ' needs 1 input argument.' ] ); end % Check for mandatory parameters required_fields = { 'fs', 'framelength', 'band_hz', 'B' }; check_param( required_fields, fieldnames( par ) ); % Define the MEL scale, following HTK book 3.3 mel_from_f = inline( '2595 * log10( 1 + f / 700 )', 'f' ); f_from_mel = inline( '700 * ( 10 .^ ( mel / 2595 ) - 1 )', 'mel' ); % Sanity check f = 10000; if abs( f_from_mel( mel_from_f( f ) ) - f ) > 1e-8 error( [ mfilename '.prepare_fbank_mat() is insane.' ] ); end melband = mel_from_f( max( 0, min( par.fs / 2, par.band_hz ) ) ); % Equal spacing in MEL space along the band n = par.B + 1; mel_list = melband( 1 ) + diff( melband ) * ( 0:n ) / n; % Back to frequency scale f_list = max( par.band_hz( 1 ), min( par.band_hz( 2 ), f_from_mel( mel_list ) ) ); % Define the triangular filters fbank_mat = zeros( par.B, 2 * par.framelength ); f_k = ( 0:( 2 * par.framelength-1 ) ) / ( 2 * par.framelength ) * par.fs; for a = 1:par.B [ tmp, k_low ] = min( abs( f_k - f_list( a ) ) ); [ tmp, k_center ] = min( abs( f_k - f_list( a + 1 ) ) ); [ tmp, k_high ] = min( abs( f_k - f_list( a + 2 ) ) ); fbank_mat( a, k_low:k_high ) = interp1( [ k_low k_center k_high ], [ 0 1 0 ], k_low:k_high, 'linear', NaN ); end % Sanity check if any( isnan( fbank_mat(:) ) ) error( [ mfilename '.prepare_fbank_mat() is still insane.' ] ); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function new_H = smooth_H( H, par ) if nargin < 2 disp( [ mfilename ' needs 2 input arguments.' ] ); end required_fields = { 'H_smooth_mat', 'T_smooth_nframe' }; check_param( required_fields, fieldnames( par ) ); % Frequency domain smoothing new_H = par.H_smooth_mat * H; % Time domain smoothing if par.T_smooth_nframe > 0 n = size( new_H,1 ); z = zeros( n, 1 ); % Erosion for a = 1:par.T_smooth_nframe new_H = min( new_H, [ z new_H( :, 1:end-1 ) ] ); new_H = min( new_H, [ new_H( :, 2:end ) z ] ); end % Dilation for a = 1:par.T_smooth_nframe new_H = max( new_H, [ z new_H( :, 1:end-1 ) ] ); new_H = max( new_H, [ new_H( :, 2:end ) z ] ); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION CHECK_PARAM(REQUIRED_PARAM, GIVEN_PARAM) function check_param(required_param, given_param) aux = ismember(required_param, given_param); if ~all(aux) s = sprintf('Some of the required parameters are missing:'); v = required_param( ~aux ); for a = 1:length( v ) s = [ s sprintf( [ '\n ' v{ a } ] ) ]; end s = [ s sprintf( '\nAborted.') ]; error( s ); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION Y = FILL_DEFAULT(X, X_DEFAULT) % % Fill absent or empty fields of X with default values in % X_DEFAULT. Resulting structure is Y. % % Example: % % >> x_default.a = 1; % >> x_default.b = 2; % >> x_default.c = 3; % >> x.a = 'myvalue'; % >> x.b = []; % >> y = fill_default(x, x_default); % >> disp(y); % a: 'myvalue' % b: 2 % c: 3 function y = fill_default(x, x_default) if nargin < 2 error('fill_default() needs 2 parameters'); end if isempty(x) y = x_default; else y = x; fields = fieldnames(x_default); for a = 1:length(fields) field = fields{ a }; replace = ~isfield(y, field); if ~replace % if field present but has empty value replace = isempty( eval( [ 'y.' field ] ) ); end if replace eval( [ 'y.' field ' = x_default.' field ';' ] ); end end end y = orderfields( y ); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION CHRONO = CHRONO_START(N, [INTERVAL], [LOGFILE_FID]) % % To create a chrono object, used to chronometer a task. % % n: integer, total number of iterations to accomplish task. % interval: float, minimum interval between two printouts, default 10 sec % % chrono: chrono object to be reused with chrono_check(). function chrono = chrono_start(n, interval, logfile_fid) if nargin < 1 error('Need at least 1 parameter'); end if nargin < 2 interval = []; end if isempty( interval ) interval = 10; end if nargin < 3 logfile_fid = []; % By default verbosity will go on the standard output end chrono.n = n; chrono.interval = interval; chrono.start_time = 1e5 * now; chrono.previous_time = chrono.start_time; chrono.logfile_fid = logfile_fid; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION CHRONO = CHRONO_CHECK(CHRONO, ITER) % % Print out for the user (how much time left). % % In: % chrono: object created by CHRONO_START % iter: integer, current iteration (from 1 to chrono.n) % % Out: updated chrono object. function chrono = chrono_check(chrono, iter) if nargin < 2 error('Need at least 2 parameters'); end current_time = 1e5 * now; elapsed_time = current_time - chrono.start_time; chrono.n = max(iter, chrono.n); if current_time - chrono.previous_time > chrono.interval % Estimate average speed & time left speed = iter / elapsed_time; time_left = (chrono.n - iter) / speed; % Print out s = ['Iter %d/%d - Speed: %.3f iter per sec /' ... ' Time elapsed: %.0f sec, left: %.0f sec']; s = sprintf(s, iter, chrono.n, speed, elapsed_time, time_left); if isempty( chrono.logfile_fid ) disp( s ); else fprintf( chrono.logfile_fid, [ s '\n' ] ); end % Update chrono object chrono.previous_time = current_time; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION CHRONO_STOP(CHRONO) function chrono_stop(chrono) finish_time = 1e5 * now; elapsed_time = finish_time - chrono.start_time; speed = chrono.n / elapsed_time; s = '%d iterations in %.3f sec: %.3f iterations per sec'; disp(sprintf(s, chrono.n, elapsed_time, speed)); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION Y = DILATION(X, N_TIMES) function y = dilation(x, n_times) if n_times < 0 error( 'dilation needs n_times >= 0' ); end siz = size( x ); if sum( siz>1 ) > 1 disp( 'dilation: WARNING! Input has more than one dimension.' ); end if n_times < 1 y = x; return; end x = x(:); % Mirror-replicate beginning and end of data vector n_left = min( length( x ), n_times ); fill_left = x( n_left:-1:1 ); fill_left = [ repmat( fill_left( 1 ), n_times-n_left, 1 ); fill_left ]; n_right = min( length( x ), n_times ); fill_right = x( end:-1:end-n_right+1 ); fill_right = [ fill_right; repmat( fill_right( end ), n_times-n_right, 1 ) ]; aux = [fill_left; x(:); fill_right]; for a = 1:n_times % These Inf values won't go in the result anyway aux = max( [ aux, [aux(2:end); -Inf], [-Inf; aux(1:(end-1))] ], [], 2 ); end y = aux((1+n_times):(end-n_times)); y = reshape( y, siz ); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % FUNCTION Y = EROSION(X, N_TIMES) function y = erosion(x, n_times) if n_times < 0 error( 'erosion needs n_times >= 0' ); end siz = size( x ); if sum( siz>1 ) > 1 disp( 'erosion: WARNING! Input has more than one dimension.' ); end if n_times < 1 y = x; return; end x = x(:); % Mirror-replicate beginning and end of data vector n_left = min( length( x ), n_times ); fill_left = x( n_left:-1:1 ); fill_left = [ repmat( fill_left( 1 ), n_times-n_left, 1 ); fill_left ]; n_right = min( length( x ), n_times ); fill_right = x( end:-1:end-n_right+1 ); fill_right = [ fill_right; repmat( fill_right( end ), n_times-n_right, 1 ) ]; aux = [fill_left; x(:); fill_right]; for a = 1:n_times % These Inf values won't go in the result anyway aux = min( [ aux, [aux(2:end); -Inf], [-Inf; aux(1:(end-1))] ], [], 2 ); end y = aux((1+n_times):(end-n_times)); y = reshape( y, siz ); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = closure( x, ntimes ) % FUNCTION Y = CLOSURE( X, NTIMES ) if nargin < 2 error( [ mfilename '.closure() needs 2 input arguments.' ] ); end y = erosion( dilation( x, ntimes ), ntimes ); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function new_x = overlap_add( X, filter_mat, framelength, frameshift, preemp ) % function new_x = overlap_add( X, filter_mat, framelength, frameshift, preemp ) % % overlap-add reconstruction of a signal, from its COMPLEX-domain spectrogram "X". % % Inputs: % % X (mandatory): 2*framelength by nframes matrix of complex values, % symmetric spectrogram (as given e.g. by a FFT). % % filter_mat (optional): 2*framelength by nframes matrix of real or % complex values. It defines a filter that you want to apply on % X. For example you may give a matrix of ones and zeroes, if you % only want to keep certain (time, frequency) points of your % spectrogram. % % framelength (mandatory): integer, length of your time frame, in samples. % % frameshift (mandatory): integer, shift between two time frames, in samples. % % preemp (optional, default 0.97): pre-emphasis coefficient. % % % Output: % % new_x: vector of real values, time-domain waveform reconstructed % from the (optionally filtered) complex spectrogram X. % % By Guillaume Lathoud 2005 - lathoud@idiap.ch % % Thanks to Oliver Masson for all the explanations. if nargin < 4 error( [ mfilename '.overlap_add() needs at least 4 input arguments.' ] ); end if ~exist( 'preemp', 'var' ) preemp = 0.97; end % XXX slightly better this way % (on tests we found it slightly closer to the original signal) if isempty( filter_mat ) filter_mat = ones( size( X ) ); end if ~isempty( filter_mat ) if numel( unique( [ size( X, 1 ) size( filter_mat, 1 ) 2*framelength ] ) ) > 1 error( [ mfilename ': inconsistent frame length.' ] ); end nframes = unique( [ size( X, 2 ) size( filter_mat, 2 ) ] ); if numel( nframes ) > 1 error( [ mfilename ': inconsistent number of frames.' ] ); end else if numel( unique( [ size( X, 1 ) 2*framelength ] ) ) > 1 error( [ mfilename ': inconsistent frame length.' ] ); end nframes = size( X,2 ); end % overlap-add construction of a signal % scaling + masking if ~isempty( filter_mat ) new_filter = ifft( filter_mat, 2*framelength, 1 ); new_filter = new_filter( [ end-framelength/2+1:end 1:framelength/2 ], : ); new_filter = diag( sparse( blackman( framelength, 'periodic' ) ) ) * new_filter; new_filter( framelength+1:2*framelength, : ) = 0; % Apply the filter in a correct manner, in the frequency domain new_X = X .* fft( new_filter, 2*framelength, 1 ); else new_X = X; end % overlap-add nsamples = framelength + ( nframes-1 ) * frameshift; new_x = zeros( nsamples+4*framelength, 1 ); the_long_win = hamming( 2*framelength, 'periodic' ); for a = 1:nframes % The "real" is for very very small numerical imprecision % in order to remove some 1e-10-like imaginary part a_frame = real( ifft( new_X( :,a ), 2 * framelength )) ./ the_long_win; start_sample = 1 + (a-1) * frameshift; end_sample = start_sample + 2 * framelength - 1; ind = start_sample:end_sample; new_x( ind ) = new_x( ind ) + a_frame; end % Finally remove pre-emphasis new_x = filter( 1, [ 1 -preemp], new_x ); if ~isempty( filter_mat ) % Compensate the filter's delay: cut the crap at the beginning new_x = new_x( framelength/2+1:end ); end % Cut off the crap at the end new_x = new_x( 1:nsamples ); % Return a column vector new_x = new_x(:);