function main_separation() % main_separation % This function runs audio source separation on the noisy part of the % corpus with parameters used in the paper. % Separated data will be generated in $outputPath folder (one folder per % setting). Output files with *_est1.wav (resp. *_est2.wav) suffix refers % to separated speaker (resp. noise) audio files. % % Before you start to use this code : % - Run the localization baseline (main_compute_est_loc.m) to compute % estimated source localizations % - Run main_compute_speakers_models.m to compute clean speaker models % - Update paths in the "USER PARAMS" section % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% USER PARAMS corpusPath = 'C:/VBox_Partage/interspeech2/voiceHome-2_corpus_v1.0/';% corpus path on your computer outputPath = 'C:\VBox_Partage\interspeech2\sep_resTEST\'; % Path where separated data will be generated fasst_matlab_dir = 'C:/Program Files/fasst 2.1.0/scripts/MATLAB/'; % Path to FASST Matlab scripts on your computer %% ADD PATHS addpath(genpath('./FASST_Framework/')); addpath(genpath('./../common/')); addpath(fasst_matlab_dir); %% PAPER'S PARAMS transformType = {'ERB','STFT'}; % STFT / ERB wienerTimeSmoothing = {'0','2','4','8'}; % Number of 1/2 frames (except current frame): 0/2/4/8 spk_learning_mode = {'spk_dep','spk_nodep'}; % spk_dep/spk_nodep TDOA_mode = {'true','estimated'}; %% LOOP ON PARAMS for i = 1;length(transformType) for j = 1:length(wienerTimeSmoothing) for k = 1:length(spk_learning_mode) for l = 1:length(TDOA_mode) separation(corpusPath,spk_learning_mode{k},outputPath,TDOA_mode{l},transformType{i},wienerTimeSmoothing{j}); end end end end end function [] = separation(corpusPath,spk_learning_mode,outputPath,TDOA_mode,transformType,wienerTimeSmoothing) %% Get function name functionName = mfilename('fullpath'); idSlash = find(functionName == '/'); if(isempty(idSlash)) idSlash = find(functionName == '\'); end functionName = functionName((idSlash(end)+1):end); %% FIXED PARAMS wlen = '1024'; % window length (frame length in time domain) - % should be multiple of 4 for STFT and multiple of 2 for ERB nbin_ERB = '8'; % number of frequency coefficient for ERB transform type speech_model_type = 'close_field'; % close_field / clean_reverberated / clean_reverberated_1chanMax chan2keep = '[1:8]'; spk_NMF_order = '32'; spk_iters = '50'; homeList = '[1,2,3,4]'; roomList = '[1,2,3]'; spkList = '[1,2,3]'; posList = '[1:5]'; noiseList = '[1:4]'; uttList = '[1:2]'; RT60 = '0.25'; nsrc_max = '[1,1]'; back_multiple_sources = '0'; spat_back_use_diffuse_model = '0'; spat_spk_use_diffuse_model = '0'; spat_back_meth = 'Direction'; % Position / Direction spat_spk_meth = 'Direction'; % Position / Direction back_NMF_order = '16'; % Last EM iteration numbers (joint model adaptation) switch transformType case 'ERB' sep_iters = '{''50''}'; case 'STFT' sep_iters = '{''100''}'; end % duration and background interval (after and/or before 'ok vesta + % command') backgroundDuration_Before = '4'; % between ]0;4] strBackIntervalUsed = 'before'; %'before' => only back_iters = '100'; spec_back_meth = 'VQ'; % VQ / random spec_spk_meth = 'VQ'; % VQ / random strGenericConfigAdapt = 'single8'; % Other params fs = 16000; sound_velocity = 343; %% Set paths if(strcmp(outputPath(end), '/')==0) outputPath = [outputPath, '/']; end % Mandatory paths needed by processNoisyData dirPaths = struct; dirPaths.res = []; % Initalized into the loop (depends on noise condition) resDirName = [transformType '_' spk_learning_mode '_Dir_' TDOA_mode '_smooth' wienerTimeSmoothing '/']; dirPaths.tmp = [outputPath '/' resDirName 'tmp/']; % Temporary directory % Others paths needed by this calling script spk_models_dir = './Models_spk/'; % Path to clean speaker models est_direction_dir = './../Localization_Baseline/Estimated_Localization/'; % Path to estimated source localizations (generated by main_compute_est_loc.m - Localization baseline) isolated_dir = [outputPath '/' resDirName 'isol/']; % Directory for isolated utterence to separate %% Eval params switch TDOA_mode case 'estimated' TDOA_mode = '{''estimated'',''estimated''}'; case 'true' TDOA_mode = '{''true'',''true''}'; otherwise error(''); end homeList = eval(homeList); roomList = eval(roomList); spkList = eval(spkList); posList = eval(posList); noiseList = eval(noiseList); uttList = eval(uttList); chan2keep = eval(chan2keep); TDOA_mode = eval(TDOA_mode); spk_TDOA_mode = TDOA_mode{1}; back_TDOA_mode = TDOA_mode{2}; nsrc_max = eval(nsrc_max); spk_nsrc_max = nsrc_max(1); back_nsrc_max = nsrc_max(2); backDuration_Before = eval(backgroundDuration_Before); wienerTimeSmoothing = eval(wienerTimeSmoothing); spk_NMF_order = eval(spk_NMF_order); spk_iters = eval(spk_iters); wlen = eval(wlen); nbin_ERB = eval(nbin_ERB); nHouse =4; nRoom = 3; nSpk = 3; nPos = 5; nNoise = 4; nUtt = 2; backOffset = 1; %% Result tree folders nEMStage = 1; % TO BE MODIFIED FOR MULTI STAGE EM outputDir = cell(length(noiseList),nEMStage); for h = homeList for r = roomList for noise = 1:length(noiseList) for idEm = 1:nEMStage currentNoiseId = getNoiseId(h,r,noise,nRoom,nNoise); outputDir{noise,idEm} = [outputPath resDirName 'stage' num2str(idEm) '/sep_res/noiseCond' num2str(currentNoiseId) '/']; if(~exist(outputDir{noise,idEm},'dir')) mkdir(outputDir{noise,idEm}); end end end end end %% Create tmp and iso folders if(~exist(dirPaths.tmp,'dir')) mkdir(dirPaths.tmp) end if(~exist(isolated_dir,'dir')) mkdir(isolated_dir) end %% Get structs from params for processNoisyData [spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt] = getAdaptParameters(strGenericConfigAdapt); %% Main loop % Read array_geometry [~,micPosOnArray] = load_arrayGeo(corpusPath,1); for h = homeList for r = roomList [~,arrayCentroid,arrayOrientation] = load_arrayPos(corpusPath,h,r,1); if(sum(arrayOrientation == 0)~=2) error('TODO'); else Position_xyz_Sensors_all = bsxfun(@plus,micPosOnArray,arrayCentroid); end % Load estimated direction matrices % For speaker : estimated localization on the speech interval load([est_direction_dir 'GCC-PHAT_wuw_cmd/' 'home' num2str(h) '_room' num2str(r) '_wuw_cmd.mat'],'az_est','el_est'); az_est_spk = az_est; el_est_spk = el_est; % For background : estimated localization on noise-only interval load([est_direction_dir 'GCC-PHAT_noise_before/' 'home' num2str(h) '_room' num2str(r) '_noise_before.mat'],'az_est','el_est'); az_est_back = az_est; el_est_back = el_est; clear az_est el_est; for spk = spkList [spkGenre,spkId] = getSpkId(h,spk); for pos = posList % Load true speaker position & localization [~,spkPos,~] = load_spkPos(corpusPath,h,r,spk,pos); for noise = noiseList % Load true noise localization if(noise == 1) backPos = [0 0 0]; % Computed TDOA at this position must not be used % USE A RANDOM SPATIAL MODEL current_spat_back_meth = 'random'; current_spat_back_use_diffuse_model = '0'; else [~,backPos] = load_noisePos(corpusPath,h,r,noise,nRoom,nNoise); current_spat_back_meth = spat_back_meth; current_spat_back_use_diffuse_model = spat_back_use_diffuse_model; end [spk_params,back_params,global_params] = ... getParamStruct(chan2keep,[],[],RT60,back_multiple_sources,current_spat_back_use_diffuse_model,spat_spk_use_diffuse_model,current_spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all',wienerTimeSmoothing); for utt = uttList fprintf('\n*** Processing file || Home = %d || Room = %d || Spk = %d || Pos = %d || Noise = %d || Utt. = %d ***\n',h,r,spk,pos,noise,utt); dirPaths.res = outputDir(find(noise==noiseList),:); % get current file name [fname_noisy,~] = getNoisyFileNames(h,r,spk,pos,noise,utt,nHouse,nRoom,nSpk,nPos,nNoise,nUtt); % Extract isolated samples iso_samples = wavOpening(corpusPath,fname_noisy,'noisy','wuw_cmd',[0.5 0.5],NaN,fs); % Extract background samples (before and after command) back_samples_before = wavOpening(corpusPath,fname_noisy,'noisy','noise_before',backOffset,backDuration_Before,fs); % Select part of background to use switch(strBackIntervalUsed) case 'before' back_samples = back_samples_before; clear back_samples_before; otherwise error(['[' functionName ']' 'back_interval_used variable is unknown']); end % Affect estimated direction for the current utterence spkEstDir = [shiftdim(az_est_spk(spk,pos,noise,utt,:))';shiftdim(el_est_spk(spk,pos,noise,utt,:))']'; backEstDir = [shiftdim(az_est_back(spk,pos,noise,utt,:))';shiftdim(el_est_back(spk,pos,noise,utt,:))']'; % Init data struct (depending on each file) spk_data = struct; % TDOAs / spatGain / nsrc / specModelName back_data = struct; % TDOAs / spatGain / nsrc % init TDOAs [spk_data.TDOAs, back_data.TDOAs,back_data.nsrc,spk_data.nsrc] = initTDOAs(Position_xyz_Sensors_all',spkPos',backPos',spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode); % init Gains [spk_data.spatGain, back_data.spatGain] = initGains(Position_xyz_Sensors_all',chan2keep,spkPos',backPos'); % Affect clean spk spectral model path switch transformType case 'STFT' spk_data.specModelName = [spk_models_dir spkGenre num2str(spkId) '_VQ' num2str(spk_NMF_order) '_' speech_model_type '_' num2str(spk_iters) 'iters_' spk_learning_mode '_' transformType '_' num2str(wlen) '.mat']; case 'ERB' spk_data.specModelName = [spk_models_dir spkGenre num2str(spkId) '_VQ' num2str(spk_NMF_order) '_' speech_model_type '_' num2str(spk_iters) 'iters_' spk_learning_mode '_' transformType '_' num2str(wlen) '_' num2str(nbin_ERB) '.mat']; end % Init models (No model available at this point) modelsOutput.rough = []; % models initialized on unique source signal modelsOutput.refined = []; % models refined on mixture signal % Save background segment on disk fname_back = [isolated_dir fname_noisy '_back.wav']; audiowrite(fname_back,back_samples(:,chan2keep),fs); % Clear unused variables clear back_samples_before back_samples_after wuw_samples ; % Save isolated segment on disk fname_iso = [isolated_dir fname_noisy '_iso.wav']; audiowrite(fname_iso,iso_samples(:,chan2keep),fs); % Call processNoisyData [~] = processNoisyData(fname_noisy,fname_iso,fname_back,dirPaths,global_params,spk_params,spk_data,back_params,back_data); % Clear unused variables clear iso_samples; end end end end end end rmdir(dirPaths.tmp,'s'); rmdir(isolated_dir,'s'); end function [spk_params,back_params,global_params] = getParamStruct(chan2keep,pfRoomDimensions,RT60_default,RT60,back_multiple_sources,spat_back_use_diffuse_model,spat_spk_use_diffuse_model,spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all,wienerTimeSmoothing) % getParamStruct % Params to structure of params % % [spk_params,back_params,global_params] = getParamStruct(chan2keep,pfRoomDimensions,RT60_default,RT60,back_multiple_sources,spat_back_use_diffuse_model,spat_spk_use_diffuse_model,spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all,wienerTimeSmoothing) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% spk_params = struct; back_params = struct; global_params = struct; %% Set global_params global_params.transformType = transformType; global_params.wlen = wlen; global_params.nbin_ERB = nbin_ERB; global_params.fs = fs; global_params.nchan = length(chan2keep); global_params.RT60 = eval(RT60); if(isempty(global_params.RT60)), global_params.RT60 = RT60_default; end global_params.sep_iters = eval(sep_iters); global_params.sound_velocity = sound_velocity; global_params.pfRoomDimensions = pfRoomDimensions; global_params.micDist = zeros(length(chan2keep)); global_params.wiener.a = 0; % Default global_params.wiener.b = 0; % Default global_params.wiener.c1 = wienerTimeSmoothing; global_params.wiener.c2 = 0; % Default global_params.wiener.d = -Inf; % Default for i1 = 1:length(chan2keep) for i2 = 1:length(chan2keep) global_params.micDist(i1,i2) = sqrt(sum(( Position_xyz_Sensors_all(:,chan2keep(i1)) - Position_xyz_Sensors_all(:,chan2keep(i2))).^2 )); end end global_params.Position_xyz_Sensors = Position_xyz_Sensors_all(:,chan2keep); %% Set spk_params spk_params.multiple_sources = 0; spk_params.spat_diffuse_model = eval(spat_spk_use_diffuse_model); spk_params.spat_meth = spat_spk_meth; spk_params.spec_meth = spec_spk_meth; spk_params.NMF_order = spk_NMF_order; spk_params.nIters = spk_iters; spk_params.spat_adapt = eval(spat_spk_adapt); spk_params.spec_Wex_adapt = eval(spec_spk_Wex_adapt); spk_params.spec_Hex_adapt = eval(spec_spk_Hex_adapt); %% Set back_params back_params.multiple_sources = eval(back_multiple_sources); back_params.spat_diffuse_model = eval(spat_back_use_diffuse_model); back_params.spat_meth = spat_back_meth; back_params.spec_meth = spec_back_meth; back_params.NMF_order = eval(back_NMF_order); back_params.nIters = eval(back_iters); back_params.spat_adapt = eval(spat_back_adapt); back_params.spec_Wex_adapt = eval(spec_back_Wex_adapt); back_params.spec_Hex_adapt = eval(spec_back_Hex_adapt); end function [spk_TDOAs, back_TDOAs,back_nsrc,spk_nsrc] = initTDOAs(micPos,spkPos,backPos,spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode) % initTDOAs % This function computes TDOAs between source and microphones according to % spk_TDOA_mode/back_TDOA_mode (true or estimated localization) % % [spk_TDOAs, back_TDOAs,back_nsrc,spk_nsrc] = initTDOAs(micPos,spkPos,backPos,spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode) % % OUTPUTS: % spk_TDOAs : I x 1 matrix, TDOA between i-th microphone and speaker source % back_TDOAs : I x 1 matrix, TDOA between i-th microphone and background source % spk_nsrc : number of speaker source (set to "1") % back_nsrc : number of background source (set to "1") % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Init spk TDOAs switch spk_TDOA_mode case 'true' [spk_TDOAs,~] = computeTrueTDOA(micPos,spkPos,sound_velocity); spk_TDOAs = spk_TDOAs(chan2keep,:); spk_nsrc = 1; case 'estimated' [~,~,spk_TDOAs] = computeEstTDOA(micPos,spkEstDir,chan2keep,sound_velocity); spk_nsrc = 1; end %% Init back TDOAs switch back_TDOA_mode case 'true' [back_TDOAs,~] = computeTrueTDOA(micPos,backPos,sound_velocity); back_TDOAs = back_TDOAs(chan2keep,:); back_nsrc = 1; case 'estimated' [~,~,back_TDOAs] = computeEstTDOA(micPos,backEstDir,chan2keep,sound_velocity); back_nsrc = 1; end end function [trueTDOA,trueDir] = computeTrueTDOA(Position_xyz_Sensors,Position_xyz_Sources,c) % computeTrueTDOA % This function computes true TDOAs and true direction of source % % [trueTDOA,trueDir] = computeTrueTDOA(Position_xyz_Sensors,Position_xyz_Sources,c) % % INPUTS : % Position_xyz_Sensors : 3 x I, microphone positions % Position_xyz_Sources : 3 x J, source positions % c : 1x1, sound velocity % % OUTPUTS : % trueTDOA : I x J, TDOAs % trueDir : J x 2, Source directions (azimuth, elevation) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% I = size(Position_xyz_Sensors,2); J = size(Position_xyz_Sources,2); trueTDOA = zeros(I,J); fDist = zeros(I,J); Tau = zeros(I,J); for j=1:J for i =1:I fDist(i,j) = sqrt(sum((Position_xyz_Sources(:,j) - Position_xyz_Sensors(:,i)).^2)); Tau(i,j) = fDist(i,j)/c; end trueTDOA(:,j) = Tau(:,j) - Tau(1,j); end % True DOA (azimuth, elevation) micPosCentroid = mean(Position_xyz_Sensors,2); % Express Current_xyz_Source in the microphone array referential srcPos = bsxfun(@minus,Position_xyz_Sources,micPosCentroid); [thetaRef,phiRef,~] = cart2sph(srcPos(1,:),srcPos(2,:),srcPos(3,:)); % Convert to degrees trueDir = [thetaRef,phiRef].*180/pi; end function [theta_est,phi_est,TDOAs_est] = computeEstTDOA(micPos,estDir,chan2keep,c) % function computeEstTDOA % This function keeps the first DOA (azimuth + elevation) and converts it % to TDOAs % % [theta_est,phi_est,TDOAs_est] = computeEstTDOA(micPos,estDir,chan2keep,c) % % INPUTS % micPos : 3 x nChan, Position of ALL sensors (not only used sensors % because it is needed to compute the barycenter of the array ==> the oracle % angles are given in the array referential) % estDir : nEstSrc x 2, Estimated directions (azimuth, elevation) % chan2keep : Index of used channel % c : sound velocity % % OUTPUTS % theta_est : estimated azimut % phi_est : estimated elevation % TDOAs_est : estimated TDOAs % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Get function name functionName = mfilename('fullpath'); idSlash = find(functionName == '/'); if(isempty(idSlash)) idSlash = find(functionName == '\'); end functionName = functionName((idSlash(end)+1):end); % We load estimatedDirections theta_est = estDir(1,1); phi_est = estDir(1,2); %% Compute the time tau between the estimated direction projected on a unit range sphere and each mic of the probe TDOAs_est = dir2tdoa(micPos,theta_est,phi_est,chan2keep,c); end function tdoas = dir2tdoa(micPos,theta,phi,chan2keep,c) % dir2tdoa % This function converts a direction expressed in (azimuth,elevation) to % TDOAs % % tdoas = dir2tdoa(micPos,theta,phi,chan2keep,c) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% I = size(micPos,2); J = length(theta); tau = zeros(I,J); radius = 1; % sphere of radius 1 for j = 1:J tau(:,j) = tau_spherical(micPos', radius, theta(j), phi(j), c); end %% Compute estimated TDOAs for chan2keep micRef = chan2keep(1); tdoas = zeros(I,length(theta)); for j = 1:J tdoas(:,j) = tau(chan2keep,j) - tau(micRef,j); end end function tau=tau_spherical(mic_pos, radius, azimuth, elevation, c) % This function computes the delay vector between a source and a microphone array, for a given geometry % % mic_pos: microphones locations, in cartesien coordinates, in meters % radius: radius of source location, in spherical coordinates, in meters % azimuth: azimuth of source location, in spherical coordinates, in degrees % elevation: elevation of source location, in spherical coordinates, in degrees % c: sound speed, in m/s % % tau: the delays, in seconds, relative to each microphone azimuth = azimuth / 180 * pi; elevation = elevation / 180 * pi; %x = radius .* cos(elevation) .* cos(azimuth); %y = radius .* cos(elevation) .* sin(azimuth); %z = radius .* sin(elevation); [x, y, z] = sph2cart(azimuth, elevation, radius); s = repmat([x, y, z], size(mic_pos, 1), 1); mean_pos = repmat(mean(mic_pos,1), size(mic_pos, 1), 1); tau = sqrt(sum((mic_pos - mean_pos - s).^2, 2)); % euclidian distances between the source and each mic tau = tau / c; % compute delays end function [spk_spat_gain,back_spat_gain] = initGains(Position_xyz_Sensors_all,chan2keep,spkPos,backPos) % initGains % This function computes gains between sources and microphones following the % rank-1 anechoic model. % Ref : section 2.3 of "N. Duong, E. Vincent, R. Gribonval, % Under-determined reverberant audio source separation using a full-rank % spatial covariance model" % % [spk_spat_gain,back_spat_gain] = initGains(Position_xyz_Sensors_all,chan2keep,spkPos,backPos) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Compute microphone distances only for used mics spk_spat_gain = ones(length(chan2keep),1); % Init gain => 1 for i = 1:length(chan2keep) spk_spat_gain(i,1) = 1/ ( sqrt(4*pi) * sqrt(sum((spkPos - Position_xyz_Sensors_all(:,chan2keep(i))).^2)) ); end % Back gain back_spat_gain = ones(length(chan2keep),1); % Update gain value for these case for i = 1:length(chan2keep) back_spat_gain(i,1) = 1/ ( sqrt(4*pi) * sqrt(sum((backPos - Position_xyz_Sensors_all(:,chan2keep(i))).^2)) ); end end function [spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt] = getAdaptParameters(strConfig) % getAdaptParameters % Get adaptation parameters of each model (spat/spec) of source (spk/back) % Note : spat = spatial / spec = spectral % Note2 : spectral model is only represented by the Wex matrix, the Hex % matrix is always freely adapted. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2017 Ewen Camberlein and Romain Lebarbenchon % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) % If you find it useful, please cite the following reference: % - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent, % Sunit Sivasankaran, Irina Illina, Frédéric Bimbot % "VoiceHome-2, an extended corpus for multichannelspeech processing in % real homes", submitted to Speech Communication, Elsevier, 2017 % % Contact : nancy.bertin[at]irisa.fr %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% switch strConfig case 'single0' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single1' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single2' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single3' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single4' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single5' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single6' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single7' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single8' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single9' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single10' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single11' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single12' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''free''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''fixed''}'; case 'single13' spat_back_adapt = '{''fixed''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single14' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''fixed''}'; spec_spk_Wex_adapt = '{''free''}'; case 'single15' spat_back_adapt = '{''free''}'; spec_back_Wex_adapt = '{''fixed''}'; spat_spk_adapt = '{''free''}'; spec_spk_Wex_adapt = '{''free''}'; otherwise error('Configuration pas encore ajoutée à la liste'); end spec_back_Hex_adapt = '{''free''}'; spec_spk_Hex_adapt = '{''free''}'; end