function main_separation()
% main_separation
% This function runs audio source separation on the noisy part of the
% corpus with parameters used in the paper.
% Separated data will be generated in $outputPath folder (one folder per
% setting). Output files with *_est1.wav (resp. *_est2.wav) suffix refers
% to separated speaker (resp. noise) audio files.
%
% Before you start to use this code :
% - Run the localization baseline (main_compute_est_loc.m) to compute
% estimated source localizations
% - Run main_compute_speakers_models.m to compute clean speaker models
% - Update paths in the "USER PARAMS" section
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% USER PARAMS
corpusPath = 'C:/VBox_Partage/interspeech2/voiceHome-2_corpus_v1.0/';% corpus path on your computer
outputPath = 'C:\VBox_Partage\interspeech2\sep_resTEST\'; % Path where separated data will be generated
fasst_matlab_dir = 'C:/Program Files/fasst 2.1.0/scripts/MATLAB/'; % Path to FASST Matlab scripts on your computer
%% ADD PATHS
addpath(genpath('./FASST_Framework/'));
addpath(genpath('./../common/'));
addpath(fasst_matlab_dir);
%% PAPER'S PARAMS
transformType = {'ERB','STFT'}; % STFT / ERB
wienerTimeSmoothing = {'0','2','4','8'}; % Number of 1/2 frames (except current frame): 0/2/4/8
spk_learning_mode = {'spk_dep','spk_nodep'}; % spk_dep/spk_nodep
TDOA_mode = {'true','estimated'};
%% LOOP ON PARAMS
for i = 1;length(transformType)
for j = 1:length(wienerTimeSmoothing)
for k = 1:length(spk_learning_mode)
for l = 1:length(TDOA_mode)
separation(corpusPath,spk_learning_mode{k},outputPath,TDOA_mode{l},transformType{i},wienerTimeSmoothing{j});
end
end
end
end
end
function [] = separation(corpusPath,spk_learning_mode,outputPath,TDOA_mode,transformType,wienerTimeSmoothing)
%% Get function name
functionName = mfilename('fullpath');
idSlash = find(functionName == '/');
if(isempty(idSlash))
idSlash = find(functionName == '\');
end
functionName = functionName((idSlash(end)+1):end);
%% FIXED PARAMS
wlen = '1024'; % window length (frame length in time domain) - % should be multiple of 4 for STFT and multiple of 2 for ERB
nbin_ERB = '8'; % number of frequency coefficient for ERB transform type
speech_model_type = 'close_field'; % close_field / clean_reverberated / clean_reverberated_1chanMax
chan2keep = '[1:8]';
spk_NMF_order = '32';
spk_iters = '50';
homeList = '[1,2,3,4]';
roomList = '[1,2,3]';
spkList = '[1,2,3]';
posList = '[1:5]';
noiseList = '[1:4]';
uttList = '[1:2]';
RT60 = '0.25';
nsrc_max = '[1,1]';
back_multiple_sources = '0';
spat_back_use_diffuse_model = '0';
spat_spk_use_diffuse_model = '0';
spat_back_meth = 'Direction'; % Position / Direction
spat_spk_meth = 'Direction'; % Position / Direction
back_NMF_order = '16';
% Last EM iteration numbers (joint model adaptation)
switch transformType
case 'ERB'
sep_iters = '{''50''}';
case 'STFT'
sep_iters = '{''100''}';
end
% duration and background interval (after and/or before 'ok vesta +
% command')
backgroundDuration_Before = '4'; % between ]0;4]
strBackIntervalUsed = 'before'; %'before' => only
back_iters = '100';
spec_back_meth = 'VQ'; % VQ / random
spec_spk_meth = 'VQ'; % VQ / random
strGenericConfigAdapt = 'single8';
% Other params
fs = 16000;
sound_velocity = 343;
%% Set paths
if(strcmp(outputPath(end), '/')==0)
outputPath = [outputPath, '/'];
end
% Mandatory paths needed by processNoisyData
dirPaths = struct;
dirPaths.res = []; % Initalized into the loop (depends on noise condition)
resDirName = [transformType '_' spk_learning_mode '_Dir_' TDOA_mode '_smooth' wienerTimeSmoothing '/'];
dirPaths.tmp = [outputPath '/' resDirName 'tmp/']; % Temporary directory
% Others paths needed by this calling script
spk_models_dir = './Models_spk/'; % Path to clean speaker models
est_direction_dir = './../Localization_Baseline/Estimated_Localization/'; % Path to estimated source localizations (generated by main_compute_est_loc.m - Localization baseline)
isolated_dir = [outputPath '/' resDirName 'isol/']; % Directory for isolated utterence to separate
%% Eval params
switch TDOA_mode
case 'estimated'
TDOA_mode = '{''estimated'',''estimated''}';
case 'true'
TDOA_mode = '{''true'',''true''}';
otherwise
error('');
end
homeList = eval(homeList);
roomList = eval(roomList);
spkList = eval(spkList);
posList = eval(posList);
noiseList = eval(noiseList);
uttList = eval(uttList);
chan2keep = eval(chan2keep);
TDOA_mode = eval(TDOA_mode);
spk_TDOA_mode = TDOA_mode{1};
back_TDOA_mode = TDOA_mode{2};
nsrc_max = eval(nsrc_max);
spk_nsrc_max = nsrc_max(1);
back_nsrc_max = nsrc_max(2);
backDuration_Before = eval(backgroundDuration_Before);
wienerTimeSmoothing = eval(wienerTimeSmoothing);
spk_NMF_order = eval(spk_NMF_order);
spk_iters = eval(spk_iters);
wlen = eval(wlen);
nbin_ERB = eval(nbin_ERB);
nHouse =4;
nRoom = 3;
nSpk = 3;
nPos = 5;
nNoise = 4;
nUtt = 2;
backOffset = 1;
%% Result tree folders
nEMStage = 1; % TO BE MODIFIED FOR MULTI STAGE EM
outputDir = cell(length(noiseList),nEMStage);
for h = homeList
for r = roomList
for noise = 1:length(noiseList)
for idEm = 1:nEMStage
currentNoiseId = getNoiseId(h,r,noise,nRoom,nNoise);
outputDir{noise,idEm} = [outputPath resDirName 'stage' num2str(idEm) '/sep_res/noiseCond' num2str(currentNoiseId) '/'];
if(~exist(outputDir{noise,idEm},'dir'))
mkdir(outputDir{noise,idEm});
end
end
end
end
end
%% Create tmp and iso folders
if(~exist(dirPaths.tmp,'dir'))
mkdir(dirPaths.tmp)
end
if(~exist(isolated_dir,'dir'))
mkdir(isolated_dir)
end
%% Get structs from params for processNoisyData
[spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt] = getAdaptParameters(strGenericConfigAdapt);
%% Main loop
% Read array_geometry
[~,micPosOnArray] = load_arrayGeo(corpusPath,1);
for h = homeList
for r = roomList
[~,arrayCentroid,arrayOrientation] = load_arrayPos(corpusPath,h,r,1);
if(sum(arrayOrientation == 0)~=2)
error('TODO');
else
Position_xyz_Sensors_all = bsxfun(@plus,micPosOnArray,arrayCentroid);
end
% Load estimated direction matrices
% For speaker : estimated localization on the speech interval
load([est_direction_dir 'GCC-PHAT_wuw_cmd/' 'home' num2str(h) '_room' num2str(r) '_wuw_cmd.mat'],'az_est','el_est');
az_est_spk = az_est;
el_est_spk = el_est;
% For background : estimated localization on noise-only interval
load([est_direction_dir 'GCC-PHAT_noise_before/' 'home' num2str(h) '_room' num2str(r) '_noise_before.mat'],'az_est','el_est');
az_est_back = az_est;
el_est_back = el_est;
clear az_est el_est;
for spk = spkList
[spkGenre,spkId] = getSpkId(h,spk);
for pos = posList
% Load true speaker position & localization
[~,spkPos,~] = load_spkPos(corpusPath,h,r,spk,pos);
for noise = noiseList
% Load true noise localization
if(noise == 1)
backPos = [0 0 0]; % Computed TDOA at this position must not be used
% USE A RANDOM SPATIAL MODEL
current_spat_back_meth = 'random';
current_spat_back_use_diffuse_model = '0';
else
[~,backPos] = load_noisePos(corpusPath,h,r,noise,nRoom,nNoise);
current_spat_back_meth = spat_back_meth;
current_spat_back_use_diffuse_model = spat_back_use_diffuse_model;
end
[spk_params,back_params,global_params] = ...
getParamStruct(chan2keep,[],[],RT60,back_multiple_sources,current_spat_back_use_diffuse_model,spat_spk_use_diffuse_model,current_spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all',wienerTimeSmoothing);
for utt = uttList
fprintf('\n*** Processing file || Home = %d || Room = %d || Spk = %d || Pos = %d || Noise = %d || Utt. = %d ***\n',h,r,spk,pos,noise,utt);
dirPaths.res = outputDir(find(noise==noiseList),:);
% get current file name
[fname_noisy,~] = getNoisyFileNames(h,r,spk,pos,noise,utt,nHouse,nRoom,nSpk,nPos,nNoise,nUtt);
% Extract isolated samples
iso_samples = wavOpening(corpusPath,fname_noisy,'noisy','wuw_cmd',[0.5 0.5],NaN,fs);
% Extract background samples (before and after command)
back_samples_before = wavOpening(corpusPath,fname_noisy,'noisy','noise_before',backOffset,backDuration_Before,fs);
% Select part of background to use
switch(strBackIntervalUsed)
case 'before'
back_samples = back_samples_before;
clear back_samples_before;
otherwise
error(['[' functionName ']' 'back_interval_used variable is unknown']);
end
% Affect estimated direction for the current utterence
spkEstDir = [shiftdim(az_est_spk(spk,pos,noise,utt,:))';shiftdim(el_est_spk(spk,pos,noise,utt,:))']';
backEstDir = [shiftdim(az_est_back(spk,pos,noise,utt,:))';shiftdim(el_est_back(spk,pos,noise,utt,:))']';
% Init data struct (depending on each file)
spk_data = struct; % TDOAs / spatGain / nsrc / specModelName
back_data = struct; % TDOAs / spatGain / nsrc
% init TDOAs
[spk_data.TDOAs, back_data.TDOAs,back_data.nsrc,spk_data.nsrc] = initTDOAs(Position_xyz_Sensors_all',spkPos',backPos',spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode);
% init Gains
[spk_data.spatGain, back_data.spatGain] = initGains(Position_xyz_Sensors_all',chan2keep,spkPos',backPos');
% Affect clean spk spectral model path
switch transformType
case 'STFT'
spk_data.specModelName = [spk_models_dir spkGenre num2str(spkId) '_VQ' num2str(spk_NMF_order) '_' speech_model_type '_' num2str(spk_iters) 'iters_' spk_learning_mode '_' transformType '_' num2str(wlen) '.mat'];
case 'ERB'
spk_data.specModelName = [spk_models_dir spkGenre num2str(spkId) '_VQ' num2str(spk_NMF_order) '_' speech_model_type '_' num2str(spk_iters) 'iters_' spk_learning_mode '_' transformType '_' num2str(wlen) '_' num2str(nbin_ERB) '.mat'];
end
% Init models (No model available at this point)
modelsOutput.rough = []; % models initialized on unique source signal
modelsOutput.refined = []; % models refined on mixture signal
% Save background segment on disk
fname_back = [isolated_dir fname_noisy '_back.wav'];
audiowrite(fname_back,back_samples(:,chan2keep),fs);
% Clear unused variables
clear back_samples_before back_samples_after wuw_samples ;
% Save isolated segment on disk
fname_iso = [isolated_dir fname_noisy '_iso.wav'];
audiowrite(fname_iso,iso_samples(:,chan2keep),fs);
% Call processNoisyData
[~] = processNoisyData(fname_noisy,fname_iso,fname_back,dirPaths,global_params,spk_params,spk_data,back_params,back_data);
% Clear unused variables
clear iso_samples;
end
end
end
end
end
end
rmdir(dirPaths.tmp,'s');
rmdir(isolated_dir,'s');
end
function [spk_params,back_params,global_params] = getParamStruct(chan2keep,pfRoomDimensions,RT60_default,RT60,back_multiple_sources,spat_back_use_diffuse_model,spat_spk_use_diffuse_model,spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all,wienerTimeSmoothing)
% getParamStruct
% Params to structure of params
%
% [spk_params,back_params,global_params] = getParamStruct(chan2keep,pfRoomDimensions,RT60_default,RT60,back_multiple_sources,spat_back_use_diffuse_model,spat_spk_use_diffuse_model,spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all,wienerTimeSmoothing)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
spk_params = struct;
back_params = struct;
global_params = struct;
%% Set global_params
global_params.transformType = transformType;
global_params.wlen = wlen;
global_params.nbin_ERB = nbin_ERB;
global_params.fs = fs;
global_params.nchan = length(chan2keep);
global_params.RT60 = eval(RT60);
if(isempty(global_params.RT60)), global_params.RT60 = RT60_default; end
global_params.sep_iters = eval(sep_iters);
global_params.sound_velocity = sound_velocity;
global_params.pfRoomDimensions = pfRoomDimensions;
global_params.micDist = zeros(length(chan2keep));
global_params.wiener.a = 0; % Default
global_params.wiener.b = 0; % Default
global_params.wiener.c1 = wienerTimeSmoothing;
global_params.wiener.c2 = 0; % Default
global_params.wiener.d = -Inf; % Default
for i1 = 1:length(chan2keep)
for i2 = 1:length(chan2keep)
global_params.micDist(i1,i2) = sqrt(sum(( Position_xyz_Sensors_all(:,chan2keep(i1)) - Position_xyz_Sensors_all(:,chan2keep(i2))).^2 ));
end
end
global_params.Position_xyz_Sensors = Position_xyz_Sensors_all(:,chan2keep);
%% Set spk_params
spk_params.multiple_sources = 0;
spk_params.spat_diffuse_model = eval(spat_spk_use_diffuse_model);
spk_params.spat_meth = spat_spk_meth;
spk_params.spec_meth = spec_spk_meth;
spk_params.NMF_order = spk_NMF_order;
spk_params.nIters = spk_iters;
spk_params.spat_adapt = eval(spat_spk_adapt);
spk_params.spec_Wex_adapt = eval(spec_spk_Wex_adapt);
spk_params.spec_Hex_adapt = eval(spec_spk_Hex_adapt);
%% Set back_params
back_params.multiple_sources = eval(back_multiple_sources);
back_params.spat_diffuse_model = eval(spat_back_use_diffuse_model);
back_params.spat_meth = spat_back_meth;
back_params.spec_meth = spec_back_meth;
back_params.NMF_order = eval(back_NMF_order);
back_params.nIters = eval(back_iters);
back_params.spat_adapt = eval(spat_back_adapt);
back_params.spec_Wex_adapt = eval(spec_back_Wex_adapt);
back_params.spec_Hex_adapt = eval(spec_back_Hex_adapt);
end
function [spk_TDOAs, back_TDOAs,back_nsrc,spk_nsrc] = initTDOAs(micPos,spkPos,backPos,spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode)
% initTDOAs
% This function computes TDOAs between source and microphones according to
% spk_TDOA_mode/back_TDOA_mode (true or estimated localization)
%
% [spk_TDOAs, back_TDOAs,back_nsrc,spk_nsrc] = initTDOAs(micPos,spkPos,backPos,spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode)
%
% OUTPUTS:
% spk_TDOAs : I x 1 matrix, TDOA between i-th microphone and speaker source
% back_TDOAs : I x 1 matrix, TDOA between i-th microphone and background source
% spk_nsrc : number of speaker source (set to "1")
% back_nsrc : number of background source (set to "1")
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Init spk TDOAs
switch spk_TDOA_mode
case 'true'
[spk_TDOAs,~] = computeTrueTDOA(micPos,spkPos,sound_velocity);
spk_TDOAs = spk_TDOAs(chan2keep,:);
spk_nsrc = 1;
case 'estimated'
[~,~,spk_TDOAs] = computeEstTDOA(micPos,spkEstDir,chan2keep,sound_velocity);
spk_nsrc = 1;
end
%% Init back TDOAs
switch back_TDOA_mode
case 'true'
[back_TDOAs,~] = computeTrueTDOA(micPos,backPos,sound_velocity);
back_TDOAs = back_TDOAs(chan2keep,:);
back_nsrc = 1;
case 'estimated'
[~,~,back_TDOAs] = computeEstTDOA(micPos,backEstDir,chan2keep,sound_velocity);
back_nsrc = 1;
end
end
function [trueTDOA,trueDir] = computeTrueTDOA(Position_xyz_Sensors,Position_xyz_Sources,c)
% computeTrueTDOA
% This function computes true TDOAs and true direction of source
%
% [trueTDOA,trueDir] = computeTrueTDOA(Position_xyz_Sensors,Position_xyz_Sources,c)
%
% INPUTS :
% Position_xyz_Sensors : 3 x I, microphone positions
% Position_xyz_Sources : 3 x J, source positions
% c : 1x1, sound velocity
%
% OUTPUTS :
% trueTDOA : I x J, TDOAs
% trueDir : J x 2, Source directions (azimuth, elevation)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
I = size(Position_xyz_Sensors,2);
J = size(Position_xyz_Sources,2);
trueTDOA = zeros(I,J);
fDist = zeros(I,J);
Tau = zeros(I,J);
for j=1:J
for i =1:I
fDist(i,j) = sqrt(sum((Position_xyz_Sources(:,j) - Position_xyz_Sensors(:,i)).^2));
Tau(i,j) = fDist(i,j)/c;
end
trueTDOA(:,j) = Tau(:,j) - Tau(1,j);
end
% True DOA (azimuth, elevation)
micPosCentroid = mean(Position_xyz_Sensors,2);
% Express Current_xyz_Source in the microphone array referential
srcPos = bsxfun(@minus,Position_xyz_Sources,micPosCentroid);
[thetaRef,phiRef,~] = cart2sph(srcPos(1,:),srcPos(2,:),srcPos(3,:));
% Convert to degrees
trueDir = [thetaRef,phiRef].*180/pi;
end
function [theta_est,phi_est,TDOAs_est] = computeEstTDOA(micPos,estDir,chan2keep,c)
% function computeEstTDOA
% This function keeps the first DOA (azimuth + elevation) and converts it
% to TDOAs
%
% [theta_est,phi_est,TDOAs_est] = computeEstTDOA(micPos,estDir,chan2keep,c)
%
% INPUTS
% micPos : 3 x nChan, Position of ALL sensors (not only used sensors
% because it is needed to compute the barycenter of the array ==> the oracle
% angles are given in the array referential)
% estDir : nEstSrc x 2, Estimated directions (azimuth, elevation)
% chan2keep : Index of used channel
% c : sound velocity
%
% OUTPUTS
% theta_est : estimated azimut
% phi_est : estimated elevation
% TDOAs_est : estimated TDOAs
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Get function name
functionName = mfilename('fullpath');
idSlash = find(functionName == '/');
if(isempty(idSlash))
idSlash = find(functionName == '\');
end
functionName = functionName((idSlash(end)+1):end);
% We load estimatedDirections
theta_est = estDir(1,1);
phi_est = estDir(1,2);
%% Compute the time tau between the estimated direction projected on a unit range sphere and each mic of the probe
TDOAs_est = dir2tdoa(micPos,theta_est,phi_est,chan2keep,c);
end
function tdoas = dir2tdoa(micPos,theta,phi,chan2keep,c)
% dir2tdoa
% This function converts a direction expressed in (azimuth,elevation) to
% TDOAs
%
% tdoas = dir2tdoa(micPos,theta,phi,chan2keep,c)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
I = size(micPos,2);
J = length(theta);
tau = zeros(I,J);
radius = 1; % sphere of radius 1
for j = 1:J
tau(:,j) = tau_spherical(micPos', radius, theta(j), phi(j), c);
end
%% Compute estimated TDOAs for chan2keep
micRef = chan2keep(1);
tdoas = zeros(I,length(theta));
for j = 1:J
tdoas(:,j) = tau(chan2keep,j) - tau(micRef,j);
end
end
function tau=tau_spherical(mic_pos, radius, azimuth, elevation, c)
% This function computes the delay vector between a source and a microphone array, for a given geometry
%
% mic_pos: microphones locations, in cartesien coordinates, in meters
% radius: radius of source location, in spherical coordinates, in meters
% azimuth: azimuth of source location, in spherical coordinates, in degrees
% elevation: elevation of source location, in spherical coordinates, in degrees
% c: sound speed, in m/s
%
% tau: the delays, in seconds, relative to each microphone
azimuth = azimuth / 180 * pi;
elevation = elevation / 180 * pi;
%x = radius .* cos(elevation) .* cos(azimuth);
%y = radius .* cos(elevation) .* sin(azimuth);
%z = radius .* sin(elevation);
[x, y, z] = sph2cart(azimuth, elevation, radius);
s = repmat([x, y, z], size(mic_pos, 1), 1);
mean_pos = repmat(mean(mic_pos,1), size(mic_pos, 1), 1);
tau = sqrt(sum((mic_pos - mean_pos - s).^2, 2)); % euclidian distances between the source and each mic
tau = tau / c; % compute delays
end
function [spk_spat_gain,back_spat_gain] = initGains(Position_xyz_Sensors_all,chan2keep,spkPos,backPos)
% initGains
% This function computes gains between sources and microphones following the
% rank-1 anechoic model.
% Ref : section 2.3 of "N. Duong, E. Vincent, R. Gribonval,
% Under-determined reverberant audio source separation using a full-rank
% spatial covariance model"
%
% [spk_spat_gain,back_spat_gain] = initGains(Position_xyz_Sensors_all,chan2keep,spkPos,backPos)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Compute microphone distances only for used mics
spk_spat_gain = ones(length(chan2keep),1); % Init gain => 1
for i = 1:length(chan2keep)
spk_spat_gain(i,1) = 1/ ( sqrt(4*pi) * sqrt(sum((spkPos - Position_xyz_Sensors_all(:,chan2keep(i))).^2)) );
end
% Back gain
back_spat_gain = ones(length(chan2keep),1);
% Update gain value for these case
for i = 1:length(chan2keep)
back_spat_gain(i,1) = 1/ ( sqrt(4*pi) * sqrt(sum((backPos - Position_xyz_Sensors_all(:,chan2keep(i))).^2)) );
end
end
function [spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt] = getAdaptParameters(strConfig)
% getAdaptParameters
% Get adaptation parameters of each model (spat/spec) of source (spk/back)
% Note : spat = spatial / spec = spectral
% Note2 : spectral model is only represented by the Wex matrix, the Hex
% matrix is always freely adapted.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
switch strConfig
case 'single0'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single1'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single2'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single3'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single4'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single5'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single6'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single7'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single8'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single9'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single10'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single11'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single12'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''free''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''fixed''}';
case 'single13'
spat_back_adapt = '{''fixed''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single14'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''fixed''}';
spec_spk_Wex_adapt = '{''free''}';
case 'single15'
spat_back_adapt = '{''free''}';
spec_back_Wex_adapt = '{''fixed''}';
spat_spk_adapt = '{''free''}';
spec_spk_Wex_adapt = '{''free''}';
otherwise
error('Configuration pas encore ajoutée ŕ la liste');
end
spec_back_Hex_adapt = '{''free''}';
spec_spk_Hex_adapt = '{''free''}';
end