Raw File
function main_separation()
% main_separation
% This function runs audio source separation on the noisy part of the 
% corpus with parameters used in the paper.
% Separated data will be generated in $outputPath folder (one folder per
% setting). Output files with *_est1.wav (resp. *_est2.wav) suffix refers
% to separated speaker (resp. noise) audio files.
% 
% Before you start to use this code :
% - Run the localization baseline (main_compute_est_loc.m) to compute
% estimated source localizations
% - Run main_compute_speakers_models.m to compute clean speaker models
% - Update paths in the "USER PARAMS" section
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% USER PARAMS
corpusPath = 'C:/VBox_Partage/interspeech2/voiceHome-2_corpus_v1.0/';% corpus path on your computer
outputPath = 'C:\VBox_Partage\interspeech2\sep_resTEST\'; % Path where separated data will be generated
fasst_matlab_dir = 'C:/Program Files/fasst 2.1.0/scripts/MATLAB/'; % Path to FASST Matlab scripts on your computer

%% ADD PATHS
addpath(genpath('./FASST_Framework/'));
addpath(genpath('./../common/'));
addpath(fasst_matlab_dir);

%% PAPER'S PARAMS
transformType = {'ERB','STFT'};   % STFT / ERB
wienerTimeSmoothing = {'0','2','4','8'}; % Number of 1/2 frames (except current frame): 0/2/4/8
spk_learning_mode = {'spk_dep','spk_nodep'}; % spk_dep/spk_nodep
TDOA_mode = {'true','estimated'};

%% LOOP ON PARAMS
for i = 1;length(transformType)
    for j = 1:length(wienerTimeSmoothing)
        for k = 1:length(spk_learning_mode)
            for l = 1:length(TDOA_mode)
                separation(corpusPath,spk_learning_mode{k},outputPath,TDOA_mode{l},transformType{i},wienerTimeSmoothing{j});
            end
        end
    end
end
end
function [] = separation(corpusPath,spk_learning_mode,outputPath,TDOA_mode,transformType,wienerTimeSmoothing)

%% Get function name
functionName = mfilename('fullpath');
idSlash = find(functionName == '/');
if(isempty(idSlash))
    idSlash = find(functionName == '\');
end
functionName = functionName((idSlash(end)+1):end);

%% FIXED PARAMS
wlen          = '1024';   % window length (frame length in time domain) - % should be multiple of 4 for STFT and multiple of 2 for ERB
nbin_ERB      = '8';    % number of frequency coefficient for ERB transform type
speech_model_type = 'close_field'; % close_field / clean_reverberated / clean_reverberated_1chanMax
chan2keep = '[1:8]';
spk_NMF_order = '32';
spk_iters = '50';
homeList = '[1,2,3,4]';
roomList = '[1,2,3]';
spkList = '[1,2,3]';
posList = '[1:5]';
noiseList = '[1:4]';
uttList = '[1:2]';
RT60 = '0.25';
nsrc_max = '[1,1]'; 
back_multiple_sources = '0';
spat_back_use_diffuse_model = '0';
spat_spk_use_diffuse_model = '0';
spat_back_meth = 'Direction'; % Position / Direction
spat_spk_meth = 'Direction';  % Position / Direction
back_NMF_order = '16';

% Last EM iteration numbers (joint model adaptation)
switch transformType
    case 'ERB'
        sep_iters = '{''50''}';
    case 'STFT'
        sep_iters = '{''100''}';
end

% duration and background interval (after and/or before 'ok vesta +
% command')
backgroundDuration_Before = '4'; % between ]0;4]
strBackIntervalUsed = 'before'; %'before' => only

back_iters = '100';
spec_back_meth = 'VQ'; % VQ / random
spec_spk_meth = 'VQ';  % VQ / random
strGenericConfigAdapt = 'single8';

% Other params
fs = 16000;
sound_velocity = 343;

%% Set paths
if(strcmp(outputPath(end), '/')==0)
    outputPath = [outputPath, '/'];
end

% Mandatory paths needed by processNoisyData
dirPaths = struct;
dirPaths.res = []; % Initalized into the loop (depends on noise condition)
resDirName = [transformType '_' spk_learning_mode '_Dir_' TDOA_mode '_smooth' wienerTimeSmoothing '/'];
dirPaths.tmp = [outputPath  '/' resDirName 'tmp/']; % Temporary directory

% Others paths needed by this calling script
spk_models_dir = './Models_spk/'; % Path to clean speaker models
est_direction_dir = './../Localization_Baseline/Estimated_Localization/'; % Path to estimated source localizations (generated by main_compute_est_loc.m - Localization baseline)
isolated_dir = [outputPath '/' resDirName 'isol/']; % Directory for isolated utterence to separate

%% Eval params
switch TDOA_mode
    case 'estimated'
        TDOA_mode = '{''estimated'',''estimated''}';
    case 'true'
        TDOA_mode = '{''true'',''true''}';
    otherwise
        error('');
end
    
homeList        = eval(homeList);
roomList        = eval(roomList);
spkList         = eval(spkList);
posList         = eval(posList); 
noiseList        = eval(noiseList); 
uttList         = eval(uttList); 
chan2keep       = eval(chan2keep);
TDOA_mode       = eval(TDOA_mode);
spk_TDOA_mode   = TDOA_mode{1};
back_TDOA_mode  = TDOA_mode{2};
nsrc_max        = eval(nsrc_max);
spk_nsrc_max    = nsrc_max(1);
back_nsrc_max   = nsrc_max(2);
backDuration_Before = eval(backgroundDuration_Before);
wienerTimeSmoothing = eval(wienerTimeSmoothing);

spk_NMF_order   = eval(spk_NMF_order);
spk_iters       = eval(spk_iters);
wlen            = eval(wlen);
nbin_ERB        = eval(nbin_ERB);

nHouse =4;
nRoom = 3;
nSpk = 3;
nPos = 5;
nNoise = 4;
nUtt = 2;
backOffset = 1;

%% Result tree folders
nEMStage = 1; % TO BE MODIFIED FOR MULTI STAGE EM
outputDir = cell(length(noiseList),nEMStage);
for h = homeList
    for r = roomList
        for noise = 1:length(noiseList)
            for idEm = 1:nEMStage
                currentNoiseId = getNoiseId(h,r,noise,nRoom,nNoise);
                outputDir{noise,idEm} = [outputPath resDirName 'stage' num2str(idEm) '/sep_res/noiseCond' num2str(currentNoiseId) '/'];
                if(~exist(outputDir{noise,idEm},'dir'))
                    mkdir(outputDir{noise,idEm});
                end
            end
        end
    end
end

%% Create tmp and iso folders
if(~exist(dirPaths.tmp,'dir'))
    mkdir(dirPaths.tmp)
end

if(~exist(isolated_dir,'dir'))
    mkdir(isolated_dir)
end

%% Get structs from params for processNoisyData
[spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt] = getAdaptParameters(strGenericConfigAdapt);


%% Main loop
% Read array_geometry
[~,micPosOnArray] = load_arrayGeo(corpusPath,1);

for h = homeList
    for r = roomList
        [~,arrayCentroid,arrayOrientation] = load_arrayPos(corpusPath,h,r,1);
        if(sum(arrayOrientation == 0)~=2)
            error('TODO');
        else
            Position_xyz_Sensors_all = bsxfun(@plus,micPosOnArray,arrayCentroid);
        end
        
        % Load estimated direction matrices
        % For speaker : estimated localization on the speech interval
        load([est_direction_dir 'GCC-PHAT_wuw_cmd/' 'home' num2str(h) '_room' num2str(r) '_wuw_cmd.mat'],'az_est','el_est');
        az_est_spk = az_est;
        el_est_spk = el_est;
        
        % For background : estimated localization on noise-only interval
        load([est_direction_dir 'GCC-PHAT_noise_before/' 'home' num2str(h) '_room' num2str(r) '_noise_before.mat'],'az_est','el_est');
        az_est_back = az_est;
        el_est_back = el_est;
        clear az_est el_est;
        
        for spk = spkList
            [spkGenre,spkId] = getSpkId(h,spk);
            
            for pos = posList
                % Load true speaker position & localization
                [~,spkPos,~] = load_spkPos(corpusPath,h,r,spk,pos);
                
                for noise = noiseList
                    % Load true noise localization
                    if(noise == 1)
                        backPos = [0 0 0]; % Computed TDOA at this position must not be used
                        
                        % USE A RANDOM SPATIAL MODEL
                        current_spat_back_meth = 'random';
                        current_spat_back_use_diffuse_model = '0';
                        
                    else
                        
                        [~,backPos] = load_noisePos(corpusPath,h,r,noise,nRoom,nNoise);
                        current_spat_back_meth = spat_back_meth;
                        current_spat_back_use_diffuse_model = spat_back_use_diffuse_model;
                    end
                    
                    [spk_params,back_params,global_params] = ...
                        getParamStruct(chan2keep,[],[],RT60,back_multiple_sources,current_spat_back_use_diffuse_model,spat_spk_use_diffuse_model,current_spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all',wienerTimeSmoothing);
                    
                    for utt = uttList
                        
                        fprintf('\n*** Processing file || Home = %d || Room = %d || Spk = %d || Pos = %d || Noise = %d || Utt. = %d ***\n',h,r,spk,pos,noise,utt);
                        dirPaths.res = outputDir(find(noise==noiseList),:);
                        
                        % get current file name
                        [fname_noisy,~] = getNoisyFileNames(h,r,spk,pos,noise,utt,nHouse,nRoom,nSpk,nPos,nNoise,nUtt);
                        
                        % Extract isolated samples
                        iso_samples = wavOpening(corpusPath,fname_noisy,'noisy','wuw_cmd',[0.5 0.5],NaN,fs);

                        % Extract background samples (before and after command)
                        
                        back_samples_before = wavOpening(corpusPath,fname_noisy,'noisy','noise_before',backOffset,backDuration_Before,fs);
                        
                        % Select part of background to use
                        switch(strBackIntervalUsed)
                            case 'before'
                                back_samples = back_samples_before;
                                clear back_samples_before;
                            otherwise
                                error(['[' functionName ']' 'back_interval_used variable is unknown']);
                        end
                        
                        % Affect estimated direction for the current utterence
                        spkEstDir = [shiftdim(az_est_spk(spk,pos,noise,utt,:))';shiftdim(el_est_spk(spk,pos,noise,utt,:))']';
                        backEstDir = [shiftdim(az_est_back(spk,pos,noise,utt,:))';shiftdim(el_est_back(spk,pos,noise,utt,:))']';
                        
                        % Init data struct (depending on each file)
                        spk_data  = struct; %  TDOAs / spatGain / nsrc / specModelName
                        back_data = struct; %  TDOAs / spatGain / nsrc
                        
                        % init TDOAs
                        [spk_data.TDOAs, back_data.TDOAs,back_data.nsrc,spk_data.nsrc] = initTDOAs(Position_xyz_Sensors_all',spkPos',backPos',spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode);
                        % init Gains
                        [spk_data.spatGain, back_data.spatGain] = initGains(Position_xyz_Sensors_all',chan2keep,spkPos',backPos');
                        
                        % Affect clean spk spectral model path
                        
                        switch transformType
                            case 'STFT'
                                spk_data.specModelName = [spk_models_dir spkGenre num2str(spkId) '_VQ' num2str(spk_NMF_order) '_' speech_model_type '_' num2str(spk_iters) 'iters_' spk_learning_mode '_' transformType '_' num2str(wlen) '.mat'];
                            case 'ERB'
                                spk_data.specModelName = [spk_models_dir spkGenre num2str(spkId) '_VQ' num2str(spk_NMF_order) '_' speech_model_type '_' num2str(spk_iters) 'iters_' spk_learning_mode '_' transformType '_' num2str(wlen) '_' num2str(nbin_ERB) '.mat'];
                        end
                        
                        % Init models (No model available at this point)
                        modelsOutput.rough   = []; % models initialized on unique source signal
                        modelsOutput.refined = []; % models refined on mixture signal
                        
                        % Save background segment on disk
                        fname_back = [isolated_dir fname_noisy '_back.wav'];
                        audiowrite(fname_back,back_samples(:,chan2keep),fs);
                        
                        % Clear unused variables
                        clear back_samples_before back_samples_after wuw_samples ;
                        
                        
                        % Save isolated segment on disk
                        fname_iso = [isolated_dir fname_noisy '_iso.wav'];
                        audiowrite(fname_iso,iso_samples(:,chan2keep),fs);
                        
                        % Call processNoisyData                       
                        [~] = processNoisyData(fname_noisy,fname_iso,fname_back,dirPaths,global_params,spk_params,spk_data,back_params,back_data);
                        
                        % Clear unused variables
                        clear iso_samples;
                        
                    end
                    
                end
            end
        end
    end
end

rmdir(dirPaths.tmp,'s');
rmdir(isolated_dir,'s');

end

function [spk_params,back_params,global_params] = getParamStruct(chan2keep,pfRoomDimensions,RT60_default,RT60,back_multiple_sources,spat_back_use_diffuse_model,spat_spk_use_diffuse_model,spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all,wienerTimeSmoothing)
% getParamStruct
% Params to structure of params
%
% [spk_params,back_params,global_params] = getParamStruct(chan2keep,pfRoomDimensions,RT60_default,RT60,back_multiple_sources,spat_back_use_diffuse_model,spat_spk_use_diffuse_model,spat_back_meth,spat_spk_meth,spec_back_meth,spec_spk_meth,back_NMF_order,spk_NMF_order,back_iters,spk_iters,sep_iters,spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt,transformType,wlen,nbin_ERB,fs,sound_velocity,Position_xyz_Sensors_all,wienerTimeSmoothing)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

spk_params = struct;
back_params = struct;
global_params = struct;

%% Set global_params
global_params.transformType     = transformType;
global_params.wlen              = wlen;
global_params.nbin_ERB          = nbin_ERB;
global_params.fs                = fs;
global_params.nchan             = length(chan2keep);
global_params.RT60              = eval(RT60);
if(isempty(global_params.RT60)), global_params.RT60 = RT60_default; end
global_params.sep_iters         = eval(sep_iters);
global_params.sound_velocity    = sound_velocity;
global_params.pfRoomDimensions  = pfRoomDimensions;
global_params.micDist           = zeros(length(chan2keep));

global_params.wiener.a          = 0; % Default
global_params.wiener.b          = 0; % Default
global_params.wiener.c1         = wienerTimeSmoothing;
global_params.wiener.c2         = 0; % Default
global_params.wiener.d          = -Inf; % Default

for i1 = 1:length(chan2keep)
    for i2 = 1:length(chan2keep)
        global_params.micDist(i1,i2) = sqrt(sum(( Position_xyz_Sensors_all(:,chan2keep(i1)) - Position_xyz_Sensors_all(:,chan2keep(i2))).^2 ));
    end
end

global_params.Position_xyz_Sensors = Position_xyz_Sensors_all(:,chan2keep);

%% Set spk_params
spk_params.multiple_sources     = 0;
spk_params.spat_diffuse_model   = eval(spat_spk_use_diffuse_model);
spk_params.spat_meth            = spat_spk_meth;
spk_params.spec_meth            = spec_spk_meth;
spk_params.NMF_order            = spk_NMF_order;
spk_params.nIters               = spk_iters;
spk_params.spat_adapt           = eval(spat_spk_adapt);
spk_params.spec_Wex_adapt       = eval(spec_spk_Wex_adapt);
spk_params.spec_Hex_adapt       = eval(spec_spk_Hex_adapt);

%% Set back_params
back_params.multiple_sources    = eval(back_multiple_sources);
back_params.spat_diffuse_model  = eval(spat_back_use_diffuse_model);
back_params.spat_meth           = spat_back_meth;
back_params.spec_meth           = spec_back_meth;
back_params.NMF_order           = eval(back_NMF_order);
back_params.nIters              = eval(back_iters);
back_params.spat_adapt          = eval(spat_back_adapt);
back_params.spec_Wex_adapt      = eval(spec_back_Wex_adapt);
back_params.spec_Hex_adapt      = eval(spec_back_Hex_adapt);
end

function [spk_TDOAs, back_TDOAs,back_nsrc,spk_nsrc] = initTDOAs(micPos,spkPos,backPos,spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode)
% initTDOAs
% This function computes TDOAs between source and microphones according to
% spk_TDOA_mode/back_TDOA_mode (true or estimated localization)
%
% [spk_TDOAs, back_TDOAs,back_nsrc,spk_nsrc] = initTDOAs(micPos,spkPos,backPos,spkEstDir,backEstDir,sound_velocity,spk_nsrc_max,back_nsrc_max,chan2keep,spk_TDOA_mode,back_TDOA_mode)
%
% OUTPUTS:
% spk_TDOAs : I x 1 matrix, TDOA between i-th microphone and speaker source
% back_TDOAs : I x 1 matrix, TDOA between i-th microphone and background source
% spk_nsrc : number of speaker source (set to "1")
% back_nsrc : number of background source (set to "1")
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Init spk TDOAs
switch spk_TDOA_mode
    case 'true'
        [spk_TDOAs,~] = computeTrueTDOA(micPos,spkPos,sound_velocity);
        spk_TDOAs = spk_TDOAs(chan2keep,:);
        spk_nsrc = 1;
    case 'estimated'
        [~,~,spk_TDOAs] = computeEstTDOA(micPos,spkEstDir,chan2keep,sound_velocity);
        spk_nsrc = 1;
end

%% Init back TDOAs
switch back_TDOA_mode
    case 'true'
        [back_TDOAs,~] = computeTrueTDOA(micPos,backPos,sound_velocity);
        back_TDOAs = back_TDOAs(chan2keep,:);
        back_nsrc = 1;
    case 'estimated'
        [~,~,back_TDOAs] = computeEstTDOA(micPos,backEstDir,chan2keep,sound_velocity);
        back_nsrc = 1;
end
end
function [trueTDOA,trueDir] = computeTrueTDOA(Position_xyz_Sensors,Position_xyz_Sources,c)
% computeTrueTDOA
% This function computes true TDOAs and true direction of source
%
% [trueTDOA,trueDir] = computeTrueTDOA(Position_xyz_Sensors,Position_xyz_Sources,c)
%
% INPUTS :
% Position_xyz_Sensors : 3 x I, microphone positions 
% Position_xyz_Sources : 3 x J, source positions
% c : 1x1, sound velocity
%
% OUTPUTS :
% trueTDOA : I x J, TDOAs
% trueDir : J x 2, Source directions (azimuth, elevation)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
I = size(Position_xyz_Sensors,2);
J = size(Position_xyz_Sources,2);

trueTDOA = zeros(I,J);
fDist = zeros(I,J);
Tau = zeros(I,J);

for j=1:J
    for i =1:I
        fDist(i,j) = sqrt(sum((Position_xyz_Sources(:,j) - Position_xyz_Sensors(:,i)).^2));
        Tau(i,j)   = fDist(i,j)/c;
    end
    trueTDOA(:,j) = Tau(:,j) - Tau(1,j);
end

% True DOA (azimuth, elevation)
micPosCentroid = mean(Position_xyz_Sensors,2);
% Express Current_xyz_Source in the microphone array referential
srcPos = bsxfun(@minus,Position_xyz_Sources,micPosCentroid);
[thetaRef,phiRef,~] = cart2sph(srcPos(1,:),srcPos(2,:),srcPos(3,:));

% Convert to degrees
trueDir = [thetaRef,phiRef].*180/pi; 
end

function [theta_est,phi_est,TDOAs_est] = computeEstTDOA(micPos,estDir,chan2keep,c)

% function computeEstTDOA
% This function keeps the first DOA (azimuth + elevation) and converts it
% to TDOAs
%
% [theta_est,phi_est,TDOAs_est] = computeEstTDOA(micPos,estDir,chan2keep,c)
%
% INPUTS
% micPos : 3 x nChan, Position of ALL sensors (not only used sensors
% because it is needed to compute the barycenter of the array ==> the oracle
% angles are given in the array referential)
% estDir : nEstSrc x 2, Estimated directions (azimuth, elevation)
% chan2keep : Index of used channel
% c : sound velocity
%
% OUTPUTS
% theta_est : estimated azimut
% phi_est : estimated elevation
% TDOAs_est : estimated TDOAs
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Get function name
functionName = mfilename('fullpath');
idSlash = find(functionName == '/');
if(isempty(idSlash))
    idSlash = find(functionName == '\');
end
functionName = functionName((idSlash(end)+1):end);

% We load estimatedDirections
theta_est = estDir(1,1);
phi_est = estDir(1,2);

%% Compute the time tau between the estimated direction projected on a unit range sphere and each mic of the probe
TDOAs_est = dir2tdoa(micPos,theta_est,phi_est,chan2keep,c);

end
function tdoas = dir2tdoa(micPos,theta,phi,chan2keep,c)
% dir2tdoa
% This function converts a direction expressed in (azimuth,elevation) to
% TDOAs
%
% tdoas = dir2tdoa(micPos,theta,phi,chan2keep,c)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
I = size(micPos,2);
J = length(theta);

tau = zeros(I,J);
radius = 1; % sphere of radius 1
for j = 1:J
    tau(:,j) = tau_spherical(micPos', radius, theta(j), phi(j), c);
end

%% Compute estimated TDOAs for chan2keep
micRef = chan2keep(1);
tdoas = zeros(I,length(theta));
for j =  1:J
    tdoas(:,j) = tau(chan2keep,j) - tau(micRef,j);
end
end

function tau=tau_spherical(mic_pos, radius, azimuth, elevation, c)

% This function computes the delay vector between a source and a microphone array, for a given geometry
%
% mic_pos: microphones locations, in cartesien coordinates, in meters
% radius: radius of source location, in spherical coordinates, in meters
% azimuth: azimuth of source location, in spherical coordinates, in degrees
% elevation: elevation of source location, in spherical coordinates, in degrees
% c: sound speed, in m/s
%
% tau: the delays, in seconds, relative to each microphone

    azimuth = azimuth / 180 * pi;
    elevation = elevation / 180 * pi;
    
    %x = radius .* cos(elevation) .* cos(azimuth);
    %y = radius .* cos(elevation) .* sin(azimuth);
    %z = radius .* sin(elevation);
    [x, y, z] = sph2cart(azimuth, elevation, radius);
    s = repmat([x, y, z], size(mic_pos, 1), 1);
    mean_pos = repmat(mean(mic_pos,1), size(mic_pos, 1), 1);
   
    tau = sqrt(sum((mic_pos - mean_pos - s).^2, 2)); % euclidian distances between the source and each mic
    tau = tau / c; % compute delays
end

function [spk_spat_gain,back_spat_gain] = initGains(Position_xyz_Sensors_all,chan2keep,spkPos,backPos)
% initGains
% This function computes gains between sources and microphones following the
% rank-1 anechoic model.
% Ref : section 2.3 of "N. Duong, E. Vincent, R. Gribonval,
% Under-determined reverberant audio source separation using a full-rank
% spatial covariance model"
%
% [spk_spat_gain,back_spat_gain] = initGains(Position_xyz_Sensors_all,chan2keep,spkPos,backPos)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Compute microphone distances only for used mics
spk_spat_gain = ones(length(chan2keep),1); % Init gain => 1

for i = 1:length(chan2keep)
    spk_spat_gain(i,1) = 1/ ( sqrt(4*pi) *  sqrt(sum((spkPos - Position_xyz_Sensors_all(:,chan2keep(i))).^2)) );
end

% Back gain
back_spat_gain = ones(length(chan2keep),1);

% Update gain value for these case
for i = 1:length(chan2keep)
    back_spat_gain(i,1) = 1/ ( sqrt(4*pi) *  sqrt(sum((backPos - Position_xyz_Sensors_all(:,chan2keep(i))).^2)) );
end

end

function [spat_back_adapt,spat_spk_adapt,spec_back_Wex_adapt,spec_spk_Wex_adapt,spec_back_Hex_adapt,spec_spk_Hex_adapt] = getAdaptParameters(strConfig)
% getAdaptParameters
% Get adaptation parameters of each model (spat/spec) of source (spk/back) 
% Note : spat = spatial / spec = spectral 
% Note2 : spectral model is only represented by the Wex matrix, the Hex
% matrix is always freely adapted.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
switch strConfig
    case 'single0'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single1'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single2'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single3'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single4'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single5'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single6'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single7'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single8'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single9'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single10'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single11'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single12'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''free''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''fixed''}';
    case 'single13'
        spat_back_adapt = '{''fixed''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single14'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''fixed''}';
        spec_spk_Wex_adapt = '{''free''}';
    case 'single15'
        spat_back_adapt = '{''free''}';
        spec_back_Wex_adapt = '{''fixed''}';
        spat_spk_adapt = '{''free''}';
        spec_spk_Wex_adapt = '{''free''}';
    otherwise
        error('Configuration pas encore ajoutée ŕ la liste');
end
spec_back_Hex_adapt = '{''free''}';
spec_spk_Hex_adapt = '{''free''}';

end
back to top