https://hal.archives-ouvertes.fr/hal-02963528
Raw File
Tip revision: 282551cd4868b7b38f2c72e9b0ac84a22e7b8411 authored by Software Heritage on 01 January 2017, 00:00:00 UTC
hal: Deposit 1043 in collection hal
Tip revision: 282551c
main_compute_est_loc.m
% main_compute_est_loc
% This script compute estimated DOAs on the whole corpus.
%
% Before you start to use this code :
% - Download and extract voiceHome-2 corpus (https://doi.org/10.5281/zenodo.1252143)
% - Download and extract MBSS-Locate Basic toolbox (https://gitlab.inria.fr/bass-db/mbss_locate)
% - Update paths in the "USER PARAMS" section
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% USER PARAMS
corpusPath = 'C:\VBox_Partage\interspeech2\voiceHome-2_corpus_v1.0\'; % Path to voiceHome-2 corpus
mbssBasicPath = './../MultiChannel_BSS_Locate_Basic_v1.3/'; % Path to MBSS locate basic toolbox

%% ADD paths
addpath(genpath('./../common/')); % Common functions
addpath(genpath([mbssBasicPath '/mFiles/localization_tools/'])); % mbss localization functions

%% PAPER'S PARAMS
local_meth = 'GCC-PHAT';
modeList = {'wuw_cmd','noise_before'};

homeList = [1 2 3 4];
roomList = [1 2 3];
spkList = [1 2 3];
posList = [1 2 3 4 5];
noiseList = [1 2 3 4];
uttlist = [1 2];

nHouse = 4; % number of houses
nRoom = 3; % number of rooms per house
nSpk = 3; % number of speakers per house
nPos = 5; % number of speakers positions
nNoise = 4; % number of noise conditions per room
nUtt = 2; % number of utterances per {spk,pos,room,house,noise}

geoId = 1; % probe geometry (fixed)

% MBSS params
pooling = 'max';
normalizeSpecInst = 0; %normalize instantaneous local angular spectra (1:normalization 0:no normalization)
c = 343;             % sound velocity
gridRes = 1;         % resolution of the azimuth/elevation grids
AlphaResolution = 5; % resolution of alpha grid => resolution of tdoa grid
min_angle = 15;      % min distance between two peaks
nEstSources = 8; % Number of keeping peaks for eval.
specDisplay = 1;
fs = 16000;
nEstSrc = 8; % Number of estimated sources

% azimuth and elevation grids parameters
thetaBound = [-179 180];
phiBound = [-90 90];

% Read array_geometry
[~,micPosOnArray] = load_arrayGeo(corpusPath,geoId);

%% MAIN LOOP
for m = 1:length(modeList)
    mode = modeList{m};
switch(mode)
    case {'wuw_cmd','wuw','cmd'}
        fprintf('>>>>>>>>> Compute estimated directions on utterence segment <<<<<<<<<\n');
        nSec = NaN;
        offset = [0 0];        
    case 'noise_before'
        fprintf('>>>>>>>>> Compute estimated directions on noise segment <<<<<<<<<\n');
        nSec = 4;
        offset = 1;
end

outputDir = ['./Estimated_Localization/' local_meth '_' mode '/'];
mkdir(outputDir);

for h = homeList
for r = roomList
    [~,arrayCentroid,~] = load_arrayPos(corpusPath,h,r,geoId);
    
fprintf('Process home %d/%d || room %d/%d\n',h,nHouse,r,nRoom);     
% Outputs
az_est = nan(nSpk,nPos,nNoise,nUtt,nEstSources);
el_est = nan(nSpk,nPos,nNoise,nUtt,nEstSources);

az_spk_true = nan(nSpk,nPos,nNoise,nUtt);
az_noise_true = nan(nSpk,nPos,nNoise,nUtt);

el_spk_true = nan(nSpk,nPos,nNoise,nUtt);
el_noise_true = nan(nSpk,nPos,nNoise,nUtt);
  
for spk = spkList
for pos = posList 
    % True speaker localization
    [~,spkTruePos,spkTrueMouthOrientation] = load_spkPos(corpusPath,h,r,spk,pos);
    dirVector = (spkTruePos - arrayCentroid);
    [spkTrueTheta,spkTruePhi] = cart2sph(dirVector(1),dirVector(2),dirVector(3));
    spkTrueLoc = [spkTrueTheta,spkTruePhi].*180/pi; % back to degree;
    
for noise = noiseList
    % True noise localization
    [~,noiseTruePos] = load_noisePos(corpusPath,h,r,noise,nRoom,nNoise);
    dirVector = noiseTruePos - arrayCentroid;
    [noiseTrueTheta,noiseTruePhi] = cart2sph(dirVector(1),dirVector(2),dirVector(3));
    noiseTrueLoc = [noiseTrueTheta,noiseTruePhi].*180/pi; % back to degree;
    
for utt = uttlist
    
    % Load wav file with the specified mode
    [fname,~] = getNoisyFileNames(h,r,spk,pos,noise,utt,nHouse,nRoom,nSpk,nPos,nNoise,nUtt);
    
    y = wavOpening(corpusPath,fname,'noisy',mode,offset,nSec,fs);
    
    [theta_e,phi_e] = MBSS_locate_spec(y, fs, nEstSrc , micPosOnArray, c, local_meth, pooling, thetaBound, phiBound, gridRes, AlphaResolution, min_angle, normalizeSpecInst, specDisplay);
    
    % Estimated locations assignment
    theta_e = [theta_e, nan.*(1:(nEstSrc-length(theta_e)))];
    phi_e = [phi_e, nan.*(1:(nEstSrc-length(phi_e)))];
    
    az_est(spk,pos,noise,utt,:) = theta_e; 
    el_est(spk,pos,noise,utt,:) = phi_e;
    
    % True locations assignment for speaker and noise
    if(~strcmp(mode,'noise_before'))
        az_spk_true(spk,pos,noise,utt) = spkTrueLoc(1);
        el_spk_true(spk,pos,noise,utt) = spkTrueLoc(2);
    end
    
    az_noise_true(spk,pos,noise,utt) = noiseTrueLoc(1);
    el_noise_true(spk,pos,noise,utt) = noiseTrueLoc(2);
       
    close all;
    save([outputDir 'home' num2str(h) '_room' num2str(r) '_' mode '.mat'],'az_est','el_est','az_spk_true','az_noise_true','el_spk_true','el_noise_true');

end
end
end
end
end
end
end
fprintf('>>> END OF PROCESSING <<<');
fprintf('Results are saved in ./Estimated_Localization/ folder ');

back to top