https://hal.archives-ouvertes.fr/hal-02963528
Tip revision: 282551cd4868b7b38f2c72e9b0ac84a22e7b8411 authored by Software Heritage on 01 January 2017, 00:00:00 UTC
hal: Deposit 1043 in collection hal
hal: Deposit 1043 in collection hal
Tip revision: 282551c
main_compute_est_loc.m
% main_compute_est_loc
% This script compute estimated DOAs on the whole corpus.
%
% Before you start to use this code :
% - Download and extract voiceHome-2 corpus (https://doi.org/10.5281/zenodo.1252143)
% - Download and extract MBSS-Locate Basic toolbox (https://gitlab.inria.fr/bass-db/mbss_locate)
% - Update paths in the "USER PARAMS" section
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% USER PARAMS
corpusPath = 'C:\VBox_Partage\interspeech2\voiceHome-2_corpus_v1.0\'; % Path to voiceHome-2 corpus
mbssBasicPath = './../MultiChannel_BSS_Locate_Basic_v1.3/'; % Path to MBSS locate basic toolbox
%% ADD paths
addpath(genpath('./../common/')); % Common functions
addpath(genpath([mbssBasicPath '/mFiles/localization_tools/'])); % mbss localization functions
%% PAPER'S PARAMS
local_meth = 'GCC-PHAT';
modeList = {'wuw_cmd','noise_before'};
homeList = [1 2 3 4];
roomList = [1 2 3];
spkList = [1 2 3];
posList = [1 2 3 4 5];
noiseList = [1 2 3 4];
uttlist = [1 2];
nHouse = 4; % number of houses
nRoom = 3; % number of rooms per house
nSpk = 3; % number of speakers per house
nPos = 5; % number of speakers positions
nNoise = 4; % number of noise conditions per room
nUtt = 2; % number of utterances per {spk,pos,room,house,noise}
geoId = 1; % probe geometry (fixed)
% MBSS params
pooling = 'max';
normalizeSpecInst = 0; %normalize instantaneous local angular spectra (1:normalization 0:no normalization)
c = 343; % sound velocity
gridRes = 1; % resolution of the azimuth/elevation grids
AlphaResolution = 5; % resolution of alpha grid => resolution of tdoa grid
min_angle = 15; % min distance between two peaks
nEstSources = 8; % Number of keeping peaks for eval.
specDisplay = 1;
fs = 16000;
nEstSrc = 8; % Number of estimated sources
% azimuth and elevation grids parameters
thetaBound = [-179 180];
phiBound = [-90 90];
% Read array_geometry
[~,micPosOnArray] = load_arrayGeo(corpusPath,geoId);
%% MAIN LOOP
for m = 1:length(modeList)
mode = modeList{m};
switch(mode)
case {'wuw_cmd','wuw','cmd'}
fprintf('>>>>>>>>> Compute estimated directions on utterence segment <<<<<<<<<\n');
nSec = NaN;
offset = [0 0];
case 'noise_before'
fprintf('>>>>>>>>> Compute estimated directions on noise segment <<<<<<<<<\n');
nSec = 4;
offset = 1;
end
outputDir = ['./Estimated_Localization/' local_meth '_' mode '/'];
mkdir(outputDir);
for h = homeList
for r = roomList
[~,arrayCentroid,~] = load_arrayPos(corpusPath,h,r,geoId);
fprintf('Process home %d/%d || room %d/%d\n',h,nHouse,r,nRoom);
% Outputs
az_est = nan(nSpk,nPos,nNoise,nUtt,nEstSources);
el_est = nan(nSpk,nPos,nNoise,nUtt,nEstSources);
az_spk_true = nan(nSpk,nPos,nNoise,nUtt);
az_noise_true = nan(nSpk,nPos,nNoise,nUtt);
el_spk_true = nan(nSpk,nPos,nNoise,nUtt);
el_noise_true = nan(nSpk,nPos,nNoise,nUtt);
for spk = spkList
for pos = posList
% True speaker localization
[~,spkTruePos,spkTrueMouthOrientation] = load_spkPos(corpusPath,h,r,spk,pos);
dirVector = (spkTruePos - arrayCentroid);
[spkTrueTheta,spkTruePhi] = cart2sph(dirVector(1),dirVector(2),dirVector(3));
spkTrueLoc = [spkTrueTheta,spkTruePhi].*180/pi; % back to degree;
for noise = noiseList
% True noise localization
[~,noiseTruePos] = load_noisePos(corpusPath,h,r,noise,nRoom,nNoise);
dirVector = noiseTruePos - arrayCentroid;
[noiseTrueTheta,noiseTruePhi] = cart2sph(dirVector(1),dirVector(2),dirVector(3));
noiseTrueLoc = [noiseTrueTheta,noiseTruePhi].*180/pi; % back to degree;
for utt = uttlist
% Load wav file with the specified mode
[fname,~] = getNoisyFileNames(h,r,spk,pos,noise,utt,nHouse,nRoom,nSpk,nPos,nNoise,nUtt);
y = wavOpening(corpusPath,fname,'noisy',mode,offset,nSec,fs);
[theta_e,phi_e] = MBSS_locate_spec(y, fs, nEstSrc , micPosOnArray, c, local_meth, pooling, thetaBound, phiBound, gridRes, AlphaResolution, min_angle, normalizeSpecInst, specDisplay);
% Estimated locations assignment
theta_e = [theta_e, nan.*(1:(nEstSrc-length(theta_e)))];
phi_e = [phi_e, nan.*(1:(nEstSrc-length(phi_e)))];
az_est(spk,pos,noise,utt,:) = theta_e;
el_est(spk,pos,noise,utt,:) = phi_e;
% True locations assignment for speaker and noise
if(~strcmp(mode,'noise_before'))
az_spk_true(spk,pos,noise,utt) = spkTrueLoc(1);
el_spk_true(spk,pos,noise,utt) = spkTrueLoc(2);
end
az_noise_true(spk,pos,noise,utt) = noiseTrueLoc(1);
el_noise_true(spk,pos,noise,utt) = noiseTrueLoc(2);
close all;
save([outputDir 'home' num2str(h) '_room' num2str(r) '_' mode '.mat'],'az_est','el_est','az_spk_true','az_noise_true','el_spk_true','el_noise_true');
end
end
end
end
end
end
end
fprintf('>>> END OF PROCESSING <<<');
fprintf('Results are saved in ./Estimated_Localization/ folder ');