https://hal.archives-ouvertes.fr/hal-02963528
Tip revision: 282551cd4868b7b38f2c72e9b0ac84a22e7b8411 authored by Software Heritage on 01 January 2017, 00:00:00 UTC
hal: Deposit 1043 in collection hal
hal: Deposit 1043 in collection hal
Tip revision: 282551c
main_eval_perf.m
function main_eval_perf()
% main_eval_perf
% This function computes DOA performances in terms of recall and
% accuracy by using three different approaches :
% "azimuth only" (az), "elevation only" (el), "both azimuth and elevation"
% (both).
% This function creates the two following folders and associated performance
% files :
%
% 1) perf_NoiseLoc : Folder containing noise source localization performances
% - R_wuw_cmd_noisyCondition.txt / Acc_wuw_cmd_noisyCondition.txt :
% Performances (Recall / Accuracy) on noisy speech interval
% - R_noise_before_noisyCondition.txt / Acc_noise_before_noisyCondition.txt :
% Performances (Recall / Accuracy) on noise only interval
%
% 2) perf_NoiseLoc : Folder containing speaker source localization performances
% - R_wuw_cmd_quietCondition.txt / Acc_wuw_cmd_quietCondition.txt :
% Performances (Recall / Accuracy) on quiet speech interval
% - R_wuw_cmd_noisyCondition.txt / Acc_wuw_cmd_noisyCondition.txt :
% Performances (Recall / Accuracy) on noisy speech interval
%
% Before you start to use this code :
% - Run main_compute_est_loc to compute estimated source localizations
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% USER PARAMS
% none
%% PATHS AND PARAMS
addpath(genpath('./../common/'));
modeList = {'noise_before','wuw_cmd'};
mkdir('perf_NoiseLoc');
mkdir('perf_SpkLoc');
%% MAIN LOOP
for m = 1:length(modeList)
mode = modeList{m};
folderToProcess = ['./Estimated_Localization/GCC-PHAT_' mode '/'];
homeList = [1 2 3 4];
roomList = [1 2 3];
nRoom = 3;
nHouse = 4;
nSrcEst = 1:8; % Number of estimated sources
thres = 10; % threshold in degrees
% To know dimensions
load([folderToProcess 'home' num2str(1) '_room' num2str(1) '_' mode '.mat'],'az_est');
[nSpk,nPos,nNoise,nUtt,~] = size(az_est);
R_spk_az_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
Acc_spk_az_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
R_spk_el_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
Acc_spk_el_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
R_spk_both_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
Acc_spk_both_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
R_noise_az_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
Acc_noise_az_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
R_noise_el_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
Acc_noise_el_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
R_noise_both_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
Acc_noise_both_save = zeros(nHouse,nRoom,nSpk,nPos,nNoise,nUtt,length(nSrcEst));
for h=homeList
for r=roomList
% load az_est / el_est / az_spk_true / el_spk_true / az_noise_true / el_noise_true matrices
load([folderToProcess 'home' num2str(h) '_room' num2str(r) '_' mode '.mat']);
for spk = 1:nSpk
for pos = 1:nPos
for noise = 1:nNoise
for utt = 1:nUtt
current_az_spk_true = shiftdim(az_spk_true(spk,pos,noise,utt));
current_el_spk_true = shiftdim(el_spk_true(spk,pos,noise,utt));
current_az_noise_true = shiftdim(az_noise_true(spk,pos,noise,utt));
current_el_noise_true = shiftdim(el_noise_true(spk,pos,noise,utt));
current_az_est = shiftdim(az_est(spk,pos,noise,utt,:));
current_el_est = shiftdim(el_est(spk,pos,noise,utt,:));
% temp variables
% spk
current_R_spk_az = zeros(1,length(nSrcEst));
current_R_spk_el = zeros(1,length(nSrcEst));
current_R_spk_both = zeros(1,length(nSrcEst));
current_Acc_spk_az = zeros(1,length(nSrcEst));
current_Acc_spk_el = zeros(1,length(nSrcEst));
current_Acc_spk_both = zeros(1,length(nSrcEst));
% noise
current_R_noise_az = zeros(1,length(nSrcEst));
current_R_noise_el = zeros(1,length(nSrcEst));
current_R_noise_both = zeros(1,length(nSrcEst));
current_Acc_noise_az = zeros(1,length(nSrcEst));
current_Acc_noise_el = zeros(1,length(nSrcEst));
current_Acc_noise_both = zeros(1,length(nSrcEst));
for i = nSrcEst
% Compute azimuth only:
[current_R_spk_az(i), Acc_spk_az] = eval_angle_cart(current_az_est(1:i), current_az_spk_true,thres);
[current_R_noise_az(i), Acc_noise_az] = eval_angle_cart(current_az_est(1:i), current_az_noise_true,thres);
current_Acc_spk_az(i) = min(Acc_spk_az);
current_Acc_noise_az(i) = min(Acc_noise_az);
% Compute elevation only
[current_R_spk_el(i), Acc_spk_el] = eval_angle_cart(current_el_est(1:i), current_el_spk_true,thres);
[current_R_noise_el(i), Acc_noise_el] = eval_angle_cart(current_el_est(1:i), current_el_noise_true,thres);
current_Acc_spk_el(i) = min(Acc_spk_el);
current_Acc_noise_el(i) = min(Acc_noise_el);
% Compute both
[current_R_spk_both(i), Acc_spk_both] = eval_angle_both([current_az_est(1:i)';current_el_est(1:i)'], [current_az_spk_true;current_el_spk_true],thres);
[current_R_noise_both(i), Acc_noise_both] = eval_angle_both([current_az_est(1:i)';current_el_est(1:i)'], [current_az_noise_true;current_el_noise_true],thres);
current_Acc_noise_both(i) = min(Acc_noise_both);
current_Acc_spk_both(i) = min(Acc_spk_both);
end
R_spk_az_save(h,r,spk,pos,noise,utt,:) = current_R_spk_az;
Acc_spk_az_save(h,r,spk,pos,noise,utt,:) = current_Acc_spk_az;
R_spk_el_save(h,r,spk,pos,noise,utt,:) = current_R_spk_el;
Acc_spk_el_save(h,r,spk,pos,noise,utt,:) = current_Acc_spk_el;
R_spk_both_save(h,r,spk,pos,noise,utt,:) = current_R_spk_both;
Acc_spk_both_save(h,r,spk,pos,noise,utt,:) = current_Acc_spk_both;
R_noise_az_save(h,r,spk,pos,noise,utt,:) = current_R_noise_az;
Acc_noise_az_save(h,r,spk,pos,noise,utt,:) = current_Acc_noise_az;
R_noise_el_save(h,r,spk,pos,noise,utt,:) = current_R_noise_el;
Acc_noise_el_save(h,r,spk,pos,noise,utt,:) = current_Acc_noise_el;
R_noise_both_save(h,r,spk,pos,noise,utt,:) = current_R_noise_both;
Acc_noise_both_save(h,r,spk,pos,noise,utt,:) = current_Acc_noise_both;
end
end
end
end
end
end
%% Average results
% spk
RMean_spk_az_C1 = zeros(1,length(nSrcEst));
RMean_spk_el_C1 = zeros(1,length(nSrcEst));
RMean_spk_both_C1 = zeros(1,length(nSrcEst));
RMean_spk_az_noC1 = zeros(1,length(nSrcEst));
RMean_spk_el_noC1 = zeros(1,length(nSrcEst));
RMean_spk_both_noC1 = zeros(1,length(nSrcEst));
AccMean_spk_az_C1 = zeros(1,length(nSrcEst));
AccMean_spk_el_C1 = zeros(1,length(nSrcEst));
AccMean_spk_both_C1 = zeros(1,length(nSrcEst));
AccMean_spk_az_noC1 = zeros(1,length(nSrcEst));
AccMean_spk_el_noC1 = zeros(1,length(nSrcEst));
AccMean_spk_both_noC1 = zeros(1,length(nSrcEst));
% noise
RMean_noise_az_noC1 = zeros(1,length(nSrcEst));
RMean_noise_el_noC1 = zeros(1,length(nSrcEst));
RMean_noise_both_noC1 = zeros(1,length(nSrcEst));
AccMean_noise_az_noC1 = zeros(1,length(nSrcEst));
AccMean_noise_el_noC1 = zeros(1,length(nSrcEst));
AccMean_noise_both_noC1 = zeros(1,length(nSrcEst));
idC1 = 1;
idnoC1 = [2 3 4];
for i = 1:length(nSrcEst)
% spk + az + C1
[RMean_spk_az_C1(i),AccMean_spk_az_C1(i)] = R_Acc_avg(R_spk_az_save,Acc_spk_az_save,idC1,i,[]);
% spk + el + C1
[RMean_spk_el_C1(i),AccMean_spk_el_C1(i)] = R_Acc_avg(R_spk_el_save,Acc_spk_el_save,idC1,i,[]);
% spk + both + C1
[RMean_spk_both_C1(i),AccMean_spk_both_C1(i)] = R_Acc_avg(R_spk_both_save,Acc_spk_both_save,idC1,i,[]);
% spk + az + noC1
[RMean_spk_az_noC1(i),AccMean_spk_az_noC1(i)] = R_Acc_avg(R_spk_az_save,Acc_spk_az_save,idnoC1,i,[]);
% spk + el + noC1
[RMean_spk_el_noC1(i),AccMean_spk_el_noC1(i)] = R_Acc_avg(R_spk_el_save,Acc_spk_el_save,idnoC1,i,[]);
% spk + both + noC1
[RMean_spk_both_noC1(i),AccMean_spk_both_noC1(i)] = R_Acc_avg(R_spk_both_save,Acc_spk_both_save,idnoC1,i,[]);
% noise + az + noC1
[RMean_noise_az_noC1(i),AccMean_noise_az_noC1(i)] = R_Acc_avg(R_noise_az_save,Acc_noise_az_save,idnoC1,i,[]);
% noise + el + noC1
[RMean_noise_el_noC1(i),AccMean_noise_el_noC1(i)] = R_Acc_avg(R_noise_el_save,Acc_noise_el_save,idnoC1,i,[]);
% noise + both + noC1
[RMean_noise_both_noC1(i),AccMean_noise_both_noC1(i)] = R_Acc_avg(R_noise_both_save,Acc_noise_both_save,idnoC1,i,[]);
end
header = 'nEstSrc\t1\t2\t3\t4\t5\t6\t7\t8\n';
% Write out performances for noise source localization
% Recall/Acc in noisy condition
listPerf = {'R','Acc'};
for perf = listPerf
listVar = {[perf{:} 'Mean_noise_az_noC1'],[perf{:} 'Mean_noise_el_noC1'],[perf{:} 'Mean_noise_both_noC1']};
fid = fopen(['./perf_NoiseLoc/' perf{:} '_' mode '_noisyCondition.txt'],'w+');
fprintf(fid,header);
for iii = 1:length(listVar)
tmp = eval(listVar{iii});
lineStr = listVar{iii};
for jjj = nSrcEst
lineStr = [lineStr '\t' num2str(tmp(jjj))];
end
lineStr = [lineStr '\n'];
fprintf(fid,lineStr);
end
fclose(fid);
end
if(strcmp(mode,'wuw_cmd'))
% Write out performances for speaker source localization
% Recall/Acc in quiet condition
for perf = listPerf
listVar = {[perf{:} 'Mean_spk_az_C1'],[perf{:} 'Mean_spk_el_C1'],[perf{:} 'Mean_spk_both_C1']};
fid = fopen(['./perf_SpkLoc/' perf{:} '_' mode '_quietCondition.txt'],'w+');
fprintf(fid,header);
for iii = 1:length(listVar)
tmp = eval(listVar{iii});
lineStr = listVar{iii};
for jjj = nSrcEst
lineStr = [lineStr '\t' num2str(tmp(jjj))];
end
lineStr = [lineStr '\n'];
fprintf(fid,lineStr);
end
fclose(fid);
end
% Recall/Acc in noisy condition
for perf = listPerf
listVar = {[perf{:} 'Mean_spk_az_noC1'],[perf{:} 'Mean_spk_el_noC1'],[perf{:} 'Mean_spk_both_noC1']};
fid = fopen(['./perf_SpkLoc/' perf{:} '_' mode '_noisyCondition.txt'],'w+');
fprintf(fid,header);
for iii = 1:length(listVar)
tmp = eval(listVar{iii});
lineStr = listVar{iii};
for jjj = nSrcEst
lineStr = [lineStr '\t' num2str(tmp(jjj))];
end
lineStr = [lineStr '\n'];
fprintf(fid,lineStr);
end
fclose(fid);
end
else
% nothing
end
end
end
function [R,Acc] = eval_angle_cart(loc_e, loc_true,thres)
% eval_angle_cart
% Evaluation of Angle estimation in terms of recall and accuracy
%
% [R,Acc] = eval_angle_cart(loc_e, loc_true, thres)
%
% Inputs:
% loc_e: 1 x nsrce vector of estimated angles (azimuth or elevation)
% loc_true: 1 x 1 vector of true angle (azimuth or elevation)
% thres: correctness threshold in degrees
%
% Outputs:
% R: recall
% Acc: 1 x nsrce vector of estimated source accuracies in degrees (+inf
% if above the threshold)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if(size(loc_true,2) > 1 )
error('Only evaluation of one real source is available');
end
nsrce = length(loc_e);
angleErr = abs(bsxfun(@minus,loc_e,loc_true));
angleErr = bsxfun(@min,angleErr,360-angleErr);
correct = (angleErr <= thres);
posCorrect = find(correct ==1);
% Compute Recall
%Recall
R = sum(correct) > 0;
Acc = inf.*ones(1,nsrce);
if(isempty(posCorrect))
% No estimated source respects thres
Acc = inf.*ones(1,nsrce);
else
Acc(posCorrect) = angleErr(posCorrect);
end
end
function [R, Acc] = eval_angle_both(loc_e, loc_true,thres)
% eval_angle_both
% Evaluation of Angle estimation in terms of recall and accuracy with
% curvilinear abscissa error computation
%
% [R,Acc] = eval_angle_both(angle_e, angle_true, thresh)
%
% Inputs:
% loc_e: 2 x nsrce vector of estimated angles (azimuth and elevation)
% loc_true: 2 x 1 vector of true angle (azimuth and elevation)
% thresh: correctness threshold in degrees
%
% Outputs:
% R: recall
% Acc: 1 x nsrce vector of estimated source accuracies in degrees (+inf
% if above the threshold)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if(size(loc_true,2) > 1 )
error('Only evaluation of one real source is available');
end
if(size(loc_e,1) ~= 2 || size(loc_true,1) ~=2)
error('Each localization must have two components (theta and phi)');
end
nsrce = size(loc_e,2);
% compute AzimuthError and ElevationError
angleErr = abs(bsxfun(@minus,loc_e,loc_true));
angleErr = bsxfun(@min,angleErr,360-angleErr);
% compute Curvilinear abscissa error
angleErr = [angleErr;acosd(sind(loc_e(2,:)).*sind(loc_true(2))+cosd(loc_e(2,:)).*cosd(loc_true(2)).*cosd(loc_true(1)-loc_e(1,:)))];
% correct position vs distance threshold is computed using curvilinear abscissa distance
correct = (angleErr(3,:) <= thres);
posCorrect = find(correct == 1);
% Compute Recall
R = sum(correct) > 0;
Acc = inf.*ones(1,nsrce);
if(isempty(posCorrect))
% No estimated source respects thres
Acc = inf.*ones(1,nsrce);
else
Acc(posCorrect) = angleErr(3,posCorrect);
end
end
function [Rmean,AccMean] = R_Acc_avg(R,Acc,idNoise,estSrcId,idFilt)
% R_Acc_avg
% Recall/Accuracy averaging function
%
% [Rmean,AccMean] = R_Acc_avg(R,Acc,idNoise,estSrcId,idFilt)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
% Sunit Sivasankaran, Irina Illina, Frédéric Bimbot
% "VoiceHome-2, an extended corpus for multichannelspeech processing in
% real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tmpR = shiftdim(R(:,:,:,:,idNoise,:,estSrcId));
tmpAcc = shiftdim(Acc(:,:,:,:,idNoise,:,estSrcId));
if(isempty(idFilt))
tmpR = tmpR(:);
tmpAcc = tmpAcc(:);
else
tmpR = tmpR(idFilt);
tmpAcc = tmpAcc(idFilt);
end
Rmean = mean(tmpR);
AccMean = sum(tmpAcc(tmpR~=0))/sum(tmpR~=0);
end