https://hal.archives-ouvertes.fr/hal-02963528
Raw File
Tip revision: 282551cd4868b7b38f2c72e9b0ac84a22e7b8411 authored by Software Heritage on 01 January 2017, 00:00:00 UTC
hal: Deposit 1043 in collection hal
Tip revision: 282551c
main_compute_speaker_models.m
function main_compute_speaker_models
% main_compute_speaker_models
%
% Main function to compute clean speaker models with paper's parameters
%
% Before you start to use this code :
% - Download and install FASST v2 toolbox
% (https://gitlab.inria.fr/bass-db/fasst)
% - Download and extract voiceHome-2 corpus (https://doi.org/10.5281/zenodo.1252143)
% - Update paths in the "USER PARAMS" section
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% USER PARAMS
fasst_matlab_dir = 'C:/Program Files/fasst 2.1.0/scripts/MATLAB/'; % Path to FASST Matlab scripts on your computer
corpusPath = 'C:\VBox_Partage\interspeech2\voiceHome-2_corpus_v1.0\'; % Corpus path

%% Add FASST directory to PATH
addpath(fasst_matlab_dir);
addpath('./FASST_Framework/'); % FASST framework functions
mkdir('./Models_Spk/');

%% PAPER'S PARAMS
transformType = {'ERB','STFT'};   % STFT / ERB
spk_learning_mode = {'spk_nodep','spk_dep'}; % spk_dep/spk_nodep
spkIdList     = {'F1','F2','F3','F4','M1','M2','M3','M4','M5','M6','M7','M8'}'; % speakers id list
wlen          = '1024';   % window length (frame length in time domain) - % should be multiple of 4 for STFT and multiple of 2 for ERB
nbin_ERB      = '8';    % number of frequency coefficient for ERB transform type
speech_model_type = 'close_field'; 
spk_NMF_order = '32';
spk_iters = '50';

%% Learn speaker's models
for i = 1:length(transformType)
    for j = 1:length(spk_learning_mode)
        computeSpkModels_corpora(corpusPath,speech_model_type,spk_learning_mode{j},spk_iters,spk_NMF_order,transformType{i},wlen,nbin_ERB,spkIdList);
    end
end
end

function [] = computeSpkModels_corpora(corpusPath,speech_model_type,spk_learning_mode,spk_iters,spk_NMF_order,transformType,wlen,nbin_ERB,spkIdList)
% computeSpkModels_corpora
%
% This function computes clean speaker models for each element of
% spkIdList.
%
% Models are saved in ./Models_Spk/ 
%
% [] = computeSpkModels_corpora(corpusPath,speech_model_type,spk_learning_mode,spk_iters,spk_NMF_order,transformType,wlen,nbin_ERB,spkIdList)
%
% INPUTS:
%
% speech_model_type : String, 'close_field'
% spk_learning_mode : String, 'spk_dep', 'spk_nodep'
%   spk_dep : Use only utterance of the speaker
%   spk_nodep : Use all utterances except for the current speaker
% spk_iters : String, number of EM iterations
% spk_NMF_order : NMF order for the spectral model init
% transformType : String,Time-Frequency transform
%   Choices : 'ERB' or 'STFT'
% wlen : String, time segment length (in samples)
% nbin_ERB : String, number of bins for the ERB Transform
% spkIdList : Cell of strings, index of speakers
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright 2017 Ewen Camberlein and Romain Lebarbenchon
% This software is distributed under the terms of the GNU Public License
% version 3 (http://www.gnu.org/licenses/gpl.txt)
% If you find it useful, please cite the following reference:
% - Nancy Bertin, Ewen Camberlein, Romain Lebarbenchon, Emmanuel Vincent,
%   Sunit Sivasankaran, Irina Illina, Frédéric Bimbot 
%   "VoiceHome-2, an extended corpus for multichannelspeech processing in
%    real homes", submitted to Speech Communication, Elsevier, 2017
%
% Contact : nancy.bertin[at]irisa.fr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Get function name (error management)
functionName = mfilename('fullpath');
idSlash = find(functionName == '/');
if(isempty(idSlash))
    idSlash = find(functionName == '\');
end
functionName = functionName(idSlash(end)+1:end);

%% USER PARAMETERS
wlen          = eval(wlen);
nbin_ERB      = eval(nbin_ERB);
spk_NMF_order = eval(spk_NMF_order); % Nombre d'états de la VQ (32 spk)
spk_iters     = eval(spk_iters); % Nombre iter de l'EM

sUserParams = ...
    struct(...
    ... % static
    'wlen',               wlen,              ...
    'nbin_ERB',           nbin_ERB,          ... % init value
    'spk_NMF_order',      spk_NMF_order,     ... % Nb de SPK dans chaque fichier
    'spk_iters',          spk_iters,         ...
    'transformType',      transformType,     ...
    'speech_model_type',  speech_model_type, ...
    'spk_learning_mode',  spk_learning_mode, ...
    ... % dynamic
    'spkId',            []       ...
    );

%% CAMPAIGN PARAMETERS
sCampaignParams = ...
    struct(...
    ... % static
    'iFs',                   16000,       ...
    'nspk',                  1,           ... % Nb de SPK dans chaque fichier
    'spat_meth_default',     'Direction', ... 'Direction' / 'Position'
    'use_diffuse_model',     0,           ...
    'back_multiple_sources', 0,            ...
    ... % dynamic
    'iNbChannels',           0,           ... % init value
    'pfRoomDimensions',      [],          ...
    'fRT60',                 [],          ...
    'sound_velocity',        343          ...  
    );

% iNbChannels
switch speech_model_type
    case 'close_field'
        sCampaignParams.iNbChannels = 1;
    otherwise
        error(['[Error:' functionName '.m]' 'speech model type unknown:' speech_model_type]);
end


%% PATHS management
rootProjectDir='./';

sPaths = ...
    struct(...
    'spk_models_dir',   [rootProjectDir 'Models_Spk/'],       ... %output directory
    'learning_data_dir',     '',       ... % init value
    'tmp_dir',               ''        ... % init value
    );

% Declare audio files directories for learning models
switch speech_model_type
    case 'close_field'
        sPaths.learning_data_dir = [corpusPath 'audio/clean/']; % Données condition close field
    otherwise
        error('Not coded');
end

% Manage tmp directories : create a new tmp directory at each function call
tempDirInfos = dir(['./../tmp_' speech_model_type '*']);
if(isempty(tempDirInfos))
    idTempDir = 1;
else
    existingId = zeros(1,length(tempDirInfos));
    for t = 1:length(tempDirInfos)
        subNumId =  length(tempDirInfos(t).name)-length(['tmp_' speech_model_type]);
        existingId(t) = str2num(tempDirInfos(t).name(end-(subNumId-1):end));
    end
    idTempDir = max(existingId)+1;
end
sPaths.tmp_dir = [rootProjectDir '/tmp_' speech_model_type num2str(idTempDir) '/']; % Répertoire temporaire

if ~exist(sPaths.tmp_dir, 'dir')
    mkdir(sPaths.tmp_dir);
end


%% PROCESS
%% LOOP on spkIdList 
for spkId = spkIdList'
        % list wav files in learning dir
        fprintf('Speaker %s model generation\n',spkId{:});
        Infos_wavLearningDir = dir([sPaths.learning_data_dir '*.wav']);
        nWavFiles_learningDir = length(Infos_wavLearningDir);
         
        list_wavFilesLearningDir = cell(1,nWavFiles_learningDir);
        for n=1:nWavFiles_learningDir
            list_wavFilesLearningDir{n} = Infos_wavLearningDir(n).name;
        end
        
        switch spk_learning_mode
            case 'spk_dep'
                % Use only utterence of speaker spkId
                b = ~cellfun(@isempty,regexp(list_wavFilesLearningDir,['speaker' spkId{:}]));               
            case 'spk_nodep'
                % Use utterences of all speaker except speaker spkId
                b = cellfun(@isempty,regexp(list_wavFilesLearningDir,['speaker' spkId{:}]));                                
        end
        list_wavFilesForLearning = list_wavFilesLearningDir(b);
                
        % compute model from selected list of wav files
        sUserParams.spkId = spkId{:};
        
        nFiles = length(list_wavFilesForLearning);
        audio_segments = cell(1,nFiles);

        for n=1:nFiles
            fname_clean = list_wavFilesForLearning{n};
            fname_clean = fname_clean(1:end-4);
            audio_segments{n} = wavOpening(corpusPath,fname_clean,'clean','wuw_cmd',[0 0],NaN,sCampaignParams.iFs);
        end
        computeSpkModels(sUserParams, sCampaignParams, sPaths, audio_segments);
                        
end

rmdir(sPaths.tmp_dir,'s');

end

back to top