https://github.com/Microsoft/CNTK
Tip revision: f0d63b2c531018ec206a05629ad1a64aeb9c42fb authored by Eldar Akchurin on 30 November 2017, 12:33:49 UTC
Exposing EvaluatorWrapper interface and restructure the code to be able to reuse most of the stuff in Halide inference engine
Exposing EvaluatorWrapper interface and restructure the code to be able to reuse most of the stuff in Halide inference engine
Tip revision: f0d63b2
EvalActions.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EvalActions.cpp -- CNTK evaluation-related actions
//
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "stdafx.h"
#include "Basics.h"
#include "Actions.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "Config.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "Criterion.h"
#include "BestGpu.h"
#include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h"
#include <string>
#include <chrono>
#include <algorithm>
#include <vector>
#include <iostream>
#include <queue>
#include <set>
#include <memory>
#ifndef let
#define let const auto
#endif
using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
bool GetDistributedMBReadingDefaultValue(const ConfigParameters& config, const IDataReader& reader)
{
// Return 'true' if we're running a parallel training with a v2 reader, 'false' otherwise.
return (MPIWrapper::GetInstance() != nullptr && !reader.IsLegacyReader());
}
// ===========================================================================
// DoEvalBase() - implements CNTK "eval" command
// ===========================================================================
template <typename ElemType>
static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)
{
//DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config(L"modelPath");
intargvector mbSize = minibatchSize;
int traceLevel = config(L"traceLevel", 0);
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
size_t firstMBsToShowResult = config(L"firstMBsToShowResult", "0");
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, reader));
vector<wstring> evalNodeNamesVector;
let net = GetModelFromConfig<ConfigParameters, ElemType>(config, L"evalNodeNames", evalNodeNamesVector);
// set tracing flags
net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())),
config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())),
config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector())));
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), enableDistributedMBReading, numMBsToShowResult,
firstMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize);
}
template <typename ElemType>
void DoEval(const ConfigParameters& config)
{
// test
ConfigParameters readerConfig(config(L"reader"));
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
if (!readerConfig.ExistsCurrent(L"randomize"))
{
readerConfig.Insert("randomize", "None");
}
DataReader testDataReader(readerConfig);
DoEvalBase<ElemType>(config, testDataReader);
}
template void DoEval<double>(const ConfigParameters& config);
template void DoEval<float>(const ConfigParameters& config);
// ===========================================================================
// DoCrossValidate() - implements CNTK "cv" command
// ===========================================================================
template <typename ElemType>
void DoCrossValidate(const ConfigParameters& config)
{
// test
ConfigParameters readerConfig(config(L"reader"));
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config(L"modelPath");
intargvector mbSize = minibatchSize;
ConfigArray cvIntervalConfig = config(L"crossValidationInterval");
intargvector cvInterval = cvIntervalConfig;
size_t sleepSecondsBetweenRuns = config(L"sleepTimeBetweenRuns", "0");
int traceLevel = config(L"traceLevel", 0);
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
size_t firstMBsToShowResult = config(L"firstMBsToShowResult", "0");
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
std::vector<std::vector<EpochCriterion>> cvErrorResults;
std::vector<std::wstring> cvModels;
DataReader cvDataReader(readerConfig);
bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, cvDataReader));
bool finalModelEvaluated = false;
for (size_t i = cvInterval[0]; i <= cvInterval[2]; i += cvInterval[1])
{
wstring cvModelPath = msra::strfun::wstrprintf(L"%ls.%lld", modelPath.c_str(), i);
if (!fexists(cvModelPath))
{
fprintf(stderr, "Model %ls does not exist.\n", cvModelPath.c_str());
if (finalModelEvaluated || !fexists(modelPath))
continue; // file missing
else
{
cvModelPath = modelPath;
finalModelEvaluated = true;
}
}
cvModels.push_back(cvModelPath);
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, cvModelPath);
// BUGBUG: ^^ Should use GetModelFromConfig()
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), enableDistributedMBReading, numMBsToShowResult,
firstMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
fprintf(stderr, "Model %ls --> \n", cvModelPath.c_str());
auto evalErrors = eval.Evaluate(&cvDataReader, evalNodeNamesVector, mbSize[0], epochSize);
cvErrorResults.push_back(evalErrors);
::Sleep(1000 * sleepSecondsBetweenRuns);
}
// find best model
if (cvErrorResults.size() == 0)
LogicError("No model is evaluated.");
vector<double> minErrors;
vector<int> minErrIds;
vector<EpochCriterion> evalErrors = cvErrorResults[0];
for (int i = 0; i < evalErrors.size(); ++i)
{
minErrors.push_back(evalErrors[i].Average());
minErrIds.push_back(0);
}
for (int i = 0; i < cvErrorResults.size(); i++)
{
evalErrors = cvErrorResults[i];
for (int j = 0; j < evalErrors.size(); j++)
{
if (evalErrors[j].Average() < minErrors[j])
{
minErrors[j] = evalErrors[j].Average();
minErrIds[j] = i;
}
}
}
fprintf(stderr, "Best models:\n");
fprintf(stderr, "------------\n");
for (int i = 0; i < minErrors.size(); ++i)
fprintf(stderr, "Based on Err[%d]: Best model = %ls with min err %.8g\n", i, cvModels[minErrIds[i]].c_str(), minErrors[i]);
}
template void DoCrossValidate<float>(const ConfigParameters& config);
template void DoCrossValidate<double>(const ConfigParameters& config);
// ===========================================================================
// DoWriteOutput() - implements CNTK "write" command
// ===========================================================================
template <typename ElemType>
void DoWriteOutput(const ConfigParameters& config)
{
ConfigParameters readerConfig(config(L"reader"));
readerConfig.Insert("randomize", "None"); // we don't want randomization when output results
DataReader testDataReader(readerConfig);
ConfigArray minibatchSize = config(L"minibatchSize", "2048");
intargvector mbSize = minibatchSize;
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
vector<wstring> outputNodeNamesVector;
let net = GetModelFromConfig<ConfigParameters, ElemType>(config, L"outputNodeNames", outputNodeNamesVector);
// set tracing flags
net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())),
config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())),
config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector())));
SimpleOutputWriter<ElemType> writer(net, 1);
if (config.Exists("writer"))
{
ConfigParameters writerConfig(config(L"writer"));
bool writerUnittest = writerConfig(L"unittest", "false");
DataWriter testDataWriter(writerConfig);
writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, writerUnittest);
}
else if (config.Exists("outputPath"))
{
wstring outputPath = config(L"outputPath");
bool writeSequenceKey = config(L"writeSequenceKey", false);
WriteFormattingOptions formattingOptions(config);
bool nodeUnitTest = config(L"nodeUnitTest", "false");
writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, formattingOptions, epochSize, nodeUnitTest, writeSequenceKey);
}
else
InvalidArgument("write command: You must specify either 'writer'or 'outputPath'");
}
template void DoWriteOutput<float>(const ConfigParameters& config);
template void DoWriteOutput<double>(const ConfigParameters& config);