https://github.com/Microsoft/CNTK
Tip revision: a05c3c642648373f4ede0956e4286257c3d59a61 authored by liqfu on 24 August 2018, 17:46:51 UTC
CNTK splice allows broadcast. This case is handled in the change. For noop (identity) ops, its inputs and outputs types shall be set according to upstream ops. ToBatch/ToSequence and Unpack Batch/Sequence ops added during model importing need tp be skipped. Model import need to handle ops with multiple outputs.
CNTK splice allows broadcast. This case is handled in the change. For noop (identity) ops, its inputs and outputs types shall be set according to upstream ops. ToBatch/ToSequence and Unpack Batch/Sequence ops added during model importing need tp be skipped. Model import need to handle ops with multiple outputs.
Tip revision: a05c3c6
TrainingNodes.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "TrainingNodes.h"
#include <boost/random/uniform_real_distribution.hpp>
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
/*virtual*/ void RandomDistributionNode<ElemType>::ForwardProp(const FrameRange& fr) /*override*/
{
auto&& result = ValueFor(fr);
switch (m_type)
{
case RandomDistributionType::Uniform:
result.SetUniformRandomValue(GetRNGHandle(), m_args[0], m_args[1]);
UpdateRngOffset(GetRngOffset() + result.GetNumElements());
break;
case RandomDistributionType::Normal:
result.SetGaussianRandomValue(GetRNGHandle(), m_args[0], m_args[1]);
UpdateRngOffset(GetRngOffset() + AsMultipleOf(result.GetNumElements(), 2));
break;
case RandomDistributionType::Gumbel:
result.SetGumbelRandomValue(GetRNGHandle(), m_args[0], m_args[1]);
UpdateRngOffset(GetRngOffset() + result.GetNumElements());
break;
case RandomDistributionType::Bernoulli:
result.SetUniformRandomMask(ElemType(1 - m_args[0]), ElemType(1), GetRNGHandle());
UpdateRngOffset(GetRngOffset() + result.GetNumElements());
break;
default:
RuntimeError("RandomDistributionNode::ForwardProp: Unknown random distribution type code %d", m_type);
}
}
template <class ElemType>
/*virtual*/ void RandomDistributionNode<ElemType>::BackpropTo(const size_t /*inputIndex*/, const FrameRange&) /*override*/
{
/* Do nothing. The proper fix is for this Node to have it say it does not need gradient. */
}
template <class ElemType>
/*virtual*/ bool RandomDistributionNode<ElemType>::IsOutOfDateWrtInputs() const /*override*/ { return true; }
template class RandomDistributionNode<float>;
template class RandomDistributionNode<double>;
template class RandomDistributionNode<half>;
template<class ElemType>
void RandomSampleNodeBase<ElemType>::Validate(bool isFinalValidationPass)
{
if (m_sizeOfSampledSet == 0)
{
InvalidArgument("Number of requested samples is zero.");
}
if (isFinalValidationPass)
{
// Sampling without replacement does only work when the number of requested classes is <= number of classes.
let& shape = Input(0)->GetSampleLayout();
let dims = shape.GetDims();
size_t nClasses = dims[0];
if (!m_allowDuplicates && nClasses <= m_sizeOfSampledSet)
InvalidArgument("For sampling without duplicates the number of requested samples (%lu) needs to be less than the number of classes (%lu).", m_sizeOfSampledSet, nClasses);
}
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<RandomSampleNodeBase<ElemType>>(nodeP);
node->m_allowDuplicates = m_allowDuplicates;
node->m_sizeOfSampledSet = m_sizeOfSampledSet;
node->SetRngState(GetRngSeed(), GetRngOffset());
}
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::Save(File& fstream) const
{
Base::Save(fstream);
fstream << m_allowDuplicates;
fstream << m_sizeOfSampledSet;
RngUser::Save(fstream);
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::Load(File& fstream, size_t modelVersion)
{
Base::Load(fstream, modelVersion);
fstream >> m_allowDuplicates;
fstream >> m_sizeOfSampledSet;
RngUser::Load(fstream, modelVersion);
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::UpdateWeightsPrefixSum()
{
const Matrix<ElemType>& samplingWeights = Input(0)->ValueAsMatrix();
m_samplingWeightsPrefixSum.clear();
double runningWeightsSum = 0;
for (int iClass = 0; iClass < samplingWeights.GetNumRows(); iClass++)
{
ElemType currentWeight = samplingWeights.GetValue(iClass, 0);
if (currentWeight < 0)
InvalidArgument("Sampling weights contain negative number %f.", (float)currentWeight);
runningWeightsSum += (double)currentWeight;
m_samplingWeightsPrefixSum.push_back(runningWeightsSum);
}
}
// Runs the sampling returning a vector with the id's of the samples. The parameter nTries is used to return the number of draws that was needed
// to get the expected number of samples.
template<class ElemType>
const std::vector<size_t> RandomSampleNodeBase<ElemType>::RunSampling(size_t& nTries)
{
boost::random::uniform_real_distribution<double> r(0, m_samplingWeightsPrefixSum.back());
std::unordered_set<int> alreadySampled;
std::vector<size_t> samples;
CPURNGHandle* cpuRNGHandle = dynamic_cast<CPURNGHandle*>(&GetRNGHandle(CPUDEVICE));
// find random samples using the specified weight
if (m_allowDuplicates)
nTries = m_sizeOfSampledSet;
else
nTries = 0; // just initialize and count how many tries we need.
auto offset = GetRngOffset();
while (samples.size() < m_sizeOfSampledSet)
{
double randomValue = r(cpuRNGHandle->Generator());
offset++;
// Find the first index where value[idx] >= randomValue.
auto lower = std::lower_bound(m_samplingWeightsPrefixSum.begin(), m_samplingWeightsPrefixSum.end(), randomValue);
int idx = (int)(lower - m_samplingWeightsPrefixSum.begin());
if (m_allowDuplicates)
samples.push_back(idx);
else
{
// Sampling without replacement: each value can be sampled at most once.
// The implementation below using rejection sampling is problematic.
// E.g if first class has probability p = 0.999 we typically will have to sample 1000 times or more to hit another class.
// BUGBUG Alternative implementions, e.g:
// * Weighted Random Sampling with Reservoir: http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
// * Binary tree with classes as leafes and branch probs on non-leafes.
// * As in numpy: https://github.com/numpy/numpy/blob/master/numpy/random/mtrand/mtrand.pyx#L1440
nTries++;
if (alreadySampled.find(idx) != alreadySampled.end()) continue;
else
{
samples.push_back(idx);
alreadySampled.insert(idx);
}
}
}
UpdateRngOffset(offset);
return samples;
}
template<class ElemType>
void RandomSampleNode<ElemType>::ForwardPropNonLooping()
{
Base::UpdateWeightsPrefixSum();
if (ValueAsMatrix().GetMatrixType() != SPARSE)
{
// BUGBUG: matrix type should be configured during validation
// Note: We allocate a new one instead of switching the type in place since switching in place may
// affect other nodes who share this matrix due to memory sharing
auto newSparseValueMatrix = std::make_shared<Matrix<ElemType>>(ValueAsMatrix().GetNumRows(), ValueAsMatrix().GetNumCols(), CPUDEVICE, SPARSE, matrixFormatSparseCSC);
#ifdef _MSC_VER
ValuePtrRef() = newSparseValueMatrix;
#else
this->template ValuePtrRef() = newSparseValueMatrix;
#endif
}
Matrix<ElemType>& valueMatrix = ValueAsMatrix();
// TODO: Should we prepare the CSC data directly on the CPU and move it in one go?
// Currently the reader will place the data onto the GPU. It will then be pulled on-demand to the CPU once (and cached there).
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ true);
valueMatrix.Reset();
// Get vector with indices of randomly sampled classes
const std::vector<size_t> samples = GetWeightedSamples();
// Set columns of (sparse) result matrix as indicator vectors
for (size_t i = 0; i < Base::m_sizeOfSampledSet; i++)
{
int sample = samples[i];
valueMatrix.SetValue(sample, i, 1);
}
}
template<class ElemType>
const std::vector<size_t> RandomSampleNode<ElemType>::GetWeightedSamples()
{
size_t dummy;
// Here we are not interested in the number of sampling tries needed, which is returned in the parameter.
return Base::RunSampling(dummy);
}
template<class ElemType>
void RandomSampleNode<ElemType>::Validate(bool isFinalValidationPass)
{
Base::Validate(isFinalValidationPass);
m_pMBLayout = nullptr;
let& shape = Input(0)->GetSampleLayout();
let dims = shape.GetDims();
size_t numClasses = dims[0];
// Output: a (sparse) matrix containing m_sizeOfSampledSet columns of 1-hot vectors specifiying the sampled classes.
SetDims(TensorShape(numClasses, Base::m_sizeOfSampledSet), false);
}
template<class ElemType>
bool RandomSampleNode<ElemType>::IsOutOfDateWrtInputs() const
{
// We need to recompute the result for each mini-batch even if the weight vector didn't change.
return true;
}
template class RandomSampleNode<float>;
template class RandomSampleNode<double>;
template class RandomSampleNode<half>;
template<class ElemType>
double RandomSampleInclusionFrequencyNode<ElemType>::EstimateNumberOfTries()
{
// We estimate the average numver of tries by repeating a fixed number of experiments
const size_t numExperiments = 10; // We choose 10 without any deep justification.
long totalTries = 0;
for (int i = 0; i < numExperiments; i++)
{
size_t nTries;
Base::RunSampling(nTries);
totalTries += nTries;
}
return totalTries / (double)numExperiments;
}
// Estimates the expected number of occurrences of each class in the sampled set.
// For sampling without replacement we use estimate using average number of tries. (Inspired by TensorFlow)
// BUGBUG: Consider to reimplement using a less biased estimate as proposed by Nikos.
template<class ElemType>
double RandomSampleInclusionFrequencyNode<ElemType>::EstimateInSampleFrequency(double p, double estimatedNumTries) const
{
if (Base::m_allowDuplicates)
{
return p * Base::m_sizeOfSampledSet;
}
else /* No duplicates allowed. Estimated count is same as probability of inclusion. */
{
return -expm1(estimatedNumTries * log1p(-p));
}
}
template<class ElemType>
void RandomSampleInclusionFrequencyNode<ElemType>::ForwardPropNonLooping()
{
Base::UpdateWeightsPrefixSum();
Matrix<ElemType>& valueMatrix = ValueAsMatrix();
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ false);
valueMatrix.SetDevice(CPUDEVICE);
// BUGBUG: matrix type should be configured during validation
valueMatrix.SwitchToMatrixType(DENSE, matrixFormatDense, false);
double sumOfWeights = Base::m_samplingWeightsPrefixSum.back();
const Matrix<ElemType>& samplingWeights = Input(0)->ValueAsMatrix();
double estimatedNumTries = EstimateNumberOfTries();
for (int i = 0; i < Base::m_samplingWeightsPrefixSum.size(); i++)
{
// Get the sampling probablility for from the weights for i-th class.
double samplingProb = samplingWeights.GetValue(i, 0) / sumOfWeights;
double estimatedCount = EstimateInSampleFrequency(samplingProb, estimatedNumTries);
valueMatrix.SetValue(i, 0, (ElemType)estimatedCount);
}
}
template<class ElemType>
void RandomSampleInclusionFrequencyNode<ElemType>::Validate(bool isFinalValidationPass)
{
Base::Validate(isFinalValidationPass);
m_pMBLayout = nullptr;
let& shape = Input(0)->GetSampleLayout();
let dims = shape.GetDims();
size_t nClasses = dims[0];
// Output: one vector containing the estimated in sample frequency for each class.
SetDims(TensorShape(nClasses, 1), false);
}
template class RandomSampleInclusionFrequencyNode<float>;
template class RandomSampleInclusionFrequencyNode<double>;
template class RandomSampleInclusionFrequencyNode<half>;
template<class ElemType>
void DropoutNode<ElemType>::Save(File& fstream) const
{
Base::Save(fstream);
RngUser::Save(fstream);
}
template<class ElemType>
void DropoutNode<ElemType>::Load(File& fstream, size_t modelVersion)
{
Base::Load(fstream, modelVersion);
RngUser::Load(fstream, modelVersion);
}
#if 0 // outdated version
template<class ElemType>
void BatchNormalizationNode<ElemType>::AttachInputs(const std::vector<ComputationNodeBasePtr>& inputs)
{
Base::AttachInputs(inputs);
if (m_pre19SampleCount != 0)
{
// copy the sample count loaded from a pre-cntk-19 model into the input parameter.
Input(RUN_COUNT)->Value().SetValue(ElemType(m_pre19SampleCount));
// reset the legacy sample count.
m_pre19SampleCount = 0;
}
}
#endif
template class DropoutNode<float>;
template class DropoutNode<double>;
template class DropoutNode<half>;
template class BatchNormalizationNode<float>;
template class BatchNormalizationNode<double>;
template class BatchNormalizationNode<half>;
}}}