// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #pragma once #include #include "ComputationNetwork.h" namespace Microsoft { namespace MSR { namespace CNTK { // ----------------------------------------------------------------------- // class AdjustLearningRateAtBeginning // Providing option for DataParallelASGD training. so that every nodes // could adjust learning rate every minibatch at first N epochs. // ----------------------------------------------------------------------- // TODO: We can removed these options once we can adjust learning rate at minibatchs level enum class AdjustLearningRateAtBeginning : int { None = 0, // default, don't adjust learning rate Linearly = 1, // using linear adjustment, learning rate will from 0 to learningRatesPerMB Staircase = (1 << 1), // using staircased adjustment, learning rate will from 0 to learningRatesPerMB every adjustNbMinibatch }; template class ASGDHelper { public: virtual ~ASGDHelper() { } // ----------------------------------------------------------------------- // InitModel() -- Upload initialized model (, which was pre-computed by CNTK logic) . // to the parameter servers, so that every node could start training from same model // ----------------------------------------------------------------------- virtual void InitModel(const std::list & learnableNodes) = 0; // ----------------------------------------------------------------------- // PushAndPullModel() -- Push parameters of learnableNodes to parameter servers, then get the latests model back. // ----------------------------------------------------------------------- virtual bool PushAndPullModel(const std::list & learnableNodes, size_t sampleSinceLastSynced = 0) = 0; // ----------------------------------------------------------------------- // WaitAll() -- Wait(Barrier) all the other nodes to process // ----------------------------------------------------------------------- virtual void WaitAll() = 0; // ----------------------------------------------------------------------- // WaitAsyncBuffer() -- Wait pipeline thread to finish job when useAsyncBuffer is true // ----------------------------------------------------------------------- virtual void WaitAsyncBuffer() = 0; }; // Class ASGDHelper // Factory method to create a ASGDHelper instance template ASGDHelper* NewASGDHelper( const std::list & learnableNodes, // Parameters that needs to be train size_t nodeNumRanks, // Number of working nodes bool useAsyncBuffered = true, // Using asynchonous buffer to hide communication cost bool isSimulatedModelAveragingSGD = false, // Using parameter server-based MA rather than ASGD AdjustLearningRateAtBeginning adjusttype = AdjustLearningRateAtBeginning::None, // Adjust learning per minibatches at very begining of training process double adjustCoef = 0.2, // see in DecayCoefficient() size_t adjustPerMinibatches = 600, // int traceLevel = 0, // log level int syncPerfStats = 0); // shown perf data every syncPerfStats }}}