Content - 2b9445fa4061af8346ae75336ac320a55b18244d - cdf0966/Source/SGDLib/IDistGradAggregator.h

visit type:

Tip revision: e9162e18e1c51b88f5c92ac6eb6623943eae353e authored by Amit Agarwal on 08 May 2016, 01:37:50 UTC
Synchronize batch norm nodes' running means and variances across parallel workers at end of each epoch to ensure accurate restartability across epochs and add and end-to-end test for this scenario

Tip revision: e9162e1

IDistGradAggregator.h

#pragma once

#include "DistGradHeader.h"
#include "MPIWrapper.h"

namespace Microsoft { namespace MSR { namespace CNTK {

template <class ElemType>
class IDistGradAggregator
{
public:
    IDistGradAggregator(const MPIWrapperPtr& mpi)
        : m_mpi(mpi)
    {
    }

    virtual ~IDistGradAggregator()
    {
    }

    // Returns a boolean indicating if any samples were processed
    virtual bool AggregateGradients(const std::vector<Matrix<ElemType>*>& gradients, DistGradHeader* headerCPU, int epochNumber) = 0;

    size_t NumProc()
    {
        return m_mpi->NumNodesInUse();
    }

    size_t MyRank()
    {
        return m_mpi->CurrentNodeRank();
    }

    void WaitAll()
    {
        m_mpi->WaitAll();
    }

protected:
    MPIWrapperPtr m_mpi;
};

#define UsingIDistGradAggregatorMembers           \
    \
protected:                                        \
    using IDistGradAggregator<ElemType>::m_mpi;   \
    using IDistGradAggregator<ElemType>::NumProc; \
    using IDistGradAggregator<ElemType>::MyRank
} } }

Browse the archive

https://github.com/Microsoft/CNTK