https://github.com/Microsoft/CNTK
Raw File
Tip revision: 58df4be6848b92dca2d864c6e34574092da95262 authored by Wayne Xiong on 21 December 2016, 00:27:12 UTC
Merge remote-tracking branch 'origin/master' into origin_weixi/lfmmi-2pass-cr
Tip revision: 58df4be
CommonMatrix.h
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#ifdef _WIN32
#ifdef MATH_EXPORTS
#define MATH_API __declspec(dllexport)
#else
#define MATH_API __declspec(dllimport)
#endif
#else // no DLLs on Linux
#define MATH_API
#endif

#include "Basics.h"
#include "basetypes.h"
#include <string>
#include <stdint.h>
#include <memory>
#include <unordered_map>
#include <map>

#pragma warning( disable: 4251 )
typedef unsigned char byte;

#define DEVICEID_TYPE int
// and the following magic values
#define CPUDEVICE (DEVICEID_TYPE) - 1                 // device is the CPU
#define DEVICEID_NOTYETDETERMINED (DEVICEID_TYPE) - 3 // not yet set
#define DEVICEID_AUTO (DEVICEID_TYPE) - 4             // device should be picked automatically

#define EPS_IN_INVERSE 1e-30f    // 1e-37 is the only guaranteed precision
#define EPS_IN_LOG 1e-37f        // 1e-37 is the only guaranteed precision
#define LOG_OF_EPS_IN_LOG -85.1f // log(EPS_IN_LOG)
#define LOG10_OF_EPS_IN_LOG -37  // log_10(EPS_IN_LOG)
#define LZERO -10e10
#define MINLOGEXP -9.2103
#define LSMALL -0.5E10

#define GPUSPARSE_INDEX_TYPE int // cuSparse only supports int array indexes
#define CPUSPARSE_INDEX_TYPE int // to be consistent with cuSparse but limited the possible size of the matrix.

#define MEM_MAX_LIMIT_TIMES 2 // The maximum times allowed a cached memory block allocated to a request

namespace Microsoft { namespace MSR { namespace CNTK {

MATH_API void SetMathLibTraceLevel(int traceLevel);
MATH_API int GetMathLibTraceLevel();

class MATH_API TracingGPUMemoryAllocator
{
private:
    static int m_traceLevel;

public:
    static void SetTraceLevel(int traceLevel);
    static bool IsTraceEnabled();

    template <typename AllocatedElemType>
    static AllocatedElemType* Allocate(int deviceId, size_t numRows, size_t numCols);

    template <typename AllocatedElemType>
    static AllocatedElemType* Allocate(int deviceId, size_t numElements);

    template <typename AllocatedElemType>
    static void Free(int deviceId, AllocatedElemType* bufferPtr, bool ignoreCUDARetCode = false);

    // Let it be public method, the memory manager could check the totoal free memory and decide whether to physically
    // release all the cached memory.
    static std::pair<size_t, size_t> GetFreeAndTotalMemoryInMBs(int deviceId);

private:
    template <typename AllocatedElemType>
    static AllocatedElemType* AllocateNoTrace(int deviceId, size_t numElements);
};

// -----------------------------------------------------------------------
// ElementWiseOperator -- This enum represents which function to apply.
// This is shared between all matrix types and tensors.
// -----------------------------------------------------------------------

enum ElementWiseOperator
{
    // nullary
    opConstOne, opNone,
    // unary (or binary with constant parameter)
    opCopy,
    opNegate, opNot, opAbs, opFloor, opReciprocal,
    opSigmoid, opTanh, opSqr, opSqrt, opExp, opLog, opLinearRectifier, opCosine, opSin,
    // unary ops for use by Matrix class only (there is no TensorView implementation)
    opSigmoidDerivative, opLinearRectifierDerivative, opNegativeSine,
    // binary
    opCopyIf, opCopyIfNot, opSum, opDifference, opElementwiseProduct, opElementwiseQuotient, opLogSum,
    opMax, opMin,
    opLess, opEqual, opGreater, opGreaterEqual, opNotEqual, opLessEqual, // Note: must obey this order: (sgn(a-b) == -1, 0, +1), (sgn(a-b) != -1, 0, +1)
    opAnd, opOr, opXor, opMaskNegative,
    opElementwiseProductWithSigmoidDerivativeFromOutput, opElementwiseProductWithTanhDerivativeFromOutput,
    opElementwiseProductWithLinearRectifierDerivativeFromOutput, opElementwiseProductWithLogDerivativeFromOutput,
    opElementwiseProductWithCosDerivative, opElementwiseProductWithSinDerivative,
    opElementwiseProductWithAbsDerivative, opElementwiseProductWithSqrtDerivative,
    opElementwiseProductWithReciprocalDerivative, opSqrOfDifference,
    // binary ops for indexing
    // opIndex,
    // ternary
    opCond /*a ? b : c*/,
    opClip, /*clip a within interval b..c*/
    opElementwiseProductWithLogSumDerivative,
    opCopyIfEqual,
    opElementwiseProductWithExpOfDiff, /* a * exp(b - c) */
    // Note: not all that's implemented in CNTK ComputationNodes has an opcode yet.
};

// helper to apply a C macro for all operations of each kind
#define ForAllNullaryOps(Macro) \
    Macro(ConstOne);

#define ForAllUnaryOps(Macro) \
    Macro(Copy);              \
    Macro(Negate);            \
    Macro(Not);               \
    Macro(Abs);               \
    Macro(Floor);             \
    Macro(Reciprocal);        \
    Macro(Sigmoid);           \
    Macro(Tanh);              \
    Macro(Sqr);               \
    Macro(Sqrt);              \
    Macro(Exp);               \
    Macro(Log);               \
    Macro(LinearRectifier);   \
    Macro(Cosine);            \
    Macro(Sin);

#define ForAllBinaryOps(Macro)                                        \
    Macro(CopyIf);                                                    \
    Macro(CopyIfNot);                                                 \
    Macro(Sum);                                                       \
    Macro(Difference);                                                \
    Macro(ElementwiseProduct);                                        \
    Macro(ElementwiseQuotient);                                       \
    Macro(LogSum);                                                    \
    Macro(Max);                                                       \
    Macro(Min);                                                       \
    Macro(Equal);                                                     \
    Macro(NotEqual);                                                  \
    Macro(Greater);                                                   \
    Macro(Less);                                                      \
    Macro(GreaterEqual);                                              \
    Macro(LessEqual);                                                 \
    Macro(And);                                                       \
    Macro(Or);                                                        \
    Macro(Xor);                                                       \
    Macro(MaskNegative);                                              \
    Macro(ElementwiseProductWithSigmoidDerivativeFromOutput);         \
    Macro(ElementwiseProductWithTanhDerivativeFromOutput);            \
    Macro(ElementwiseProductWithLinearRectifierDerivativeFromOutput); \
    Macro(ElementwiseProductWithLogDerivativeFromOutput);             \
    Macro(ElementwiseProductWithCosDerivative);                       \
    Macro(ElementwiseProductWithSinDerivative);                       \
    Macro(ElementwiseProductWithAbsDerivative);                       \
    Macro(ElementwiseProductWithReciprocalDerivative);                \
    Macro(ElementwiseProductWithSqrtDerivative);                      \
    Macro(SqrOfDifference);                                           \
    //Macro(Index);

#define ForAllTernaryOps(Macro)                         \
    Macro(Cond);                                        \
    Macro(CopyIfEqual);                                 \
    Macro(Clip);                                        \
    Macro(ElementwiseProductWithLogSumDerivative);      \
    Macro(ElementwiseProductWithExpOfDiff);

// -----------------------------------------------------------------------
// various enums to describe
// -----------------------------------------------------------------------

enum MatrixFlagBitPosition
{
    // TODO: remove all formats that are actually not supported
    bitPosRowMajor = 0,         // row major matrix
    bitPosSparse = 1,           // sparse matrix (COO if uncompressed)
    bitPosCompressed = 2,       // a compressed sparse format (CSC/CSR)
    bitPosDontOwnBuffer = 3,    // buffer is not owned by this matrix
    bitPosSetValueOnDevice = 4, // in a setValue situation, the copy from buffer is already on the device
};

enum MatrixFormat
{
    // TODO: remove all formats that are actually not supported
    matrixFormatDense = 0,                          // default is dense
    matrixFormatColMajor = 0,                       // default is column major
    matrixFormatRowMajor = 1 << bitPosRowMajor,     // row major matrix
    matrixFormatSparse = 1 << bitPosSparse,         // sparse matrix
    matrixFormatCompressed = 1 << bitPosCompressed, // a compressed sparse format (CSC/CSR/COO)
    matrixFormatDenseColMajor = matrixFormatDense + matrixFormatColMajor,
    matrixFormatDenseRowMajor = matrixFormatDense + matrixFormatRowMajor,
    matrixFormatSparseCSC = matrixFormatSparse + matrixFormatColMajor + matrixFormatCompressed,
    matrixFormatSparseCSR = matrixFormatSparse + matrixFormatRowMajor + matrixFormatCompressed,
    matrixFormatSparseOther = matrixFormatSparse + matrixFormatRowMajor,                   // currently used for CPU sparse format, will change to CSC/CSR eventually
    matrixFormatMask = matrixFormatRowMajor + matrixFormatSparse + matrixFormatCompressed, // mask that covers all the
    matrixFormatSparseBlockCol,                                                            // col block based sparse matrix
    matrixFormatSparseBlockRow,                                                            // row block based sparse matrix
};

// common matrix flags for use on all matrices
enum MatrixFlags
{
    // first bits of matrix flags are MatrixFormat
    matrixFlagNormal = 0,
    matrixFlagDontOwnBuffer = 1 << bitPosDontOwnBuffer,       // the matrix memory pointers are externally managed, don't allocate/free or attempt to copy to another location
    matrixFlagSetValueOnDevice = 1 << bitPosSetValueOnDevice, // SetValue() call has a buffer that is already on the device
};


// -----------------------------------------------------------------------
// BufferManagement -- to control the allocation and release of memory
// 
// 1. The goal of buffer management
// The best way to save memory is releasing memory right after no longer used in the rest of the mini-batch, which makes 
// the extra cost on memory operation and slows down the speed. An option to solve that is building the static link between 
// all nodes in pre-computing process and making memory re-use in the runtime, known as shared node value matrices in CNTK. 
// The other option is using a buffer pool to take over the allocation and release request. Whereas the physical operation on 
// memory, logical operation will make nearly no cost on allocation or release. Since the second option, achieved as 
// BufferManagement below, could control all the memory operation, including some trivial ones, like the workspace in convolutions, 
// and more flexible, allocating based on size and being easy to implement new algorithm, it is usually more powerful than the
// first method.
// 2. How it works?
// First, it should be called in Resize function. In Resize function, using Request and LogicalReleaseFunction to replace the original 
// request and release functions. Since BufferManagement is singleton for deviceId, just call the GetManagementInstance. And in Resize, 
// there is a flag named growthOnly, which will request only the size increases to save the allocation cost. In the case, since the 
// buffer pool, nearly no cost on allocation, the growth only will be disable in BufferManagement mode.
// -----------------------------------------------------------------------
class BufferManagement
{
private:
    BufferManagement() = default;

    // Disable all the copy & move functions to keep the instance safely
    DISABLE_COPY_AND_MOVE(BufferManagement);

public:
    static BufferManagement& GetManagerInstance(DEVICEID_TYPE deviceId)
    {
        static std::mutex instancLock;
        auto instance = m_instances.find(deviceId);
        if (instance == m_instances.end()) 
        {
            std::lock_guard<std::mutex> lock(instancLock);
            if (instance == m_instances.end())
            {
                instance = m_instances.insert(std::make_pair(deviceId, std::unique_ptr<BufferManagement>(
                    new BufferManagement()))).first;
                instance->second->m_deviceId = deviceId;
                instance->second->m_totalManageSize = 0;
                instance->second->m_totalAllocSize = 0;
            }
        }
        return *(instance->second);
    }

    // for requesting, find in buffer container first, if failed, allocate a new one
    // if allocating from buffer, the size will be modified to the real buffer size
    template<class ElemType>
    ElemType* RequestBuffer(size_t& size)
    {
        ElemType* bufferPtr = nullptr;
        auto& bufferContainer = BufferContainer<ElemType>();

        // simply allocating based on size, more efficient and complex algorithm could be implemented here
        auto bufferHint = bufferContainer.lower_bound(size);
        if (bufferHint != bufferContainer.end() && bufferHint->first < size * MEM_MAX_LIMIT_TIMES) 
        {
            bufferPtr = bufferHint->second;
            size = bufferHint->first;
            m_totalManageSize -= size;
            bufferContainer.erase(bufferHint);
            return bufferPtr;
        }

        if (m_deviceId >= 0) {
#ifndef CPUONLY
            auto deviceSize = TracingGPUMemoryAllocator::GetFreeAndTotalMemoryInMBs(m_deviceId);
            float freeMemoryRatio = (float)deviceSize.first / deviceSize.second;
            if (freeMemoryRatio < 0.05f || (deviceSize.first << 20) / sizeof(ElemType) < size) 
            {
                PhysicalReleaseAllBuffer<ElemType>();
            }
            bufferPtr = TracingGPUMemoryAllocator::Allocate<ElemType>(m_deviceId, size);
            m_totalAllocSize += size;
#endif
        }
        else 
        {
            // first, try no-throw allocation.
            // if failed, empty the buffer and re-try a throwing allocation
            // if failed again, let system throw the bad_alloc exception
            bufferPtr = new (std::nothrow) ElemType[size];
            if (!bufferPtr) 
            {
                PhysicalReleaseAllBuffer<ElemType>();
                bufferPtr = new ElemType[size];
            }
            m_totalAllocSize += size;
        }

        return bufferPtr;
    }

    // insert the header of buffer into the buffer container
    template<class ElemType>
    void LogicalReleaseBuffer(ElemType* buffer, size_t size)
    {
        auto& bufferContainer = BufferContainer<ElemType>();
        bufferContainer.insert(std::make_pair(size, buffer));
        m_totalManageSize += size;
    }

    // physical release the buffer
    template<class ElemType>
    void PhysicalReleaseBuffer(ElemType* buffer)
    {
        if (m_deviceId >= 0) 
        {
#ifndef CPUONLY
            TracingGPUMemoryAllocator::Free<ElemType>(m_deviceId, buffer, false);
#endif
        }
        else {
            delete[] buffer;
        }
    }

    // empty all the cached buffer
    template<class ElemType>
    void PhysicalReleaseAllBuffer()
    {
        auto& bufferContainer = BufferContainer<ElemType>();

        for (auto& iter : bufferContainer) 
        {
            PhysicalReleaseBuffer<ElemType>(iter.second);
        }

        bufferContainer.clear();
        m_totalManageSize = 0;
    }

private:
    static std::unordered_map<DEVICEID_TYPE, std::unique_ptr<BufferManagement>> m_instances;

    template <class ElemType>
    std::multimap<size_t, ElemType*>& BufferContainer();
    DEVICEID_TYPE m_deviceId;
    size_t m_totalManageSize;
    size_t m_totalAllocSize;

    // map to store all the temp buffer handle
    std::multimap<size_t, float*> m_bufferFloatContainer;
    std::multimap<size_t, double*> m_bufferDoubleContainer;
    std::multimap<size_t, char*> m_bufferCharContainer;
    std::multimap<size_t, short*> m_bufferShortContainer;
    std::multimap<size_t, int*> m_bufferIntContainer;
};


// -----------------------------------------------------------------------
// BaseMatrixStorage -- base class for all matrix types (CPU, GPU) x (dense, sparse)
// -----------------------------------------------------------------------

template <class ElemType>
class BaseMatrixStorage : public enable_shared_from_this<BaseMatrixStorage<ElemType>>
{
    template <class ElemType2> friend class BaseMatrix;

private:
    BaseMatrixStorage<ElemType>(const BaseMatrixStorage<ElemType>& ) = delete;
    BaseMatrixStorage<ElemType>& operator=(const BaseMatrixStorage<ElemType>& ) = delete;
public:

    BaseMatrixStorage() 
    {
        ZeroInit(matrixFormatDense, CPUDEVICE);
    }

    BaseMatrixStorage(MatrixFormat format, DEVICEID_TYPE computeDevice)
    {
        ZeroInit(format, computeDevice);
    }

    ~BaseMatrixStorage()
    {
        ReleaseMemory();
        m_numRows = 0;
        m_numCols = 0;
    }

    void ReleaseMemory()
    {
        if (!m_externalBuffer)
        {
            if (m_computeDevice < 0)
            {
                delete[] m_pArray;
                m_pArray = nullptr;
                m_nzValues = nullptr;

                delete[] m_unCompIndex;
                m_unCompIndex = nullptr;

                delete[] m_compIndex;
                m_compIndex = nullptr;

                delete[] m_blockIds;
                m_blockIds = nullptr;
            }
            else
            {
#ifndef CPUONLY
                if (m_pArray != nullptr)
                    TracingGPUMemoryAllocator::Free<ElemType>(m_computeDevice, m_pArray, true);
                m_pArray = nullptr;

                if (m_tempDeviceBuffer != nullptr)
                    TracingGPUMemoryAllocator::Free<GPUSPARSE_INDEX_TYPE>(m_computeDevice, m_tempDeviceBuffer, true);
                m_tempDeviceBuffer = nullptr;
                m_tempDeviceBufferSize = 0;
#endif

                delete[](byte*) m_tempHostBuffer;
                m_tempHostBuffer = nullptr;
            }
            m_elemSizeAllocated = 0;
            m_totalBufferSizeAllocated = 0;
        }
    }

protected:
    MatrixFormat GetFormat() const { return m_format; }
    void SetFormat(MatrixFormat format) { m_format = format; }

    bool HasExternalBuffer() const { return m_externalBuffer; }

    DEVICEID_TYPE GetComputeDeviceId() const { return m_computeDevice; }
    void SetComputeDeviceId(const DEVICEID_TYPE computeId) const { m_computeDevice = computeId; }

    size_t GetNumStorageRows() const { return m_numRows; }
    void SetNumStorageRows(size_t rows) { m_numRows = rows; }

    size_t GetNumStorageCols() const { return m_numCols; }
    void SetNumStorageCols(size_t cols) { m_numCols = cols; }

    size_t GetSizeAllocated() const { return m_elemSizeAllocated; }
    void SetSizeAllocated(size_t alloc) { m_elemSizeAllocated = alloc; }

    size_t GetNumStorageElements() const { return m_numRows * m_numCols; }
    bool IsEmpty() const { return m_numRows == 0 || m_numCols == 0; }

    ElemType* Buffer() const { return m_pArray; }
    void SetBuffer(ElemType* pArray, size_t alloc, bool external = false) { m_pArray = pArray; m_totalBufferSizeAllocated = alloc; m_externalBuffer = external; }

    size_t BufferSizeAllocated() const { return m_totalBufferSizeAllocated; }
    
    size_t GetBlockSize() const { return m_blockSize; }
    void SetBlockSize(size_t blockSize) { m_blockSize = blockSize; }

    GPUSPARSE_INDEX_TYPE* GetTempDeviceBuffer() const { return m_tempDeviceBuffer; }
    void ReserveTempDeviceBuffer(const size_t minSize) const
    { 
        BaseMatrixStorage<ElemType>* nonConstThis = const_cast<BaseMatrixStorage<ElemType>*>(this);
        if (minSize > m_tempDeviceBufferSize)
        {
            TracingGPUMemoryAllocator::Free<GPUSPARSE_INDEX_TYPE>(GetComputeDeviceId(), nonConstThis->m_tempDeviceBuffer);
            nonConstThis->m_tempDeviceBuffer = TracingGPUMemoryAllocator::Allocate<GPUSPARSE_INDEX_TYPE>(GetComputeDeviceId(), minSize);
            nonConstThis->m_tempDeviceBufferSize = minSize;
        }
    }

    void* GetTempHostBuffer() const { return m_tempHostBuffer; }
    void SetTempHostBuffer(void* buffer) const { m_tempHostBuffer = buffer; }

    size_t GetTempHostBufferSize() const { return m_tempHostBufferSize; }
    void SetTempHostBufferSize(size_t bufferSize) const { m_tempHostBufferSize = bufferSize; }

    int GetColIdx() const { return m_colIdx; }
    void SetColIdx(int idx) { m_colIdx = idx; }

    size_t GetCompIndexSize() const { return m_compIndexSize; }
    void SetCompIndexSize(size_t indexSize) { m_compIndexSize = indexSize; }

    ElemType* GetNzValues() { return m_nzValues; }
    void SetNzValues(ElemType* values) { m_nzValues = values; }

    size_t* GetBlockIds() const { return m_blockIds; }
    void SetBlockIds(size_t* blockIds) { m_blockIds = blockIds; }

    size_t GetBlockIdShift() const { return m_blockIdShift; }
    void SetBlockIdShift(size_t blockIdShift) { m_blockIdShift = blockIdShift; }

    CPUSPARSE_INDEX_TYPE* GetUnCompIndex() const { return m_unCompIndex; }
    void SetUnCompIndex(CPUSPARSE_INDEX_TYPE* parray) { m_unCompIndex = parray; }
    
    CPUSPARSE_INDEX_TYPE* GetCompIndex() const { return m_compIndex; }
    void SetCompIndex(CPUSPARSE_INDEX_TYPE* parray) { m_compIndex = parray; }

    void ZeroInit(const MatrixFormat matrixFormat = matrixFormatDense, const DEVICEID_TYPE computeDevice = -1)
    {
        m_externalBuffer           = false;
        m_format                   = matrixFormat;
        m_computeDevice            = computeDevice;
        m_numRows                  = 0;
        m_numCols                  = 0;
        m_pArray                   = nullptr;
        m_elemSizeAllocated        = 0;
        m_totalBufferSizeAllocated = 0;
        m_blockSize                = 0; // block size
        m_tempDeviceBuffer         = nullptr;
        m_tempDeviceBufferSize     = 0;
        m_tempHostBuffer           = nullptr; // used to copy values.
        m_tempHostBufferSize       = 0;
        m_colIdx                   = 0; // used to SetValue()
        m_compIndexSize            = 0;
        m_nzValues                 = nullptr;
        m_unCompIndex              = nullptr; // row/col ids in CSC/CSR format
        m_compIndex                = nullptr; // begin ids of col/row in CSC/CSR format
        m_blockIds                 = nullptr; // block ids
        m_blockIdShift             = 0; // used to get efficient slice, actual col = blockIds[j] - m_blockIdShift
    }

protected:
    // **************************
    // Variables requried by all matrices
    // **************************
    MatrixFormat m_format;
    mutable DEVICEID_TYPE m_computeDevice; // current GPU device Id or CPUDEVICE
    bool m_externalBuffer; // is the buffer used by this matrix,

    // m_numRows and m_numCols should be removed
    size_t m_numRows;
    size_t m_numCols;
    size_t m_elemSizeAllocated;
    ElemType* m_pArray;

    // **************************
    // GPUSparseMatrix variables
    // **************************

    size_t m_totalBufferSizeAllocated;

    // used by the blockCol and blockRow format
    size_t m_blockSize;                      // block size
    mutable GPUSPARSE_INDEX_TYPE* m_tempDeviceBuffer;
    mutable size_t m_tempDeviceBufferSize;

    mutable void* m_tempHostBuffer; // used to copy values.
    mutable size_t m_tempHostBufferSize;

    // **************************
    // CPUSparseMatrix variables
    // **************************

    int m_colIdx; // used to SetValue()
    size_t m_compIndexSize;
    ElemType* m_nzValues;

    // non-zero values are stored in m_pArray
    CPUSPARSE_INDEX_TYPE* m_unCompIndex; // row/col ids in CSC/CSR format
    CPUSPARSE_INDEX_TYPE* m_compIndex;   // begin ids of col/row in CSC/CSR format

    size_t* m_blockIds;    // block ids
    size_t m_blockIdShift; // used to get efficient slice, actual col = blockIds[j] - m_blockIdShift

};

// -----------------------------------------------------------------------
// BaseMatrix -- base class for all matrix types (CPU, GPU) x (dense, sparse)
// -----------------------------------------------------------------------

template <class ElemType>
class MATH_API BaseMatrix
{
protected:    
    // Default constructor. Copy/Move constructors might set doNotInitialize to true to avoid double initialization.
    BaseMatrix(bool doNotInitializeFields = false)
    {
        if (!doNotInitializeFields)
            ZeroInit();
    }

    virtual ~BaseMatrix()
    {
        ZeroValues();
    }
public:
    void VerifyResizable(const char* function) const 
    { 
        if (!m_sob.unique())
            LogicError("%s: Cannot resize the matrix because it is a view.", function);
        else if (m_sob->HasExternalBuffer())
            LogicError("%s: Cannot resize the matrix because it is externally owned.", function);
    }

    // same as VerifyResizable() except for the error message. Could be folded into one.
    void VerifyMigratable(const char* function) const
    {
        if (!m_sob.unique())
            LogicError("%s: Cannot migrate the matrix between devices because it is a view.", function);
        else if (m_sob->HasExternalBuffer())
            LogicError("%s: Cannot migrate the matrix between devices because it is externally owned.", function);
    }

    // This is needed for Sparse Matrices to ensure they can write to the matrix. Note: writing to slices is not currently supported
    void VerifyWritable(const char* function) const 
    {
        if (!(m_sob->GetNumStorageRows() == m_numRows && m_sob->GetNumStorageCols() == m_numCols))
        {
            LogicError("%s: Cannot write to the matrix because it is a slice.", function);
        }
    }

    bool IsView() const { return (GetNumRows() != m_sob->GetNumStorageRows() || GetNumCols() != m_sob->GetNumStorageCols() || m_sliceViewOffset != 0); }

    void VerifySize(const size_t rows, const size_t cols)
    {
        if (rows != GetNumRows() || cols != GetNumCols())
            LogicError("VerifySize: expected matrix size %lu x %lu, but it is %lu x %lu",
                       rows, cols, GetNumRows(), GetNumCols());
    }

    MatrixFormat GetFormat() const { return m_sob->GetFormat(); }

    bool OwnBuffer() const { return !HasExternalBuffer(); }

    bool IsEmpty() const { return m_numRows == 0 || m_numCols == 0; }

    size_t GetSizeAllocated() const { return m_sob->GetSizeAllocated(); }

    size_t BufferSizeAllocated() const { return m_sob->BufferSizeAllocated(); }

    size_t GetNumRows() const { return m_numRows; }
    size_t GetNumCols() const { return m_numCols; }

protected:

    void SetFormat(MatrixFormat format) { m_sob->SetFormat(format); }

    bool HasExternalBuffer() const { return m_sob->HasExternalBuffer(); }

    DEVICEID_TYPE GetComputeDeviceId() const { return m_sob->GetComputeDeviceId(); }
    void SetComputeDeviceId(const DEVICEID_TYPE computeId) const { m_sob->SetComputeDeviceId(computeId); }

    // TODO: Some of these accessors should be merged into single methods like SetBuffer. 
    size_t GetNumStorageRows() const { return m_sob->GetNumStorageRows(); }
    void SetNumStorageRows(size_t rows) { m_sob->SetNumStorageRows(rows); }

    size_t GetNumStorageCols() const { return m_sob->GetNumStorageCols(); }
    void SetNumStorageCols(size_t cols) { m_sob->SetNumStorageCols(cols); }

    void SetSizeAllocated(size_t alloc) { m_sob->SetSizeAllocated(alloc); }

    ElemType* Buffer() const { return m_sob->Buffer(); }
    void SetBuffer(ElemType* parray, size_t alloc, bool external = false) { m_sob->SetBuffer(parray, alloc, external); }

    
    size_t GetBlockSize() const { return m_sob->GetBlockSize(); }
    void SetBlockSize(size_t blockSize) { m_sob->SetBlockSize(blockSize); }

    GPUSPARSE_INDEX_TYPE* GetTempDeviceBuffer() const { return m_sob->GetTempDeviceBuffer(); }
    void ReserveTempDeviceBuffer(const size_t minSize) const { m_sob->ReserveTempDeviceBuffer(minSize); }

    void* GetTempHostBuffer() const { return m_sob->GetTempHostBuffer(); }
    void SetTempHostBuffer(void* buffer) const { m_sob->SetTempHostBuffer(buffer); };

    size_t GetTempHostBufferSize() const { return m_sob->GetTempHostBufferSize(); }
    void SetTempHostBufferSize(size_t bufferSize) const { m_sob->SetTempHostBufferSize(bufferSize); }

    int GetColIdx() const { return m_sob->GetColIdx(); }
    void SetColIdx(int idx) { m_sob->SetColIdx(idx); }

    size_t GetCompIndexSize() const { return m_sob->GetCompIndexSize(); }
    void SetCompIndexSize(size_t indexSize) { m_sob->SetCompIndexSize(indexSize); }

    ElemType* GetNzValues() { return m_sob->GetNzValues(); }
    void SetNzValues(ElemType* values) { m_sob->SetNzValues(values); }

    size_t* GetBlockIds() const { return m_sob->GetBlockIds(); }
    void SetBlockIds(size_t* blockIds) const { m_sob->SetBlockIds(blockIds); }

    size_t GetBlockIdShift() const { return m_sob->GetBlockIdShift(); }
    void SetBlockIdShift(size_t blockIdShift) { m_sob->SetBlockIdShift(blockIdShift); }

    CPUSPARSE_INDEX_TYPE* GetUnCompIndex() const { return m_sob->GetUnCompIndex(); }
    void SetUnCompIndex(CPUSPARSE_INDEX_TYPE* parray) { m_sob->SetUnCompIndex(parray); }
    
    CPUSPARSE_INDEX_TYPE* GetCompIndex() const { return m_sob->GetCompIndex(); }
    void SetCompIndex(CPUSPARSE_INDEX_TYPE* parray) { m_sob->SetCompIndex(parray); }

    void SetNumRows(size_t numRows) { m_numRows = numRows; }
    void SetNumCols(size_t numCols) { m_numCols = numCols; }

    size_t GetNumElements() const { return m_numRows * m_numCols; }


    void ZeroInit()
    {
        MatrixFormat defFmt = matrixFormatDense;
        DEVICEID_TYPE compDev = -1;
        if (m_sob != nullptr)
        {
            defFmt = m_sob->GetFormat();
            compDev = m_sob->GetComputeDeviceId();

        }
        ZeroInit(defFmt, compDev);
    }

    void ZeroValues()
    {
        m_numRows           = 0;
        m_numCols           = 0;
        m_sliceViewOffset   = 0;
        m_sob               = nullptr;
    }
    void ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE computeDevice )
    {
        ZeroValues();
        m_sob = make_shared<BaseMatrixStorage<ElemType>>(matrixFormat, computeDevice);
    }

protected:
    //void Clear() {}

    void ZeroStorageInit() { m_sob->ZeroInit(); }
    void ReleaseStorageMemory() { m_sob->ReleaseMemory(); }

    // copy all metadata (but not content that m_sob points to)
    void ShallowCopyFrom(const BaseMatrix& other) 
    {
        *this = other;
    }

protected:

    size_t m_numRows;
    size_t m_numCols;
    // TODO: m_sliceViewOffset has a different meaning in sparse (column offset) versus dense (byte offset to start of pointer). This should perhaps be fixed.
    size_t m_sliceViewOffset; // this is the slice view of a matrix
    // TODO: implement m_colStride
    size_t m_colStride;

    // Storage OBject containing the underlying data used by this matrix
    shared_ptr<BaseMatrixStorage<ElemType>> m_sob;
};

}}}
back to top