//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Matrix.cpp -- main CPP file that contains all Matrix functions exported by the Cntk.Math.dll
//
#include "stdafx.h"
#include "Basics.h"
#include "Matrix.h"
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUMatrix.h"
#include "GPUSparseMatrix.h"
#include "File.h"
#include <assert.h>
#include <math.h>
#include "GPUWatcher.h" // bring in this class as well so that it gets exported from this DLL
#include <memory>
#include <atomic>
#include "Quantizers.h"
#ifndef CPUONLY
#define ANAMEFORLIB "Cntk.Math.Cuda-" ## CNTK_COMPONENT_VERSION ## ".lib"
#pragma comment(lib, ANAMEFORLIB) // built by MathCUDA project
#undef ANAMEFORLIB
#endif
#pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
#pragma warning(disable : 4239) // nonstandard extension; triggered by this pattern: "auto& second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;"
#pragma warning(disable : 4702) // unreachable code; triggered for unknown reasons
// Helper to dispath matrix calls to the 4 underlying matrix libraries (CPU,GPU) x (DENSE,SPARSE)
// 'MatrixPointerToCheck' determines where the operation takes place.
// 'MatrixPointerToSetFlag' is the output. If not null and its location is BOTH, we collapse it to one.
#pragma warning(disable : 4456) // declaration of curLocation hides previous local declaration
#define DISPATCH_MATRIX_ON_FLAG(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
// version of dispatch macro that prefers the CPU if the 'MatrixPointerToCheck' location is BOTH
#define DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::GPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU || curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
// version of helper macro that executes both CPU and GPU macros if 'matrixPointer' location is BOTH
#define DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(matrixPointer, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
auto curLocation = (matrixPointer)->GetCurrentMatrixLocation(); \
auto curMatrixType = (matrixPointer)->GetMatrixType(); \
if (curLocation == CurrentDataLocation::NONE) \
LogicError("Matrices do not exist in either CPU or GPU."); \
if (curMatrixType == MatrixType::UNDETERMINED) \
LogicError("Matrices must be SPARSE or DENSE."); \
if (curLocation != CurrentDataLocation::CPU) /*GPU or BOTH*/ \
{ \
if (curMatrixType == MatrixType::DENSE) \
{ \
GPUDense; \
} \
else \
{ \
GPUSparse; \
} \
} \
if (curLocation != CurrentDataLocation::GPU) /*CPU or BOTH*/ \
{ \
if (curMatrixType == MatrixType::DENSE) \
{ \
CPUDense; \
} \
else \
{ \
CPUSparse; \
} \
} \
}
namespace Microsoft { namespace MSR { namespace CNTK {
std::atomic<int> m_mathLibTraceLevel(0);
void SetMathLibTraceLevel(int traceLevel)
{
m_mathLibTraceLevel.store(traceLevel);
}
int GetMathLibTraceLevel()
{
return m_mathLibTraceLevel.load();
}
MatrixBase::~MatrixBase() { }
#pragma region Constructors, destructors and other static matrix builders
// TODO: Reformat DISPATCH... macros to the following form:
// DISPATCH..(p1, p2,
// { Cpu code },
// { GPU code },
// ...
// Initialize members
template <class ElemType>
void Matrix<ElemType>::Init(DEVICEID_TYPE deviceId)
{
ReleaseMemory();
m_preferredDeviceId = deviceId;
m_numTimesDeviceChanged = 0;
m_numTimesMatrixTypeChanged = 0;
m_devicesTransferedTo[1] = m_devicesTransferedTo[0] = CPUDEVICE - 1; // (some value that is different from any valid value)
}
// shallow-copy all members
template <class ElemType>
void Matrix<ElemType>::ShallowCopyFrom(const Matrix<ElemType>& other)
{
m_baseMatrix = other.m_baseMatrix;
m_GPUMatrix = other.m_GPUMatrix;
m_CPUMatrix = other.m_CPUMatrix;
m_GPUSparseMatrix = other.m_GPUSparseMatrix;
m_CPUSparseMatrix = other.m_CPUSparseMatrix;
m_matrixType = other.m_matrixType;
m_currentDataLocation = other.m_currentDataLocation;
m_preferredDeviceId = other.m_preferredDeviceId;
m_numTimesDeviceChanged = other.m_numTimesDeviceChanged;
m_numTimesMatrixTypeChanged = other.m_numTimesMatrixTypeChanged;
m_devicesTransferedTo[0] = other.m_devicesTransferedTo[0]; // TODO: spelling
m_devicesTransferedTo[1] = other.m_devicesTransferedTo[1];
}
// Call this function after an update operation has created/set/updated the respective pointers.
// - location: BOTH|CPU|GPU
// - pass BOTH only if object will be read from; it is not allowed to write to both and then call this function.
// - if CPU/GPU and current is BOTH, then object was written to
// What gets updated:
// - m_currentDataLocation: from function argument
// - m_matrixType: from function argument unless UNDETERMINED in which case m_matrixType remains unmodified
// - m_baseMatrix: to one of current values of m_[GC]PU{Sparse,}Matrix
// This function is heavily overloaded in its responsibility.
// - first-time initialization, e.g. of a ColumnSlice (NONE->!NONE)
// - after creating a temp copy for reading
// - collapse temp copies after writing to one of them
// - setting matrixType if not set yet
template <class ElemType>
void Matrix<ElemType>::SetDataLocation(CurrentDataLocation location, MatrixType type) const
{
assert(location == CurrentDataLocation::CPU || location == CurrentDataLocation::GPU || location == CurrentDataLocation::BOTH);
// if the object used to live on BOTH, this will collapse it to 'location' (unless we actually wrote into BOTH)
// In that case, we do a sanity check here that the object is a singleton view,
// since otherwise the collapsing would go unnoticed by the other views.
// The cases to cover:
// - everything is allowed on a singleton view
// - if the current state is BOTH:
// -> The result was written to 'location' so we should collapse it to there.
// - multiple views: much is forbidden since we cannot notify the other views on which one was written to
// - CPU <-> GPU: FORBIDDEN
// - BOTH -> CPU or GPU: current state is BOTH: location says which side was written to
// -> FORBIDDEN to write into
// - CPU or GPU -> BOTH: current state is CPU or GPU
// and a view onto it is put into BOTH state
// -> OK but inefficent to read, since this is likely happening over again; but we cannot put all views into BOTH state
// - BOTH -> BOTH:
// - read case: OK
// - write case: forbidden to call this function in this way
// - NONE -> !NONE: FORBIDDEN
if (m_currentDataLocation != location && // it is attempted to change location
m_currentDataLocation != CurrentDataLocation::NONE && // from a valid object (NONE means we are a fresh object from ColumnSlice())
location != CurrentDataLocation::BOTH) // and we are changing it not into a temporary copy for reading
{
// we get here if we wrote into this object that was BOTH but is no longer, or if we move between CPU and GPU
// Both is forbidden on shared views since we cannot inform other views of this change.
// We will now check any *valid* pointer will now be checked for uniqueness. There may be mismatching left-over pointers kept around in case they should be revived.
if (m_matrixType == MatrixType::DENSE) // note: this checks the current type, not the new one passed in. Asssumption: this tells us which pointers are valid.
{
assert(m_currentDataLocation == CurrentDataLocation::GPU || m_CPUMatrix);
assert(m_currentDataLocation == CurrentDataLocation::CPU || m_GPUMatrix);
if (m_currentDataLocation != CurrentDataLocation::GPU) ((BaseMatrix<ElemType>*)m_CPUMatrix.get())->VerifyMigratable("SetDataLocation [CPUMatrix]");
if (m_currentDataLocation != CurrentDataLocation::CPU) ((BaseMatrix<ElemType>*)m_GPUMatrix.get())->VerifyMigratable("SetDataLocation [GPUMatrix]");
}
else if (m_matrixType == MatrixType::SPARSE)
{
assert(m_currentDataLocation == CurrentDataLocation::GPU || m_CPUSparseMatrix);
assert(m_currentDataLocation == CurrentDataLocation::CPU || m_GPUSparseMatrix);
if (m_currentDataLocation != CurrentDataLocation::GPU) ((BaseMatrix<ElemType>*)m_CPUSparseMatrix.get())->VerifyMigratable("SetDataLocation [CPUSparseMatrix]");
if (m_currentDataLocation != CurrentDataLocation::CPU) ((BaseMatrix<ElemType>*)m_GPUSparseMatrix.get())->VerifyMigratable("SetDataLocation [GPUSparseMatrix]");
}
// TODO: Why do we need these typecasts? (without it will fail with "cannot access private member declared in class 'Microsoft::MSR::CNTK::CPUMatrix<float>'")
if (m_baseMatrix && !OwnBuffer()) // same arguments for externally owned matrices: Can read a temp but not write.
LogicError("SetDataLocation: A non-owning object cannot be written to in BOTH state.");
}
// passed validation: we can now update the state
m_currentDataLocation = location;
// update the matrix type if passed in
if (type != MatrixType::UNDETERMINED)
m_matrixType = type;
// set m_baseMatrix (if location is unchanged, this will not change the pointer)
// Note: m_currentDataLocation may also be CurrentDataLocation::BOTH, in which case the base matrix will be GPU.
if (m_matrixType == MatrixType::DENSE)
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? dynamic_cast<BaseMatrix<ElemType>*>(m_CPUMatrix.get()) : dynamic_cast<BaseMatrix<ElemType>*>(m_GPUMatrix.get()));
else if (m_matrixType == MatrixType::SPARSE)
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? dynamic_cast<BaseMatrix<ElemType>*>(m_CPUSparseMatrix.get()) : dynamic_cast<BaseMatrix<ElemType>*>(m_GPUSparseMatrix.get()));
// Note: Typecasts are necessary since C++ cannot figure out the common base type (probably due to shared_ptr).
// sanity check
if (!m_baseMatrix && m_matrixType != MatrixType::UNDETERMINED)
LogicError("SetDataLocation: New m_baseMatrix must not be NULL.");
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(matrixType, matrixFormat, false);
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(matrixType, matrixType == MatrixType::DENSE ? MatrixFormat::matrixFormatDense : MatrixFormat::matrixFormatSparseCSC, false);
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
template <class ElemType>
Matrix<ElemType>::Matrix(DEVICEID_TYPE deviceID)
{
Init(deviceID);
SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
// constructor for Matrix class to wrap an externally managed BaseMatrix, indicated by the use of shared_ptr.
// The appropriate destructor should be passed in by the caller.
// baseMatrix - base matrix for this element
// pArray - pointer to current data array, will replace existing pointer in baseMatrix if != NULL
// deviceId - deviceId where the pArray exists
#if 0
template <class ElemType>
Matrix<ElemType>::Matrix(shared_ptr<BaseMatrix<ElemType>> baseMatrix, ElemType* pArray, DEVICEID_TYPE deviceId) // constructor for setting Matrix from a base matrix
{
Init(deviceId);
if (baseMatrix->GetFormat() & matrixFormatSparse)
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUSparseMatrix = DownCast<CPUSparseMatrix<ElemType>>(baseMatrix);
SetDataLocation(CPU, SPARSE);
}
else
{
m_GPUSparseMatrix = DownCast<GPUSparseMatrix<ElemType>>(baseMatrix);
SetDataLocation(GPU, SPARSE);
}
}
else
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUMatrix = DownCast<CPUMatrix<ElemType>>(baseMatrix);
SetDataLocation(CPU, DENSE);
}
else
{
m_GPUMatrix = DownCast<GPUMatrix<ElemType>>(baseMatrix);
SetDataLocation(GPU, DENSE);
}
}
m_baseMatrix = baseMatrix;
m_baseMatrix->SetBuffer(pArray,0);
}
#endif
template <class ElemType>
Matrix<ElemType>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat, const size_t nnz)
{
Init(deviceId);
if (matrixType == MatrixType::SPARSE)
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormat, numRows, numCols, nnz);
SetDataLocation(CPU, SPARSE);
}
else
{
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nnz, m_preferredDeviceId, matrixFormat);
SetDataLocation(GPU, SPARSE);
}
}
else
{
if (matrixFormat != matrixFormatDense)
{
NOT_IMPLEMENTED;
}
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(numRows, numCols);
SetDataLocation(CPU, DENSE);
}
else
{
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(numRows, numCols, m_preferredDeviceId);
SetDataLocation(GPU, DENSE);
}
SetValue(0);
}
}
template <class ElemType>
Matrix<ElemType>::Matrix(const size_t numRows, const size_t numCols, ElemType* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz)
{
Init(deviceId);
if (m_preferredDeviceId == CPUDEVICE)
{
if (matrixFlags & matrixFormatSparse)
{
// WARNING: matrixFlag is not passed in and externally managed array cannot be passed in
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nnz);
SetDataLocation(CPU, SPARSE);
}
else
{
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(numRows, numCols, pArray, matrixFlags);
SetDataLocation(CPU, DENSE);
}
}
else
{
if (matrixFlags & matrixFormatSparse)
{
// m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(numRows,numCols,nnz, pArray,matrixFlags,m_preferredDeviceId);
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_preferredDeviceId, MatrixFormat(matrixFlags & MatrixFormat::matrixFormatMask));
m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, nnz, true, false);
SetDataLocation(GPU, SPARSE);
}
else
{
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(numRows, numCols, m_preferredDeviceId, pArray, matrixFlags);
SetDataLocation(GPU, DENSE);
}
}
// Why is this here??
/*
if (matrixFlagDontOwnBuffer & matrixFlags)
m_baseMatrix->SetOwnBuffer(false);
*/
}
//copy constructor, deep copy
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::DeepClone() const
{
return Matrix<ElemType>(*this, GetDeviceId());
}
template <class ElemType>
Matrix<ElemType>::Matrix(const Matrix<ElemType>& deepCopyFrom, DEVICEID_TYPE deviceId)
{
int origCopyFromDeviceId = deepCopyFrom.GetDeviceId();
Init(deviceId); // will set m_preferredDeviceId
deepCopyFrom._transferToDevice(m_preferredDeviceId, true);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom,
this,
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(*(deepCopyFrom.m_CPUMatrix)),
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(*(deepCopyFrom.m_GPUMatrix)),
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(*(deepCopyFrom.m_CPUSparseMatrix)),
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(*(deepCopyFrom.m_GPUSparseMatrix)));
// should we move back?
deepCopyFrom._transferToDevice(origCopyFromDeviceId, true);
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
}
//move constructor, shallow copy
template <class ElemType>
Matrix<ElemType>::Matrix(Matrix<ElemType>&& moveFrom)
{
Init((DEVICEID_TYPE)moveFrom.GetDeviceId());
#if 1
operator=(move(moveFrom));
#else
DISPATCH_MATRIX_ON_FLAG(&moveFrom,
this,
m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix))),
m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix))),
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix))),
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix))));
m_preferredDeviceId = moveFrom.m_preferredDeviceId;
#endif
}
//move assignment operator, shallow copy
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator=(Matrix<ElemType>&& moveFrom)
{
if (this == &moveFrom)
LogicError("Matrix: Move assignment into itself is forbidden.");
#if 1
// shallow-copy all members
ShallowCopyFrom(moveFrom);
// virgin-init the source
moveFrom.Init(CPUDEVICE);
#else
m_preferredDeviceId = moveFrom.m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(&moveFrom,
this,
if (m_CPUMatrix != nullptr) m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix)));
else m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix))),
if (m_GPUMatrix != nullptr) m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix)));
else m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix))),
if (m_CPUSparseMatrix != nullptr) m_CPUSparseMatrix->operator=(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix)));
else m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix))),
if (m_GPUSparseMatrix != nullptr) m_GPUSparseMatrix->operator=(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix)));
else m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix))));
#endif
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ReleaseMemory()
{
m_baseMatrix = nullptr;
// Perf: Avoid assignments to shared_ptr unless necessary. In certain versions of the standard library
// they cause ref counting, and this piece of code is called often..
if (m_GPUMatrix.get() != nullptr)
m_GPUMatrix = nullptr;
if (m_CPUMatrix.get() != nullptr)
m_CPUMatrix = nullptr;
if (m_GPUSparseMatrix.get() != nullptr)
m_GPUSparseMatrix = nullptr;
if (m_CPUSparseMatrix.get() != nullptr)
m_CPUSparseMatrix = nullptr;
m_matrixType = MatrixType::UNDETERMINED;
m_currentDataLocation = CurrentDataLocation::NONE;
}
template <class ElemType>
Matrix<ElemType>::~Matrix(void)
{
ReleaseMemory();
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Ones(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetValue(1);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Zeros(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetValue(0);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Eye(const size_t rows, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, rows, deviceId); // will initialize to 0
c.SetDiagonalValue(1);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RandomUniform(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType low, const ElemType high, unsigned long seed)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetUniformRandomValue(low, high, seed);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RandomGaussian(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType mean, const ElemType sigma, unsigned long seed)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetGaussianRandomValue(mean, sigma, seed);
return c;
}
template <class ElemType>
void Matrix<ElemType>::SetDevice(DEVICEID_TYPE deviceId)
{
if (deviceId >= 0)
GPUMatrix<ElemType>::SetDevice(deviceId);
}
template <class ElemType>
void Matrix<ElemType>::Read(File& stream)
{
Matrix<ElemType>& M = *this;
char type;
stream >> type;
if (type == 'd')
{
if (M.GetDeviceId() < 0)
{
if (!M.m_CPUMatrix)
M.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>();
stream >> (*M.m_CPUMatrix);
M.SetDataLocation(CPU, DENSE);
}
else
{
if (!M.m_GPUMatrix)
M.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(M.GetDeviceId());
stream >> (*M.m_GPUMatrix);
M.SetDataLocation(GPU, DENSE);
}
}
else if (type == 's')
{
if (M.GetDeviceId() < 0)
{
NOT_IMPLEMENTED; // You might want to tranfer your matrix to GPU
}
else
{
if (M.m_GPUSparseMatrix == NULL)
M.m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(M.GetDeviceId());
stream >> (*M.m_GPUSparseMatrix);
M.SetDataLocation(GPU, SPARSE);
}
}
else
LogicError("Read: Input file corrupt (invalid matrix type field 0x%02d, should be 'f' or 'd').", type);
}
template <class ElemType>
void Matrix<ElemType>::Write(File& stream) const
{
const Matrix<ElemType>& M = *this;
if (M.GetMatrixType() == MatrixType::DENSE)
{
stream << 'd';
if (M.GetDeviceId() < 0)
stream << (*M.m_CPUMatrix);
else
stream << (*M.m_GPUMatrix);
}
else
{
stream << 's';
if (M.GetDeviceId() < 0)
NOT_IMPLEMENTED // stream<<(*M.m_CPUMatrix);
else stream
<< (*M.m_GPUSparseMatrix);
}
}
#pragma endregion Constructors, destructors and other static matrix builders
#pragma region Basic Operators
template <class ElemType>
size_t Matrix<ElemType>::BufferSize() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_baseMatrix->GetSizeAllocated() * sizeof(ElemType),
return m_baseMatrix->GetSizeAllocated() * sizeof(ElemType),
return m_CPUSparseMatrix->BufferSize(),
return m_GPUSparseMatrix->BufferSizeAllocated());
}
// BUGBUG: This is ugly code. The outside world should not have access to the raw data pointers.
// if this is to be used, then at least it should also return a number of bytes as well.
template <class ElemType>
ElemType* Matrix<ElemType>::Data() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->Data(),
return m_GPUMatrix->Data(),
return m_CPUSparseMatrix->Data(),
return m_GPUSparseMatrix->Data());
}
template <class ElemType>
ElemType* Matrix<ElemType>::CopyToArray() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->CopyToArray(),
return m_GPUMatrix->CopyToArray(),
{ CPUMatrix<ElemType> tmpDense(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols()); tmpDense.SetValue((ElemType)0); CPUSparseMatrix<ElemType>::ScaleAndAdd((ElemType)1, *m_CPUSparseMatrix, tmpDense); return tmpDense.CopyToArray(); },
return m_GPUSparseMatrix->CopyToDenseMatrix().CopyToArray());
}
//memory will be allocated by the callee if not enough but need to be deleted by the caller after it's done
//return number of elements copied
template <class ElemType>
size_t Matrix<ElemType>::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->CopyToArray(arrayCopyTo, currentArraySize),
return m_GPUMatrix->CopyToArray(arrayCopyTo, currentArraySize),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->CopySection(numRows, numCols, dst, colStride),
m_GPUMatrix->CopySection(numRows, numCols, dst, colStride),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
// BUGBUG: Some code checks before calling here whether one of the dimensions is 0.
// This function must handle that case properly, that is, preserving the non-zero dimension.
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
{
int devId = GetDeviceId();
Matrix<ElemType> slice(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId); // this already creates pointers
slice.m_preferredDeviceId = m_preferredDeviceId;
// create slices for the underlying object
// Note: In case of data location == BOTH, this creates two objects just like in the source.
if (GetMatrixType() == MatrixType::DENSE)
{
if (GetCurrentMatrixLocation() == CPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_CPUMatrix)
slice.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->ColumnSlice(startColumn, numCols)));
}
if (GetCurrentMatrixLocation() == GPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_GPUMatrix)
slice.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->ColumnSlice(startColumn, numCols)));
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
if (GetCurrentMatrixLocation() == CPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_CPUSparseMatrix)
slice.m_CPUSparseMatrix->operator=(static_cast<CPUSparseMatrix<ElemType>&&>(m_CPUSparseMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(static_cast<CPUSparseMatrix<ElemType>&&>(m_CPUSparseMatrix->ColumnSlice(startColumn, numCols)));
}
if (GetCurrentMatrixLocation() == GPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_GPUSparseMatrix)
slice.m_GPUSparseMatrix->operator=(static_cast<GPUSparseMatrix<ElemType>&&>(m_GPUSparseMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(static_cast<GPUSparseMatrix<ElemType>&&>(m_GPUSparseMatrix->ColumnSlice(startColumn, numCols)));
}
}
else
LogicError("Undetermined matrix type");
// update the slice's m_currentDataLocation, m_matrixType, and m_baseMatrix
// This will work for CPU, GPU, and BOTH.
slice.SetDataLocation(GetCurrentMatrixLocation(), GetMatrixType());
return slice;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{
ReleaseMemory();
m_preferredDeviceId = fromMatrix.m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
if (m_CPUMatrix) m_CPUMatrix->AssignColumnSlice(*fromMatrix.m_CPUMatrix, startColumn, numCols);
else m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(fromMatrix.m_CPUMatrix->ColumnSlice(startColumn, numCols)),
if (m_GPUMatrix) m_GPUMatrix->AssignColumnSlice(*fromMatrix.m_GPUMatrix, startColumn, numCols);
else m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(fromMatrix.m_GPUMatrix->ColumnSlice(startColumn, numCols)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::SetColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{
assert(m_CPUMatrix || m_GPUMatrix);
// must already been allocated
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
m_CPUMatrix->SetColumnSlice(*fromMatrix.m_CPUMatrix, startColumn, numCols),
m_GPUMatrix->SetColumnSlice(*fromMatrix.m_GPUMatrix, startColumn, numCols),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::CopyColumnsStrided(const Matrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride)
{
assert(m_CPUMatrix || m_GPUMatrix);
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
m_CPUMatrix->CopyColumnsStrided(*fromMatrix.m_CPUMatrix, numCols, srcNumColsStride, destNumColsStride),
m_GPUMatrix->CopyColumnsStrided(*fromMatrix.m_GPUMatrix, numCols, srcNumColsStride, destNumColsStride),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Diagonal() const
{
int devId = GetDeviceId();
Matrix<ElemType> diag(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId);
diag.m_preferredDeviceId = m_preferredDeviceId;
AssignDiagonalValuesTo(diag);
return diag;
}
template <class ElemType>
void Matrix<ElemType>::AssignDiagonalValuesTo(Matrix<ElemType>& diag) const
{
int devId = GetDeviceId();
DecideAndMoveToRightDevice(*this, diag);
if (GetMatrixType() == MatrixType::DENSE)
{
if (devId == CPUDEVICE)
{
if (diag.m_CPUMatrix)
diag.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->Diagonal()));
else
diag.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->Diagonal()));
diag.SetDataLocation(CPU, DENSE);
}
else
{
if (diag.m_GPUMatrix)
diag.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->Diagonal()));
else
diag.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->Diagonal()));
diag.SetDataLocation(GPU, DENSE);
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
// TODO: Implement optimized diagonal functions for sparse matrices. For now use the DiagonalToDense instead.
if (devId == CPUDEVICE)
{
if (diag.m_CPUMatrix)
diag.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUSparseMatrix->DiagonalToDense()));
else
diag.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(static_cast<CPUMatrix<ElemType>&&>(m_CPUSparseMatrix->DiagonalToDense()));
diag.SetDataLocation(CPU, DENSE);
}
else
{
if (diag.m_GPUMatrix)
diag.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUSparseMatrix->DiagonalToDense()));
else
diag.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(static_cast<GPUMatrix<ElemType>&&>(m_GPUSparseMatrix->DiagonalToDense()));
diag.SetDataLocation(GPU, DENSE);
}
}
else
LogicError("Undetermined matrix type");
}
// This function will change the matrix type between DENSE and SPARSE.
// WARNING: The correct implementation is to copy the matrix between DENSE and SPARSE
// However, the conversion functions are not implemented yet and so it will always create
// a new blank matrix and destroy all info in the original matrix if different matrix type is asked.
// In case of !keepValues, the matrix content will be undefined.
template <class ElemType>
void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat newMatrixFormat, bool keepValues)
{
// This check should be uncommented but unfortunately there are still places
// this function is being called with incorrect "default" format value
/*if (m_matrixType == newMatrixType && GetFormat() != newMatrixFormat)
NOT_IMPLEMENTED;*/
if (m_matrixType == newMatrixType)
return;
if (!m_baseMatrix)
keepValues = false;
#define NUM_MATRIXTYPE_CHANGED_WARN 20
m_numTimesMatrixTypeChanged++;
if ((GetMathLibTraceLevel() > 0) && (m_numTimesMatrixTypeChanged == NUM_MATRIXTYPE_CHANGED_WARN))
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_MATRIXTYPE_CHANGED_WARN);
if (GetDeviceId() < 0) // CPU
{
if (newMatrixType == MatrixType::SPARSE)
{
if (!m_baseMatrix)
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(newMatrixFormat);
else
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(newMatrixFormat, GetNumRows(), GetNumCols(), 1);
if (keepValues)
CopyElementsFromDenseToSparse(*m_CPUMatrix, *m_CPUSparseMatrix);
SetDataLocation(CPU, SPARSE);
m_CPUMatrix = nullptr;
}
else if (newMatrixType == MatrixType::DENSE)
{
if (!m_baseMatrix)
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>();
else
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(GetNumRows(), GetNumCols());
if (keepValues)
m_CPUMatrix->SetValue(m_CPUSparseMatrix->CopyColumnSliceToDense(0, GetNumCols()));
SetDataLocation(CPU, DENSE);
m_CPUSparseMatrix = nullptr;
}
else
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
}
else // GPU
{
if (newMatrixType == MatrixType::SPARSE)
{
if (!m_baseMatrix)
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(GetDeviceId(), newMatrixFormat);
else
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(GetNumRows(), GetNumCols(), 0, GetDeviceId(), newMatrixFormat);
if (keepValues)
m_GPUSparseMatrix->SetValue(*m_GPUMatrix);
SetDataLocation(GPU, SPARSE);
m_GPUMatrix = nullptr;
}
else if (newMatrixType == MatrixType::DENSE)
{
if (!m_baseMatrix)
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(GetDeviceId());
else
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(GetNumRows(), GetNumCols(), GetDeviceId());
if (keepValues)
m_GPUSparseMatrix->CopyToDenseMatrix(*m_GPUMatrix);
SetDataLocation(GPU, DENSE);
m_GPUSparseMatrix = nullptr;
}
else
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
}
}
template <class ElemType>
void Matrix<ElemType>::CopyElementsFromDenseToSparse(CPUMatrix<ElemType>& from, CPUSparseMatrix<ElemType>& dest)
{
foreach_coord (row, col, from)
{
auto val = from(row, col);
dest.SetValue(row, col, val);
}
}
template <class ElemType>
ElemType Matrix<ElemType>::Get00Element() const
{
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return m_CPUMatrix->Get00Element(); },
{ return m_GPUMatrix->Get00Element(); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
// const operator(,)
template <class ElemType>
const ElemType Matrix<ElemType>::operator()(const size_t row, const size_t col) const
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this, nullptr,
{ return m_CPUMatrix->operator()(row, col); },
{ _transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false); return m_CPUMatrix->operator()(row, col); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
// non-const operator(,)
//WARNING: This function is very slow for GPUs since it requires copying values between CPUs and GPUs.
//In addition, if ColumnSlice is used after this function but before the values are copied back to GPU
//the operation will fail since the memory is not managed by the slice.
// If you don't need to modify the values, to call the const version above, or GetValue(row,col) which does that for you unambiguously.
// TODO: Can we remove this, and have users use SetValue() instead? To avoid this potential error?
template <class ElemType>
ElemType& Matrix<ElemType>::operator()(const size_t row, const size_t col)
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this, nullptr,
{ return m_CPUMatrix->operator()(row, col); },
{
_transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false);
SetDataLocation(CPU, DENSE);
return m_CPUMatrix->operator()(row, col);
},
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Transpose()
{
if (IsEmpty())
LogicError("Transpose: Matrix is empty.");
Matrix<ElemType> c(GetNumCols(), GetNumRows(), (DEVICEID_TYPE) GetDeviceId(), this->GetMatrixType(), this->GetFormat());
c.AssignTransposeOf(*this);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTransposeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->AssignTransposeOf(*a.m_CPUMatrix); },
{ m_GPUMatrix->AssignTransposeOf(*a.m_GPUMatrix); },
{ NOT_IMPLEMENTED; },
{ m_GPUSparseMatrix->AssignTransposeOf(*a.m_GPUSparseMatrix); });
return *this;
}
// *this[:,j] = a[:,idx[j]] * alpha + *this[:,j] * beta
// idx has width of 'this' and contains values w.r.t. 'a'
// Invalid entries (gap columns) are denoted by idx(0,j) == -1.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DoGatherColumnsOf(ElemType beta, const Matrix<ElemType>& idx, const Matrix<ElemType>& a, ElemType alpha)
{
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0
if (a.GetMatrixType() != this->GetMatrixType())
RuntimeError("Matrix::DoGatherColumnsOf: The source and target matrices must have same storage type (SPARSE/DENSE).");
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoGatherColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
{ m_CPUSparseMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.
Matrix<ElemType> tempIdx(CPUDEVICE); tempIdx.AssignValuesOf(idx);
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->GetNumNZElements());
m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempThis);
tempThis.DoGatherColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
m_GPUSparseMatrix->SetValue(tempThis);
});
return *this;
}
// *this[:,idx[j]] = a[:,j] * alpha + *this[:,idx[j]] * beta
// idx has width of 'a' and contains values w.r.t. 'this'
// Unlike gather, for scatter, 'this' must have been sized already.
// Invalid entries (gap columns) are denoted by idx(0,j) == -1.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matrix<ElemType>& idx, const Matrix<ElemType>& a, ElemType alpha)
{
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0
if (a.GetMatrixType() != this->GetMatrixType())
RuntimeError("Matrix::DoScatterColumnsOf: The source and target matrices must have same storage type (SPARSE/DENSE).");
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
{ m_CPUSparseMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.
Matrix<ElemType> tempIdx(CPUDEVICE); tempIdx.AssignValuesOf(idx);
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->GetNumNZElements());
m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempThis);
tempThis.DoScatterColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
m_GPUSparseMatrix->SetValue(tempThis);
});
return *this;
}
// set all elements of a matrix to a scalar value
// For sparse matrices, the only allowed value is 0.
template <class ElemType>
void Matrix<ElemType>::SetValue(const ElemType v)
{
if (IsEmpty()) // if empty then we are done
return;
if (v == 0 && GetMatrixType() == MatrixType::SPARSE) // if sparse, setting it to 0 is special
{
Reset();
return;
}
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->SetValue(v); },
{ m_GPUMatrix->SetValue(v); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const DeviceBoundNumber<ElemType>& db_number)
{
if (IsEmpty()) // if empty then we are done
return;
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()); },
{
if (GetDeviceId() != db_number.GetDeviceId())
RuntimeError("Matrix and device bound number must be on the same device");
m_GPUMatrix->SetValue(db_number.ExposePointer2Value());
},
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <>
/*static*/ float Matrix<float>::MakeNan(size_t /*payload*/)
{
return nanf("");
}
template <>
/*static*/ double Matrix<double>::MakeNan(size_t /*payload*/)
{
return nan("");
}
template <>
/*static*/ char Matrix<char>::MakeNan(size_t)
{
return 0;
} // (needed for completeness and to pass unit tests)
template <>
/*static*/ short Matrix<short>::MakeNan(size_t)
{
return 0;
} // (needed for completeness and to pass unit tests)
template <class ElemType>
void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemType val, size_t numColsPerMaskEntry)
{
if (GetNumCols() != (columnsMask.GetNumCols() * numColsPerMaskEntry))
RuntimeError("MaskColumnsValue: Matrix number of columns must equal [column mask * numColsPerMaskEntry].");
if (GetCurrentMatrixLocation() == CPU && (columnsMask.GetCurrentMatrixLocation() == CPU || columnsMask.GetCurrentMatrixLocation() == BOTH))
; // OK
else if (GetDeviceId() != columnsMask.GetDeviceId() && columnsMask.GetCurrentMatrixLocation() != BOTH)
RuntimeError("MaskColumnsValue: Matrix and column mask must be on the same device.");
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val, numColsPerMaskEntry); },
{ m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val, numColsPerMaskEntry); },
{ m_CPUSparseMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val, numColsPerMaskEntry); },
{ m_GPUSparseMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val, numColsPerMaskEntry); });
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const ElemType* colPointer, size_t colInd)
{
if (colPointer == nullptr)
InvalidArgument("SetColumn: colPointer is null.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(colPointer, colInd),
m_GPUMatrix->SetColumn(colPointer, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const ElemType val, size_t colInd)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(val, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const Matrix<ElemType>& colMat, size_t colInd)
{
DecideAndMoveToRightDevice(*this, colMat);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(*colMat.m_CPUMatrix, colInd),
m_GPUMatrix->SetColumn(*colMat.m_GPUMatrix, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom)
{
if (this == &deepCopyFrom)
return;
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
DecideAndMoveToRightDevice(deepCopyFrom, *this);
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), deepCopyFrom.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, this,
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
}
template <class ElemType>
void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
{
if (this == &deepCopyFrom)
return;
// TODO: do we need all these 'this->'?
DISPATCH_MATRIX_ON_FLAG(this, this,
{
// Set CPUMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ this->Resize(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols()); deepCopyFrom.CopySection(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), m_CPUMatrix->Data(), this->GetNumRows()); },
{ deepCopyFrom.m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, deepCopyFrom.GetNumCols()); },
{ CPUSparseMatrix<ElemType> tempCPUSparseMatrix(deepCopyFrom.GetFormat(), deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.m_GPUSparseMatrix->GetNumNZElements()); deepCopyFrom.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempCPUSparseMatrix); tempCPUSparseMatrix.AssignColumnSliceToDense(*m_CPUMatrix, 0, deepCopyFrom.GetNumCols()); });
},
{
// Set GPUMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ m_GPUMatrix->SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), this->GetDeviceId(), deepCopyFrom.m_CPUMatrix->Data()); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{
CPUMatrix<ElemType> tempCPUDenseMatrix(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols());
deepCopyFrom.m_CPUSparseMatrix->AssignColumnSliceToDense(tempCPUDenseMatrix, 0, deepCopyFrom.GetNumCols());
m_GPUMatrix->SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), this->GetDeviceId(), tempCPUDenseMatrix.Data());
},//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ deepCopyFrom.m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, deepCopyFrom.GetNumCols()); });
},
{
// Set CPUSparseMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ auto matrixType = GetMatrixType(); auto matrixFormat = GetFormat(); *this = deepCopyFrom.DeepClone(); SwitchToMatrixType(matrixType, matrixFormat, true); },
{ LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUSparseMatrix is not yet implemented."); },//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a CPUSparseMatrix is not yet implemented."); });//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
},
{
// Set GPUSparseMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ Matrix<ElemType> tempCPUSparseMatrix(deepCopyFrom.DeepClone()); tempCPUSparseMatrix.SwitchToMatrixType(GetMatrixType(), GetFormat(), true); m_GPUSparseMatrix->SetValue(*tempCPUSparseMatrix.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
});
}
// CastAssignValuesOf() -- assign a matrix with type conversion, needed for feeding 'float' data to 'double' inputs in V2
// This version is a stop-gap for debugging and testing. If any conversion is done, it will be slow.
// If this is ever used for something that needs performance, it should not be too hard (but labor) to implement this efficiently.
static void DoCastAssignValuesOf(Matrix<float>& target, const Matrix<float>& other) { target.AssignValuesOf(other); }
static void DoCastAssignValuesOf(Matrix<double>& target, const Matrix<double>& other) { target.AssignValuesOf(other); }
template<class ElemType>
static void CopyToVector(const Matrix<ElemType>& source, vector<ElemType>& sourceData)
{
sourceData.resize(source.GetNumElements());
ElemType* datap = sourceData.data();
size_t datasz = sourceData.size();
source.CopyToArray(datap, datasz);
assert(datap == sourceData.data() && datasz == sourceData.size()); // (make sure it used my buffer; a somewhat awkward API)
}
template<>
void Matrix<int>::AssignValuesOf(const Matrix<int>&) { NOT_IMPLEMENTED; }
template<class ElemType, class ElemTypeOther>
static void DoCastAssignValuesOf(Matrix<ElemType>& target, const Matrix<ElemTypeOther>& source)
{
target; source;
// this is implemented in a rather tedious way:
// - copy to a CPU-side STL vector
// - type-cast
// - copy to target
vector<ElemTypeOther> sourceData;
if (source.GetMatrixType() == MatrixType::SPARSE) // if sparse then convert it over (V2 cannot read sparse data into dense input_variables)
{
Matrix<ElemTypeOther> temp(source.GetNumRows(), source.GetNumCols(), CPUDEVICE, DENSE);
temp.AssignValuesOf(source);
CopyToVector(temp, sourceData);
}
else
{
CopyToVector(source, sourceData);
}
// cast all values
vector<ElemType> targetData(sourceData.size());
transform(sourceData.begin(), sourceData.end(), targetData.begin(), [](ElemTypeOther v){ return (ElemType)v; });
// set the target
if (target.GetMatrixType() == MatrixType::SPARSE) // if target is sparse then we cannot assign from a vector directly, but we can from a matrix object
{
Matrix<ElemType> temp(source.GetNumRows(), source.GetNumCols(), targetData.data(), CPUDEVICE);
target.AssignValuesOf(temp);
}
else
{
target.SetValue(source.GetNumRows(), source.GetNumCols(), target.GetDeviceId(), targetData.data());
}
}
template <class ElemType>
void Matrix<ElemType>::CastAssignValuesOf(const MatrixBase& other) /*override*/ // allows for mixed assignment with conversion
{
const Matrix<float> * otherf = dynamic_cast<const Matrix<float>*>(&other);
if (otherf)
return DoCastAssignValuesOf(*this, *otherf);
const Matrix<double> * otherd = dynamic_cast<const Matrix<double>*>(&other);
if (otherd)
return DoCastAssignValuesOf(*this, *otherd);
LogicError("CastAssignValuesOf: Only accepts float and double matrices.");
}
template<>
void Matrix<int>::SetValue(const size_t, const size_t, int, int*, const size_t, DataTransferer*) { NOT_IMPLEMENTED; }
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags, DataTransferer* transferer)
{
if (((numRows * numCols) > 0) && (pArray == nullptr))
InvalidArgument("Invalid pArray.");
// Only gpu matrix supports async data transfers, so data transferer passed only to gpu matrix.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetValue(numRows, numCols, pArray, matrixFlags),
m_GPUMatrix->SetValue(numRows, numCols, deviceId, pArray, matrixFlags, transferer),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t rIdx, const size_t cIdx, ElemType val)
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
this,
(*m_CPUMatrix)(rIdx, cIdx) = val,
NOT_IMPLEMENTED,
m_CPUSparseMatrix->SetValue(rIdx, cIdx, val),
NOT_IMPLEMENTED);
}
// read features
template <class ElemType>
void Matrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const ElemType* h_Val,
const size_t nz, const size_t numRows, const size_t numCols, DataTransferer* transferer)
{
// Note: The current implementation uses the xPUSparseMatrix as temporary space. This allows for memory sharing between calls. If
// xPUSparseMatrix is a view, this code will cause an error during runtime stating that the view is not writable nor resizable.
// Only gpu matrix supports async data transfers, so data transferer passed only to gpu matrix in case we do not need to reassign to dense.
// When we have to reassign sparse to dense we cannot use async operation, because at the time when AssignColumnSliceToDense is called the
// data should already be copied to destination.
DISPATCH_MATRIX_ON_FLAG(this, this,
{
if (!m_CPUSparseMatrix) m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nz);
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, numCols);
},
{
if (!m_GPUSparseMatrix) m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nz, GetDeviceId(), matrixFormatSparseCSC);
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, numCols);
},
{ m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); },
{ m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols, false, -1, transferer); });
}
template <class ElemType>
void Matrix<ElemType>::SetDiagonalValue(const ElemType v)
{
if (IsEmpty())
LogicError("SetDiagonalValue: Matrix is empty.");
if (GetNumRows() != GetNumCols())
LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetDiagonalValue(v),
m_GPUMatrix->SetDiagonalValue(v),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetDiagonalValue(const Matrix<ElemType>& vector)
{
if (GetNumRows() != GetNumCols())
LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
LogicError("SetDiagonalValue: Input vector must be a vector.");
if (vector.GetNumRows() * vector.GetNumCols() != GetNumRows())
LogicError("SetDiagonalValue: Input vector must match matrix dimension.");
if (IsEmpty())
return;
DecideAndMoveToRightDevice(*this, vector);
if (vector.GetNumElements() == 1) // reduce to simple form
{
DISPATCH_MATRIX_ON_FLAG(&vector,
nullptr,
SetDiagonalValue(vector(0, 0)),
SetDiagonalValue(vector.m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
SetDiagonalValue(vector(0, 0)),
SetDiagonalValue(vector.m_GPUMatrix->Get00Element()) // BUGBUG: efficiency
);
}
else if (vector.GetNumRows() != GetNumRows() && vector.GetNumCols() != GetNumRows())
LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
else
{
// WARNING: we use this pointer to decide which function to call. However, vector may be stored in a different matrix type (DENSE, SPARSE)
DISPATCH_MATRIX_ON_FLAG(this,
this,
assert(vector.m_CPUMatrix);
m_CPUMatrix->SetDiagonalValue(*vector.m_CPUMatrix),
assert(vector.m_GPUMatrix);
m_GPUMatrix->SetDiagonalValue(*vector.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
}
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomValue(low, high, seed),
m_GPUMatrix->SetUniformRandomValue(low, high, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomValue(RNGHandle& rngHandle, const ElemType low, const ElemType high)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomValue(rngHandle, low, high),
m_GPUMatrix->SetUniformRandomValue(rngHandle, low, high),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGaussianRandomValue(RNGHandle& rngHandle, const ElemType mean, const ElemType stdev)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGaussianRandomValue(rngHandle, mean, stdev),
m_GPUMatrix->SetGaussianRandomValue(rngHandle, mean, stdev),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGumbelRandomValue(RNGHandle& rngHandle, const ElemType loc, const ElemType scale)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGumbelRandomValue(rngHandle, loc, scale),
m_GPUMatrix->SetGumbelRandomValue(rngHandle, loc, scale),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("SetGaussianRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGaussianRandomValue(mean, sigma, seed),
m_GPUMatrix->SetGaussianRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetTruncatedNormalRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("SetTruncatedNormalRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetTruncatedNormalRandomValue(mean, sigma, seed),
m_GPUMatrix->SetTruncatedNormalRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("AddGaussianRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddGaussianRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//maskRate: percentage of values masked out (similar to dropout rate)
//scaleValue: which scale value to set to the left ones (unmasked items).
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& rngHandle)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomMask(maskRate, scaleValue, rngHandle),
m_GPUMatrix->SetUniformRandomMask(maskRate, scaleValue, rngHandle),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
// Vanilla SGD update.
// Modifies "this" parameter matrix, on which this method is invoked.
template <class ElemType>
void Matrix<ElemType>::SGDUpdate(Matrix<ElemType>& gradients, ElemType learnRatePerSample)
{
DecideAndMoveToRightDevice(gradients, *this);
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{
// w_t = w_{t-1} - learnRatePerSample * g_{t-1},
ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this);
},
{
// BUGBUG: cannot call ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this) here,
// it produces different results from the scale and add below.
// g'_{t-1} = learnRatePerSample * g_{t-1}
// w_t = w_{t-1} - g'_{t-1}
Scale(ElemType(learnRatePerSample), gradients);
*this -= gradients;
},
{
ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this);
},
{
ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this);
});
}
// SGD update with momentum.
// Modifies "this" parameter matrix, on which this method is invoked.
template <class ElemType>
void Matrix<ElemType>::MomentumSGDUpdate(Matrix<ElemType>& gradients,
Matrix<ElemType>& smoothedGradients,
ElemType learnRatePerSample,
ElemType momentum,
bool unitGainMomentum)
{
DecideAndMoveToRightDevice(smoothedGradients, gradients, *this);
const auto unitGainFactor = ElemType(unitGainMomentum ? (1.0 - momentum) : 1.0);
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{
// Classic momentum (unitGainFactor == 1.0):
// 1) sg_t = momentum * sg_{t-1} + learnRatePerSample * g_{t-1}
// Unit-gain momentum (unitGainFactor == 1.0 - momentum):
// 1) sg_t = momentum * sg_{t-1} + learnRatePerSample * (1.0 - momentum) * g_{t-1}
// 2) w_t = w_{t-1} - sg_t
ScaleAndAdd(unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
*this -= smoothedGradients;
},
{
ScaleAndAdd(unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
*this -= smoothedGradients;
},
{
// The sparse update is slightly different from the dense implementation above:
// Classic momentum (unitGainFactor == 1.0):
// 1) sg_t = momentum * sg_{t-1} + g_{t-1}
// Unit-gain momentum (unitGainFactor == 1.0 - momentum):
// 1) sg_t = momentum * sg_{t-1} + (1.0 - momentum) * g_{t-1}
// 2) g'_{t-1} = sg_t
// 3) w_t = w_{t-1} - learnRatePerSample * g'_{t-1}
if (momentum != 0)
{
gradients.m_CPUSparseMatrix->NormalGrad(*smoothedGradients.m_CPUMatrix, momentum, unitGainMomentum);
}
ScaleAndAdd(-learnRatePerSample, gradients, *this);
},
{
if (momentum != 0)
{
gradients.m_GPUSparseMatrix->NormalGrad(*smoothedGradients.m_GPUMatrix, momentum, unitGainMomentum);
}
ScaleAndAdd(-learnRatePerSample, gradients, *this);
});
}
// Nesterov accelerated SGD update.
// Modifies "this" parameter matrix, on which this method is invoked.
template <class ElemType>
void Matrix<ElemType>::NesterovAcceleratedMomentumSGDUpdate(Matrix<ElemType>& gradients,
Matrix<ElemType>& smoothedGradients,
ElemType learnRatePerSample,
ElemType momentum,
bool unitGainMomentum)
{
DecideAndMoveToRightDevice(smoothedGradients, gradients, *this);
const auto unitGainFactor = ElemType(unitGainMomentum ? (1.0 - momentum) : 1.0);
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{ /* CPU dense */
// 1) sg_t = momentum * sg_{t-1} + learnRatePerSample * unitGainFactor * g_{t-1}
// 2) w'_t = w_{t-1} - momentum * sg_t
// 3) w_t = w'_t - learnRatePerSample * unitGainFactor * g_{t-1}
// The end result:
// w_t = w_{t-1} - momentum^2 * sg_{t-1} - learnRatePerSample * unitGainFactor * (1 + momentum) * g_{t-1}
// sg_t = momentum * sg_{t-1} + learnRatePerSample * unitGainFactor * g_{t-1}
ScaleAndAdd( unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradients, *this);
},
{ /* GPU dense */
ScaleAndAdd(unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradients, *this);
},
{ /* CPU sparse */
if (momentum != 0)
{
// Identical to the above, except that as a side effect "NormalGrad" modifies
// gradient values in place, so that gradientCache is needed to store the original values.
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.AssignValuesOf(gradients);
gradients.m_CPUSparseMatrix->NormalGrad(*smoothedGradients.m_CPUMatrix, momentum, unitGainMomentum);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradientCache, *this);
}
},
{ /* GPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.AssignValuesOf(gradients);
gradients.m_GPUSparseMatrix->NormalGrad(*smoothedGradients.m_GPUMatrix, momentum, unitGainMomentum);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradientCache, *this);
}
});
}
// both 'this' and gradients will be changed
template <class ElemType>
ElemType Matrix<ElemType>::Adagrad(Matrix<ElemType>& gradients, const bool needAveMultiplier)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{ return m_CPUMatrix->Adagrad(*gradients.m_CPUMatrix, needAveMultiplier); SetDataLocation(CPU); },
{ return m_GPUMatrix->Adagrad(*gradients.m_GPUMatrix, needAveMultiplier); SetDataLocation(GPU); },
{ return gradients.m_CPUSparseMatrix->Adagrad(*m_CPUMatrix, needAveMultiplier); SetDataLocation(CPU); },
{ return gradients.m_GPUSparseMatrix->Adagrad(*m_GPUMatrix, needAveMultiplier); SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
// FSAdaGrad update -- Frank's "fix" of AdaGrad, very similar to what became later known as Adam
// updates
// - momentum accumulator
// - var momentum accumulator
// - denominator
// then
// - the model itself
template <class ElemType>
void Matrix<ElemType>::FSAdagradUpdate(size_t mbSize,
Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, double& smoothedCount,
const double learnRatePerSample, const double targetAdagradAvDenom,
const double meanMomentum, const double varMomentum, bool unitGainMomentum)
{
// keep track on how many samples have been accumulated into the g^2 accumulator
smoothedCount = varMomentum * smoothedCount + (1.0 - varMomentum) * mbSize;
// update the numerator and then do the meanMomentum-based model update
// Each AdaGrad-normalized gradient value is multiplied by the following, which
// - makes up for general scaling (targetAdagradAvDenom, a constant chosen by the user that should resemble the typical value range of gradients)
// - sqrt(1/#samples accumulated) to turn the sqr sum into an average
let targetAdagradAvDenom_x_sqrtAdagradSqrFrames = (ElemType)(targetAdagradAvDenom * sqrt(smoothedCount));
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{
m_CPUMatrix->FSAdagrad(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
targetAdagradAvDenom_x_sqrtAdagradSqrFrames, unitGainMomentum);
SetDataLocation(CPU);
},
{
m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
targetAdagradAvDenom_x_sqrtAdagradSqrFrames, unitGainMomentum);
SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{ gradients.m_GPUSparseMatrix->FSAdagrad(*m_GPUMatrix, *functionValues.m_GPUMatrix, (ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum, targetAdagradAvDenom_x_sqrtAdagradSqrFrames, unitGainMomentum); SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
///
// Implement the original adam algorithm according to the paper
// Ref: ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION, https://arxiv.org/pdf/1412.6980.pdf
///
template <class ElemType>
void Matrix<ElemType>::AdamUpdate(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, double& smoothedCount,
const double learnRatePerSample, const double meanMomentum, const double varMomentum, const double epsilon, bool unitGainMomentum, bool adamax)
{
smoothedCount++;
// Bias correction
let biasCorrection = adamax? (ElemType)(1. / (1- pow(meanMomentum, smoothedCount))) : (ElemType)(sqrt(1- pow(varMomentum, smoothedCount))/(1- pow(meanMomentum, smoothedCount)));
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{
m_CPUMatrix->Adam(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
biasCorrection, (ElemType)epsilon, unitGainMomentum, adamax);
SetDataLocation(CPU);
},
{
m_GPUMatrix->Adam(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
biasCorrection, (ElemType)epsilon, unitGainMomentum, adamax);
SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{ gradients.m_GPUSparseMatrix->Adam(*m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum,
(ElemType)varMomentum, biasCorrection, (ElemType)epsilon, unitGainMomentum, adamax);
SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
template <class ElemType>
ElemType Matrix<ElemType>::RmsProp(Matrix<ElemType>& gradients,
ElemType RMS_GAMMA,
ElemType RMS_WGT_INC,
ElemType RMS_WGT_MAX,
ElemType RMS_WGT_DEC,
ElemType RMS_WGT_MIN,
const bool needAveMultiplier)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{ return m_CPUMatrix->RmsProp(*gradients.m_CPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier); SetDataLocation(CPU); },
{ return m_GPUMatrix->RmsProp(*gradients.m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier); SetDataLocation(GPU); },
{ NOT_IMPLEMENTED; },
{ return gradients.m_GPUSparseMatrix->RmsProp(*m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier); SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
template <class ElemType>
void Matrix<ElemType>::AdaDeltaUpdate(Matrix<ElemType>& gradients,
Matrix<ElemType>& functionValues,
ElemType learningRate, ElemType rho, ElemType epsilon)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{ return m_CPUMatrix->AdaDelta(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix, learningRate, rho, epsilon); SetDataLocation(CPU); },
{ return m_GPUMatrix->AdaDelta(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learningRate, rho, epsilon); SetDataLocation(GPU); },
{ return gradients.m_CPUSparseMatrix->AdaDelta(*m_CPUMatrix, *functionValues.m_CPUMatrix, learningRate, rho, epsilon); SetDataLocation(CPU); },
{ return gradients.m_GPUSparseMatrix->AdaDelta(*m_GPUMatrix, *functionValues.m_GPUMatrix, learningRate, rho, epsilon); SetDataLocation(GPU); });
}
template <class ElemType>
void Matrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
{
if (numRows != GetNumRows() || numCols != GetNumCols())
{
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->Reshape(numRows, numCols); },
{ m_GPUMatrix->Reshape(numRows, numCols); },
{ NOT_IMPLEMENTED; },
{ m_GPUSparseMatrix->Reshape(numRows, numCols); });
}
}
// Note: Resize() will leave the matrix content undefined.
// Note: Resize calls RequireSizeAndAllocate on the sparse versions in for performance reasons. If the external caller knows the nz, then we should set it.
template <class ElemType>
void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve /*=0*/, bool growOnly /*=true*/)
{
// TODO: should this function test whether the size is changing, and skip if it isn't? We have at least one explicit test for this code calling this (recurrent node)
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
{ m_CPUMatrix->Resize(numRows, numCols, growOnly); },
{ m_GPUMatrix->Resize(numRows, numCols, growOnly); },
{ m_CPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); },
{ m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); });
#ifdef _DEBUG
if (GetMatrixType() != MatrixType::SPARSE)
Invalidate(); // Fill the matrix with NaNs to detect using the content which is undefined. Unfortunately this won't work for sparse matrices.
#endif
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RepMat(const Matrix<ElemType>& frmMat, const size_t rowRatio, const size_t colRatio)
{
size_t nCols = frmMat.GetNumCols();
size_t nRows = frmMat.GetNumRows();
if (rowRatio > 1)
RuntimeError("RepMat not yet supporting raw ratio larger than 1");
size_t newCols = colRatio * nCols;
Matrix<ElemType> c(nRows, newCols, frmMat.GetDeviceId());
for (size_t i = 0; i < colRatio; i++)
{
c.ColumnSlice(i * nCols, nCols).AssignValuesOf(frmMat);
}
return c;
}
template <class ElemType>
size_t Matrix<ElemType>::GetAllocatedSize() const
{
return m_baseMatrix->GetSizeAllocated();
}
// reset for sparse matrix. Semantically the same as setting all values to 0.
template <class ElemType>
void Matrix<ElemType>::Reset()
{
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; },
{ m_CPUSparseMatrix->Reset(); },
{ m_GPUSparseMatrix->Reset(); });
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumRows() const
{
return m_baseMatrix->GetNumRows();
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumCols() const
{
return m_baseMatrix->GetNumCols();
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumElements() const
{
return GetNumRows() * GetNumCols();
}
template <class ElemType>
bool Matrix<ElemType>::IsEmpty() const
{
return m_baseMatrix->IsEmpty();
}
#pragma endregion Basic Operators
#pragma region Member BLAS Functions
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator+=(ElemType alpha)
{
return AssignSumOf(alpha, *this);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator+(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), GetDeviceId());
c.AssignSumOf(alpha, *this);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOf(const ElemType alpha, const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSumOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSumOf(alpha, *a.m_CPUMatrix),
m_GPUMatrix->AssignSumOf(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then [this]=[this]+a
//if a is a column vector, add to all columns of [this]
//if a is a row vector, add to all rows of [this]
//if a is a scalar, add it to all elements.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator+=(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(*this, a);
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->operator+=(*a.m_CPUMatrix),
m_GPUMatrix->operator+=(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then OUTPUT=[this]+a
//if a is a column vector, add to all columns of [this]
//if a is a row vector, add to all rows of [this]
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator+(const Matrix<ElemType>& a) const
{
if (GetNumElements() == 1)
{
Matrix<ElemType> c(a.DeepClone());
DISPATCH_MATRIX_ON_FLAG(this,
&c,
c += (*this)(0, 0),
c += (m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
c += (*this)(0, 0),
NOT_IMPLEMENTED);
return c;
}
else if (a.GetNumElements() == 1)
{
Matrix<ElemType> c(this->DeepClone());
DISPATCH_MATRIX_ON_FLAG(&a,
&c,
c += a(0, 0),
c += (a.m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
c += a(0, 0),
NOT_IMPLEMENTED);
return c;
}
else
{
Matrix<ElemType> c(this->DeepClone()); // this implementation will introduce a copy overhead. but make resue of the code
c += a;
return c;
}
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (this == &a)
{
*this += b;
return *this;
}
if (this == &b)
{
*this += a;
return *this;
}
if (a.GetNumElements() == 1)
{
SetValue(b);
(*this) += a;
}
else
{
SetValue(a);
(*this) += b;
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator-=(ElemType alpha)
{
return AssignDifferenceOf(*this, alpha);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator-(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), GetDeviceId());
c.AssignDifferenceOf(*this, alpha);
return c;
}
//for each column of a, we assign numRows starting from startIndex to this
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AssignRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for each column of a, we assign all rows of a to this starting from startIndex
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignToRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AssignToRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for the row slice of this starting from startIndex we add a to it.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddToRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AddToRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for each column of this, we add row slice of a starting from startIndex
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddWithRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AddWithRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignRepeatOf(*a.m_CPUMatrix, numRowRepeats, numColRepeats),
m_GPUMatrix->AssignRepeatOf(*a.m_GPUMatrix, numRowRepeats, numColRepeats),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddToRowRepeatValuesOf(const Matrix<ElemType>& a, const size_t numRepeats)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddToRowRepeatValuesOf(*a.m_CPUMatrix, numRepeats),
m_GPUMatrix->AddToRowRepeatValuesOf(*a.m_GPUMatrix, numRepeats),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//used in the DSSM model. The resulted *this is a [a.GetRows()*(negNumber+1), a.GetCols()] matrix
//each column contains posNumber of positive samples (original) and negNumber negative samples generated by copying
//sample shifted by shiftNumber columns
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignPositiveAndShiftedNegSample(*a.m_CPUMatrix, posNumber, negNumber, shiftNumber),
m_GPUMatrix->AssignPositiveAndShiftedNegSample(*a.m_GPUMatrix, posNumber, negNumber, shiftNumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//used in the DSSM model. *this = *this + positive and negative samples folded back to the right place
//each column of a contains posNumber of positive samples (original) and negNumber negative samples generated by copying
//sample shifted by shiftNumber columns
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddFoldedPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddFoldedPositiveAndShiftedNegSample(*a.m_CPUMatrix, posNumber, negNumber, shiftNumber),
m_GPUMatrix->AddFoldedPositiveAndShiftedNegSample(*a.m_GPUMatrix, posNumber, negNumber, shiftNumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const ElemType alpha, const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignDifferenceOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignDifferenceOf(alpha, *a.m_CPUMatrix),
m_GPUMatrix->AssignDifferenceOf(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const Matrix<ElemType>& a, const ElemType alpha)
{
if (a.IsEmpty())
LogicError("AssignDifferenceOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignDifferenceOf(*a.m_CPUMatrix, alpha),
m_GPUMatrix->AssignDifferenceOf(*a.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then [this]=[this]-a
//if a is a column vector, minus it from all columns of [this]
//if a is a row vector, minus it from all rows of [this]
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator-=(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("Minus Operation: Matrix a is empty.");
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
*m_CPUMatrix -= *a.m_CPUMatrix,
*m_GPUMatrix -= *a.m_GPUMatrix,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then output=[this]-a
//if a is a column vector, minus it from all columns of [this]
//if a is a row vector, minus it from all rows of [this]
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator-(const Matrix<ElemType>& a) const
{
Matrix<ElemType> c(this->DeepClone()); // this implementation will introduce a copy overhead. but make resue of the code
ScaleAndAdd(-1, a, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
// if first arg broadcasts, we swap first and the flip the sign
// This is because there is no equivalent to operator-=() that works the other way round.
// TODO: We need ternary ops where the output storage is separate.
if (a.GetNumRows() < b.GetNumRows() || a.GetNumCols() < b.GetNumCols())
{
if (a.GetNumRows() > b.GetNumRows() || a.GetNumCols() > b.GetNumCols())
LogicError("AssignDifferenceOf: Invalid dimensions.");
AssignDifferenceOf(b, a);
*this *= -1;
return *this;
}
if (this != &a)
SetValue(a);
(*this) -= b;
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator*=(ElemType alpha)
{
Scale(alpha, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator*(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), (DEVICEID_TYPE) m_preferredDeviceId);
Scale(alpha, *this, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignProductOf(const ElemType alpha, const Matrix<ElemType>& a)
{
Scale(alpha, a, *this);
return *this;
}
// [this]=a*b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignProductOf(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB)
{
if (a.GetNumElements() == 1)
{
if (transposeB)
AssignTransposeOf(b);
else
this->SetValue(b);
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
(*this) *= a(0, 0),
(*this) *= a.m_GPUMatrix->Get00Element(),
(*this) *= a(0, 0),
NOT_IMPLEMENTED);
}
else if (b.GetNumElements() == 1)
{
if (transposeA)
AssignTransposeOf(a);
else
this->SetValue(a);
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
(*this) *= b(0, 0),
(*this) *= b.m_GPUMatrix->Get00Element(),
(*this) *= b(0, 0),
NOT_IMPLEMENTED);
}
else
Multiply(a, transposeA, b, transposeB, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator*(const Matrix<ElemType>& a) const
{
if (GetNumElements() == 1)
{
Matrix<ElemType> c((DEVICEID_TYPE) a.GetPreferredDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
c.AssignProductOf((*this)(0, 0), a),
c.AssignProductOf(m_GPUMatrix->Get00Element(), a), // BUGBUG: efficiency
c.AssignProductOf((*this)(0, 0), a),
NOT_IMPLEMENTED);
return c;
}
else if (a.GetNumElements() == 1)
{
Matrix<ElemType> c((DEVICEID_TYPE) GetPreferredDeviceId());
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
c.AssignProductOf(a(0, 0), (*this)),
c.AssignProductOf(a.m_GPUMatrix->Get00Element(), (*this)), // BUGBUG: efficiency
c.AssignProductOf(a(0, 0), (*this)),
NOT_IMPLEMENTED);
return c;
}
else
{
Matrix<ElemType> c(GetNumRows(), a.GetNumCols(), (DEVICEID_TYPE) GetPreferredDeviceId());
Multiply(*this, a, c);
return c;
}
}
// [this]=a*b where a is a 1x1 scalar
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::Assign1x1ProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
Multiply1x1AndWeightedAdd(+1, a, b, 0.0f, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator/=(ElemType alpha)
{
(*this) *= 1 / alpha;
return (*this);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator/(ElemType alpha) const
{
return ((*this) * (1 / alpha));
}
//element-wise power
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator^=(ElemType alpha)
{
auto& us = *this;
ElementWisePower(alpha, us, us);
return us;
}
//element-wise power
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator^(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), (DEVICEID_TYPE) GetDeviceId());
ElementWisePower(alpha, *this, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementPowerOf(const Matrix<ElemType>& a, const ElemType power)
{
ElementWisePower(power, a, *this);
return *this;
}
//[this]=[this] .* a (we cannot override operator .* in c++)
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementMultiplyWith(const Matrix<ElemType>& a)
{
return AssignElementProductOf(*this, a);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementDivideBy(const Matrix<ElemType>& a)
{
return AssignElementDivisionOf(*this, a);
}
//[this]=a .* b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignElementProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddElementProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AddElementProductOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == GetNumCols()))
InvalidArgument("The input matrix dimensions do not match [this].");
DecideAndMoveToRightDevice(*this, a, b);
if (!(a.GetMatrixType() == b.GetMatrixType() && GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->AddElementProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AddElementProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=a ./ b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementDivisionOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementDivisionOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementDivisionOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignElementDivisionOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ColumnElementMultiplyWith(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("ColumnElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
InvalidArgument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows.");
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->ColumnElementMultiplyWith(*a.m_CPUMatrix),
m_GPUMatrix->ColumnElementMultiplyWith(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::RowElementMultiplyWith(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("RowElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
InvalidArgument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->RowElementMultiplyWith(*a.m_CPUMatrix),
m_GPUMatrix->RowElementMultiplyWith(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::RowElementDivideBy(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("RowElementDivideBy: Matrix is empty.");
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
InvalidArgument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->RowElementDivideBy(*a.m_CPUMatrix),
m_GPUMatrix->RowElementDivideBy(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ColumnElementDivideBy(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("ColumnElementDivideBy: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
InvalidArgument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows.");
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->ColumnElementDivideBy(*a.m_CPUMatrix),
m_GPUMatrix->ColumnElementDivideBy(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=1 ./ a
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementInverse()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ElementInverse(),
m_GPUMatrix->ElementInverse(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->ElementInverse());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementInverseOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignElementInverseOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignElementInverseOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignElementInverseOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignElementInverseOf(*a.m_GPUSparseMatrix));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSigmoid()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSigmoid(),
m_GPUMatrix->InplaceSigmoid(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceSigmoid());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSigmoidOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSigmoidOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSigmoidOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignSigmoidOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=sigmoid([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLinearRectifierDerivative()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLinearRectifierDerivative(),
m_GPUMatrix->InplaceLinearRectifierDerivative(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceLinearRectifierDerivative());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLinearRectifierDerivativeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLinearRectifierDerivativeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignLinearRectifierDerivativeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLinearRectifierDerivativeOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=sigmoid([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSigmoidDerivative()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSigmoidDerivative(),
m_GPUMatrix->InplaceSigmoidDerivative(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSigmoidDerivativeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSigmoidDerivativeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSigmoidDerivativeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b, bool searchInCol)
{
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignNumOfDiff(*a.m_CPUMatrix, *b.m_CPUMatrix, searchInCol),
m_GPUMatrix->AssignNumOfDiff(*a.m_GPUMatrix, *b.m_GPUMatrix, searchInCol),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=tanh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTanh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTanh(),
m_GPUMatrix->InplaceTanh(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceTanh());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTanhOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTanhOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignTanhOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTanhOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=softmax([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLogSoftmax(const bool isColWise)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLogSoftmax(isColWise),
m_GPUMatrix->InplaceLogSoftmax(isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLogSoftmaxOf(const Matrix<ElemType>& a, const bool isColWise)
{
if (a.IsEmpty())
LogicError("AssignLogSoftmaxOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLogSoftmaxOf(*a.m_CPUMatrix, isColWise),
m_GPUMatrix->AssignLogSoftmaxOf(*a.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=softmax([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceHardmax(const bool isColWise)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceHardmax(isColWise),
m_GPUMatrix->InplaceHardmax(isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignHardmaxOf(const Matrix<ElemType>& a, const bool isColWise)
{
if (a.IsEmpty())
LogicError("AssignHardmaxOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignHardmaxOf(*a.m_CPUMatrix, isColWise),
m_GPUMatrix->AssignHardmaxOf(*a.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSqrt()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSqrt(),
m_GPUMatrix->InplaceSqrt(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceSqrt());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSqrtOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSqrtOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSqrtOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSqrtOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignSqrtOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=exp([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceExp()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceExp(),
m_GPUMatrix->InplaceExp(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceExp());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignExpOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignExpOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignExpOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignExpOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignExpOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=exp([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAbs()
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->InplaceAbs(),
m_GPUMatrix->InplaceAbs(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceAbs());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAbsOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAbsOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAbsOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAbsOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignAbsOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=log([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLog()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLog(),
m_GPUMatrix->InplaceLog(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceLog());
return *this;
}
//[this]=log([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLog10()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLog10(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLogOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignLogOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLogOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignLogOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLogOf(*a.m_GPUSparseMatrix));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLog10Of(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignLogOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLog10Of(*a.m_CPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLogOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=cos([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceCosine()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceCosine(),
m_GPUMatrix->InplaceCosine(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCosineOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignCosineOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignCosineOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignCosineOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]= -sin([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceNegativeSine()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceNegativeSine(),
m_GPUMatrix->InplaceNegativeSine(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNegativeSineOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignNegativeSineOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignNegativeSineOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignNegativeSineOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncate(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncate: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncate(threshold),
m_GPUMatrix->InplaceTruncate(threshold),
m_CPUSparseMatrix->InplaceTruncate(threshold),
m_GPUSparseMatrix->InplaceTruncate(threshold));
return *this;
}
template <class ElemType>
void Matrix<ElemType>::InplaceTranspose()
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceTranspose());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
{
assert(threshold >= 0);
if (IsEmpty())
LogicError("InplaceSoftThreshold: Matrix is empty.");
if (threshold == 0)
return *this;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSoftThreshold(threshold),
m_GPUMatrix->InplaceSoftThreshold(threshold),
m_CPUSparseMatrix->InplaceSoftThreshold(threshold),
m_GPUSparseMatrix->InplaceSoftThreshold(threshold));
return *this;
}
//Threshold truncating: this[i] = max( this[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateBottom(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateBottom: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncateBottom(threshold),
m_GPUMatrix->InplaceTruncateBottom(threshold),
m_CPUSparseMatrix->InplaceTruncateBottom(threshold),
m_GPUSparseMatrix->InplaceTruncateBottom(threshold));
return *this;
}
//Threshold truncating: this[i] = max( a[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTruncateBottomOf(const Matrix<ElemType>& a, const ElemType threshold)
{
if (a.IsEmpty())
LogicError("AssignTruncateBottomOf: Matrix a is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
{
this->SetValue(a);
return *this;
}
}
else
{
if (!isfinite(threshold))
{
this->SetValue(a);
return *this;
}
}
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTruncateBottomOf(*a.m_CPUMatrix, threshold),
m_GPUMatrix->AssignTruncateBottomOf(*a.m_GPUMatrix, threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTruncateBottomOf(*a.m_GPUSparseMatrix, threshold));
return *this;
}
//Threshold truncating: this[i] = min( this[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateTop(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateTop: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncateTop(threshold),
m_GPUMatrix->InplaceTruncateTop(threshold),
m_CPUSparseMatrix->InplaceTruncateTop(threshold),
m_GPUSparseMatrix->InplaceTruncateTop(threshold));
return *this;
}
//Threshold truncating: this[i] = min( a[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTruncateTopOf(const Matrix<ElemType>& a, const ElemType threshold)
{
if (a.IsEmpty())
LogicError("AssignTruncateTopOf: Matrix a is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
{
this->SetValue(a);
return *this;
}
}
else
{
if (!isfinite(threshold))
{
this->SetValue(a);
return *this;
}
}
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTruncateTopOf(*a.m_CPUMatrix, threshold),
m_GPUMatrix->AssignTruncateTopOf(*a.m_GPUMatrix, threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTruncateTopOf(*a.m_GPUSparseMatrix, threshold));
return *this;
}
//Threshold truncating: this[i] = 0 if abs(this[i]<threshold).
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::SetToZeroIfAbsLessThan(const ElemType threshold)
{
if (IsEmpty())
LogicError("SetToZeroIfAbsLessThan: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetToZeroIfAbsLessThan(threshold),
m_GPUMatrix->SetToZeroIfAbsLessThan(threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->SetToZeroIfAbsLessThan(threshold));
return *this;
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::SumOfElements() const
{
if (IsEmpty())
LogicError("SumOfElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->SumOfElements(),
return m_GPUMatrix->SumOfElements(),
return m_CPUSparseMatrix->SumOfElements(),
return m_GPUSparseMatrix->SumOfElements());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignOneHot(const Matrix<ElemType>& a, vector<size_t>& shape, size_t axis, bool is_sparse)
{
if (a.IsEmpty())
LogicError("AssignOneHot: Matrix a is empty.");
if (axis >= shape.size())
LogicError("AssignOneHot: axis is not correct");
if (a.GetMatrixType() == SPARSE)
NOT_IMPLEMENTED;
//todo: sparse matrix type
if (is_sparse && GetMatrixType() != SPARSE)
{
SwitchToMatrixType(SPARSE, matrixFormatSparseCSC, false);
}
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
this,
m_CPUMatrix->AssignOneHot(*a.m_CPUMatrix, shape, axis),
m_GPUMatrix->AssignOneHot(*a.m_GPUMatrix, shape, axis),
m_CPUSparseMatrix->AssignOneHot(*a.m_CPUMatrix, shape, axis),
m_GPUSparseMatrix->AssignOneHot(*a.m_GPUMatrix, shape, axis)
);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::GatherFromTarget(const Matrix<ElemType>& indices, const Matrix<ElemType>& target, size_t row_elements)
{
if (indices.IsEmpty() || target.IsEmpty())
LogicError("GatherFromTarget: Input matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(&indices,
this,
m_CPUMatrix->GatherFromTarget(*indices.m_CPUMatrix, *target.m_CPUMatrix, row_elements),
m_GPUMatrix->GatherFromTarget(*indices.m_GPUMatrix, *target.m_GPUMatrix, row_elements),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ScatterToIndices(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, size_t row_elements)
{
if (indices.IsEmpty() || values.IsEmpty())
LogicError("ScatterAccordingIndices: input matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(&values,
this,
m_CPUMatrix->ScatterToIndices(*values.m_CPUMatrix, *indices.m_CPUMatrix, row_elements),
m_GPUMatrix->ScatterToIndices(*values.m_GPUMatrix, *indices.m_GPUMatrix, row_elements),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOfElements(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSumOfElements: Matrix a is empty.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSumOfElements(*a.m_CPUMatrix),
m_GPUMatrix->AssignSumOfElements(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
DeviceBoundNumber<ElemType> Matrix<ElemType>::Sum_AsDeviceBoundNum() const
{
DeviceBoundNumber<ElemType> result;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
ElemType* val = new ElemType;
* val = m_CPUMatrix->SumOfElements(); result.ShallowCopyFrom(val, -1); return result,
return m_GPUMatrix->Sum_AsDeviceBoundNum(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::SumOfAbsElements() const
{
if (IsEmpty())
LogicError("SumOfAbsElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return m_CPUMatrix->SumOfAbsElements(); },
{ return m_GPUMatrix->SumOfAbsElements(); },
{ NOT_IMPLEMENTED; },
{ return m_GPUSparseMatrix->SumOfAbsElements(); });
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::LogSumOfElements() const
{
if (IsEmpty())
LogicError("LogSumOfElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return m_CPUMatrix->LogSumOfElements(); },
{ return m_GPUMatrix->LogSumOfElements(); },
{ NOT_IMPLEMENTED},
{ NOT_IMPLEMENTED });
}
template <class ElemType>
bool Matrix<ElemType>::IsValid() const
{
if (m_currentDataLocation == CurrentDataLocation::GPU && GetMatrixType() == MatrixType::SPARSE)
{
return this->m_GPUSparseMatrix->IsValid();
}
else
{
NOT_IMPLEMENTED;
}
return false;
}
template <class ElemType>
bool Matrix<ElemType>::IsEqualTo(const Matrix<ElemType>& a, const ElemType threshold /*= 1e-8*/) const
{
return AreEqual(*this, a, threshold);
}
template <class ElemType>
void Matrix<ElemType>::VectorSum(const Matrix<ElemType>& a, Matrix<ElemType>& c, const bool isColWise)
{
DecideAndMoveToRightDevice(c, a);
if (!(a.GetMatrixType() == c.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::VectorSum(*a.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::VectorSum(*a.m_GPUMatrix, *c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::VectorNorm1(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNormInf: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNorm1(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNorm1(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNorm1Of(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNorm1(*this, isColWise);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::VectorNorm2(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNorm2: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNorm2(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNorm2(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNorm2Of(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNorm2(*this, isColWise);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::VectorNormInf(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNormInf: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNormInf(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNormInf(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNormInfOf(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNormInf(*this, isColWise);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise)
{
InnerProduct(a, b, *this, isColWise);
return *this;
}
//column-wise crossproduct
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignKhatriRaoProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignKhatriRaoProductOf: Matrix is empty.");
assert(a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumCols() == b.GetNumCols()))
InvalidArgument("AssignKhatriRaoProductOf: The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignKhatriRaoProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignKhatriRaoProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//column-wise reshaped product. Used to compute KhatriRaoProduct Gradient
// this = reshape each column of a from (K1xK2,1) to (K1, K2)
// if each column of a is not transposed, each (K1, K2) times each column of b (K2, frames).
// the output is a (K1, frames) matrix
// if each column of a is tranposed, each (K1, K2)^T times each column of b(K1, frames) and output is (K2, frames)
//column-wise crossproduct
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddColumnReshapeProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool transposeAColumn)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AddColumnReshapeProductOf: Matrix is empty.");
assert(a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumCols() == b.GetNumCols()))
InvalidArgument("AddColumnReshapeProductOf: The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(*this, a, b);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType() && GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddColumnReshapeProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix, transposeAColumn),
m_GPUMatrix->AddColumnReshapeProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix, transposeAColumn),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddWithScaleOf(ElemType alpha, const Matrix<ElemType>& a)
{
ScaleAndAdd(alpha, a, *this);
return *this;
}
template <class ElemType>
ElemType Matrix<ElemType>::FrobeniusNorm() const
{
if (IsEmpty())
LogicError("FrobeniusNorm: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->FrobeniusNorm(),
return m_GPUMatrix->FrobeniusNorm(),
return m_CPUSparseMatrix->FrobeniusNorm(),
return m_GPUSparseMatrix->FrobeniusNorm());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignFrobeniusNormOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignFrobeniusNormOf: Matrix a is empty.");
Resize(1, 1);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignFrobeniusNormOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignFrobeniusNormOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNormInf() const
{
if (IsEmpty())
LogicError("MatrixNormInf: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNormInf(),
return m_GPUMatrix->MatrixNormInf(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNormInf());
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNorm1() const
{
if (IsEmpty())
LogicError("MatrixNorm1: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNorm1(),
return m_GPUMatrix->MatrixNorm1(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNorm1());
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNorm0() const
{
if (IsEmpty())
LogicError("MatrixNorm0: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNorm0(),
return m_GPUMatrix->MatrixNorm0(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNorm0());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSignOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSignOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSignOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSignOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddSignOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AddSignOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AddSignOf(*a.m_CPUMatrix),
m_GPUMatrix->AddSignOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
// I decided to use Matrix<ElemType>& maxIndices instead of integer vector because the result may be used to do additional calculation
template <class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndices, Matrix<ElemType>& maxValues, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndices, maxValues);
maxIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this, &maxValues,
{ m_CPUMatrix->VectorMax(*maxIndices.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise); maxIndices.SetDataLocation(CPU, DENSE); },
{ m_GPUMatrix->VectorMax(*maxIndices.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise); maxIndices.SetDataLocation(GPU, DENSE); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
// Note: must SetDataLocation() also on maxIndices, since both maxValues and maxIndices are written.
}
template <class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndices, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndices, maxValues);
maxIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this, &maxValues,
{ m_CPUMatrix->VectorMax(*maxIndices.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK); maxIndices.SetDataLocation(CPU, DENSE); },
{ m_GPUMatrix->VectorMax(*maxIndices.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK); maxIndices.SetDataLocation(GPU, DENSE); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
void Matrix<ElemType>::VectorMin(Matrix<ElemType>& minIndices, Matrix<ElemType>& minValues, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorMin: Matrix is empty.");
DecideAndMoveToRightDevice(*this, minIndices, minValues);
minIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
minValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this, &minValues,
{ m_CPUMatrix->VectorMin(*minIndices.m_CPUMatrix, *minValues.m_CPUMatrix, isColWise); minIndices.SetDataLocation(CPU, DENSE); },
{ m_GPUMatrix->VectorMin(*minIndices.m_GPUMatrix, *minValues.m_GPUMatrix, isColWise); minIndices.SetDataLocation(GPU, DENSE); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
#pragma endregion Member BLAS Functions
#pragma region Other helper Functions
template <class ElemType>
int Matrix<ElemType>::GetDeviceId() const
{
if (m_currentDataLocation == CurrentDataLocation::NONE)
return m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return CPUDEVICE; },
{ return m_GPUMatrix->GetComputeDeviceId(); },
{ return CPUDEVICE; },
{ return m_GPUSparseMatrix->GetComputeDeviceId(); });
}
template <class ElemType>
MatrixType Matrix<ElemType>::GetMatrixType() const
{
return m_matrixType;
}
template <class ElemType>
MatrixFormat Matrix<ElemType>::GetFormat() const
{
return m_baseMatrix->GetFormat();
}
// TODO: Comment why we need a second ElemType.
// TODO: Move the shared core functions to the front of this source file.
// BUGBUG: This performs a copy operation even for the output matrix that gets overwritten right away.
// We should (1) define which is the output and (2) whether it will be completely overwritten (so we won't actually copy it).
// bring two matrices onto the same device
// If different and prefered devices are the same, move to preferred device.
// Otherwise GPU takes precedence over CPU, and if both are GPU move to a's device.
// The inputs are only distinguished in that a's GPU takes precedence over b's in case they differ.
// TODO: This is called somewhat inconsistently, sometimes with a=*this, sometimes with b=*this.
template <class ElemType>
template <class ElemType2>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType2>& b)
{
int deviceIdA = a.GetDeviceId(), deviceIdB = b.GetDeviceId();
if (deviceIdA == deviceIdB)
return;
if (!a.OwnBuffer() && b.OwnBuffer())
b._transferToDevice(deviceIdA);
else if (a.OwnBuffer() && !b.OwnBuffer())
a._transferToDevice(deviceIdB);
else
{
int preferredDeviceIdA = a.GetPreferredDeviceId(), preferredDeviceIdB = b.GetPreferredDeviceId();
if (preferredDeviceIdA == preferredDeviceIdB) // both prefer the same device: move to preferred
{
a._transferToDevice(preferredDeviceIdA);
b._transferToDevice(preferredDeviceIdA);
}
else if (deviceIdA != CPUDEVICE) // one of them lives on GPU: use that
{
b._transferToDevice(deviceIdA);
}
else
{
a._transferToDevice(deviceIdB);
}
}
}
// same but for 3 matrices
// If b and c are both on the same GPU then a will be forced to go there; otherwise a's GPU takes precedence, then b's.
template <class ElemType>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c)
{
int deviceIdA = a.GetDeviceId(), deviceIdB = b.GetDeviceId(), deviceIdC = c.GetDeviceId();
if (deviceIdA == deviceIdB && deviceIdA == deviceIdC)
return;
int preferredDeviceIdA = a.GetPreferredDeviceId(), preferredDeviceIdB = b.GetPreferredDeviceId(), preferredDeviceIdC = c.GetPreferredDeviceId();
if (preferredDeviceIdA == preferredDeviceIdB && preferredDeviceIdA == preferredDeviceIdC)
{
a._transferToDevice(preferredDeviceIdA);
b._transferToDevice(preferredDeviceIdA);
c._transferToDevice(preferredDeviceIdA);
}
else if (deviceIdB == deviceIdC && deviceIdB != CPUDEVICE) // TODO: why not the other two combinations?
{
a._transferToDevice(deviceIdB); // 'a' is outvoted
}
else if (deviceIdA != CPUDEVICE) // one of them lives on GPU: use that
{
b._transferToDevice(deviceIdA);
c._transferToDevice(deviceIdA);
}
else if (deviceIdB != CPUDEVICE)
{
a._transferToDevice(deviceIdB);
c._transferToDevice(deviceIdB);
}
else
{
a._transferToDevice(deviceIdC);
b._transferToDevice(deviceIdC);
}
}
// same but for 4 matrices
template <class ElemType>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& d)
{
// this function is only called for one operator, so for now we keep it simple
DecideAndMoveToRightDevice(a, b, c);
d._transferToDevice(a.GetDeviceId()); // BUGBUG: Is this correct in case a,b,c share the same preferredDevice?
}
template <class ElemType>
void Matrix<ElemType>::_transferToDevice(int to_id, bool isBeingMoved /*= true*/, bool emptyTransfer /* = false*/) const
{
int from_id = GetDeviceId();
if (to_id == from_id) // nothing to do
return;
if (OwnBuffer())
_transferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer);
else
RuntimeError("Cannot move externally owned matrices to the preferred device.");
}
// this function performs data transfer and updates data location, but not the device that is stored with it
template <class ElemType>
void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved /*= true*/, bool emptyTransfer /* = false*/) const
{
if (from_id < 0)
from_id = CPUDEVICE;
if (to_id < 0)
to_id = CPUDEVICE;
if (from_id == to_id)
{
if (from_id != GetDeviceId())
RuntimeError("Trying to transfer matrix from device to the same device while the matrix does not live in the from device.");
return;
}
// warn about device change
#define NUM_DEVICE_CHANGED_WARN 20
if (m_numTimesDeviceChanged <= NUM_DEVICE_CHANGED_WARN &&
(!emptyTransfer || (from_id >= 0 && to_id >= 0)))
{
m_numTimesDeviceChanged++;
if (m_devicesTransferedTo[0] < CPUDEVICE)
m_devicesTransferedTo[0] = to_id;
else if (m_devicesTransferedTo[0] != to_id)
m_devicesTransferedTo[1] = to_id;
}
if ((GetMathLibTraceLevel() > 0) && (m_numTimesDeviceChanged == NUM_DEVICE_CHANGED_WARN && m_devicesTransferedTo[1] >= CPUDEVICE))
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_DEVICE_CHANGED_WARN);
// do the transfer
if (m_matrixType == MatrixType::SPARSE)
{
if (from_id == CPUDEVICE) // from CPU to GPU
{
if (!m_CPUSparseMatrix)
LogicError("Can't move from CPU because I'm not there!");
if (emptyTransfer)
{
if (m_GPUSparseMatrix && m_GPUSparseMatrix->GetComputeDeviceId() == to_id)
m_GPUSparseMatrix->Resize(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount());
else
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount(), to_id, m_CPUSparseMatrix->GetFormat());
}
else
{
if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetComputeDeviceId() != to_id)
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id);
m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
}
if (isBeingMoved)
{
SetDataLocation(GPU, SPARSE);
m_CPUSparseMatrix = nullptr;
}
else
{
SetDataLocation(BOTH, SPARSE);
}
}
else // from GPU
{
if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetComputeDeviceId() != from_id)
LogicError("This matrix isn't on this (or any?) GPU");
if (to_id < 0) // to CPU
{
if (!m_CPUSparseMatrix)
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(m_GPUSparseMatrix->GetFormat());
if (emptyTransfer)
m_CPUSparseMatrix->Resize(m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->NzCount(), true);
else
m_GPUSparseMatrix->CopyToCPUSparseMatrix(*m_CPUSparseMatrix);
if (isBeingMoved)
{
SetDataLocation(CPU, SPARSE);
m_GPUSparseMatrix = nullptr;
}
else
{
SetDataLocation(BOTH, SPARSE);
}
}
else // to another GPU
{
m_GPUSparseMatrix->ChangeDeviceTo(to_id);
}
}
}
else
// #pragma omp critical // causes a build error on gcc; not clear why this is here
{
if (from_id == CPUDEVICE) // from CPU to GPU
{
if (!m_CPUMatrix)
LogicError("Can't move from CPU because I'm not there!");
if (emptyTransfer)
{
if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
m_GPUMatrix->Resize(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols());
else
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id);
}
else
{
if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
m_GPUMatrix->SetValue(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
else
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
}
if (isBeingMoved)
{
SetDataLocation(GPU, DENSE);
m_CPUMatrix = nullptr;
}
else
SetDataLocation(BOTH, DENSE);
}
else // from GPU
{
if (!m_GPUMatrix || m_GPUMatrix->GetComputeDeviceId() != from_id)
LogicError("This matrix isn't on this (or any?) GPU");
if (to_id < 0) // to CPU
{
if (emptyTransfer)
{
if (m_CPUMatrix)
m_CPUMatrix->Resize(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
else
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
}
else
{
ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
if (m_CPUMatrix)
m_CPUMatrix->SetValue(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr);
else
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
delete[] arr;
}
if (isBeingMoved)
{
SetDataLocation(CPU, DENSE);
m_GPUMatrix = nullptr;
}
else
{
SetDataLocation(BOTH, DENSE);
}
}
else // to another GPU
{
m_GPUMatrix->ChangeDeviceTo(to_id);
}
}
} // and of omp critical section
}
template <class ElemType>
void Matrix<ElemType>::TransferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const
{
_transferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer);
if (updatePreferredDevice)
m_preferredDeviceId = GetDeviceId();
}
template <class ElemType>
void Matrix<ElemType>::TransferToDeviceIfNotThere(int to_id, bool isBeingMoved/*false: may leave in BOTH state*/, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const
{
int from_id = GetDeviceId();
if (from_id == to_id) // already at the right place
return;
if (GetCurrentMatrixLocation() == BOTH && // if currently in BOTH state
!isBeingMoved && // and leaving in BOTH state is OK
(from_id < 0 || to_id < 0)) // and this is not about changing GPUs
{
return; // then we are good
}
TransferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer, updatePreferredDevice);
}
template <class ElemType>
void Matrix<ElemType>::Print(const char* matrixName, ptrdiff_t rowStart, ptrdiff_t rowEnd, ptrdiff_t colStart, ptrdiff_t colEnd) const
{
DEVICEID_TYPE orgdevice = GetDeviceId();
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
// CPU:
m_CPUMatrix->Print(matrixName, rowStart, rowEnd, colStart, colEnd),
// GPU;
{
_transferToDevice(CPUDEVICE, false, false);
m_CPUMatrix->Print(matrixName, rowStart, rowEnd, colStart, colEnd);
_transferToDevice(orgdevice, false, false);
},
// CPU, sparse:
m_CPUSparseMatrix->Print(matrixName),
// GPU, sparse:
{
_transferToDevice(CPUDEVICE, false, false);
m_CPUSparseMatrix->Print(matrixName);
_transferToDevice(orgdevice, false, false);
});
}
template <class ElemType>
void Matrix<ElemType>::Print(const char* matrixName /*=nullptr*/) const
{
Print(matrixName, 0, GetNumRows() - 1, 0, GetNumCols() - 1);
}
//helpfer function used for convolution neural network
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignPackedConvolutionInput(const Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding)
{
DecideAndMoveToRightDevice(inputSubBatch, *this);
SwitchToMatrixType(inputSubBatch.GetMatrixType(), inputSubBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputSubBatch,
this,
m_CPUMatrix->AssignPackedConvolutionInput(*(inputSubBatch.m_CPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
m_GPUMatrix->AssignPackedConvolutionInput(*(inputSubBatch.m_GPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//helpfer function used for convolution neural network
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::UnpackConvolutionInput(Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding) const
{
DecideAndMoveToRightDevice(*this, inputSubBatch);
inputSubBatch.SwitchToMatrixType(GetMatrixType(), inputSubBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&inputSubBatch,
m_CPUMatrix->UnpackConvolutionInput(*(inputSubBatch.m_CPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
m_GPUMatrix->UnpackConvolutionInput(*(inputSubBatch.m_GPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return inputSubBatch;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignMaxPoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(inputBatch, *this);
SwitchToMatrixType(inputBatch.GetMatrixType(), inputBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputBatch,
this,
m_CPUMatrix->AssignMaxPoolingResult(*(inputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AssignMaxPoolingResult(*(inputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddMaxPoolingGradient(const Matrix<ElemType>& outputGradientBatch, const Matrix<ElemType>& inputBatch, const Matrix<ElemType>& outputBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(*this, outputGradientBatch, inputBatch);
outputBatch._transferToDevice(GetDeviceId());
if (!(GetMatrixType() == outputGradientBatch.GetMatrixType() && GetMatrixType() == inputBatch.GetMatrixType() && GetMatrixType() == outputBatch.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddMaxPoolingGradient(*(outputGradientBatch.m_CPUMatrix), *(inputBatch.m_CPUMatrix), *(outputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AddMaxPoolingGradient(*(outputGradientBatch.m_GPUMatrix), *(inputBatch.m_GPUMatrix), *(outputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample);
,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAveragePoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(inputBatch, *this);
SwitchToMatrixType(inputBatch.GetMatrixType(), inputBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputBatch,
this,
m_CPUMatrix->AssignAveragePoolingResult(*(inputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AssignAveragePoolingResult(*(inputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSoftmaxSum(const Matrix<ElemType>& a, const Matrix<ElemType>& softmax)
{
Resize(1, 1);
if (GetDeviceId() < 0)
a.m_CPUMatrix->AssignSoftmaxSum(*softmax.m_CPUMatrix, *m_CPUMatrix);
else
a.m_GPUMatrix->AssignSoftmaxSum(*softmax.m_GPUMatrix, *m_GPUMatrix);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNceUnnormalizedEval(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias)
{
// if (a.GetMatrixType() != MatrixType::SPARSE)
// NOT_IMPLEMENTED;
Resize(1, 1);
if (GetDeviceId() < 0)
a.m_CPUMatrix->AssignNCEUnnormalizedEval(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *m_CPUMatrix);
else
a.m_GPUMatrix->AssignNCEUnnormalizedEval(*b.m_GPUMatrix, *c.m_GPUMatrix, *m_GPUMatrix);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNoiseContrastiveEstimation(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias, Matrix<ElemType>& tmp)
{
if (a.IsEmpty() || b.IsEmpty() || c.IsEmpty())
LogicError("AssignNoiseContrastiveEstimation: one of the input matrices is empty.");
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != GetDeviceId())
NOT_IMPLEMENTED;
Resize(1, 1);
if (GetDeviceId() < 0)
{
size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix,
*bias.m_CPUMatrix, *tmp.m_CPUMatrix, *m_CPUMatrix);
}
else
{
size_t sampleCount = a.m_GPUMatrix->GetNumElements() / a.m_GPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_GPUMatrix->AssignNoiseContrastiveEstimation(*b.m_GPUMatrix, *c.m_GPUMatrix,
*bias.m_GPUMatrix, sampleCount, *tmp.m_GPUMatrix, *m_GPUMatrix);
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNCEDerivative(const Matrix<ElemType>& tmp, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, size_t inputIndex)
{
if (a.IsEmpty() || b.IsEmpty() || c.IsEmpty())
LogicError("AssignNoiseContrastiveEstimation: one of the input matrices is empty.");
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != GetDeviceId())
NOT_IMPLEMENTED;
assert(tmp.GetNumRows() == a.GetNumRows() / 2);
if (GetDeviceId() < 0)
{
// samples gradient hidden embedding embedding/hidden
a.m_CPUMatrix->AssignNCEDerivative(*tmp.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, inputIndex, *m_CPUMatrix);
}
else
{
a.m_GPUMatrix->AssignNCEDerivative(*tmp.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, inputIndex, *m_GPUMatrix);
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddAveragePoolingGradient(const Matrix<ElemType>& outputGradientBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(*this, outputGradientBatch);
if (!(GetMatrixType() == outputGradientBatch.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddAveragePoolingGradient(*(outputGradientBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AddAveragePoolingGradient(*(outputGradientBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
#pragma endregion Other Helper Functions
template <class ElemType>
void Matrix<ElemType>::ConvolutionForward(const Matrix<ElemType>& kernel, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIwht.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ConvolutionForward(*(kernel.m_CPUMatrix), *(mpRowCol.m_CPUMatrix), *(mpRowIwht.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
m_GPUMatrix->ConvolutionForward(*(kernel.m_GPUMatrix), *(mpRowCol.m_GPUMatrix), *(mpRowIwht.m_GPUMatrix),
*(mpRowRun.m_GPUMatrix), *(runs.m_GPUMatrix), *(output.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::ConvolutionBackwardData(const Matrix<ElemType>& kernel, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& grad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIwht.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ConvolutionBackwardData(*(kernel.m_CPUMatrix), *(mpRowCol.m_CPUMatrix), *(mpRowIwht.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(grad.m_CPUMatrix)),
m_GPUMatrix->ConvolutionBackwardData(*(kernel.m_GPUMatrix), *(mpRowCol.m_GPUMatrix), *(mpRowIwht.m_GPUMatrix),
*(mpRowRun.m_GPUMatrix), *(runs.m_GPUMatrix), *(grad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::ConvolutionBackwardKernel(const Matrix<ElemType>& in, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& kernelGrad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIwht.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, kernelGrad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ConvolutionBackwardKernel(*(in.m_CPUMatrix), *(mpRowCol.m_CPUMatrix), *(mpRowIwht.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(kernelGrad.m_CPUMatrix)),
m_GPUMatrix->ConvolutionBackwardKernel(*(in.m_GPUMatrix), *(mpRowCol.m_GPUMatrix), *(mpRowIwht.m_GPUMatrix),
*(mpRowRun.m_GPUMatrix), *(runs.m_GPUMatrix), *(kernelGrad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionInput(size_t unrollCols, size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionInput(unrollCols, mapOutSize, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionOutput(size_t unrollCols, size_t mapInCount, size_t mapOutCount, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionOutput(unrollCols, mapInCount, mapOutCount, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionInputForKernelBackprop(size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionInputForKernelBackprop(mapOutSize, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxPoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxPoolingForward(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(output.m_CPUMatrix)),
m_GPUMatrix->MaxPoolingForward(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(output.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxPoolingBackward(const Matrix<ElemType>& out, const Matrix<ElemType>& in,
const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices,
Matrix<ElemType>& grad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxPoolingBackward(*(out.m_CPUMatrix), *(in.m_CPUMatrix),
*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix),
*(grad.m_CPUMatrix)),
m_GPUMatrix->MaxPoolingBackward(*(out.m_GPUMatrix), *(in.m_GPUMatrix),
*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix),
*(grad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::ROIPoolingForward(const size_t numRois, const size_t numImg, const size_t channels, const size_t width, const size_t height,
const size_t pooledWidth, const size_t pooledHeight, const Matrix<ElemType>& roiData, Matrix<ElemType>& output,
Matrix<ElemType>& argmax) const
{
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ROIPoolingForward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_CPUMatrix), *(output.m_CPUMatrix), *(argmax.m_CPUMatrix)),
m_GPUMatrix->ROIPoolingForward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_GPUMatrix), *(output.m_GPUMatrix), *(argmax.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::ROIPoolingBackward(const size_t numRois, const size_t numImg, const size_t channels, const size_t width, const size_t height,
const size_t pooledWidth, const size_t pooledHeight, const Matrix<ElemType>& roiData, Matrix<ElemType>& grad,
Matrix<ElemType>& argmax) const
{
DecideAndMoveToRightDevice(*this, grad);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ROIPoolingBackward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_CPUMatrix), *(grad.m_CPUMatrix), *(argmax.m_CPUMatrix)),
m_GPUMatrix->ROIPoolingBackward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_GPUMatrix), *(grad.m_GPUMatrix), *(argmax.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxUnpooling(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, const Matrix<ElemType>& poolInput, Matrix<ElemType>& input) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, input);
// REVIEW alexeyk: setting values to zero may cause inconsistency when negative values are unpooled.
// To see why, let's assume we have just one input with negative value and output of, for example, 2x2.
// As a result of unpooling, there will be 3 zero values and one negative. If we now apply max pooling
// operation to the output then we get 0 as the output, not the original negative value.
// In practice this will not happen as pooling layers usually go right after ReLU layer.
input.SetValue(0);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxUnpooling(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(poolInput.m_CPUMatrix), *(input.m_CPUMatrix)),
m_GPUMatrix->MaxUnpooling(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(poolInput.m_GPUMatrix), *(input.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AveragePoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output, const bool poolIncludePad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AveragePoolingForward(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(output.m_CPUMatrix), poolIncludePad),
m_GPUMatrix->AveragePoolingForward(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(output.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AveragePoolingBackward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& grad, const bool poolIncludePad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AveragePoolingBackward(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(grad.m_CPUMatrix), poolIncludePad),
m_GPUMatrix->AveragePoolingBackward(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(grad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::BatchNormalizationForward(const Matrix<ElemType>& scale, const Matrix<ElemType>& bias, bool inferenceOnly, double expAvgFactor, double blendFactor,
Matrix<ElemType>& runMean, Matrix<ElemType>& runVariance, Matrix<ElemType>& out, double epsilon,
Matrix<ElemType>& saveMean, Matrix<ElemType>& saveInvStdDev) const
{
DecideAndMoveToRightDevice(*this, out);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->BatchNormalizationForward(*(scale.m_CPUMatrix), *(bias.m_CPUMatrix), inferenceOnly, expAvgFactor, blendFactor,
*(runMean.m_CPUMatrix), *(runVariance.m_CPUMatrix),
*(out.m_CPUMatrix), epsilon, *(saveMean.m_CPUMatrix), *(saveInvStdDev.m_CPUMatrix)),
m_GPUMatrix->BatchNormalizationForward(*(scale.m_GPUMatrix), *(bias.m_GPUMatrix), inferenceOnly, expAvgFactor, blendFactor,
*(runMean.m_GPUMatrix), *(runVariance.m_GPUMatrix),
*(out.m_GPUMatrix), epsilon, *(saveMean.m_GPUMatrix), *(saveInvStdDev.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::BatchNormalizationBackward(const Matrix<ElemType>& in, Matrix<ElemType>& grad, const Matrix<ElemType>& scale, double blendFactor,
const Matrix<ElemType>& saveMean, const Matrix<ElemType>& saveInvStdDev,
Matrix<ElemType>& scaleGrad, Matrix<ElemType>& biasGrad) const
{
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->BatchNormalizationBackward(*(in.m_CPUMatrix), *(grad.m_CPUMatrix), *(scale.m_CPUMatrix), blendFactor,
*(saveMean.m_CPUMatrix), *(saveInvStdDev.m_CPUMatrix),
*(scaleGrad.m_CPUMatrix), *(biasGrad.m_CPUMatrix)),
m_GPUMatrix->BatchNormalizationBackward(*(in.m_GPUMatrix), *(grad.m_GPUMatrix), *(scale.m_GPUMatrix), blendFactor,
*(saveMean.m_GPUMatrix), *(saveInvStdDev.m_GPUMatrix),
*(scaleGrad.m_GPUMatrix), *(biasGrad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RNNForward(const Matrix<ElemType> &inputX, const Matrix<ElemType> ¶mW, size_t xDim, size_t yDim, const vector<size_t>& numSequencesForFrame, const RnnAttributes& rnnAttributes, Matrix<ElemType>& reserve, Matrix<ElemType>& workspace)
{
DecideAndMoveToRightDevice(*this, inputX, paramW);
// move reserve/workspace to the consensus device
reserve._transferToDevice(GetDeviceId());
workspace._transferToDevice(GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
m_GPUMatrix->RNNForward(*(inputX.m_GPUMatrix), *(paramW.m_GPUMatrix), xDim, yDim, numSequencesForFrame, rnnAttributes, *(reserve.m_GPUMatrix), *(workspace.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RNNBackwardData(const Matrix<ElemType>& outputDY, const Matrix<ElemType>& paramW, Matrix<ElemType>& outputDX, const RnnAttributes& rnnAttributes, Matrix<ElemType>& reserve, Matrix<ElemType>& workspace)
{
DecideAndMoveToRightDevice(*this, outputDY, paramW, outputDX);
// move reserve/workspace to the consensus device
reserve._transferToDevice(GetDeviceId());
workspace._transferToDevice(GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
m_GPUMatrix->RNNBackwardData(*(outputDY.m_GPUMatrix), *(paramW.m_GPUMatrix), *(outputDX.m_GPUMatrix), rnnAttributes, *(reserve.m_GPUMatrix), *(workspace.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RNNBackwardWeights(const Matrix<ElemType>& inputX, const Matrix<ElemType>& outputY, Matrix<ElemType>& dw, const RnnAttributes& rnnAttributes, Matrix<ElemType>& reserve, Matrix<ElemType>& workspace)
{
DecideAndMoveToRightDevice(*this, inputX, outputY, dw);
// move reserve/workspace to the consensus device
reserve._transferToDevice(GetDeviceId());
workspace._transferToDevice(GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
m_GPUMatrix->RNNBackwardWeights(*(inputX.m_GPUMatrix), *(outputY.m_GPUMatrix), *(dw.m_GPUMatrix), rnnAttributes, *(reserve.m_GPUMatrix), *(workspace.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
#pragma region Static BLAS Functions
template <class ElemType>
void Matrix<ElemType>::SVD(const Matrix<ElemType>& A, Matrix<ElemType>& SIGMA, Matrix<ElemType>& U, Matrix<ElemType>& VT, Matrix<ElemType>& W)
{
if (A.IsEmpty())
LogicError("SVD: the input matrix is empty.");
DecideAndMoveToRightDevice(A, SIGMA, U);
VT._transferToDevice(A.GetDeviceId());
W._transferToDevice(A.GetDeviceId());
SIGMA.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
U.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
VT.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
W.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&A, nullptr,
{
Matrix<ElemType> tA = A.DeepClone();
CPUMatrix<ElemType>::SVD(*tA.m_CPUMatrix, *SIGMA.m_CPUMatrix, *U.m_CPUMatrix, *VT.m_CPUMatrix, *W.m_CPUMatrix);
SIGMA.SetDataLocation(CPU);
U.SetDataLocation(CPU);
VT.SetDataLocation(CPU);
W.SetDataLocation(CPU);
// need to SetDataLocation() on all matrices we write to
},
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c, shared_ptr<QuantizedMultiplier<ElemType>> pQuantizedMultiplier)
{
DecideAndMoveToRightDevice(a, b, c);
if (c.GetDeviceId() < 0) // CPU
{
if (a.GetMatrixType() == MatrixType::SPARSE) // CPU, SPARSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * DENSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * SPARSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}
else // CPU, DENSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> ANY
{
if (c.GetMatrixType() == MatrixType::DENSE) // CPU, DENSE * SPARSE -> DENSE
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUSparseMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else if (c.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> SPARSE
{
if (beta != 0 && beta != 1)
{
NOT_IMPLEMENTED;
}
else
{
if (beta == 0)
{
c.Reset();
}
CPUSparseMatrix<ElemType>::MultiplyAndAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUSparseMatrix, transposeB, *c.m_CPUSparseMatrix);
}
c.SetDataLocation(CPU, SPARSE);
}
else
NOT_IMPLEMENTED; // CPU, DENSE * SPARSE -> UNDETERMINED ?
}
else // CPU, DENSE * DENSE -> DENSE (matrix c enforced to be DENSE)
{
c.SwitchToMatrixType(MatrixType::DENSE, matrixFormatDense, false);
CPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix, pQuantizedMultiplier);
c.SetDataLocation(CPU, DENSE);
}
}
}
else // GPU operations
{
if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::DENSE) // GPU, DENSE * DENSE -> DENSE
{
GPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUMatrix, transposeB, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::DENSE) // GPU, SPARSE * DENSE -> DENSE
{
GPUMatrix<ElemType> second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;
GPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUSparseMatrix, transposeA, second, false, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::DENSE) // GPU, DENSE * SPARSE -> DENSE
{
GPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::SPARSE) // GPU, DENSE * SPARSE -> SPARSE
{
if (beta != 0 && beta != 1)
{
NOT_IMPLEMENTED;
}
else
{
if (beta == 0)
{
c.Reset();
}
GPUSparseMatrix<ElemType>::MultiplyAndAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, *c.m_GPUSparseMatrix);
}
c.SetDataLocation(GPU, SPARSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::SPARSE) // GPU, SPARSE * SPARSE -> SPARSE
{
GPUSparseMatrix<ElemType> firstDummy = alpha == 1 ? *a.m_GPUSparseMatrix : (*a.m_GPUSparseMatrix) * alpha;
GPUSparseMatrix<ElemType>& first = firstDummy; // By Malcolm.. gcc doesn't support auto
if (beta == 0)
{
GPUSparseMatrix<ElemType>::Multiply(first, transposeA, *b.m_GPUSparseMatrix, transposeB, *c.m_GPUSparseMatrix);
c.SetDataLocation(GPU, SPARSE);
}
else
{
GPUSparseMatrix<ElemType> tmp(b.m_GPUSparseMatrix->GetComputeDeviceId());
GPUSparseMatrix<ElemType>::Multiply(first, transposeA, *b.m_GPUSparseMatrix, transposeB, tmp);
*c.m_GPUSparseMatrix = tmp + (*c.m_GPUSparseMatrix) * beta;
c.SetDataLocation(GPU, SPARSE);
}
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::SPARSE) // GPU, DENSE * DENSE -> SPARSE
{
GPUMatrix<ElemType> tmp(a.m_GPUMatrix->GetComputeDeviceId());
GPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUMatrix, transposeB, (ElemType)0.0, tmp);
if (beta != 0)
{
GPUSparseMatrix<ElemType> tmpSparse(a.m_GPUMatrix->GetComputeDeviceId());
tmpSparse.SetValue(tmp);
*c.m_GPUSparseMatrix = tmpSparse + (*c.m_GPUSparseMatrix) * beta;
}
else
{
c.m_GPUSparseMatrix->SetValue(tmp);
}
c.SetDataLocation(GPU, SPARSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::DENSE) // GPU, SPARSE * SPARSE -> DENSE
{
NOT_IMPLEMENTED;
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::SPARSE) // GPU, SPARSE * DENSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else // No combination left
{
NOT_IMPLEMENTED;
}
}
}
template <class ElemType>
/*static*/ void Matrix<ElemType>::Multiply1x1AndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType beta, Matrix<ElemType>& c)
{
// special case: a is a 1x1 matrix
// The only alternative is to Get00Elements(), which makes things inefficient.
if (a.GetNumElements() != 1)
InvalidArgument("Multiply1x1AndWeightedAdd: first arg must be a scalar.");
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::Multiply1x1AndWeightedAdd(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUMatrix<ElemType>::Multiply1x1AndWeightedAdd(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, beta, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = op(a) * op(b) + c</summary>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::MultiplyAndAdd(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 1.0, c);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = op(a) * op(b)</summary>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Multiply(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 0.0, c);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b are not transposed): c = a * b</summary>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Multiply(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, false, b, false, 0.0, c);
}
/// <summary>1-D Convolution with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c. MultiplyAndWeightedAdd is just a special case of this.</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::ConvolveAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c, size_t numChannels, size_t horizontalSubsample, bool padding, bool channelwise)
{
DecideAndMoveToRightDevice(a, b, c);
if (c.GetDeviceId() >= 0 /*GPU*/ && a.GetMatrixType() == MatrixType::DENSE && b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE)
{
GPUSparseMatrix<ElemType>::ConvolveAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, beta, *c.m_GPUMatrix, numChannels, horizontalSubsample, padding, channelwise);
}
else
{
NOT_IMPLEMENTED;
}
}
/// <summary>Columnwise scale with col-major matrix and accumulate.</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="v">Input scale vector for each column of a</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, the same shape as a</param>
template <class ElemType>
void Matrix<ElemType>::ColumnwiseScaleAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& v, ElemType beta, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, v, c);
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
CPUMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_CPUMatrix, *v.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_GPUMatrix, *v.m_GPUMatrix, beta, *c.m_GPUMatrix),
CPUSparseMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, *v.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUSparseMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_GPUSparseMatrix, *v.m_GPUMatrix, beta, *c.m_GPUMatrix));
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a + c</summary>
/// if a is a column vector, add to all columns of c
/// if a is a row vector, add to all rows of c
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
/*static*/ void Matrix<ElemType>::ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
if (a.IsEmpty() || c.IsEmpty())
LogicError("ScaleAndAdd: one of the input matrices is empty.");
DecideAndMoveToRightDevice(c, a);
if (a.GetMatrixType() == c.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&c, &c,
{ CPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix); },
{ GPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix); },
{ NOT_IMPLEMENTED; },
{ GPUSparseMatrix<ElemType> b = move(*c.m_GPUSparseMatrix); GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, b, *c.m_GPUSparseMatrix); });
}
else
{
DISPATCH_MATRIX_ON_FLAG(&c, nullptr,
{
CPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUSparseMatrix, *c.m_CPUMatrix);
c.SetDataLocation(CPU);
},
{
if (a.m_GPUSparseMatrix->GetFormat() == MatrixFormat::matrixFormatSparseCSC)
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, *c.m_GPUMatrix, *c.m_GPUMatrix);
else // new GPU sparse matrix code
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, *c.m_GPUMatrix);
c.SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{
c.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(c.m_GPUSparseMatrix->CopyToDenseMatrix());
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, 1, *c.m_GPUSparseMatrix, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
c.m_GPUSparseMatrix = nullptr;
});
}
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a + beta * c</summary>
/// if a is a column vector, add to all columns of c
/// if a is a row vector, add to all rows of c
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, caller is responsible for allocating this</param>
template <class ElemType>
/*static*/ void Matrix<ElemType>::ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, ElemType beta, Matrix<ElemType>& c)
{
if (beta == 1)
ScaleAndAdd(alpha, a, c);
else if (beta == 0)
Scale(alpha, a, c);
else
{
ScaleAndAdd(alpha / beta, a, c); // c1=alpha/beta * a + c
Scale(beta, c); // c/beta * beta
// TODO: two lines above should be changed as follows:
// Scale(beta, c); // c1 = c * beta
// ScaleAndAdd(alpha, a, c); // c=alpha * a + c1 = alpha * a + beta * c
}
}
// tensor swapping and addition: c <- keepWeight * b + scaleFactor * swap_dimensions(a, S, K)
// where
// - a is interpreted as a tensor of dimension (D x S x M x K x T) // column-major, as usual
// - b and c as a tensor of dimension (D x K x M x S x T) // note: K and S swapped
// The main point of this function is to reshuffle a tensor w.r.t. two dimensions that get swapped in memory,
// but for gradients, we will need to add, hence the keepWeight.
// Notes:
// - c and b may be the same (in-place operation is expressly allowed).
// - D, M, and/or T may be 1. For example, D == M == T == 1 implements a 2D matrix transpose from (S x K) to (K x S).
// - If keepWeight == 0, then b will just get overwritten (straight assignment, b may be uninitialized or contain NaNs).
// - The original matrix dimensions are ignored except that sizes must match (rows x cols == D x S x M x K x T).
// For diagnostics purposes, this function also enforces the rows % D == 0 and cols % T == 0, but this is not a functional requirement and can be removed if that helps.
// - Dense matrices only.
// TODO: Handle these cases:
// - no swapping happening --just do a block copy
// - swapping can be implemented by cuDNN --do so
template <class ElemType>
/*static*/ void Matrix<ElemType>::TensorShuffleScaleAndAdd(ElemType keepWeight, const Matrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
if (a.GetNumElements() != c.GetNumElements() || b.GetNumElements() != c.GetNumElements()) // allocations must match (but not dimensions, since we reinterpret the dimensions anyway)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c must have same number of elements.");
if (c.IsEmpty()) // operating on empty minibatch slices is perfectly cromulent
return;
// sanity checks for current use cases--these are not strictly necessary and can be deleted
if (a.GetNumRows() % D != 0 || b.GetNumRows() % D != 0 || c.GetNumRows() % D != 0)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c are meant to have a row dimension that is a multiple of D.");
if (a.GetNumCols() % T != 0 || b.GetNumCols() % T != 0 || c.GetNumCols() % T != 0)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c are meant to have a column dimension that is a multiple of T.");
DecideAndMoveToRightDevice(a, b, c);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_CPUMatrix, D, S, M, K, T, scaleFactor, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_GPUMatrix, D, S, M, K, T, scaleFactor, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_GPUSparseMatrix, D, S, M, K, T, scaleFactor, *b.m_GPUSparseMatrix, *c.m_GPUSparseMatrix));
}
/// <summary>c += alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AddScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a, b);
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == c.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddScaledDifference(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AddScaledDifference(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AssignScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, c);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AssignScaledDifference(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AssignScaledDifference(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>c += alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AddScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a, b);
alpha._transferToDevice(c.GetDeviceId());
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == c.GetMatrixType() && a.GetMatrixType() == alpha.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddScaledDifference(*alpha.m_CPUMatrix, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AddScaledDifference(*alpha.m_GPUMatrix, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AssignScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, alpha);
c._transferToDevice(a.GetDeviceId());
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == alpha.GetMatrixType()))
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::AssignScaledDifference(*alpha.m_CPUMatrix, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AssignScaledDifference(*alpha.m_GPUMatrix, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//c[ci,cj] += a[ai,aj]
template <class ElemType>
void Matrix<ElemType>::AddElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj)
{
DecideAndMoveToRightDevice(c, a);
if (c.GetMatrixType() != a.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddElementToElement(1, *a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
GPUMatrix<ElemType>::AddElementToElement(1, *a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//c[ci,cj] = a[ai,aj]
template <class ElemType>
void Matrix<ElemType>::AssignElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj)
{
DecideAndMoveToRightDevice(c, a);
if (c.GetMatrixType() != a.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddElementToElement(0, *a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
GPUMatrix<ElemType>::AddElementToElement(0, *a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//for each column of this, we add row slice of a starting from startIndex
template <class ElemType>
void Matrix<ElemType>::MinusOneAt(Matrix<ElemType>& a, const size_t position)
{
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::MinusOneAt(*a.m_CPUMatrix, position),
GPUMatrix<ElemType>::MinusOneAt(*a.m_GPUMatrix, position),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Scale(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a);
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
if (alpha == 0)
{
c.Resize(a);
c.SetValue(0); // this is a little faster, and also does not propagate NaNs, which we'd expect from 'beta' parameters
return;
}
else
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::Scale(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED, * c.m_GPUSparseMatrix = (*a.m_GPUSparseMatrix) * alpha);
}
/// <summary>Matrix-scalar multiply with col-major matrices: a = alpha * a</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
template <class ElemType>
void Matrix<ElemType>::Scale(ElemType alpha, Matrix<ElemType>& a)
{
if (alpha == 0)
a.SetValue(0); // this is a little faster, and also does not propagate NaNs, which we'd expect from 'beta' parameters
else if (a.IsEmpty())
return;
else
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::Scale(alpha, *a.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::Scale(alpha, *a.m_GPUSparseMatrix));
}
/// <summary>Matrix scalar matrix multiply with col-major matrices: a = alpha[0,0] * a</summary>
/// <param name="alpha">1x1 matrix</param>
/// <param name="a">Input matrix</param>
template <class ElemType>
void Matrix<ElemType>::Scale(const Matrix<ElemType>& alpha, Matrix<ElemType>& a)
{
if (a.IsEmpty())
return;
DecideAndMoveToRightDevice(a, alpha);
if (a.GetMatrixType() != alpha.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
CPUMatrix<ElemType>::Scale(*alpha.m_CPUMatrix, *a.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(*alpha.m_GPUMatrix, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::InnerProduct(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
// TODO: consider swapping the arguments in this case
if (b.GetMatrixType() != DENSE) // only support a being sparse/dense. Both b and c should be dense
NOT_IMPLEMENTED;
c.SwitchToMatrixType(b.GetMatrixType(), b.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::InnerProduct(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::InnerProduct(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, isColWise),
CPUSparseMatrix<ElemType>::InnerProduct(*a.m_CPUSparseMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUSparseMatrix<ElemType>::InnerProduct(*a.m_GPUSparseMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, isColWise));
}
template <class ElemType>
ElemType Matrix<ElemType>::InnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProductOfMatrices: one of the input matrices is empty.");
DecideAndMoveToRightDevice(a, b);
if (a.GetMatrixType() == b.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
return CPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_CPUMatrix, *b.m_CPUMatrix),
return GPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
else
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
NOT_IMPLEMENTED,
return GPUSparseMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUSparseMatrix),
NOT_IMPLEMENTED,
return GPUSparseMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUSparseMatrix, *b.m_GPUMatrix));
}
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProductOfMatrices: one of the input matrices is empty.");
Resize(1, 1);
DecideAndMoveToRightDevice(a, b, *this);
if (a.GetMatrixType() == b.GetMatrixType())
{
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->SetValue(CPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_CPUMatrix, *b.m_CPUMatrix)),
m_GPUMatrix->AssignInnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
else
{
NOT_IMPLEMENTED;
}
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ElementWisePower(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
if (a.IsEmpty())
return;
DecideAndMoveToRightDevice(a, c);
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::ElementWisePower(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::ElementWisePower(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::ElementWisePower(alpha, *a.m_GPUSparseMatrix, *c.m_GPUSparseMatrix));
}
template <class ElemType>
bool Matrix<ElemType>::AreEqual(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const ElemType threshold /*= 1e-8*/)
{
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
return false;
DecideAndMoveToRightDevice(a, b);
if (a.GetMatrixType() == b.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
return CPUMatrix<ElemType>::AreEqual(*a.m_CPUMatrix, *b.m_CPUMatrix, threshold),
return GPUMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUMatrix, threshold),
return CPUSparseMatrix<ElemType>::AreEqual(*a.m_CPUSparseMatrix, *b.m_CPUSparseMatrix, threshold),
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUSparseMatrix, threshold));
}
else
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
NOT_IMPLEMENTED;
return false,
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUSparseMatrix, threshold),
NOT_IMPLEMENTED;
return false,
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUMatrix, threshold));
}
}
template <class ElemType>
bool Matrix<ElemType>::HasElement(const Matrix<ElemType>& a, const ElemType value)
{
if (a.IsEmpty())
return false;
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
return CPUMatrix<ElemType>::HasElement(*a.m_CPUMatrix, value),
return GPUMatrix<ElemType>::HasElement(*a.m_GPUMatrix, value),
NOT_IMPLEMENTED;
return false,
NOT_IMPLEMENTED;
return false);
}
// diagnostics helper to check if matrix has a NaN
// This is very slow.
template <class ElemType>
bool Matrix<ElemType>::HasNan(const char* name) const
{
// Not implemented for sparse matrices.
// Return false as a workaround to at
// least evaluate the dense matrices.
if (m_matrixType == MatrixType::SPARSE)
return false;
if (IsEmpty())
return false;
// if GPU then first detect NaN there, will be faster
if (GetDeviceId() != CPUDEVICE)
{
Matrix<ElemType> sum(GetDeviceId());
sum.AssignSumOfElements(*this);
auto x = sum.Get00Element();
if (!std::isnan(x))
return false;
}
// const auto & us = *this;
const Matrix<ElemType>& us = *this;
foreach_coord (i, j, us)
if (std::isnan(us(i, j)))
{
fprintf(stderr, "HasNan: NaN detected at %s (%ld,%ld) in (%d,%d) matrix\n", name, i, j, (int) GetNumRows(), (int) GetNumCols());
return true;
}
return false;
}
#define CheckNan(m) m.HasNan(#m)
// another diagnostics helper to check if matrix has a NaN
// This is used at load and save time. This test is slow.
template <class ElemType>
size_t Matrix<ElemType>::CountNanInf() const
{
const auto& us = *this;
size_t n = 0; // number of NaNs/INF found
foreach_coord (i, j, us)
{
auto val = us(i, j);
if (std::isnan(val) || !std::isfinite(val))
n++;
}
return n;
}
// TODO: these are scalar operations--why are they in Matrix?
template <class ElemType>
ElemType Matrix<ElemType>::Exp10(ElemType num)
{
return (ElemType) exp(num * 2.302585093);
}
template <class ElemType>
ElemType Matrix<ElemType>::Mod(ElemType x, ElemType y)
{
assert(y > 0);
if (y <= 0)
LogicError("y is smaller than zero");
return x - y * floor(x / y);
}
// TODO: use static LogAdd() as defined in TensorOps.h
// Not doing this currently because that one uses ElemType for all ops, while this one uses double inside. Must compare before making this change.
template <class ElemType>
ElemType Matrix<ElemType>::LogAdd(ElemType x, ElemType y)
{
ElemType temp, diff, z;
if (x < y)
{
temp = x;
x = y;
y = temp; // TODO: ::swap(x,y)?
}
diff = y - x;
if (diff < MINLOGEXP)
{
return (ElemType)((x < LSMALL) ? LZERO : x);
}
else
{
z = exp(diff);
return (ElemType)(x + log(1.0 + z));
}
}
//Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, size_t shift)
//[this]= (a right shift by n), padded with zeros
// shift left, shift needs to be negative value
// shift right, shift needs to be positive value
// BUGBUG: Leaves uninitialized values in the opened-up columns.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, int shift)
{
if (a.IsEmpty())
LogicError("Shift: Matrix is empty.");
else
LogicError("Shift: BUGBUG This function currently leaves uninitialized values. Fix the code or contact fseide@microsoft.com.");
auto& us = *this;
if (this != &a)
{
Resize(a.GetNumRows(), a.GetNumCols());
}
long n = (long) GetNumCols();
if (shift >= 0 && shift < n)
us.ColumnSlice(shift, n - shift).AssignValuesOf(a.ColumnSlice(0, n - shift));
if (shift < 0 && shift > -n)
us.ColumnSlice(0, n + shift).AssignValuesOf(a.ColumnSlice(-shift, n + shift));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOfWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, size_t shift, size_t negnumber)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOfWithShiftNeg: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (a.GetNumRows() != 1)
InvalidArgument("AssignElementProductOfWithShiftNeg: The input matrix must be a row vector.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOfWithShiftNeg(*a.m_CPUMatrix, *b.m_CPUMatrix, shift, negnumber),
m_GPUMatrix->AssignElementProductOfWithShiftNeg(*a.m_GPUMatrix, *b.m_GPUMatrix, shift, negnumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOfWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise, size_t shift, size_t negnumber)
{
InnerProductWithShiftNeg(a, b, *this, isColWise, shift, negnumber);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::InnerProductWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise, size_t shift, size_t negnumber)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::InnerProductWithShiftNeg(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise, shift, negnumber),
GPUMatrix<ElemType>::InnerProductWithShiftNeg(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, shift, negnumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::GetARowByIndex(const Matrix<ElemType>& a, size_t index)
{
if (a.IsEmpty())
LogicError("GetARowByIndex: Matrix is empty.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->GetARowByIndex(*a.m_CPUMatrix, index),
m_GPUMatrix->GetARowByIndex(*a.m_GPUMatrix, index),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ConductRowElementMultiplyWithShift(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, size_t shift, bool bFirstmatrixfixed)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::ConductRowElementMultiplyWithShift(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, shift, bFirstmatrixfixed),
GPUMatrix<ElemType>::ConductRowElementMultiplyWithShift(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, shift, bFirstmatrixfixed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOfWithShift(const Matrix<ElemType>& a, const Matrix<ElemType>& b, size_t shift)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOfWithShift: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (a.GetNumRows() != 1)
InvalidArgument("AssignElementProductOfWithShiftNeg: The input matrix must be a row vector.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOfWithShift(*a.m_CPUMatrix, *b.m_CPUMatrix, shift),
m_GPUMatrix->AssignElementProductOfWithShift(*a.m_GPUMatrix, *b.m_GPUMatrix, shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::RCRFBackwardCompute(const Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const int shift)
{
DecideAndMoveToRightDevice(alpha, beta);
functionValues._transferToDevice(alpha.GetDeviceId());
beta._transferToDevice(alpha.GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(&alpha,
&beta,
CPUMatrix<ElemType>::RCRFBackwardCompute(
*alpha.m_CPUMatrix,
*beta.m_CPUMatrix,
*lbls.m_CPUMatrix,
*pair_scores.m_CPUMatrix),
GPUMatrix<ElemType>::RCRFBackwardCompute(
*alpha.m_GPUMatrix,
*beta.m_GPUMatrix,
*lbls.m_GPUMatrix,
*pos_scores.m_GPUMatrix,
*pair_scores.m_GPUMatrix, shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RCRFTransGrdCompute(const Matrix<ElemType>& lbls,
const Matrix<ElemType>& alpha,
const Matrix<ElemType>& beta,
const Matrix<ElemType>& pair_scores,
Matrix<ElemType>& grd,
const int startLbl,
const int shift)
{
DecideAndMoveToRightDevice(alpha, grd);
grd._transferToDevice(alpha.GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(&alpha,
&grd,
CPUMatrix<ElemType>::RCRFTransGrdCompute(
*lbls.m_CPUMatrix,
*alpha.m_CPUMatrix,
*beta.m_CPUMatrix,
*pair_scores.m_CPUMatrix,
*grd.m_CPUMatrix),
GPUMatrix<ElemType>::RCRFTransGrdCompute(
*lbls.m_GPUMatrix,
*alpha.m_GPUMatrix,
*beta.m_GPUMatrix,
*pair_scores.m_GPUMatrix,
*grd.m_GPUMatrix,
startLbl,
shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DropFrame(const Matrix<ElemType>& label, const Matrix<ElemType>& gamma, const ElemType& threshhold)
{
DecideAndMoveToRightDevice(*this, label, gamma);
if (label.GetNumCols() != gamma.GetNumCols() || label.GetNumRows() != gamma.GetNumRows())
LogicError("DropFrame: label matrix is not in the same size as gamm matrix.");
SwitchToMatrixType(label.GetMatrixType(), label.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->DropFrame(*label.m_CPUMatrix, *gamma.m_CPUMatrix, threshhold),
m_GPUMatrix->DropFrame(*label.m_GPUMatrix, *gamma.m_GPUMatrix, threshhold),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSequenceError(const ElemType hsmoothingWeight, const Matrix<ElemType>& label,
const Matrix<ElemType>& dnnoutput, const Matrix<ElemType>& gamma, ElemType alpha)
{
DecideAndMoveToRightDevice(label, dnnoutput, gamma);
if (!(label.GetMatrixType() == gamma.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(label.GetMatrixType(), label.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignSequenceError(hsmoothingWeight, *label.m_CPUMatrix, *dnnoutput.m_CPUMatrix, *gamma.m_CPUMatrix, alpha),
m_GPUMatrix->AssignSequenceError(hsmoothingWeight, *label.m_GPUMatrix, *dnnoutput.m_GPUMatrix, *gamma.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
// Calculate CTC score
// prob (input): the posterior output from the network
// alpha, beta (output): alpha and beta for forward-backward calculation.
// phoneSeq (input): phone ID sequence for each utterance in this minibatch, each col is one utterance
// phoneBound (input): phone boundary (frame index) of each phone for each utterance in this minibatch, each col is one utterance
// totalScore (output): total CTC score
// uttToChanInd (input): map from utterance ID to minibatch channel ID. We need this because each channel may contain more than one utterance.
// uttBeginFrame(input): the positon of the first frame of each utterance in the minibatch channel. We need this because each channel may contain more than one utterance.
// uttFrameNum (input): the frame number of each utterance. The size of this vector = the number of all utterances in this minibatch
// uttPhoneNum (input): the phone number of each utterance. The size of this vector = the number of all utterances in this minibatch
// numParallelSequences (input): num of parallel sequences
// mbsize (input): the maximum channel frame number
// blankTokenId (input): id of the CTC blank token
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference. This using the original time information to enforce that CTC tokens only get aligned within a time margin.
// Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy.
// delayConstraint=-1 means no constraint
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCTCScore(const Matrix<ElemType>& prob, Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
const Matrix<ElemType>& phoneSeq, const Matrix<ElemType>& phoneBound, Matrix<ElemType> &totalScore, const std::vector<size_t> & uttToChanInd,
const std::vector<size_t> & uttBeginFrame, const std::vector<size_t> & uttFrameNum, const std::vector<size_t> & uttPhoneNum,
const size_t numParallelSequences, const size_t mbsize, const size_t blankTokenId, const int delayConstraint, const bool isColWise)
{
DecideAndMoveToRightDevice(prob, *this);
alpha.Resize(phoneSeq.GetNumRows(), prob.GetNumCols());
beta.Resize(phoneSeq.GetNumRows(), prob.GetNumCols());
Resize(prob.GetNumRows(), prob.GetNumCols());
alpha.SetValue(LZERO);
beta.SetValue(LZERO);
SetValue(LZERO);
SwitchToMatrixType(prob.GetMatrixType(), prob.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&prob,
this,
this->m_CPUMatrix->AssignCTCScore(*prob.m_CPUMatrix, *alpha.m_CPUMatrix, *beta.m_CPUMatrix, *phoneSeq.m_CPUMatrix, *phoneBound.m_CPUMatrix, *totalScore.m_CPUMatrix,
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, isColWise),
this->m_GPUMatrix->AssignCTCScore(*prob.m_GPUMatrix, *alpha.m_GPUMatrix, *beta.m_GPUMatrix, *phoneSeq.m_GPUMatrix, *phoneBound.m_GPUMatrix, *totalScore.m_GPUMatrix,
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
return *this;
}
#pragma endregion Static BLAS Functions
// TensorView currently does not interface with sparse matrices. For now, we just catch this and throw.
template <class ElemType>
static bool VerifyIsDense(const Matrix<ElemType>& a)
{
if (a.GetMatrixType() != DENSE)
RuntimeError("TensorOp: Tensor operations are currently not supported for sparse matrices.");
return true;
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
const array<size_t, 2>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a);
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
const array<size_t, 3>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a) && VerifyIsDense(b);
DecideAndMoveToRightDevice(*this, a, b);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
const array<size_t, 4>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a) && VerifyIsDense(b) && VerifyIsDense(c);
DecideAndMoveToRightDevice(*this, a, b, c);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorArgOp(const Matrix<ElemType>& a, ElementWiseOperator reductionOp,
const array<size_t, 2>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a);
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorArgOp(*a.m_CPUMatrix, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorArgOp(*a.m_GPUMatrix, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//template class Matrix<short>;
template class Matrix<float>;
template class Matrix<double>;
// We use Matrix<char> as the backing store for QuantizedMatrix, and also as a flag matrix.
// Let's explicitly instantiate the methods we need for that purpose
template Matrix<char>::Matrix(DEVICEID_TYPE);
template Matrix<char>::Matrix(Matrix<char>&&);
template Matrix<char>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat, const size_t nnz);
template Matrix<char>::Matrix(const size_t numRows, const size_t numCols, char* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz);
template Matrix<char>::~Matrix();
template Matrix<char>& Matrix<char>::operator=(Matrix<char>&& moveFrom);
template char* Matrix<char>::Data() const;
template int Matrix<char>::GetDeviceId() const;
template size_t Matrix<char>::GetNumElements() const;
template Matrix<char> Matrix<char>::ColumnSlice(size_t startColumn, size_t numCols) const;
template void Matrix<char>::_transferToDevice(int id_to, bool isBeingMoved, bool emptyTransfer) const;
template void Matrix<char>::TransferToDeviceIfNotThere(int id_to, bool isBeingMoved, bool emptyTransfer, bool updatePreferredDevice) const;
template size_t Matrix<char>::GetNumRows() const;
template size_t Matrix<char>::GetNumCols() const;
template void Matrix<char>::SetValue(const char);
template void Matrix<char>::SetValue(size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags, DataTransferer* transferer);
//template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
template void Matrix<char>::SetValue(const Matrix<char>&);
template void Matrix<char>::AssignValuesOf(const Matrix<char>&);
template void Matrix<char>::CastAssignValuesOf(const MatrixBase& other);
template bool Matrix<char>::IsEmpty() const;
template void Matrix<char>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly);
template void Matrix<char>::Reshape(const size_t, const size_t);
template char* Matrix<char>::CopyToArray(void) const;
// Matrix<short> methods
template Matrix<short>::Matrix(DEVICEID_TYPE);
template Matrix<short>::Matrix(Matrix<short>&&);
template Matrix<short>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat, const size_t nnz);
template Matrix<short>::Matrix(const size_t numRows, const size_t numCols, short* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz);
template Matrix<short>::~Matrix();
template Matrix<short>& Matrix<short>::operator=(Matrix<short>&& moveFrom);
template short* Matrix<short>::Data() const;
template int Matrix<short>::GetDeviceId() const;
template size_t Matrix<short>::GetNumElements() const;
template Matrix<short> Matrix<short>::ColumnSlice(size_t startColumn, size_t numCols) const;
template void Matrix<short>::_transferToDevice(int id_to, bool isBeingMoved, bool emptyTransfer) const;
template void Matrix<short>::TransferToDeviceIfNotThere(int id_to, bool isBeingMoved, bool emptyTransfer, bool updatePreferredDevice) const;
template size_t Matrix<short>::GetNumRows() const;
template size_t Matrix<short>::GetNumCols() const;
template void Matrix<short>::SetValue(const short);
template void Matrix<short>::SetValue(size_t numRows, const size_t numCols, int deviceId, short* pArray, size_t matrixFlags, DataTransferer* transferer);
//template void Matrix<short>::SetValue(const Matrix<short>&, MatrixFormat);
template void Matrix<short>::SetValue(const Matrix<short>&);
template void Matrix<short>::AssignValuesOf(const Matrix<short>&);
template void Matrix<short>::CastAssignValuesOf(const MatrixBase& other);
template bool Matrix<short>::IsEmpty() const;
template void Matrix<short>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly);
template void Matrix<short>::Reshape(const size_t, const size_t);
template short* Matrix<short>::CopyToArray(void) const;
template Matrix<int>::Matrix(const size_t, const size_t, int*, DEVICEID_TYPE, const size_t, const size_t);
}}}