https://github.com/Microsoft/CNTK
Tip revision: a05c3c642648373f4ede0956e4286257c3d59a61 authored by liqfu on 24 August 2018, 17:46:51 UTC
CNTK splice allows broadcast. This case is handled in the change. For noop (identity) ops, its inputs and outputs types shall be set according to upstream ops. ToBatch/ToSequence and Unpack Batch/Sequence ops added during model importing need tp be skipped. Model import need to handle ops with multiple outputs.
CNTK splice allows broadcast. This case is handled in the change. For noop (identity) ops, its inputs and outputs types shall be set according to upstream ops. ToBatch/ToSequence and Unpack Batch/Sequence ops added during model importing need tp be skipped. Model import need to handle ops with multiple outputs.
Tip revision: a05c3c6
Matrix.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Matrix.cpp -- main CPP file that contains all Matrix functions exported by the Cntk.Math.dll
//
#include "stdafx.h"
#include "Basics.h"
#include "Matrix.h"
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUMatrix.h"
#include "GPUSparseMatrix.h"
#include "File.h"
#include <assert.h>
#include <math.h>
#include "GPUWatcher.h" // bring in this class as well so that it gets exported from this DLL
#include <memory>
#include <atomic>
#include "Quantizers.h"
#include "half.hpp"
#ifndef CPUONLY
#define ANAMEFORLIB "Cntk.Math.Cuda-" ## CNTK_COMPONENT_VERSION ## ".lib"
#pragma comment(lib, ANAMEFORLIB) // built by MathCUDA project
#undef ANAMEFORLIB
#endif
#pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
#pragma warning(disable : 4239) // nonstandard extension; triggered by this pattern: "auto& second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;"
#pragma warning(disable : 4702) // unreachable code; triggered for unknown reasons
// Helper to dispath matrix calls to the 4 underlying matrix libraries (CPU,GPU) x (DENSE,SPARSE)
// 'MatrixPointerToCheck' determines where the operation takes place.
// 'MatrixPointerToSetFlag' is the output. If not null and its location is BOTH, we collapse it to one.
#pragma warning(disable : 4456) // declaration of curLocation hides previous local declaration
#define DISPATCH_MATRIX_ON_FLAG(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
// version of dispatch macro that prefers the CPU if the 'MatrixPointerToCheck' location is BOTH
#define DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::GPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU || curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
// version of helper macro that executes both CPU and GPU macros if 'matrixPointer' location is BOTH
#define DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(matrixPointer, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
auto curLocation = (matrixPointer)->GetCurrentMatrixLocation(); \
auto curMatrixType = (matrixPointer)->GetMatrixType(); \
if (curLocation == CurrentDataLocation::NONE) \
LogicError("Matrices do not exist in either CPU or GPU."); \
if (curMatrixType == MatrixType::UNDETERMINED) \
LogicError("Matrices must be SPARSE or DENSE."); \
if (curLocation != CurrentDataLocation::CPU) /*GPU or BOTH*/ \
{ \
if (curMatrixType == MatrixType::DENSE) \
{ \
GPUDense; \
} \
else \
{ \
GPUSparse; \
} \
} \
if (curLocation != CurrentDataLocation::GPU) /*CPU or BOTH*/ \
{ \
if (curMatrixType == MatrixType::DENSE) \
{ \
CPUDense; \
} \
else \
{ \
CPUSparse; \
} \
} \
}
namespace Microsoft { namespace MSR { namespace CNTK {
std::atomic<int> m_mathLibTraceLevel(0);
void SetMathLibTraceLevel(int traceLevel)
{
m_mathLibTraceLevel.store(traceLevel);
}
int GetMathLibTraceLevel()
{
return m_mathLibTraceLevel.load();
}
MatrixBase::~MatrixBase() { }
#pragma region Constructors, destructors and other static matrix builders
// TODO: Reformat DISPATCH... macros to the following form:
// DISPATCH..(p1, p2,
// { Cpu code },
// { GPU code },
// ...
// Initialize members
template <class ElemType>
void Matrix<ElemType>::Init(DEVICEID_TYPE deviceId)
{
ReleaseMemory();
m_preferredDeviceId = deviceId;
m_numTimesDeviceChanged = 0;
m_numTimesMatrixTypeChanged = 0;
m_devicesTransferedTo[1] = m_devicesTransferedTo[0] = CPUDEVICE - 1; // (some value that is different from any valid value)
}
// shallow-copy all members
template <class ElemType>
void Matrix<ElemType>::ShallowCopyFrom(const Matrix<ElemType>& other)
{
m_baseMatrix = other.m_baseMatrix;
m_GPUMatrix = other.m_GPUMatrix;
m_CPUMatrix = other.m_CPUMatrix;
m_GPUSparseMatrix = other.m_GPUSparseMatrix;
m_CPUSparseMatrix = other.m_CPUSparseMatrix;
m_matrixType = other.m_matrixType;
m_currentDataLocation = other.m_currentDataLocation;
m_preferredDeviceId = other.m_preferredDeviceId;
m_numTimesDeviceChanged = other.m_numTimesDeviceChanged;
m_numTimesMatrixTypeChanged = other.m_numTimesMatrixTypeChanged;
m_devicesTransferedTo[0] = other.m_devicesTransferedTo[0]; // TODO: spelling
m_devicesTransferedTo[1] = other.m_devicesTransferedTo[1];
}
// Call this function after an update operation has created/set/updated the respective pointers.
// - location: BOTH|CPU|GPU
// - pass BOTH only if object will be read from; it is not allowed to write to both and then call this function.
// - if CPU/GPU and current is BOTH, then object was written to
// What gets updated:
// - m_currentDataLocation: from function argument
// - m_matrixType: from function argument unless UNDETERMINED in which case m_matrixType remains unmodified
// - m_baseMatrix: to one of current values of m_[GC]PU{Sparse,}Matrix
// This function is heavily overloaded in its responsibility.
// - first-time initialization, e.g. of a ColumnSlice (NONE->!NONE)
// - after creating a temp copy for reading
// - collapse temp copies after writing to one of them
// - setting matrixType if not set yet
template <class ElemType>
void Matrix<ElemType>::SetDataLocation(CurrentDataLocation location, MatrixType type) const
{
assert(location == CurrentDataLocation::CPU || location == CurrentDataLocation::GPU || location == CurrentDataLocation::BOTH);
// if the object used to live on BOTH, this will collapse it to 'location' (unless we actually wrote into BOTH)
// In that case, we do a sanity check here that the object is a singleton view,
// since otherwise the collapsing would go unnoticed by the other views.
// The cases to cover:
// - everything is allowed on a singleton view
// - if the current state is BOTH:
// -> The result was written to 'location' so we should collapse it to there.
// - multiple views: much is forbidden since we cannot notify the other views on which one was written to
// - CPU <-> GPU: FORBIDDEN
// - BOTH -> CPU or GPU: current state is BOTH: location says which side was written to
// -> FORBIDDEN to write into
// - CPU or GPU -> BOTH: current state is CPU or GPU
// and a view onto it is put into BOTH state
// -> OK but inefficient to read, since this is likely happening over again; but we cannot put all views into BOTH state
// - BOTH -> BOTH:
// - read case: OK
// - write case: forbidden to call this function in this way
// - NONE -> !NONE: FORBIDDEN
if (m_currentDataLocation != location && // it is attempted to change location
m_currentDataLocation != CurrentDataLocation::NONE && // from a valid object (NONE means we are a fresh object from ColumnSlice())
location != CurrentDataLocation::BOTH) // and we are changing it not into a temporary copy for reading
{
// we get here if we wrote into this object that was BOTH but is no longer, or if we move between CPU and GPU
// Both is forbidden on shared views since we cannot inform other views of this change.
// We will now check any *valid* pointer will now be checked for uniqueness. There may be mismatching left-over pointers kept around in case they should be revived.
if (m_matrixType == MatrixType::DENSE) // note: this checks the current type, not the new one passed in. Asssumption: this tells us which pointers are valid.
{
assert(m_currentDataLocation == CurrentDataLocation::GPU || m_CPUMatrix);
assert(m_currentDataLocation == CurrentDataLocation::CPU || m_GPUMatrix);
if (m_currentDataLocation != CurrentDataLocation::GPU) ((BaseMatrix<ElemType>*)m_CPUMatrix.get())->VerifyMigratable("SetDataLocation [CPUMatrix]");
if (m_currentDataLocation != CurrentDataLocation::CPU) ((BaseMatrix<ElemType>*)m_GPUMatrix.get())->VerifyMigratable("SetDataLocation [GPUMatrix]");
}
else if (m_matrixType == MatrixType::SPARSE)
{
assert(m_currentDataLocation == CurrentDataLocation::GPU || m_CPUSparseMatrix);
assert(m_currentDataLocation == CurrentDataLocation::CPU || m_GPUSparseMatrix);
if (m_currentDataLocation != CurrentDataLocation::GPU) ((BaseMatrix<ElemType>*)m_CPUSparseMatrix.get())->VerifyMigratable("SetDataLocation [CPUSparseMatrix]");
if (m_currentDataLocation != CurrentDataLocation::CPU) ((BaseMatrix<ElemType>*)m_GPUSparseMatrix.get())->VerifyMigratable("SetDataLocation [GPUSparseMatrix]");
}
// TODO: Why do we need these typecasts? (without it will fail with "cannot access private member declared in class 'Microsoft::MSR::CNTK::CPUMatrix<float>'")
if (m_baseMatrix && !OwnBuffer()) // same arguments for externally owned matrices: Can read a temp but not write.
LogicError("SetDataLocation: A non-owning object cannot be written to in BOTH state.");
}
// passed validation: we can now update the state
m_currentDataLocation = location;
// update the matrix type if passed in
if (type != MatrixType::UNDETERMINED)
m_matrixType = type;
// set m_baseMatrix (if location is unchanged, this will not change the pointer)
// Note: m_currentDataLocation may also be CurrentDataLocation::BOTH, in which case the base matrix will be GPU.
if (m_matrixType == MatrixType::DENSE)
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? dynamic_cast<BaseMatrix<ElemType>*>(m_CPUMatrix.get()) : dynamic_cast<BaseMatrix<ElemType>*>(m_GPUMatrix.get()));
else if (m_matrixType == MatrixType::SPARSE)
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? dynamic_cast<BaseMatrix<ElemType>*>(m_CPUSparseMatrix.get()) : dynamic_cast<BaseMatrix<ElemType>*>(m_GPUSparseMatrix.get()));
// Note: Typecasts are necessary since C++ cannot figure out the common base type (probably due to shared_ptr).
// sanity check
if (!m_baseMatrix && m_matrixType != MatrixType::UNDETERMINED)
LogicError("SetDataLocation: New m_baseMatrix must not be NULL.");
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(matrixType, matrixFormat, false);
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(matrixType, matrixType == MatrixType::DENSE ? MatrixFormat::matrixFormatDense : MatrixFormat::matrixFormatSparseCSC, false);
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
template <class ElemType>
Matrix<ElemType>::Matrix(DEVICEID_TYPE deviceID)
{
Init(deviceID);
SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
// constructor for Matrix class to wrap an externally managed BaseMatrix, indicated by the use of shared_ptr.
// The appropriate destructor should be passed in by the caller.
// baseMatrix - base matrix for this element
// pArray - pointer to current data array, will replace existing pointer in baseMatrix if != NULL
// deviceId - deviceId where the pArray exists
#if 0
template <class ElemType>
Matrix<ElemType>::Matrix(shared_ptr<BaseMatrix<ElemType>> baseMatrix, ElemType* pArray, DEVICEID_TYPE deviceId) // constructor for setting Matrix from a base matrix
{
Init(deviceId);
if (baseMatrix->GetFormat() & matrixFormatSparse)
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUSparseMatrix = DownCast<CPUSparseMatrix<ElemType>>(baseMatrix);
SetDataLocation(CPU, SPARSE);
}
else
{
m_GPUSparseMatrix = DownCast<GPUSparseMatrix<ElemType>>(baseMatrix);
SetDataLocation(GPU, SPARSE);
}
}
else
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUMatrix = DownCast<CPUMatrix<ElemType>>(baseMatrix);
SetDataLocation(CPU, DENSE);
}
else
{
m_GPUMatrix = DownCast<GPUMatrix<ElemType>>(baseMatrix);
SetDataLocation(GPU, DENSE);
}
}
m_baseMatrix = baseMatrix;
m_baseMatrix->SetBuffer(pArray,0);
}
#endif
template <class ElemType>
Matrix<ElemType>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat, const size_t nnz)
{
Init(deviceId);
if (matrixType == MatrixType::SPARSE)
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormat, numRows, numCols, nnz);
SetDataLocation(CPU, SPARSE);
}
else
{
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nnz, m_preferredDeviceId, matrixFormat);
SetDataLocation(GPU, SPARSE);
}
}
else
{
if (matrixFormat != matrixFormatDense)
{
NOT_IMPLEMENTED;
}
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(numRows, numCols);
SetDataLocation(CPU, DENSE);
}
else
{
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(numRows, numCols, m_preferredDeviceId);
SetDataLocation(GPU, DENSE);
}
SetValue(0);
}
}
template <class ElemType>
Matrix<ElemType>::Matrix(const size_t numRows, const size_t numCols, ElemType* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz)
{
Init(deviceId);
if (m_preferredDeviceId == CPUDEVICE)
{
if (matrixFlags & matrixFormatSparse)
{
// WARNING: matrixFlag is not passed in and externally managed array cannot be passed in
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nnz);
SetDataLocation(CPU, SPARSE);
}
else
{
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(numRows, numCols, pArray, matrixFlags);
SetDataLocation(CPU, DENSE);
}
}
else
{
if (matrixFlags & matrixFormatSparse)
{
// m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(numRows,numCols,nnz, pArray,matrixFlags,m_preferredDeviceId);
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_preferredDeviceId, MatrixFormat(matrixFlags & MatrixFormat::matrixFormatMask));
m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, nnz, true, false);
SetDataLocation(GPU, SPARSE);
}
else
{
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(numRows, numCols, m_preferredDeviceId, pArray, matrixFlags);
SetDataLocation(GPU, DENSE);
}
}
// Why is this here??
/*
if (matrixFlagDontOwnBuffer & matrixFlags)
m_baseMatrix->SetOwnBuffer(false);
*/
}
//copy constructor, deep copy
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::DeepClone() const
{
return Matrix<ElemType>(*this, GetDeviceId());
}
template <class ElemType>
Matrix<ElemType>::Matrix(const Matrix<ElemType>& deepCopyFrom, DEVICEID_TYPE deviceId)
{
int origCopyFromDeviceId = deepCopyFrom.GetDeviceId();
Init(deviceId); // will set m_preferredDeviceId
deepCopyFrom._transferToDevice(m_preferredDeviceId, true);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom,
this,
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(*(deepCopyFrom.m_CPUMatrix)),
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(*(deepCopyFrom.m_GPUMatrix)),
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(*(deepCopyFrom.m_CPUSparseMatrix)),
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(*(deepCopyFrom.m_GPUSparseMatrix)));
// should we move back?
deepCopyFrom._transferToDevice(origCopyFromDeviceId, true);
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
}
//move constructor, shallow copy
template <class ElemType>
Matrix<ElemType>::Matrix(Matrix<ElemType>&& moveFrom)
{
Init((DEVICEID_TYPE)moveFrom.GetDeviceId());
#if 1
operator=(move(moveFrom));
#else
DISPATCH_MATRIX_ON_FLAG(&moveFrom,
this,
m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix))),
m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix))),
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix))),
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix))));
m_preferredDeviceId = moveFrom.m_preferredDeviceId;
#endif
}
//move assignment operator, shallow copy
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator=(Matrix<ElemType>&& moveFrom)
{
if (this == &moveFrom)
LogicError("Matrix: Move assignment into itself is forbidden.");
#if 1
// shallow-copy all members
ShallowCopyFrom(moveFrom);
// virgin-init the source
moveFrom.Init(CPUDEVICE);
#else
m_preferredDeviceId = moveFrom.m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(&moveFrom,
this,
if (m_CPUMatrix != nullptr) m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix)));
else m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix))),
if (m_GPUMatrix != nullptr) m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix)));
else m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix))),
if (m_CPUSparseMatrix != nullptr) m_CPUSparseMatrix->operator=(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix)));
else m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix))),
if (m_GPUSparseMatrix != nullptr) m_GPUSparseMatrix->operator=(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix)));
else m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix))));
#endif
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ReleaseMemory()
{
m_baseMatrix = nullptr;
// Perf: Avoid assignments to shared_ptr unless necessary. In certain versions of the standard library
// they cause ref counting, and this piece of code is called often..
if (m_GPUMatrix.get() != nullptr)
m_GPUMatrix = nullptr;
if (m_CPUMatrix.get() != nullptr)
m_CPUMatrix = nullptr;
if (m_GPUSparseMatrix.get() != nullptr)
m_GPUSparseMatrix = nullptr;
if (m_CPUSparseMatrix.get() != nullptr)
m_CPUSparseMatrix = nullptr;
m_matrixType = MatrixType::UNDETERMINED;
m_currentDataLocation = CurrentDataLocation::NONE;
}
template <class ElemType>
Matrix<ElemType>::~Matrix(void)
{
ReleaseMemory();
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Ones(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetValue(1);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Zeros(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetValue(0);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Eye(const size_t rows, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, rows, deviceId); // will initialize to 0
c.SetDiagonalValue(1);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RandomUniform(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType low, const ElemType high, unsigned long seed)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetUniformRandomValue(low, high, seed);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RandomGaussian(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType mean, const ElemType sigma, unsigned long seed)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetGaussianRandomValue(mean, sigma, seed);
return c;
}
template <class ElemType>
void Matrix<ElemType>::SetDevice(DEVICEID_TYPE deviceId)
{
if (deviceId >= 0)
GPUMatrix<ElemType>::SetDevice(deviceId);
}
template <class ElemType>
void Matrix<ElemType>::Read(File& stream)
{
Matrix<ElemType>& M = *this;
char type;
stream >> type;
if (type == 'd')
{
if (M.GetDeviceId() < 0)
{
if (!M.m_CPUMatrix)
M.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>();
stream >> (*M.m_CPUMatrix);
M.SetDataLocation(CPU, DENSE);
}
else
{
if (!M.m_GPUMatrix)
M.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(M.GetDeviceId());
stream >> (*M.m_GPUMatrix);
M.SetDataLocation(GPU, DENSE);
}
}
else if (type == 's')
{
if (M.GetDeviceId() < 0)
{
NOT_IMPLEMENTED; // You might want to tranfer your matrix to GPU
}
else
{
if (M.m_GPUSparseMatrix == NULL)
M.m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(M.GetDeviceId());
stream >> (*M.m_GPUSparseMatrix);
M.SetDataLocation(GPU, SPARSE);
}
}
else
LogicError("Read: Input file corrupt (invalid matrix type field 0x%02d, should be 'f' or 'd').", type);
}
template <class ElemType>
void Matrix<ElemType>::Write(File& stream) const
{
const Matrix<ElemType>& M = *this;
if (M.GetMatrixType() == MatrixType::DENSE)
{
stream << 'd';
if (M.GetDeviceId() < 0)
stream << (*M.m_CPUMatrix);
else
stream << (*M.m_GPUMatrix);
}
else
{
stream << 's';
if (M.GetDeviceId() < 0)
NOT_IMPLEMENTED // stream<<(*M.m_CPUMatrix);
else stream
<< (*M.m_GPUSparseMatrix);
}
}
#pragma endregion Constructors, destructors and other static matrix builders
#pragma region Basic Operators
template <class ElemType>
size_t Matrix<ElemType>::BufferSize() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_baseMatrix->GetSizeAllocated() * sizeof(ElemType),
return m_baseMatrix->GetSizeAllocated() * sizeof(ElemType),
return m_CPUSparseMatrix->BufferSize(),
return m_GPUSparseMatrix->BufferSizeAllocated());
}
// BUGBUG: This is ugly code. The outside world should not have access to the raw data pointers.
// if this is to be used, then at least it should also return a number of bytes as well.
template <class ElemType>
ElemType* Matrix<ElemType>::Data() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->Data(),
return m_GPUMatrix->Data(),
return m_CPUSparseMatrix->Data(),
return m_GPUSparseMatrix->Data());
}
template <class ElemType>
ElemType* Matrix<ElemType>::CopyToArray() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->CopyToArray(),
return m_GPUMatrix->CopyToArray(),
{ CPUMatrix<ElemType> tmpDense(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols()); tmpDense.SetValue((ElemType)0); CPUSparseMatrix<ElemType>::ScaleAndAdd((ElemType)1, *m_CPUSparseMatrix, tmpDense); return tmpDense.CopyToArray(); },
return m_GPUSparseMatrix->CopyToDenseMatrix().CopyToArray());
}
//memory will be allocated by the callee if not enough but need to be deleted by the caller after it's done
//return number of elements copied
template <class ElemType>
size_t Matrix<ElemType>::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->CopyToArray(arrayCopyTo, currentArraySize),
return m_GPUMatrix->CopyToArray(arrayCopyTo, currentArraySize),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->CopySection(numRows, numCols, dst, colStride),
m_GPUMatrix->CopySection(numRows, numCols, dst, colStride),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
// BUGBUG: Some code checks before calling here whether one of the dimensions is 0.
// This function must handle that case properly, that is, preserving the non-zero dimension.
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
{
int devId = GetDeviceId();
Matrix<ElemType> slice(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId); // this already creates pointers
slice.m_preferredDeviceId = m_preferredDeviceId;
// create slices for the underlying object
// Note: In case of data location == BOTH, this creates two objects just like in the source.
if (GetMatrixType() == MatrixType::DENSE)
{
if (GetCurrentMatrixLocation() == CPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_CPUMatrix)
slice.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->ColumnSlice(startColumn, numCols)));
}
if (GetCurrentMatrixLocation() == GPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_GPUMatrix)
slice.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->ColumnSlice(startColumn, numCols)));
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
if (GetCurrentMatrixLocation() == CPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_CPUSparseMatrix)
slice.m_CPUSparseMatrix->operator=(static_cast<CPUSparseMatrix<ElemType>&&>(m_CPUSparseMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(static_cast<CPUSparseMatrix<ElemType>&&>(m_CPUSparseMatrix->ColumnSlice(startColumn, numCols)));
}
if (GetCurrentMatrixLocation() == GPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_GPUSparseMatrix)
slice.m_GPUSparseMatrix->operator=(static_cast<GPUSparseMatrix<ElemType>&&>(m_GPUSparseMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(static_cast<GPUSparseMatrix<ElemType>&&>(m_GPUSparseMatrix->ColumnSlice(startColumn, numCols)));
}
}
else
LogicError("Undetermined matrix type");
// update the slice's m_currentDataLocation, m_matrixType, and m_baseMatrix
// This will work for CPU, GPU, and BOTH.
slice.SetDataLocation(GetCurrentMatrixLocation(), GetMatrixType());
return slice;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{
ReleaseMemory();
m_preferredDeviceId = fromMatrix.m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
if (m_CPUMatrix) m_CPUMatrix->AssignColumnSlice(*fromMatrix.m_CPUMatrix, startColumn, numCols);
else m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(fromMatrix.m_CPUMatrix->ColumnSlice(startColumn, numCols)),
if (m_GPUMatrix) m_GPUMatrix->AssignColumnSlice(*fromMatrix.m_GPUMatrix, startColumn, numCols);
else m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(fromMatrix.m_GPUMatrix->ColumnSlice(startColumn, numCols)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::SetColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{
assert(m_CPUMatrix || m_GPUMatrix);
// must already been allocated
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
m_CPUMatrix->SetColumnSlice(*fromMatrix.m_CPUMatrix, startColumn, numCols),
m_GPUMatrix->SetColumnSlice(*fromMatrix.m_GPUMatrix, startColumn, numCols),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::CopyColumnsStrided(const Matrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride)
{
assert(m_CPUMatrix || m_GPUMatrix);
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
m_CPUMatrix->CopyColumnsStrided(*fromMatrix.m_CPUMatrix, numCols, srcNumColsStride, destNumColsStride),
m_GPUMatrix->CopyColumnsStrided(*fromMatrix.m_GPUMatrix, numCols, srcNumColsStride, destNumColsStride),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Diagonal() const
{
int devId = GetDeviceId();
Matrix<ElemType> diag(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId);
diag.m_preferredDeviceId = m_preferredDeviceId;
AssignDiagonalValuesTo(diag);
return diag;
}
template <class ElemType>
void Matrix<ElemType>::AssignDiagonalValuesTo(Matrix<ElemType>& diag) const
{
int devId = GetDeviceId();
DecideAndMoveToRightDevice(*this, diag);
if (GetMatrixType() == MatrixType::DENSE)
{
if (devId == CPUDEVICE)
{
if (diag.m_CPUMatrix)
diag.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->Diagonal()));
else
diag.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->Diagonal()));
diag.SetDataLocation(CPU, DENSE);
}
else
{
if (diag.m_GPUMatrix)
diag.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->Diagonal()));
else
diag.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->Diagonal()));
diag.SetDataLocation(GPU, DENSE);
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
// TODO: Implement optimized diagonal functions for sparse matrices. For now use the DiagonalToDense instead.
if (devId == CPUDEVICE)
{
if (diag.m_CPUMatrix)
diag.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUSparseMatrix->DiagonalToDense()));
else
diag.m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(static_cast<CPUMatrix<ElemType>&&>(m_CPUSparseMatrix->DiagonalToDense()));
diag.SetDataLocation(CPU, DENSE);
}
else
{
if (diag.m_GPUMatrix)
diag.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUSparseMatrix->DiagonalToDense()));
else
diag.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(static_cast<GPUMatrix<ElemType>&&>(m_GPUSparseMatrix->DiagonalToDense()));
diag.SetDataLocation(GPU, DENSE);
}
}
else
LogicError("Undetermined matrix type");
}
// This function will change the matrix type between DENSE and SPARSE.
// WARNING: The correct implementation is to copy the matrix between DENSE and SPARSE
// However, the conversion functions are not implemented yet and so it will always create
// a new blank matrix and destroy all info in the original matrix if different matrix type is asked.
// In case of !keepValues, the matrix content will be undefined.
template <class ElemType>
void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat newMatrixFormat, bool keepValues)
{
// This check should be uncommented but unfortunately there are still places
// this function is being called with incorrect "default" format value
/*if (m_matrixType == newMatrixType && GetFormat() != newMatrixFormat)
NOT_IMPLEMENTED;*/
if (m_matrixType == newMatrixType)
return;
if (!m_baseMatrix)
keepValues = false;
#define NUM_MATRIXTYPE_CHANGED_WARN 20
m_numTimesMatrixTypeChanged++;
if ((GetMathLibTraceLevel() > 0) && (m_numTimesMatrixTypeChanged == NUM_MATRIXTYPE_CHANGED_WARN))
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_MATRIXTYPE_CHANGED_WARN);
if (GetDeviceId() < 0) // CPU
{
if (newMatrixType == MatrixType::SPARSE)
{
if (!m_baseMatrix)
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(newMatrixFormat);
else
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(newMatrixFormat, GetNumRows(), GetNumCols(), 1);
if (keepValues)
CopyElementsFromDenseToSparse(*m_CPUMatrix, *m_CPUSparseMatrix);
SetDataLocation(CPU, SPARSE);
m_CPUMatrix = nullptr;
}
else if (newMatrixType == MatrixType::DENSE)
{
if (!m_baseMatrix)
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>();
else
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(GetNumRows(), GetNumCols());
if (keepValues)
m_CPUMatrix->SetValue(m_CPUSparseMatrix->CopyColumnSliceToDense(0, GetNumCols()));
SetDataLocation(CPU, DENSE);
m_CPUSparseMatrix = nullptr;
}
else
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
}
else // GPU
{
if (newMatrixType == MatrixType::SPARSE)
{
if (!m_baseMatrix)
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(GetDeviceId(), newMatrixFormat);
else
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(GetNumRows(), GetNumCols(), 0, GetDeviceId(), newMatrixFormat);
if (keepValues)
m_GPUSparseMatrix->SetValue(*m_GPUMatrix);
SetDataLocation(GPU, SPARSE);
m_GPUMatrix = nullptr;
}
else if (newMatrixType == MatrixType::DENSE)
{
if (!m_baseMatrix)
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(GetDeviceId());
else
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(GetNumRows(), GetNumCols(), GetDeviceId());
if (keepValues)
m_GPUSparseMatrix->CopyToDenseMatrix(*m_GPUMatrix);
SetDataLocation(GPU, DENSE);
m_GPUSparseMatrix = nullptr;
}
else
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
}
}
template <class ElemType>
void Matrix<ElemType>::CopyElementsFromDenseToSparse(CPUMatrix<ElemType>& from, CPUSparseMatrix<ElemType>& dest)
{
foreach_coord (row, col, from)
{
auto val = from(row, col);
dest.SetValue(row, col, val);
}
}
template <class ElemType>
ElemType Matrix<ElemType>::Get00Element() const
{
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return m_CPUMatrix->Get00Element(); },
{ return m_GPUMatrix->Get00Element(); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
// const operator(,)
template <class ElemType>
const ElemType Matrix<ElemType>::operator()(const size_t row, const size_t col) const
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this, nullptr,
{ return m_CPUMatrix->operator()(row, col); },
{ _transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false); return m_CPUMatrix->operator()(row, col); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
// non-const operator(,)
//WARNING: This function is very slow for GPUs since it requires copying values between CPUs and GPUs.
//In addition, if ColumnSlice is used after this function but before the values are copied back to GPU
//the operation will fail since the memory is not managed by the slice.
// If you don't need to modify the values, to call the const version above, or GetValue(row,col) which does that for you unambiguously.
// TODO: Can we remove this, and have users use SetValue() instead? To avoid this potential error?
template <class ElemType>
ElemType& Matrix<ElemType>::operator()(const size_t row, const size_t col)
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this, nullptr,
{ return m_CPUMatrix->operator()(row, col); },
{
_transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false);
SetDataLocation(CPU, DENSE);
return m_CPUMatrix->operator()(row, col);
},
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Transpose()
{
if (IsEmpty())
LogicError("Transpose: Matrix is empty.");
Matrix<ElemType> c(GetNumCols(), GetNumRows(), (DEVICEID_TYPE) GetDeviceId(), this->GetMatrixType(), this->GetFormat());
c.AssignTransposeOf(*this);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTransposeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->AssignTransposeOf(*a.m_CPUMatrix); },
{ m_GPUMatrix->AssignTransposeOf(*a.m_GPUMatrix); },
{ NOT_IMPLEMENTED; },
{ m_GPUSparseMatrix->AssignTransposeOf(*a.m_GPUSparseMatrix); });
return *this;
}
// *this[:,j] = a[:,idx[j]] * alpha + *this[:,j] * beta
// idx has width of 'this' and contains values w.r.t. 'a'
// Invalid entries (gap columns) are denoted by idx(0,j) == -1.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DoGatherColumnsOf(ElemType beta, const Matrix<ElemType>& idx, const Matrix<ElemType>& a, ElemType alpha)
{
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0
if (a.GetMatrixType() != this->GetMatrixType())
RuntimeError("Matrix::DoGatherColumnsOf: The source and target matrices must have same storage type (SPARSE/DENSE).");
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoGatherColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
{ m_CPUSparseMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.
Matrix<ElemType> tempIdx(CPUDEVICE); tempIdx.AssignValuesOf(idx);
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->GetNumNZElements());
m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempThis);
tempThis.DoGatherColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
m_GPUSparseMatrix->SetValue(tempThis);
});
return *this;
}
// *this[:,idx[j]] = a[:,j] * alpha + *this[:,idx[j]] * beta
// idx has width of 'a' and contains values w.r.t. 'this'
// Unlike gather, for scatter, 'this' must have been sized already.
// Invalid entries (gap columns) are denoted by idx(0,j) == -1.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matrix<ElemType>& idx, const Matrix<ElemType>& a, ElemType alpha, bool idxHaveDups)
{
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0
if (a.GetMatrixType() != this->GetMatrixType())
RuntimeError("Matrix::DoScatterColumnsOf: The source and target matrices must have same storage type (SPARSE/DENSE).");
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha, idxHaveDups); },
{ m_CPUSparseMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.
Matrix<ElemType> tempIdx(CPUDEVICE); tempIdx.AssignValuesOf(idx);
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->GetNumNZElements());
m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempThis);
tempThis.DoScatterColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
m_GPUSparseMatrix->SetValue(tempThis);
});
return *this;
}
// set all elements of a matrix to a scalar value
// For sparse matrices, the only allowed value is 0.
template <class ElemType>
void Matrix<ElemType>::SetValue(const ElemType v)
{
if (IsEmpty()) // if empty then we are done
return;
if (v == 0 && GetMatrixType() == MatrixType::SPARSE) // if sparse, setting it to 0 is special
{
Reset();
return;
}
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->SetValue(v); },
{ m_GPUMatrix->SetValue(v); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const DeviceBoundNumber<ElemType>& db_number)
{
if (IsEmpty()) // if empty then we are done
return;
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()); },
{
if (GetDeviceId() != db_number.GetDeviceId())
RuntimeError("Matrix and device bound number must be on the same device");
m_GPUMatrix->SetValue(db_number.ExposePointer2Value());
},
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <>
/*static*/ float Matrix<float>::MakeNan(size_t /*payload*/)
{
return nanf("");
}
template <>
/*static*/ double Matrix<double>::MakeNan(size_t /*payload*/)
{
return nan("");
}
template <>
/*static*/ half Matrix<half>::MakeNan(size_t /*payload*/)
{
return half(nanf(""));
}
template <>
/*static*/ char Matrix<char>::MakeNan(size_t)
{
return 0;
} // (needed for completeness and to pass unit tests)
template <>
/*static*/ short Matrix<short>::MakeNan(size_t)
{
return 0;
} // (needed for completeness and to pass unit tests)
template <class ElemType>
void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemType val, size_t numColsPerMaskEntry)
{
if (GetNumCols() != (columnsMask.GetNumCols() * numColsPerMaskEntry))
RuntimeError("MaskColumnsValue: Matrix number of columns must equal [column mask * numColsPerMaskEntry].");
if (GetCurrentMatrixLocation() == CPU && (columnsMask.GetCurrentMatrixLocation() == CPU || columnsMask.GetCurrentMatrixLocation() == BOTH))
; // OK
else if (GetDeviceId() != columnsMask.GetDeviceId() && columnsMask.GetCurrentMatrixLocation() != BOTH)
RuntimeError("MaskColumnsValue: Matrix and column mask must be on the same device.");
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val, numColsPerMaskEntry); },
{ m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val, numColsPerMaskEntry); },
{ m_CPUSparseMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val, numColsPerMaskEntry); },
{ m_GPUSparseMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val, numColsPerMaskEntry); });
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const ElemType* colPointer, size_t colInd)
{
if (colPointer == nullptr)
InvalidArgument("SetColumn: colPointer is null.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(colPointer, colInd),
m_GPUMatrix->SetColumn(colPointer, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const ElemType val, size_t colInd)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(val, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const Matrix<ElemType>& colMat, size_t colInd)
{
DecideAndMoveToRightDevice(*this, colMat);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(*colMat.m_CPUMatrix, colInd),
m_GPUMatrix->SetColumn(*colMat.m_GPUMatrix, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom)
{
if (this == &deepCopyFrom)
return;
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
DecideAndMoveToRightDevice(deepCopyFrom, *this);
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), deepCopyFrom.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, this,
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
}
template <class ElemType>
void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
{
if (this == &deepCopyFrom)
return;
// TODO: do we need all these 'this->'?
DISPATCH_MATRIX_ON_FLAG(this, this,
{
// Set CPUMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ this->Resize(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols()); deepCopyFrom.CopySection(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), m_CPUMatrix->Data(), this->GetNumRows()); },
{ deepCopyFrom.m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, deepCopyFrom.GetNumCols()); },
{ CPUSparseMatrix<ElemType> tempCPUSparseMatrix(deepCopyFrom.GetFormat(), deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.m_GPUSparseMatrix->GetNumNZElements()); deepCopyFrom.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempCPUSparseMatrix); tempCPUSparseMatrix.AssignColumnSliceToDense(*m_CPUMatrix, 0, deepCopyFrom.GetNumCols()); });
},
{
// Set GPUMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ m_GPUMatrix->SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), this->GetDeviceId(), deepCopyFrom.m_CPUMatrix->Data()); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{
CPUMatrix<ElemType> tempCPUDenseMatrix(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols());
deepCopyFrom.m_CPUSparseMatrix->AssignColumnSliceToDense(tempCPUDenseMatrix, 0, deepCopyFrom.GetNumCols());
m_GPUMatrix->SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), this->GetDeviceId(), tempCPUDenseMatrix.Data());
},//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ deepCopyFrom.m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, deepCopyFrom.GetNumCols()); });
},
{
// Set CPUSparseMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ auto matrixType = GetMatrixType(); auto matrixFormat = GetFormat(); *this = deepCopyFrom.DeepClone(); SwitchToMatrixType(matrixType, matrixFormat, true); },
{ LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUSparseMatrix is not yet implemented."); },//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a CPUSparseMatrix is not yet implemented."); });//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
},
{
// Set GPUSparseMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
{ Matrix<ElemType> tempCPUSparseMatrix(deepCopyFrom.DeepClone()); tempCPUSparseMatrix.SwitchToMatrixType(GetMatrixType(), GetFormat(), true); m_GPUSparseMatrix->SetValue(*tempCPUSparseMatrix.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
});
}
// CastAssignValuesOf() -- assign a matrix with type conversion, needed for feeding 'float' data to 'double' inputs in V2
// This version is a stop-gap for debugging and testing. If any conversion is done, it will be slow.
// If this is ever used for something that needs performance, it should not be too hard (but labor) to implement this efficiently.
static void DoCastAssignValuesOf(Matrix<float>& target, const Matrix<float>& other) { target.AssignValuesOf(other); }
static void DoCastAssignValuesOf(Matrix<double>& target, const Matrix<double>& other) { target.AssignValuesOf(other); }
static void DoCastAssignValuesOf(Matrix<half>& target, const Matrix<half>& other) { target.AssignValuesOf(other); }
template<class ElemType>
static void CopyToVector(const Matrix<ElemType>& source, vector<ElemType>& sourceData)
{
sourceData.resize(source.GetNumElements());
ElemType* datap = sourceData.data();
size_t datasz = sourceData.size();
source.CopyToArray(datap, datasz);
assert(datap == sourceData.data() && datasz == sourceData.size()); // (make sure it used my buffer; a somewhat awkward API)
}
template<>
void Matrix<int>::AssignValuesOf(const Matrix<int>&) { NOT_IMPLEMENTED; }
template<class ElemType, class ElemTypeOther>
static void DoCastAssignValuesOf(Matrix<ElemType>& target, const Matrix<ElemTypeOther>& source)
{
target; source;
// this is implemented in a rather tedious way:
// - copy to a CPU-side STL vector
// - type-cast
// - copy to target
vector<ElemTypeOther> sourceData;
if (source.GetMatrixType() == MatrixType::SPARSE) // if sparse then convert it over (V2 cannot read sparse data into dense input_variables)
{
Matrix<ElemTypeOther> temp(source.GetNumRows(), source.GetNumCols(), CPUDEVICE, DENSE);
temp.AssignValuesOf(source);
CopyToVector(temp, sourceData);
}
else
{
CopyToVector(source, sourceData);
}
// cast all values
vector<ElemType> targetData(sourceData.size());
transform(sourceData.begin(), sourceData.end(), targetData.begin(), [](ElemTypeOther v){ return (ElemType)v; });
// set the target
if (target.GetMatrixType() == MatrixType::SPARSE) // if target is sparse then we cannot assign from a vector directly, but we can from a matrix object
{
Matrix<ElemType> temp(source.GetNumRows(), source.GetNumCols(), targetData.data(), CPUDEVICE);
target.AssignValuesOf(temp);
}
else
{
target.SetValue(source.GetNumRows(), source.GetNumCols(), target.GetDeviceId(), targetData.data());
}
}
template <class ElemType>
void Matrix<ElemType>::CastAssignValuesOf(const MatrixBase& other) /*override*/ // allows for mixed assignment with conversion
{
const Matrix<float> * otherf = dynamic_cast<const Matrix<float>*>(&other);
const Matrix<double> * otherd = dynamic_cast<const Matrix<double>*>(&other);
const Matrix<half> * otherh = dynamic_cast<const Matrix<half>*>(&other);
if (!otherf && !otherd && !otherh)
LogicError("CastAssignValuesOf: Only accepts float, double and half matrices.");
DISPATCH_MATRIX_ON_FLAG(
this,
this,
{
if (otherf) DoCastAssignValuesOf(*this, *otherf);
if (otherd) DoCastAssignValuesOf(*this, *otherd);
if (otherh) DoCastAssignValuesOf(*this, *otherh);
},
{
if (otherf) m_GPUMatrix->template CastAssignValuesOf<float>(otherf->m_GPUMatrix.get());
if (otherd) m_GPUMatrix->template CastAssignValuesOf<double>(otherd->m_GPUMatrix.get());
if (otherh) m_GPUMatrix->template CastAssignValuesOf<half>(otherh->m_GPUMatrix.get());
},
{
if (otherf) DoCastAssignValuesOf(*this, *otherf);
if (otherd) DoCastAssignValuesOf(*this, *otherd);
if (otherh) DoCastAssignValuesOf(*this, *otherh);
},
{
if (otherf) m_GPUSparseMatrix->template DeepCast<float>(*otherf->m_GPUSparseMatrix);
if (otherd) m_GPUSparseMatrix->template DeepCast<double>(*otherd->m_GPUSparseMatrix);
if (otherh) m_GPUSparseMatrix->template DeepCast<half>(*otherh->m_GPUSparseMatrix);
});
}
template<>
void Matrix<int>::SetValue(const size_t, const size_t, int, int*, const size_t, DataTransferer*) { NOT_IMPLEMENTED; }
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags, DataTransferer* transferer)
{
if (((numRows * numCols) > 0) && (pArray == nullptr))
InvalidArgument("Invalid pArray.");
// Only gpu matrix supports async data transfers, so data transferer passed only to gpu matrix.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetValue(numRows, numCols, pArray, matrixFlags),
m_GPUMatrix->SetValue(numRows, numCols, deviceId, pArray, matrixFlags, transferer),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t rIdx, const size_t cIdx, ElemType val)
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
this,
(*m_CPUMatrix)(rIdx, cIdx) = val,
NOT_IMPLEMENTED,
m_CPUSparseMatrix->SetValue(rIdx, cIdx, val),
NOT_IMPLEMENTED);
}
// read features
template <class ElemType>
void Matrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const ElemType* h_Val,
const size_t nz, const size_t numRows, const size_t numCols, DataTransferer* transferer)
{
// Note: The current implementation uses the xPUSparseMatrix as temporary space. This allows for memory sharing between calls. If
// xPUSparseMatrix is a view, this code will cause an error during runtime stating that the view is not writable nor resizable.
// Only gpu matrix supports async data transfers, so data transferer passed only to gpu matrix in case we do not need to reassign to dense.
// When we have to reassign sparse to dense we cannot use async operation, because at the time when AssignColumnSliceToDense is called the
// data should already be copied to destination.
DISPATCH_MATRIX_ON_FLAG(this, this,
{
if (!m_CPUSparseMatrix) m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nz);
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, numCols);
},
{
if (!m_GPUSparseMatrix) m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nz, GetDeviceId(), matrixFormatSparseCSC);
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, numCols);
},
{ m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); },
{ m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols, false, -1, transferer); });
}
///
/// adjusts the sparse block column matrix with the new Col2BlockId
/// For each column, if new Col2BlockId contains valid index, a corresponding block exists at the index
/// if old col2BlockId[i] contains value at that column, it would be copied over; otherwise the block would be filled with zeros
///
template <class ElemType>
void Matrix<ElemType>::AdjustSparseBlockColumn(const GPUSPARSE_INDEX_TYPE* cpuCol2BlockId, size_t numBlocks, bool useBlockId2Col)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AdjustCol2BlockId(cpuCol2BlockId, numBlocks, useBlockId2Col));
}
template <class ElemType>
void Matrix<ElemType>::SetDiagonalValue(const ElemType v)
{
if (IsEmpty())
LogicError("SetDiagonalValue: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetDiagonalValue(v),
m_GPUMatrix->SetDiagonalValue(v),
m_CPUSparseMatrix->SetDiagonalValue(v),
m_GPUSparseMatrix->SetDiagonalValue(v));
}
template <class ElemType>
void Matrix<ElemType>::SetDiagonalValue(const Matrix<ElemType>& vector)
{
if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
LogicError("SetDiagonalValue: Input vector must be a vector.");
if (vector.GetNumRows() * vector.GetNumCols() != GetDiagSize())
LogicError("SetDiagonalValue: Input vector must match matrix dimension.");
if (IsEmpty())
return;
DecideAndMoveToRightDevice(*this, vector);
if (vector.GetNumElements() == 1) // reduce to simple form
{
DISPATCH_MATRIX_ON_FLAG(&vector,
nullptr,
SetDiagonalValue(vector(0, 0)),
SetDiagonalValue(vector.m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
SetDiagonalValue(vector(0, 0)),
SetDiagonalValue(vector.m_GPUMatrix->Get00Element()) // BUGBUG: efficiency
);
}
else if (vector.GetNumRows() != GetDiagSize() && vector.GetNumCols() != GetDiagSize())
LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
else
{
// WARNING: we use this pointer to decide which function to call. However, vector may be stored in a different matrix type (DENSE, SPARSE)
DISPATCH_MATRIX_ON_FLAG(this,
this,
assert(vector.m_CPUMatrix);
m_CPUMatrix->SetDiagonalValue(*vector.m_CPUMatrix),
assert(vector.m_GPUMatrix);
m_GPUMatrix->SetDiagonalValue(*vector.m_GPUMatrix),
assert(vector.m_CPUMatrix);
m_CPUSparseMatrix->SetDiagonalValue(*vector.m_CPUMatrix),
assert(vector.m_GPUSparseMatrix);
m_GPUSparseMatrix->SetDiagonalValue(*vector.m_GPUMatrix));
}
}
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomValue(low, high, seed),
m_GPUMatrix->SetUniformRandomValue(low, high, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomValue(RNGHandle& rngHandle, const ElemType low, const ElemType high)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomValue(rngHandle, low, high),
m_GPUMatrix->SetUniformRandomValue(rngHandle, low, high),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGaussianRandomValue(RNGHandle& rngHandle, const ElemType mean, const ElemType stdev)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGaussianRandomValue(rngHandle, mean, stdev),
m_GPUMatrix->SetGaussianRandomValue(rngHandle, mean, stdev),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGumbelRandomValue(RNGHandle& rngHandle, const ElemType loc, const ElemType scale)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGumbelRandomValue(rngHandle, loc, scale),
m_GPUMatrix->SetGumbelRandomValue(rngHandle, loc, scale),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("SetGaussianRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGaussianRandomValue(mean, sigma, seed),
m_GPUMatrix->SetGaussianRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetTruncatedNormalRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("SetTruncatedNormalRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetTruncatedNormalRandomValue(mean, sigma, seed),
m_GPUMatrix->SetTruncatedNormalRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("AddGaussianRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddGaussianRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//maskRate: percentage of values masked out (similar to dropout rate)
//scaleValue: which scale value to set to the left ones (unmasked items).
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& rngHandle)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomMask(maskRate, scaleValue, rngHandle),
m_GPUMatrix->SetUniformRandomMask(maskRate, scaleValue, rngHandle),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
// Vanilla SGD update.
// Modifies "this" parameter matrix, on which this method is invoked.
template <class ElemType>
void Matrix<ElemType>::SGDUpdate(Matrix<ElemType>& gradients, ElemType learnRatePerSample)
{
DecideAndMoveToRightDevice(gradients, *this);
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{
// w_t = w_{t-1} - learnRatePerSample * g_{t-1},
ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this);
},
{
// BUGBUG: cannot call ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this) here,
// it produces different results from the scale and add below.
// g'_{t-1} = learnRatePerSample * g_{t-1}
// w_t = w_{t-1} - g'_{t-1}
Scale(ElemType(learnRatePerSample), gradients);
*this -= gradients;
},
{
ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this);
},
{
ScaleAndAdd(ElemType(-learnRatePerSample), gradients, *this);
});
}
// SGD update with momentum.
// Modifies "this" parameter matrix, on which this method is invoked.
template <class ElemType>
void Matrix<ElemType>::MomentumSGDUpdate(Matrix<ElemType>& gradients,
Matrix<ElemType>& smoothedGradients,
ElemType learnRatePerSample,
ElemType momentum,
ElemType unitGainFactor)
{
DecideAndMoveToRightDevice(smoothedGradients, gradients, *this);
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{
// Classic momentum (unitGainFactor == 1.0):
// 1) sg_t = momentum * sg_{t-1} + learnRatePerSample * g_{t-1}
// Unit-gain momentum (unitGainFactor == 1.0 - momentum):
// 1) sg_t = momentum * sg_{t-1} + learnRatePerSample * (1.0 - momentum) * g_{t-1}
// 2) w_t = w_{t-1} - sg_t
ScaleAndAdd(unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
*this -= smoothedGradients;
},
{
ScaleAndAdd(unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
*this -= smoothedGradients;
},
{
// The sparse update is slightly different from the dense implementation above:
// Classic momentum (unitGainFactor == 1.0):
// 1) sg_t = momentum * sg_{t-1} + g_{t-1}
// Unit-gain momentum (unitGainFactor == 1.0 - momentum):
// 1) sg_t = momentum * sg_{t-1} + (1.0 - momentum) * g_{t-1}
// 2) g'_{t-1} = sg_t
// 3) w_t = w_{t-1} - learnRatePerSample * g'_{t-1}
if (momentum != 0)
{
gradients.m_CPUSparseMatrix->NormalGrad(*smoothedGradients.m_CPUMatrix, momentum, unitGainFactor);
}
ScaleAndAdd(-learnRatePerSample, gradients, *this);
},
{
if (momentum != 0)
{
gradients.m_GPUSparseMatrix->NormalGrad(*smoothedGradients.m_GPUMatrix, momentum, unitGainFactor);
}
ScaleAndAdd(-learnRatePerSample, gradients, *this);
});
}
// Nesterov accelerated SGD update.
// Modifies "this" parameter matrix, on which this method is invoked.
template <class ElemType>
void Matrix<ElemType>::NesterovAcceleratedMomentumSGDUpdate(Matrix<ElemType>& gradients,
Matrix<ElemType>& smoothedGradients,
ElemType learnRatePerSample,
ElemType momentum,
ElemType unitGainFactor)
{
DecideAndMoveToRightDevice(smoothedGradients, gradients, *this);
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{ /* CPU dense */
// 1) sg_t = momentum * sg_{t-1} + learnRatePerSample * unitGainFactor * g_{t-1}
// 2) w'_t = w_{t-1} - momentum * sg_t
// 3) w_t = w'_t - learnRatePerSample * unitGainFactor * g_{t-1}
// The end result:
// w_t = w_{t-1} - momentum^2 * sg_{t-1} - learnRatePerSample * unitGainFactor * (1 + momentum) * g_{t-1}
// sg_t = momentum * sg_{t-1} + learnRatePerSample * unitGainFactor * g_{t-1}
ScaleAndAdd( unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradients, *this);
},
{ /* GPU dense */
ScaleAndAdd(unitGainFactor * learnRatePerSample, gradients, momentum, smoothedGradients);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradients, *this);
},
{ /* CPU sparse */
if (momentum != 0)
{
// Identical to the above, except that as a side effect "NormalGrad" modifies
// gradient values in place, so that gradientCache is needed to store the original values.
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.AssignValuesOf(gradients);
gradients.m_CPUSparseMatrix->NormalGrad(*smoothedGradients.m_CPUMatrix, momentum, unitGainFactor);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradientCache, *this);
}
},
{ /* GPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.AssignValuesOf(gradients);
gradients.m_GPUSparseMatrix->NormalGrad(*smoothedGradients.m_GPUMatrix, momentum, unitGainFactor);
ScaleAndAdd(-momentum, smoothedGradients, *this);
ScaleAndAdd(-unitGainFactor * learnRatePerSample, gradientCache, *this);
}
});
}
// both 'this' and gradients will be changed
template <class ElemType>
ElemType Matrix<ElemType>::Adagrad(Matrix<ElemType>& gradients, const bool needAveMultiplier)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{ return m_CPUMatrix->Adagrad(*gradients.m_CPUMatrix, needAveMultiplier); SetDataLocation(CPU); },
{ return m_GPUMatrix->Adagrad(*gradients.m_GPUMatrix, needAveMultiplier); SetDataLocation(GPU); },
{ return gradients.m_CPUSparseMatrix->Adagrad(*m_CPUMatrix, needAveMultiplier); SetDataLocation(CPU); },
{ return gradients.m_GPUSparseMatrix->Adagrad(*m_GPUMatrix, needAveMultiplier); SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
// FSAdaGrad update -- Frank's "fix" of AdaGrad, very similar to what became later known as Adam
// updates
// - momentum accumulator
// - var momentum accumulator
// - denominator
// then
// - the model itself
template <class ElemType>
void Matrix<ElemType>::FSAdagradUpdate(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const double targetAdagradAvDenom_x_sqrtAdagradSqrFrames,
const double learnRatePerSample, const double meanMomentum, const double varMomentum, ElemType unitGainFactor)
{
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{
m_CPUMatrix->FSAdagrad(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
(ElemType)targetAdagradAvDenom_x_sqrtAdagradSqrFrames, unitGainFactor);
SetDataLocation(CPU);
},
{
m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
(ElemType)targetAdagradAvDenom_x_sqrtAdagradSqrFrames, unitGainFactor);
SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{
gradients.m_GPUSparseMatrix->FSAdagrad(*m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
(ElemType)targetAdagradAvDenom_x_sqrtAdagradSqrFrames, unitGainFactor);
SetDataLocation(GPU);
});
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
///
// Implement the original adam algorithm according to the paper
// Ref: ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION, https://arxiv.org/pdf/1412.6980.pdf
///
template <class ElemType>
void Matrix<ElemType>::AdamUpdate(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const double smoothedCount,
const double learnRatePerSample, const double meanMomentum, const double varMomentum, const double epsilon, ElemType unitGainFactor, bool adamax)
{
// Bias correction
let biasCorrection = adamax? (ElemType)(1. / (1- pow(meanMomentum, smoothedCount))) : (ElemType)(sqrt(1- pow(varMomentum, smoothedCount))/(1- pow(meanMomentum, smoothedCount)));
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{
m_CPUMatrix->Adam(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
biasCorrection, (ElemType)epsilon, unitGainFactor, adamax);
SetDataLocation(CPU);
},
{
m_GPUMatrix->Adam(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
biasCorrection, (ElemType)epsilon, unitGainFactor, adamax);
SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{ gradients.m_GPUSparseMatrix->Adam(*m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum,
(ElemType)varMomentum, biasCorrection, (ElemType)epsilon, unitGainFactor, adamax);
SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
template <class ElemType>
ElemType Matrix<ElemType>::RmsProp(Matrix<ElemType>& gradients,
ElemType RMS_GAMMA,
ElemType RMS_WGT_INC,
ElemType RMS_WGT_MAX,
ElemType RMS_WGT_DEC,
ElemType RMS_WGT_MIN,
const bool needAveMultiplier,
const bool initialized)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{ auto ret = m_CPUMatrix->RmsProp(*gradients.m_CPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier, initialized); SetDataLocation(CPU); return ret; },
{ auto ret = m_GPUMatrix->RmsProp(*gradients.m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier, initialized); SetDataLocation(GPU); return ret; },
{ NOT_IMPLEMENTED; },
{ auto ret = gradients.m_GPUSparseMatrix->RmsProp(*m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier, initialized); SetDataLocation(GPU); return ret; });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
template <class ElemType>
template <typename GradType>
void Matrix<ElemType>::AdaDeltaUpdate(Matrix<GradType>& gradients,
Matrix<ElemType>& functionValues,
ElemType learningRate, ElemType rho, ElemType epsilon,
int* timestamps, int currentTimestamp)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{ m_CPUMatrix->AdaDelta(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix, learningRate, rho, epsilon); SetDataLocation(CPU); },
{ m_GPUMatrix->AdaDelta(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learningRate, rho, epsilon); SetDataLocation(GPU); },
{ gradients.m_CPUSparseMatrix->AdaDelta(*m_CPUMatrix, *functionValues.m_CPUMatrix, learningRate, rho, epsilon, timestamps, currentTimestamp); SetDataLocation(CPU); },
{ gradients.m_GPUSparseMatrix->AdaDelta(*m_GPUMatrix, *functionValues.m_GPUMatrix, learningRate, rho, epsilon, timestamps, currentTimestamp); SetDataLocation(GPU); });
}
template <class ElemType>
void Matrix<ElemType>::AdaDeltaFlushState(size_t cols, ElemType rho, int* timestamps, int currentTimestamp)
{
DecideAndMoveToRightDevice(*this, *this);
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->AdaDeltaFlushTimestamps(cols, rho, timestamps, currentTimestamp); SetDataLocation(CPU); },
{ m_GPUMatrix->AdaDeltaFlushTimestamps(cols, rho, timestamps, currentTimestamp); SetDataLocation(GPU); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
void Matrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
{
if (numRows != GetNumRows() || numCols != GetNumCols())
{
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->Reshape(numRows, numCols); },
{ m_GPUMatrix->Reshape(numRows, numCols); },
{ NOT_IMPLEMENTED; },
{ m_GPUSparseMatrix->Reshape(numRows, numCols); });
}
}
// Note: Resize() will leave the matrix content undefined.
// Note: Resize calls RequireSizeAndAllocate on the sparse versions in for performance reasons. If the external caller knows the nz, then we should set it.
template <class ElemType>
void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve /*=0*/, bool growOnly /*=true*/, bool keepValue /*=false*/)
{
// TODO: should this function test whether the size is changing, and skip if it isn't? We have at least one explicit test for this code calling this (recurrent node)
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
{ m_CPUMatrix->Resize(numRows, numCols, growOnly); },
{ m_GPUMatrix->Resize(numRows, numCols, growOnly); },
{ m_CPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); },
{ m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); });
#ifdef _DEBUG
if (GetMatrixType() != MatrixType::SPARSE && !keepValue)
Invalidate(); // Fill the matrix with NaNs to detect using the content which is undefined. Unfortunately this won't work for sparse matrices.
#else
UNUSED(keepValue);
#endif
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RepMat(const Matrix<ElemType>& frmMat, const size_t rowRatio, const size_t colRatio)
{
size_t nCols = frmMat.GetNumCols();
size_t nRows = frmMat.GetNumRows();
if (rowRatio > 1)
RuntimeError("RepMat not yet supporting raw ratio larger than 1");
size_t newCols = colRatio * nCols;
Matrix<ElemType> c(nRows, newCols, frmMat.GetDeviceId());
for (size_t i = 0; i < colRatio; i++)
{
c.ColumnSlice(i * nCols, nCols).AssignValuesOf(frmMat);
}
return c;
}
template <class ElemType>
size_t Matrix<ElemType>::GetAllocatedSize() const
{
return m_baseMatrix->GetSizeAllocated();
}
// reset for sparse matrix. Semantically the same as setting all values to 0.
template <class ElemType>
void Matrix<ElemType>::Reset()
{
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; },
{ m_CPUSparseMatrix->Reset(); },
{ m_GPUSparseMatrix->Reset(); });
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumRows() const
{
return m_baseMatrix->GetNumRows();
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumCols() const
{
return m_baseMatrix->GetNumCols();
}
template <class ElemType>
size_t Matrix<ElemType>::GetDiagSize() const
{
return m_baseMatrix->GetDiagSize();
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumElements() const
{
return GetNumRows() * GetNumCols();
}
template <class ElemType>
bool Matrix<ElemType>::IsEmpty() const
{
return m_baseMatrix->IsEmpty();
}
template <class ElemType>
bool Matrix<ElemType>::IsView() const
{
return m_baseMatrix->IsView();
}
#pragma endregion Basic Operators
#pragma region Member BLAS Functions
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator+=(ElemType alpha)
{
return AssignSumOf(alpha, *this);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator+(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), GetDeviceId());
c.AssignSumOf(alpha, *this);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOf(const ElemType alpha, const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSumOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSumOf(alpha, *a.m_CPUMatrix),
m_GPUMatrix->AssignSumOf(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then [this]=[this]+a
//if a is a column vector, add to all columns of [this]
//if a is a row vector, add to all rows of [this]
//if a is a scalar, add it to all elements.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator+=(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(*this, a);
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->operator+=(*a.m_CPUMatrix),
m_GPUMatrix->operator+=(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then OUTPUT=[this]+a
//if a is a column vector, add to all columns of [this]
//if a is a row vector, add to all rows of [this]
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator+(const Matrix<ElemType>& a) const
{
if (GetNumElements() == 1)
{
Matrix<ElemType> c(a.DeepClone());
DISPATCH_MATRIX_ON_FLAG(this,
&c,
c += (*this)(0, 0),
c += (m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
c += (*this)(0, 0),
NOT_IMPLEMENTED);
return c;
}
else if (a.GetNumElements() == 1)
{
Matrix<ElemType> c(this->DeepClone());
DISPATCH_MATRIX_ON_FLAG(&a,
&c,
c += a(0, 0),
c += (a.m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
c += a(0, 0),
NOT_IMPLEMENTED);
return c;
}
else
{
Matrix<ElemType> c(this->DeepClone()); // this implementation will introduce a copy overhead. but make resue of the code
c += a;
return c;
}
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (this == &a)
{
*this += b;
return *this;
}
if (this == &b)
{
*this += a;
return *this;
}
if (a.GetNumElements() == 1)
{
SetValue(b);
(*this) += a;
}
else
{
SetValue(a);
(*this) += b;
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator-=(ElemType alpha)
{
return AssignDifferenceOf(*this, alpha);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator-(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), GetDeviceId());
c.AssignDifferenceOf(*this, alpha);
return c;
}
//for each column of a, we assign numRows starting from startIndex to this
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AssignRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for each column of a, we assign all rows of a to this starting from startIndex
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignToRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AssignToRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for the row slice of this starting from startIndex we add a to it.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddToRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AddToRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for each column of this, we add row slice of a starting from startIndex
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddWithRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AddWithRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignRepeatOf(*a.m_CPUMatrix, numRowRepeats, numColRepeats),
m_GPUMatrix->AssignRepeatOf(*a.m_GPUMatrix, numRowRepeats, numColRepeats),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddToRowRepeatValuesOf(const Matrix<ElemType>& a, const size_t numRepeats)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddToRowRepeatValuesOf(*a.m_CPUMatrix, numRepeats),
m_GPUMatrix->AddToRowRepeatValuesOf(*a.m_GPUMatrix, numRepeats),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//used in the DSSM model. The resulted *this is a [a.GetRows()*(negNumber+1), a.GetCols()] matrix
//each column contains posNumber of positive samples (original) and negNumber negative samples generated by copying
//sample shifted by shiftNumber columns
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignPositiveAndShiftedNegSample(*a.m_CPUMatrix, posNumber, negNumber, shiftNumber),
m_GPUMatrix->AssignPositiveAndShiftedNegSample(*a.m_GPUMatrix, posNumber, negNumber, shiftNumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//used in the DSSM model. *this = *this + positive and negative samples folded back to the right place
//each column of a contains posNumber of positive samples (original) and negNumber negative samples generated by copying
//sample shifted by shiftNumber columns
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddFoldedPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddFoldedPositiveAndShiftedNegSample(*a.m_CPUMatrix, posNumber, negNumber, shiftNumber),
m_GPUMatrix->AddFoldedPositiveAndShiftedNegSample(*a.m_GPUMatrix, posNumber, negNumber, shiftNumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const ElemType alpha, const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignDifferenceOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignDifferenceOf(alpha, *a.m_CPUMatrix),
m_GPUMatrix->AssignDifferenceOf(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const Matrix<ElemType>& a, const ElemType alpha)
{
if (a.IsEmpty())
LogicError("AssignDifferenceOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignDifferenceOf(*a.m_CPUMatrix, alpha),
m_GPUMatrix->AssignDifferenceOf(*a.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then [this]=[this]-a
//if a is a column vector, minus it from all columns of [this]
//if a is a row vector, minus it from all rows of [this]
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator-=(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("Minus Operation: Matrix a is empty.");
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
*m_CPUMatrix -= *a.m_CPUMatrix,
*m_GPUMatrix -= *a.m_GPUMatrix,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then output=[this]-a
//if a is a column vector, minus it from all columns of [this]
//if a is a row vector, minus it from all rows of [this]
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator-(const Matrix<ElemType>& a) const
{
Matrix<ElemType> c(this->DeepClone()); // this implementation will introduce a copy overhead. but make resue of the code
ScaleAndAdd(-1, a, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
// if first arg broadcasts, we swap first and the flip the sign
// This is because there is no equivalent to operator-=() that works the other way round.
// TODO: We need ternary ops where the output storage is separate.
if (a.GetNumRows() < b.GetNumRows() || a.GetNumCols() < b.GetNumCols())
{
if (a.GetNumRows() > b.GetNumRows() || a.GetNumCols() > b.GetNumCols())
LogicError("AssignDifferenceOf: Invalid dimensions.");
AssignDifferenceOf(b, a);
*this *= -1;
return *this;
}
if (this != &a)
SetValue(a);
(*this) -= b;
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator*=(ElemType alpha)
{
Scale(alpha, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator*(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), (DEVICEID_TYPE) m_preferredDeviceId);
Scale(alpha, *this, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignProductOf(const ElemType alpha, const Matrix<ElemType>& a)
{
Scale(alpha, a, *this);
return *this;
}
// [this]=a*b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignProductOf(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB)
{
if (a.GetNumElements() == 1)
{
if (transposeB)
AssignTransposeOf(b);
else
this->SetValue(b);
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
(*this) *= a(0, 0),
(*this) *= a.m_GPUMatrix->Get00Element(),
(*this) *= a(0, 0),
NOT_IMPLEMENTED);
}
else if (b.GetNumElements() == 1)
{
if (transposeA)
AssignTransposeOf(a);
else
this->SetValue(a);
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
(*this) *= b(0, 0),
(*this) *= b.m_GPUMatrix->Get00Element(),
(*this) *= b(0, 0),
NOT_IMPLEMENTED);
}
else
Multiply(a, transposeA, b, transposeB, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator*(const Matrix<ElemType>& a) const
{
if (GetNumElements() == 1)
{
Matrix<ElemType> c((DEVICEID_TYPE) a.GetPreferredDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
c.AssignProductOf((*this)(0, 0), a),
c.AssignProductOf(m_GPUMatrix->Get00Element(), a), // BUGBUG: efficiency
c.AssignProductOf((*this)(0, 0), a),
NOT_IMPLEMENTED);
return c;
}
else if (a.GetNumElements() == 1)
{
Matrix<ElemType> c((DEVICEID_TYPE) GetPreferredDeviceId());
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
c.AssignProductOf(a(0, 0), (*this)),
c.AssignProductOf(a.m_GPUMatrix->Get00Element(), (*this)), // BUGBUG: efficiency
c.AssignProductOf(a(0, 0), (*this)),
NOT_IMPLEMENTED);
return c;
}
else
{
Matrix<ElemType> c(GetNumRows(), a.GetNumCols(), (DEVICEID_TYPE) GetPreferredDeviceId());
Multiply(*this, a, c);
return c;
}
}
// [this]=a*b where a is a 1x1 scalar
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::Assign1x1ProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
Multiply1x1AndWeightedAdd(+1, a, b, 0.0f, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator/=(ElemType alpha)
{
(*this) *= 1 / alpha;
return (*this);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator/(ElemType alpha) const
{
return ((*this) * (1 / alpha));
}
//element-wise power
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator^=(ElemType alpha)
{
auto& us = *this;
ElementWisePower(alpha, us, us);
return us;
}
//element-wise power
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator^(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), (DEVICEID_TYPE) GetDeviceId());
ElementWisePower(alpha, *this, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementPowerOf(const Matrix<ElemType>& a, const ElemType power)
{
ElementWisePower(power, a, *this);
return *this;
}
//[this]=[this] .* a (we cannot override operator .* in c++)
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementMultiplyWith(const Matrix<ElemType>& a)
{
return AssignElementProductOf(*this, a);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementDivideBy(const Matrix<ElemType>& a)
{
return AssignElementDivisionOf(*this, a);
}
//[this]=a .* b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignElementProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddElementProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AddElementProductOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == GetNumCols()))
InvalidArgument("The input matrix dimensions do not match [this].");
DecideAndMoveToRightDevice(*this, a, b);
if (!(a.GetMatrixType() == b.GetMatrixType() && GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->AddElementProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AddElementProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=a ./ b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementDivisionOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementDivisionOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementDivisionOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignElementDivisionOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ColumnElementMultiplyWith(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("ColumnElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
InvalidArgument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows.");
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->ColumnElementMultiplyWith(*a.m_CPUMatrix),
m_GPUMatrix->ColumnElementMultiplyWith(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::RowElementMultiplyWith(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("RowElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
InvalidArgument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->RowElementMultiplyWith(*a.m_CPUMatrix),
m_GPUMatrix->RowElementMultiplyWith(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::RowElementDivideBy(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("RowElementDivideBy: Matrix is empty.");
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
InvalidArgument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->RowElementDivideBy(*a.m_CPUMatrix),
m_GPUMatrix->RowElementDivideBy(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ColumnElementDivideBy(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("ColumnElementDivideBy: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
InvalidArgument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows.");
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->ColumnElementDivideBy(*a.m_CPUMatrix),
m_GPUMatrix->ColumnElementDivideBy(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=1 ./ a
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementInverse()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ElementInverse(),
m_GPUMatrix->ElementInverse(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->ElementInverse());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementInverseOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignElementInverseOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignElementInverseOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignElementInverseOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignElementInverseOf(*a.m_GPUSparseMatrix));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSigmoid()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSigmoid(),
m_GPUMatrix->InplaceSigmoid(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceSigmoid());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSigmoidOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSigmoidOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSigmoidOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignSigmoidOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=sigmoid([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLinearRectifierDerivative()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLinearRectifierDerivative(),
m_GPUMatrix->InplaceLinearRectifierDerivative(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceLinearRectifierDerivative());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLinearRectifierDerivativeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLinearRectifierDerivativeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignLinearRectifierDerivativeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLinearRectifierDerivativeOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=sigmoid([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSigmoidDerivative()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSigmoidDerivative(),
m_GPUMatrix->InplaceSigmoidDerivative(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSigmoidDerivativeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSigmoidDerivativeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSigmoidDerivativeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b, bool searchInCol)
{
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignNumOfDiff(*a.m_CPUMatrix, *b.m_CPUMatrix, searchInCol),
m_GPUMatrix->AssignNumOfDiff(*a.m_GPUMatrix, *b.m_GPUMatrix, searchInCol),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=tanh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTanh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTanh(),
m_GPUMatrix->InplaceTanh(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceTanh());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTanhOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTanhOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignTanhOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTanhOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=atanh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAtanh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceAtanh(),
m_GPUMatrix->InplaceAtanh(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAtanhOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAtanhOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAtanhOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAtanhOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=softmax([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLogSoftmax(const bool isColWise)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLogSoftmax(isColWise),
m_GPUMatrix->InplaceLogSoftmax(isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLogSoftmaxOf(const Matrix<ElemType>& a, const bool isColWise)
{
if (a.IsEmpty())
LogicError("AssignLogSoftmaxOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLogSoftmaxOf(*a.m_CPUMatrix, isColWise),
m_GPUMatrix->AssignLogSoftmaxOf(*a.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=softmax([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceHardmax(const bool isColWise)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceHardmax(isColWise),
m_GPUMatrix->InplaceHardmax(isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignHardmaxOf(const Matrix<ElemType>& a, const bool isColWise)
{
if (a.IsEmpty())
LogicError("AssignHardmaxOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignHardmaxOf(*a.m_CPUMatrix, isColWise),
m_GPUMatrix->AssignHardmaxOf(*a.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSqrt()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSqrt(),
m_GPUMatrix->InplaceSqrt(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceSqrt());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSqrtOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSqrtOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSqrtOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSqrtOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignSqrtOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=exp([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceExp()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceExp(),
m_GPUMatrix->InplaceExp(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceExp());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignExpOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignExpOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignExpOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignExpOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignExpOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=exp([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAbs()
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->InplaceAbs(),
m_GPUMatrix->InplaceAbs(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceAbs());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAbsOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAbsOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAbsOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAbsOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignAbsOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=log([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLog()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLog(),
m_GPUMatrix->InplaceLog(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceLog());
return *this;
}
//[this]=log([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLog10()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLog10(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLogOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignLogOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLogOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignLogOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLogOf(*a.m_GPUSparseMatrix));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLog10Of(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignLogOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLog10Of(*a.m_CPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLogOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=cos([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceCosine()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceCosine(),
m_GPUMatrix->InplaceCosine(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCosineOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignCosineOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignCosineOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignCosineOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]= -sin([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceNegativeSine()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceNegativeSine(),
m_GPUMatrix->InplaceNegativeSine(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNegativeSineOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignNegativeSineOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignNegativeSineOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignNegativeSineOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=tan([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTan()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTan(),
m_GPUMatrix->InplaceTan(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTanOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignTanOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTanOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignTanOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=acos([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAcos()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceAcos(),
m_GPUMatrix->InplaceAcos(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAcosOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAcosOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAcosOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAcosOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=asin([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAsin()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceAsin(),
m_GPUMatrix->InplaceAsin(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAsinOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAsinOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAsinOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAsinOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=atan([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAtan()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceAtan(),
m_GPUMatrix->InplaceAtan(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAtanOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAtanOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAtanOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAtanOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=cosh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceCosh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceCosh(),
m_GPUMatrix->InplaceCosh(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCoshOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignCoshOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignCoshOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignCoshOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=sinh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSinh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSinh(),
m_GPUMatrix->InplaceSinh(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSinhOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSinhOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSinhOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSinhOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=asinh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAsinh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceAsinh(),
m_GPUMatrix->InplaceAsinh(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAsinhOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAsinhOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAsinhOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAsinhOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncate(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncate: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncate(threshold),
m_GPUMatrix->InplaceTruncate(threshold),
m_CPUSparseMatrix->InplaceTruncate(threshold),
m_GPUSparseMatrix->InplaceTruncate(threshold));
return *this;
}
template <class ElemType>
void Matrix<ElemType>::InplaceTranspose()
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceTranspose());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
{
assert(threshold >= 0);
if (IsEmpty())
LogicError("InplaceSoftThreshold: Matrix is empty.");
if (threshold == 0)
return *this;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSoftThreshold(threshold),
m_GPUMatrix->InplaceSoftThreshold(threshold),
m_CPUSparseMatrix->InplaceSoftThreshold(threshold),
m_GPUSparseMatrix->InplaceSoftThreshold(threshold));
return *this;
}
//Threshold truncating: this[i] = max( this[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateBottom(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateBottom: Matrix is empty.");
if (!isfinite(threshold))
return *this;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncateBottom(threshold),
m_GPUMatrix->InplaceTruncateBottom(threshold),
m_CPUSparseMatrix->InplaceTruncateBottom(threshold),
m_GPUSparseMatrix->InplaceTruncateBottom(threshold));
return *this;
}
//Threshold truncating: this[i] = max( a[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTruncateBottomOf(const Matrix<ElemType>& a, const ElemType threshold)
{
if (a.IsEmpty())
LogicError("AssignTruncateBottomOf: Matrix a is empty.");
if (!isfinite(threshold))
{
this->SetValue(a);
return *this;
}
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTruncateBottomOf(*a.m_CPUMatrix, threshold),
m_GPUMatrix->AssignTruncateBottomOf(*a.m_GPUMatrix, threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTruncateBottomOf(*a.m_GPUSparseMatrix, threshold));
return *this;
}
//Threshold truncating: this[i] = min( this[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateTop(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateTop: Matrix is empty.");
if (!isfinite(threshold))
return *this;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncateTop(threshold),
m_GPUMatrix->InplaceTruncateTop(threshold),
m_CPUSparseMatrix->InplaceTruncateTop(threshold),
m_GPUSparseMatrix->InplaceTruncateTop(threshold));
return *this;
}
//Threshold truncating: this[i] = min( a[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTruncateTopOf(const Matrix<ElemType>& a, const ElemType threshold)
{
if (a.IsEmpty())
LogicError("AssignTruncateTopOf: Matrix a is empty.");
if (!isfinite(threshold))
{
this->SetValue(a);
return *this;
}
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTruncateTopOf(*a.m_CPUMatrix, threshold),
m_GPUMatrix->AssignTruncateTopOf(*a.m_GPUMatrix, threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTruncateTopOf(*a.m_GPUSparseMatrix, threshold));
return *this;
}
//Threshold truncating: this[i] = 0 if abs(this[i]<threshold).
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::SetToZeroIfAbsLessThan(const ElemType threshold)
{
if (IsEmpty())
LogicError("SetToZeroIfAbsLessThan: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetToZeroIfAbsLessThan(threshold),
m_GPUMatrix->SetToZeroIfAbsLessThan(threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->SetToZeroIfAbsLessThan(threshold));
return *this;
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::SumOfElements() const
{
if (IsEmpty())
LogicError("SumOfElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->SumOfElements(),
return m_GPUMatrix->SumOfElements(),
return m_CPUSparseMatrix->SumOfElements(),
return m_GPUSparseMatrix->SumOfElements());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignOneHot(const Matrix<ElemType>& a, vector<size_t>& shape, size_t axis, bool is_sparse)
{
if (a.IsEmpty())
LogicError("AssignOneHot: Matrix a is empty.");
if (axis >= shape.size())
LogicError("AssignOneHot: axis is not correct");
if (a.GetMatrixType() == SPARSE)
NOT_IMPLEMENTED;
//todo: sparse matrix type
if (is_sparse && GetMatrixType() != SPARSE)
{
SwitchToMatrixType(SPARSE, matrixFormatSparseCSC, false);
}
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
this,
m_CPUMatrix->AssignOneHot(*a.m_CPUMatrix, shape, axis),
m_GPUMatrix->AssignOneHot(*a.m_GPUMatrix, shape, axis),
m_CPUSparseMatrix->AssignOneHot(*a.m_CPUMatrix, shape, axis),
m_GPUSparseMatrix->AssignOneHot(*a.m_GPUMatrix, shape, axis)
);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::GatherFromTarget(const Matrix<ElemType>& indices, const Matrix<ElemType>& target, size_t row_elements)
{
if (indices.IsEmpty() || target.IsEmpty())
LogicError("GatherFromTarget: Input matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(&indices,
this,
m_CPUMatrix->GatherFromTarget(*indices.m_CPUMatrix, *target.m_CPUMatrix, row_elements),
m_GPUMatrix->GatherFromTarget(*indices.m_GPUMatrix, *target.m_GPUMatrix, row_elements),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ScatterToIndices(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, size_t row_elements, const Matrix<char>* mask/* = nullptr*/)
{
if (indices.IsEmpty() || values.IsEmpty() || (mask && mask->IsEmpty()))
LogicError("ScatterAccordingIndices: input matrix is empty.");
if (mask && (indices.GetNumCols() % mask->GetNumCols() != 0))
LogicError("ScatterAccordingIndices: The number of columns(%zu) of the matrix slice to be masked is not a multiple of the number of columns(%zu) of the mask slice.",
indices.GetNumCols(), mask->GetNumCols());
DISPATCH_MATRIX_ON_FLAG(&values,
this,
m_CPUMatrix->ScatterToIndices(*values.m_CPUMatrix, *indices.m_CPUMatrix, row_elements, mask ? mask->m_CPUMatrix.get() : nullptr),
m_GPUMatrix->ScatterToIndices(*values.m_GPUMatrix, *indices.m_GPUMatrix, row_elements, mask ? mask->m_GPUMatrix.get() : nullptr),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOfElements(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSumOfElements: Matrix a is empty.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSumOfElements(*a.m_CPUMatrix),
m_GPUMatrix->AssignSumOfElements(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
DeviceBoundNumber<ElemType> Matrix<ElemType>::Sum_AsDeviceBoundNum() const
{
DeviceBoundNumber<ElemType> result;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
ElemType* val = new ElemType;
* val = m_CPUMatrix->SumOfElements(); result.ShallowCopyFrom(val, -1); return result,
return m_GPUMatrix->Sum_AsDeviceBoundNum(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::SumOfAbsElements() const
{
if (IsEmpty())
LogicError("SumOfAbsElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return m_CPUMatrix->SumOfAbsElements(); },
{ return m_GPUMatrix->SumOfAbsElements(); },
{ NOT_IMPLEMENTED; },
{ return m_GPUSparseMatrix->SumOfAbsElements(); });
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::LogSumOfElements() const
{
if (IsEmpty())
LogicError("LogSumOfElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return m_CPUMatrix->LogSumOfElements(); },
{ return m_GPUMatrix->LogSumOfElements(); },
{ NOT_IMPLEMENTED},
{ NOT_IMPLEMENTED });
}
template <class ElemType>
bool Matrix<ElemType>::IsValid() const
{
if (m_currentDataLocation == CurrentDataLocation::GPU && GetMatrixType() == MatrixType::SPARSE)
{
return this->m_GPUSparseMatrix->IsValid();
}
else
{
NOT_IMPLEMENTED;
}
return false;
}
template <class ElemType>
bool Matrix<ElemType>::IsEqualTo(const Matrix<ElemType>& a, const ElemType threshold /*= 1e-8*/) const
{
return AreEqual(*this, a, threshold);
}
template <class ElemType>
void Matrix<ElemType>::VectorSum(const Matrix<ElemType>& a, Matrix<ElemType>& c, const bool isColWise)
{
DecideAndMoveToRightDevice(c, a);
if (!(a.GetMatrixType() == c.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::VectorSum(*a.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::VectorSum(*a.m_GPUMatrix, *c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::VectorNorm1(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNormInf: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNorm1(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNorm1(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNorm1Of(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNorm1(*this, isColWise);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::VectorNorm2(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNorm2: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNorm2(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNorm2(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNorm2Of(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNorm2(*this, isColWise);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::VectorNormInf(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNormInf: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNormInf(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNormInf(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNormInfOf(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNormInf(*this, isColWise);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise)
{
InnerProduct(a, b, *this, isColWise);
return *this;
}
//column-wise crossproduct
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignKhatriRaoProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignKhatriRaoProductOf: Matrix is empty.");
assert(a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumCols() == b.GetNumCols()))
InvalidArgument("AssignKhatriRaoProductOf: The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignKhatriRaoProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignKhatriRaoProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//column-wise reshaped product. Used to compute KhatriRaoProduct Gradient
// this = reshape each column of a from (K1xK2,1) to (K1, K2)
// if each column of a is not transposed, each (K1, K2) times each column of b (K2, frames).
// the output is a (K1, frames) matrix
// if each column of a is tranposed, each (K1, K2)^T times each column of b(K1, frames) and output is (K2, frames)
//column-wise crossproduct
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddColumnReshapeProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool transposeAColumn)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AddColumnReshapeProductOf: Matrix is empty.");
assert(a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumCols() == b.GetNumCols()))
InvalidArgument("AddColumnReshapeProductOf: The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(*this, a, b);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType() && GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddColumnReshapeProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix, transposeAColumn),
m_GPUMatrix->AddColumnReshapeProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix, transposeAColumn),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddWithScaleOf(ElemType alpha, const Matrix<ElemType>& a)
{
ScaleAndAdd(alpha, a, *this);
return *this;
}
template <class ElemType>
ElemType Matrix<ElemType>::FrobeniusNorm() const
{
if (IsEmpty())
LogicError("FrobeniusNorm: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->FrobeniusNorm(),
return m_GPUMatrix->FrobeniusNorm(),
return m_CPUSparseMatrix->FrobeniusNorm(),
return m_GPUSparseMatrix->FrobeniusNorm());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignFrobeniusNormOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignFrobeniusNormOf: Matrix a is empty.");
Resize(1, 1);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignFrobeniusNormOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignFrobeniusNormOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNormInf() const
{
if (IsEmpty())
LogicError("MatrixNormInf: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNormInf(),
return m_GPUMatrix->MatrixNormInf(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNormInf());
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNorm1() const
{
if (IsEmpty())
LogicError("MatrixNorm1: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNorm1(),
return m_GPUMatrix->MatrixNorm1(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNorm1());
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNorm0() const
{
if (IsEmpty())
LogicError("MatrixNorm0: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNorm0(),
return m_GPUMatrix->MatrixNorm0(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNorm0());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSignOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSignOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSignOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSignOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddSignOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AddSignOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AddSignOf(*a.m_CPUMatrix),
m_GPUMatrix->AddSignOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
// I decided to use Matrix<ElemType>& maxIndices instead of integer vector because the result may be used to do additional calculation
template <class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndices, Matrix<ElemType>& maxValues, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndices, maxValues);
maxIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this, &maxValues,
{ m_CPUMatrix->VectorMax(*maxIndices.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise); maxIndices.SetDataLocation(CPU, DENSE); },
{ m_GPUMatrix->VectorMax(*maxIndices.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise); maxIndices.SetDataLocation(GPU, DENSE); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
// Note: must SetDataLocation() also on maxIndices, since both maxValues and maxIndices are written.
}
template <class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndices, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndices, maxValues);
maxIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this, &maxValues,
{ m_CPUMatrix->VectorMax(*maxIndices.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK); maxIndices.SetDataLocation(CPU, DENSE); },
{ m_GPUMatrix->VectorMax(*maxIndices.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK); maxIndices.SetDataLocation(GPU, DENSE); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
template <class ElemType>
void Matrix<ElemType>::VectorMin(Matrix<ElemType>& minIndices, Matrix<ElemType>& minValues, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorMin: Matrix is empty.");
DecideAndMoveToRightDevice(*this, minIndices, minValues);
minIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
minValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this, &minValues,
{ m_CPUMatrix->VectorMin(*minIndices.m_CPUMatrix, *minValues.m_CPUMatrix, isColWise); minIndices.SetDataLocation(CPU, DENSE); },
{ m_GPUMatrix->VectorMin(*minIndices.m_GPUMatrix, *minValues.m_GPUMatrix, isColWise); minIndices.SetDataLocation(GPU, DENSE); },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
#pragma endregion Member BLAS Functions
#pragma region Other helper Functions
template <class ElemType>
int Matrix<ElemType>::GetDeviceId() const
{
if (m_currentDataLocation == CurrentDataLocation::NONE)
return m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
{ return CPUDEVICE; },
{ return m_GPUMatrix->GetComputeDeviceId(); },
{ return CPUDEVICE; },
{ return m_GPUSparseMatrix->GetComputeDeviceId(); });
}
template <class ElemType>
MatrixType Matrix<ElemType>::GetMatrixType() const
{
return m_matrixType;
}
template <class ElemType>
MatrixFormat Matrix<ElemType>::GetFormat() const
{
return m_baseMatrix->GetFormat();
}
// TODO: Comment why we need a second ElemType.
// TODO: Move the shared core functions to the front of this source file.
// BUGBUG: This performs a copy operation even for the output matrix that gets overwritten right away.
// We should (1) define which is the output and (2) whether it will be completely overwritten (so we won't actually copy it).
// bring two matrices onto the same device
// If different and preferred devices are the same, move to preferred device.
// Otherwise GPU takes precedence over CPU, and if both are GPU move to a's device.
// The inputs are only distinguished in that a's GPU takes precedence over b's in case they differ.
// TODO: This is called somewhat inconsistently, sometimes with a=*this, sometimes with b=*this.
template <class ElemType>
template <class ElemType2>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType2>& b)
{
int deviceIdA = a.GetDeviceId(), deviceIdB = b.GetDeviceId();
if (deviceIdA == deviceIdB)
return;
if (!a.OwnBuffer() && b.OwnBuffer())
b._transferToDevice(deviceIdA);
else if (a.OwnBuffer() && !b.OwnBuffer())
a._transferToDevice(deviceIdB);
else
{
int preferredDeviceIdA = a.GetPreferredDeviceId(), preferredDeviceIdB = b.GetPreferredDeviceId();
if (preferredDeviceIdA == preferredDeviceIdB) // both prefer the same device: move to preferred
{
a._transferToDevice(preferredDeviceIdA);
b._transferToDevice(preferredDeviceIdA);
}
else if (deviceIdA != CPUDEVICE) // one of them lives on GPU: use that
{
b._transferToDevice(deviceIdA);
}
else
{
a._transferToDevice(deviceIdB);
}
}
}
// same but for 3 matrices
// If b and c are both on the same GPU then a will be forced to go there; otherwise a's GPU takes precedence, then b's.
template <class ElemType>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c)
{
int deviceIdA = a.GetDeviceId(), deviceIdB = b.GetDeviceId(), deviceIdC = c.GetDeviceId();
if (deviceIdA == deviceIdB && deviceIdA == deviceIdC)
return;
int preferredDeviceIdA = a.GetPreferredDeviceId(), preferredDeviceIdB = b.GetPreferredDeviceId(), preferredDeviceIdC = c.GetPreferredDeviceId();
if (preferredDeviceIdA == preferredDeviceIdB && preferredDeviceIdA == preferredDeviceIdC)
{
a._transferToDevice(preferredDeviceIdA);
b._transferToDevice(preferredDeviceIdA);
c._transferToDevice(preferredDeviceIdA);
}
else if (deviceIdB == deviceIdC && deviceIdB != CPUDEVICE) // TODO: why not the other two combinations?
{
a._transferToDevice(deviceIdB); // 'a' is outvoted
}
else if (deviceIdA != CPUDEVICE) // one of them lives on GPU: use that
{
b._transferToDevice(deviceIdA);
c._transferToDevice(deviceIdA);
}
else if (deviceIdB != CPUDEVICE)
{
a._transferToDevice(deviceIdB);
c._transferToDevice(deviceIdB);
}
else
{
a._transferToDevice(deviceIdC);
b._transferToDevice(deviceIdC);
}
}
// same but for 4 matrices
template <class ElemType>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& d)
{
// this function is only called for one operator, so for now we keep it simple
DecideAndMoveToRightDevice(a, b, c);
d._transferToDevice(a.GetDeviceId()); // BUGBUG: Is this correct in case a,b,c share the same preferredDevice?
}
template <class ElemType>
void Matrix<ElemType>::_transferToDevice(int to_id, bool isBeingMoved /*= true*/, bool emptyTransfer /* = false*/) const
{
int from_id = GetDeviceId();
if (to_id == from_id) // nothing to do
return;
if (OwnBuffer())
_transferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer);
else
RuntimeError("Cannot move externally owned matrices to the preferred device.");
}
// this function performs data transfer and updates data location, but not the device that is stored with it
template <class ElemType>
void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved /*= true*/, bool emptyTransfer /* = false*/) const
{
if (from_id < 0)
from_id = CPUDEVICE;
if (to_id < 0)
to_id = CPUDEVICE;
if (from_id == to_id)
{
if (from_id != GetDeviceId())
RuntimeError("Trying to transfer matrix from device to the same device while the matrix does not live in the from device.");
return;
}
// warn about device change
#define NUM_DEVICE_CHANGED_WARN 20
if (m_numTimesDeviceChanged <= NUM_DEVICE_CHANGED_WARN &&
(!emptyTransfer || (from_id >= 0 && to_id >= 0)))
{
m_numTimesDeviceChanged++;
if (m_devicesTransferedTo[0] < CPUDEVICE)
m_devicesTransferedTo[0] = to_id;
else if (m_devicesTransferedTo[0] != to_id)
m_devicesTransferedTo[1] = to_id;
}
if ((GetMathLibTraceLevel() > 0) && (m_numTimesDeviceChanged == NUM_DEVICE_CHANGED_WARN && m_devicesTransferedTo[1] >= CPUDEVICE))
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_DEVICE_CHANGED_WARN);
// do the transfer
if (m_matrixType == MatrixType::SPARSE)
{
if (from_id == CPUDEVICE) // from CPU to GPU
{
if (!m_CPUSparseMatrix)
LogicError("Can't move from CPU because I'm not there!");
if (emptyTransfer)
{
if (m_GPUSparseMatrix && m_GPUSparseMatrix->GetComputeDeviceId() == to_id)
m_GPUSparseMatrix->Resize(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount());
else
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount(), to_id, m_CPUSparseMatrix->GetFormat());
}
else
{
if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetComputeDeviceId() != to_id)
m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id);
m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
}
if (isBeingMoved)
{
SetDataLocation(GPU, SPARSE);
m_CPUSparseMatrix = nullptr;
}
else
{
SetDataLocation(BOTH, SPARSE);
}
}
else // from GPU
{
if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetComputeDeviceId() != from_id)
LogicError("This matrix isn't on this (or any?) GPU");
if (to_id < 0) // to CPU
{
if (!m_CPUSparseMatrix)
m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(m_GPUSparseMatrix->GetFormat());
if (emptyTransfer)
m_CPUSparseMatrix->Resize(m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->NzCount(), true);
else
m_GPUSparseMatrix->CopyToCPUSparseMatrix(*m_CPUSparseMatrix);
if (isBeingMoved)
{
SetDataLocation(CPU, SPARSE);
m_GPUSparseMatrix = nullptr;
}
else
{
SetDataLocation(BOTH, SPARSE);
}
}
else // to another GPU
{
m_GPUSparseMatrix->ChangeDeviceTo(to_id);
}
}
}
else
// #pragma omp critical // causes a build error on gcc; not clear why this is here
{
if (from_id == CPUDEVICE) // from CPU to GPU
{
if (!m_CPUMatrix)
LogicError("Can't move from CPU because I'm not there!");
if (emptyTransfer)
{
if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
m_GPUMatrix->Resize(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols());
else
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id);
}
else
{
if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
m_GPUMatrix->SetValue(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
else
m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
}
if (isBeingMoved)
{
SetDataLocation(GPU, DENSE);
m_CPUMatrix = nullptr;
}
else
SetDataLocation(BOTH, DENSE);
}
else // from GPU
{
if (!m_GPUMatrix || m_GPUMatrix->GetComputeDeviceId() != from_id)
LogicError("This matrix isn't on this (or any?) GPU");
if (to_id < 0) // to CPU
{
if (emptyTransfer)
{
if (m_CPUMatrix)
m_CPUMatrix->Resize(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
else
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
}
else
{
ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
if (m_CPUMatrix)
m_CPUMatrix->SetValue(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr);
else
m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
delete[] arr;
}
if (isBeingMoved)
{
SetDataLocation(CPU, DENSE);
m_GPUMatrix = nullptr;
}
else
{
SetDataLocation(BOTH, DENSE);
}
}
else // to another GPU
{
m_GPUMatrix->ChangeDeviceTo(to_id);
}
}
} // and of omp critical section
}
template <class ElemType>
void Matrix<ElemType>::TransferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const
{
_transferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer);
if (updatePreferredDevice)
m_preferredDeviceId = GetDeviceId();
}
template <class ElemType>
void Matrix<ElemType>::TransferToDeviceIfNotThere(int to_id, bool isBeingMoved/*false: may leave in BOTH state*/, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const
{
int from_id = GetDeviceId();
if (from_id == to_id) // already at the right place
return;
if (GetCurrentMatrixLocation() == BOTH && // if currently in BOTH state
!isBeingMoved && // and leaving in BOTH state is OK
(from_id < 0 || to_id < 0)) // and this is not about changing GPUs
{
return; // then we are good
}
TransferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer, updatePreferredDevice);
}
template <class ElemType>
void Matrix<ElemType>::Print(const char* matrixName, ptrdiff_t rowStart, ptrdiff_t rowEnd, ptrdiff_t colStart, ptrdiff_t colEnd) const
{
DEVICEID_TYPE orgdevice = GetDeviceId();
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
// CPU:
m_CPUMatrix->Print(matrixName, rowStart, rowEnd, colStart, colEnd),
// GPU;
{
_transferToDevice(CPUDEVICE, false, false);
m_CPUMatrix->Print(matrixName, rowStart, rowEnd, colStart, colEnd);
_transferToDevice(orgdevice, false, false);
},
// CPU, sparse:
m_CPUSparseMatrix->Print(matrixName),
// GPU, sparse:
{
_transferToDevice(CPUDEVICE, false, false);
m_CPUSparseMatrix->Print(matrixName);
_transferToDevice(orgdevice, false, false);
});
}
template <class ElemType>
void Matrix<ElemType>::Print(const char* matrixName /*=nullptr*/) const
{
Print(matrixName, 0, GetNumRows() - 1, 0, GetNumCols() - 1);
}
//helpfer function used for convolution neural network
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignPackedConvolutionInput(const Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding)
{
DecideAndMoveToRightDevice(inputSubBatch, *this);
SwitchToMatrixType(inputSubBatch.GetMatrixType(), inputSubBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputSubBatch,
this,
m_CPUMatrix->AssignPackedConvolutionInput(*(inputSubBatch.m_CPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
m_GPUMatrix->AssignPackedConvolutionInput(*(inputSubBatch.m_GPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//helpfer function used for convolution neural network
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::UnpackConvolutionInput(Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding) const
{
DecideAndMoveToRightDevice(*this, inputSubBatch);
inputSubBatch.SwitchToMatrixType(GetMatrixType(), inputSubBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&inputSubBatch,
m_CPUMatrix->UnpackConvolutionInput(*(inputSubBatch.m_CPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
m_GPUMatrix->UnpackConvolutionInput(*(inputSubBatch.m_GPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return inputSubBatch;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignMaxPoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(inputBatch, *this);
SwitchToMatrixType(inputBatch.GetMatrixType(), inputBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputBatch,
this,
m_CPUMatrix->AssignMaxPoolingResult(*(inputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AssignMaxPoolingResult(*(inputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddMaxPoolingGradient(const Matrix<ElemType>& outputGradientBatch, const Matrix<ElemType>& inputBatch, const Matrix<ElemType>& outputBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(*this, outputGradientBatch, inputBatch);
outputBatch._transferToDevice(GetDeviceId());
if (!(GetMatrixType() == outputGradientBatch.GetMatrixType() && GetMatrixType() == inputBatch.GetMatrixType() && GetMatrixType() == outputBatch.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddMaxPoolingGradient(*(outputGradientBatch.m_CPUMatrix), *(inputBatch.m_CPUMatrix), *(outputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AddMaxPoolingGradient(*(outputGradientBatch.m_GPUMatrix), *(inputBatch.m_GPUMatrix), *(outputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample);
,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAveragePoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(inputBatch, *this);
SwitchToMatrixType(inputBatch.GetMatrixType(), inputBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputBatch,
this,
m_CPUMatrix->AssignAveragePoolingResult(*(inputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AssignAveragePoolingResult(*(inputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSoftmaxSum(const Matrix<ElemType>& a, const Matrix<ElemType>& softmax)
{
Resize(1, 1);
if (GetDeviceId() < 0)
a.m_CPUMatrix->AssignSoftmaxSum(*softmax.m_CPUMatrix, *m_CPUMatrix);
else
a.m_GPUMatrix->AssignSoftmaxSum(*softmax.m_GPUMatrix, *m_GPUMatrix);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNceUnnormalizedEval(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias)
{
// if (a.GetMatrixType() != MatrixType::SPARSE)
// NOT_IMPLEMENTED;
Resize(1, 1);
if (GetDeviceId() < 0)
a.m_CPUMatrix->AssignNCEUnnormalizedEval(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *m_CPUMatrix);
else
a.m_GPUMatrix->AssignNCEUnnormalizedEval(*b.m_GPUMatrix, *c.m_GPUMatrix, *m_GPUMatrix);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNoiseContrastiveEstimation(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias, Matrix<ElemType>& tmp)
{
if (a.IsEmpty() || b.IsEmpty() || c.IsEmpty())
LogicError("AssignNoiseContrastiveEstimation: one of the input matrices is empty.");
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != GetDeviceId())
NOT_IMPLEMENTED;
Resize(1, 1);
if (GetDeviceId() < 0)
{
size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix,
*bias.m_CPUMatrix, *tmp.m_CPUMatrix, *m_CPUMatrix);
}
else
{
size_t sampleCount = a.m_GPUMatrix->GetNumElements() / a.m_GPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_GPUMatrix->AssignNoiseContrastiveEstimation(*b.m_GPUMatrix, *c.m_GPUMatrix,
*bias.m_GPUMatrix, sampleCount, *tmp.m_GPUMatrix, *m_GPUMatrix);
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNCEDerivative(const Matrix<ElemType>& tmp, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, size_t inputIndex)
{
if (a.IsEmpty() || b.IsEmpty() || c.IsEmpty())
LogicError("AssignNoiseContrastiveEstimation: one of the input matrices is empty.");
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != GetDeviceId())
NOT_IMPLEMENTED;
assert(tmp.GetNumRows() == a.GetNumRows() / 2);
if (GetDeviceId() < 0)
{
// samples gradient hidden embedding embedding/hidden
a.m_CPUMatrix->AssignNCEDerivative(*tmp.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, inputIndex, *m_CPUMatrix);
}
else
{
a.m_GPUMatrix->AssignNCEDerivative(*tmp.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, inputIndex, *m_GPUMatrix);
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddAveragePoolingGradient(const Matrix<ElemType>& outputGradientBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(*this, outputGradientBatch);
if (!(GetMatrixType() == outputGradientBatch.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddAveragePoolingGradient(*(outputGradientBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AddAveragePoolingGradient(*(outputGradientBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
#pragma endregion Other Helper Functions
template <class ElemType>
void Matrix<ElemType>::ConvolutionForward(const Matrix<ElemType>& kernel, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIwht.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ConvolutionForward(*(kernel.m_CPUMatrix), *(mpRowCol.m_CPUMatrix), *(mpRowIwht.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
m_GPUMatrix->ConvolutionForward(*(kernel.m_GPUMatrix), *(mpRowCol.m_GPUMatrix), *(mpRowIwht.m_GPUMatrix),
*(mpRowRun.m_GPUMatrix), *(runs.m_GPUMatrix), *(output.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::ConvolutionBackwardData(const Matrix<ElemType>& kernel, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& grad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIwht.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ConvolutionBackwardData(*(kernel.m_CPUMatrix), *(mpRowCol.m_CPUMatrix), *(mpRowIwht.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(grad.m_CPUMatrix)),
m_GPUMatrix->ConvolutionBackwardData(*(kernel.m_GPUMatrix), *(mpRowCol.m_GPUMatrix), *(mpRowIwht.m_GPUMatrix),
*(mpRowRun.m_GPUMatrix), *(runs.m_GPUMatrix), *(grad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::ConvolutionBackwardKernel(const Matrix<ElemType>& in, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& kernelGrad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIwht.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, kernelGrad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ConvolutionBackwardKernel(*(in.m_CPUMatrix), *(mpRowCol.m_CPUMatrix), *(mpRowIwht.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(kernelGrad.m_CPUMatrix)),
m_GPUMatrix->ConvolutionBackwardKernel(*(in.m_GPUMatrix), *(mpRowCol.m_GPUMatrix), *(mpRowIwht.m_GPUMatrix),
*(mpRowRun.m_GPUMatrix), *(runs.m_GPUMatrix), *(kernelGrad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionInput(size_t unrollCols, size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionInput(unrollCols, mapOutSize, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionOutput(size_t unrollCols, size_t mapInCount, size_t mapOutCount, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionOutput(unrollCols, mapInCount, mapOutCount, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionInputForKernelBackprop(size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionInputForKernelBackprop(mapOutSize, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxPoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxPoolingForward(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(output.m_CPUMatrix)),
m_GPUMatrix->MaxPoolingForward(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(output.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxPoolingBackward(const Matrix<ElemType>& out, const Matrix<ElemType>& in,
const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices,
Matrix<ElemType>& grad, bool accumulateGradient) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxPoolingBackward(*(out.m_CPUMatrix), *(in.m_CPUMatrix),
*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix),
*(grad.m_CPUMatrix), accumulateGradient),
m_GPUMatrix->MaxPoolingBackward(*(out.m_GPUMatrix), *(in.m_GPUMatrix),
*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix),
*(grad.m_GPUMatrix), accumulateGradient),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxROIPoolingForward(const size_t numRois, const size_t numImg, const size_t channels, const size_t width, const size_t height,
const size_t pooledWidth, const size_t pooledHeight, const Matrix<ElemType>& roiData, Matrix<ElemType>& output,
Matrix<ElemType>& argmax, double spatialScale) const
{
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxROIPoolingForward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_CPUMatrix), *(output.m_CPUMatrix), *(argmax.m_CPUMatrix), spatialScale),
m_GPUMatrix->MaxROIPoolingForward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_GPUMatrix), *(output.m_GPUMatrix), *(argmax.m_GPUMatrix), spatialScale),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxROIPoolingBackward(const size_t numRois, const size_t numImg, const size_t channels, const size_t width, const size_t height,
const size_t pooledWidth, const size_t pooledHeight, const Matrix<ElemType>& roiData, Matrix<ElemType>& grad,
Matrix<ElemType>& argmax, double spatialScale) const
{
DecideAndMoveToRightDevice(*this, grad);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxROIPoolingBackward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_CPUMatrix), *(grad.m_CPUMatrix), *(argmax.m_CPUMatrix), spatialScale),
m_GPUMatrix->MaxROIPoolingBackward(numRois, numImg, channels, width, height, pooledWidth, pooledHeight, *(roiData.m_GPUMatrix), *(grad.m_GPUMatrix), *(argmax.m_GPUMatrix), spatialScale),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxUnpooling(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, const Matrix<ElemType>& poolInput, Matrix<ElemType>& input) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, input);
// REVIEW alexeyk: setting values to zero may cause inconsistency when negative values are unpooled.
// To see why, let's assume we have just one input with negative value and output of, for example, 2x2.
// As a result of unpooling, there will be 3 zero values and one negative. If we now apply max pooling
// operation to the output then we get 0 as the output, not the original negative value.
// In practice this will not happen as pooling layers usually go right after ReLU layer.
input.SetValue(0);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxUnpooling(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(poolInput.m_CPUMatrix), *(input.m_CPUMatrix)),
m_GPUMatrix->MaxUnpooling(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(poolInput.m_GPUMatrix), *(input.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AveragePoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output, const bool poolIncludePad) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AveragePoolingForward(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(output.m_CPUMatrix), poolIncludePad),
m_GPUMatrix->AveragePoolingForward(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(output.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AveragePoolingBackward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& grad, const bool poolIncludePad, bool accumulateGradient) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AveragePoolingBackward(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(grad.m_CPUMatrix), poolIncludePad, accumulateGradient),
m_GPUMatrix->AveragePoolingBackward(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(grad.m_GPUMatrix), accumulateGradient),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
template <class StatType>
void Matrix<ElemType>::BatchNormalizationForward(const Matrix<StatType>& scale, const Matrix<StatType>& bias, bool inferenceOnly, double expAvgFactor, double blendFactor,
Matrix<StatType>& runMean, Matrix<StatType>& runVariance, Matrix<ElemType>& out, double epsilon,
Matrix<StatType>& saveMean, Matrix<StatType>& saveInvStdDev) const
{
DecideAndMoveToRightDevice(*this, out);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->BatchNormalizationForward(*(scale.m_CPUMatrix), *(bias.m_CPUMatrix), inferenceOnly, expAvgFactor, blendFactor,
*(runMean.m_CPUMatrix), *(runVariance.m_CPUMatrix),
*(out.m_CPUMatrix), epsilon, *(saveMean.m_CPUMatrix), *(saveInvStdDev.m_CPUMatrix)),
m_GPUMatrix->BatchNormalizationForward(*(scale.m_GPUMatrix), *(bias.m_GPUMatrix), inferenceOnly, expAvgFactor, blendFactor,
*(runMean.m_GPUMatrix), *(runVariance.m_GPUMatrix),
*(out.m_GPUMatrix), epsilon, *(saveMean.m_GPUMatrix), *(saveInvStdDev.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
template <class StatType>
void Matrix<ElemType>::BatchNormalizationBackward(const Matrix<ElemType>& in, Matrix<ElemType>& grad, const Matrix<StatType>& scale, double blendFactor,
const Matrix<StatType>& saveMean, const Matrix<StatType>& saveInvStdDev,
Matrix<StatType>& scaleGrad, Matrix<StatType>& biasGrad) const
{
DecideAndMoveToRightDevice(*this, grad);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->BatchNormalizationBackward(*(in.m_CPUMatrix), *(grad.m_CPUMatrix), *(scale.m_CPUMatrix), blendFactor,
*(saveMean.m_CPUMatrix), *(saveInvStdDev.m_CPUMatrix),
*(scaleGrad.m_CPUMatrix), *(biasGrad.m_CPUMatrix)),
m_GPUMatrix->BatchNormalizationBackward(*(in.m_GPUMatrix), *(grad.m_GPUMatrix), *(scale.m_GPUMatrix), blendFactor,
*(saveMean.m_GPUMatrix), *(saveInvStdDev.m_GPUMatrix),
*(scaleGrad.m_GPUMatrix), *(biasGrad.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RNNForward(const Matrix<ElemType> &inputX, const Matrix<ElemType> ¶mW, size_t xDim, size_t yDim, const vector<size_t>& numSequencesForFrame, const RnnAttributes& rnnAttributes, Matrix<ElemType>& reserve, Matrix<ElemType>& workspace)
{
DecideAndMoveToRightDevice(*this, inputX, paramW);
// move reserve/workspace to the consensus device
reserve._transferToDevice(GetDeviceId());
workspace._transferToDevice(GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
m_GPUMatrix->RNNForward(*(inputX.m_GPUMatrix), *(paramW.m_GPUMatrix), xDim, yDim, numSequencesForFrame, rnnAttributes, *(reserve.m_GPUMatrix), *(workspace.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RNNBackwardData(const Matrix<ElemType>& outputDY, const Matrix<ElemType>& paramW, Matrix<ElemType>& outputDX, const RnnAttributes& rnnAttributes, Matrix<ElemType>& reserve, Matrix<ElemType>& workspace)
{
DecideAndMoveToRightDevice(*this, outputDY, paramW, outputDX);
// move reserve/workspace to the consensus device
reserve._transferToDevice(GetDeviceId());
workspace._transferToDevice(GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
m_GPUMatrix->RNNBackwardData(*(outputDY.m_GPUMatrix), *(paramW.m_GPUMatrix), *(outputDX.m_GPUMatrix), rnnAttributes, *(reserve.m_GPUMatrix), *(workspace.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RNNBackwardWeights(const Matrix<ElemType>& inputX, const Matrix<ElemType>& outputY, Matrix<ElemType>& dw, const RnnAttributes& rnnAttributes, Matrix<ElemType>& reserve, Matrix<ElemType>& workspace)
{
DecideAndMoveToRightDevice(*this, inputX, outputY, dw);
// move reserve/workspace to the consensus device
reserve._transferToDevice(GetDeviceId());
workspace._transferToDevice(GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
m_GPUMatrix->RNNBackwardWeights(*(inputX.m_GPUMatrix), *(outputY.m_GPUMatrix), *(dw.m_GPUMatrix), rnnAttributes, *(reserve.m_GPUMatrix), *(workspace.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
#pragma region Static BLAS Functions
template <class ElemType>
void Matrix<ElemType>::SVD(const Matrix<ElemType>& A, Matrix<ElemType>& SIGMA, Matrix<ElemType>& U, Matrix<ElemType>& VT, Matrix<ElemType>& W)
{
if (A.IsEmpty())
LogicError("SVD: the input matrix is empty.");
DecideAndMoveToRightDevice(A, SIGMA, U);
VT._transferToDevice(A.GetDeviceId());
W._transferToDevice(A.GetDeviceId());
SIGMA.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
U.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
VT.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
W.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&A, nullptr,
{
Matrix<ElemType> tA = A.DeepClone();
CPUMatrix<ElemType>::SVD(*tA.m_CPUMatrix, *SIGMA.m_CPUMatrix, *U.m_CPUMatrix, *VT.m_CPUMatrix, *W.m_CPUMatrix);
SIGMA.SetDataLocation(CPU);
U.SetDataLocation(CPU);
VT.SetDataLocation(CPU);
W.SetDataLocation(CPU);
// need to SetDataLocation() on all matrices we write to
},
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; },
{ NOT_IMPLEMENTED; });
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c, shared_ptr<QuantizedMultiplier<ElemType>> pQuantizedMultiplier)
{
DecideAndMoveToRightDevice(a, b, c);
if (c.GetDeviceId() < 0) // CPU
{
if (a.GetMatrixType() == MatrixType::SPARSE) // CPU, SPARSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * DENSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * SPARSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}
else // CPU, DENSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> ANY
{
if (c.GetMatrixType() == MatrixType::DENSE) // CPU, DENSE * SPARSE -> DENSE
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUSparseMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else if (c.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> SPARSE
{
if (beta != 0 && beta != 1)
{
NOT_IMPLEMENTED;
}
else
{
if (beta == 0)
{
c.Reset();
}
CPUSparseMatrix<ElemType>::MultiplyAndAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUSparseMatrix, transposeB, *c.m_CPUSparseMatrix);
}
c.SetDataLocation(CPU, SPARSE);
}
else
NOT_IMPLEMENTED; // CPU, DENSE * SPARSE -> UNDETERMINED ?
}
else // CPU, DENSE * DENSE -> DENSE (matrix c enforced to be DENSE)
{
c.SwitchToMatrixType(MatrixType::DENSE, matrixFormatDense, false);
CPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix, pQuantizedMultiplier);
c.SetDataLocation(CPU, DENSE);
}
}
}
else // GPU operations
{
if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::DENSE) // GPU, DENSE * DENSE -> DENSE
{
GPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUMatrix, transposeB, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::DENSE) // GPU, SPARSE * DENSE -> DENSE
{
GPUMatrix<ElemType> second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;
GPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUSparseMatrix, transposeA, second, false, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::DENSE) // GPU, DENSE * SPARSE -> DENSE
{
GPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::SPARSE) // GPU, DENSE * SPARSE -> SPARSE
{
if (beta != 0 && beta != 1)
{
NOT_IMPLEMENTED;
}
else
{
if (beta == 0)
{
c.Reset();
}
GPUSparseMatrix<ElemType>::MultiplyAndAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, *c.m_GPUSparseMatrix);
}
c.SetDataLocation(GPU, SPARSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::SPARSE) // GPU, SPARSE * SPARSE -> SPARSE
{
GPUSparseMatrix<ElemType> firstDummy = alpha == 1 ? *a.m_GPUSparseMatrix : (*a.m_GPUSparseMatrix) * alpha;
GPUSparseMatrix<ElemType>& first = firstDummy; // By Malcolm.. gcc doesn't support auto
if (beta == 0)
{
GPUSparseMatrix<ElemType>::Multiply(first, transposeA, *b.m_GPUSparseMatrix, transposeB, *c.m_GPUSparseMatrix);
c.SetDataLocation(GPU, SPARSE);
}
else
{
GPUSparseMatrix<ElemType> tmp(b.m_GPUSparseMatrix->GetComputeDeviceId());
GPUSparseMatrix<ElemType>::Multiply(first, transposeA, *b.m_GPUSparseMatrix, transposeB, tmp);
*c.m_GPUSparseMatrix = tmp + (*c.m_GPUSparseMatrix) * beta;
c.SetDataLocation(GPU, SPARSE);
}
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::SPARSE) // GPU, DENSE * DENSE -> SPARSE
{
GPUMatrix<ElemType> tmp(a.m_GPUMatrix->GetComputeDeviceId());
GPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUMatrix, transposeB, (ElemType)0.0, tmp);
if (beta != 0)
{
GPUSparseMatrix<ElemType> tmpSparse(a.m_GPUMatrix->GetComputeDeviceId());
tmpSparse.SetValue(tmp);
*c.m_GPUSparseMatrix = tmpSparse + (*c.m_GPUSparseMatrix) * beta;
}
else
{
c.m_GPUSparseMatrix->SetValue(tmp);
}
c.SetDataLocation(GPU, SPARSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::DENSE) // GPU, SPARSE * SPARSE -> DENSE
{
NOT_IMPLEMENTED;
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::SPARSE) // GPU, SPARSE * DENSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else // No combination left
{
NOT_IMPLEMENTED;
}
}
}
template <class ElemType>
/*static*/ void Matrix<ElemType>::Multiply1x1AndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType beta, Matrix<ElemType>& c)
{
// special case: a is a 1x1 matrix
// The only alternative is to Get00Elements(), which makes things inefficient.
if (a.GetNumElements() != 1)
InvalidArgument("Multiply1x1AndWeightedAdd: first arg must be a scalar.");
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::Multiply1x1AndWeightedAdd(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUMatrix<ElemType>::Multiply1x1AndWeightedAdd(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, beta, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = op(a) * op(b) + c</summary>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::MultiplyAndAdd(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 1.0, c);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = op(a) * op(b)</summary>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Multiply(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 0.0, c);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b are not transposed): c = a * b</summary>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Multiply(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, false, b, false, 0.0, c);
}
/// <summary>1-D Convolution with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c. MultiplyAndWeightedAdd is just a special case of this.</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::ConvolveAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c, size_t numChannels, size_t horizontalSubsample, bool padding, bool channelwise)
{
DecideAndMoveToRightDevice(a, b, c);
if (c.GetDeviceId() >= 0 /*GPU*/ && a.GetMatrixType() == MatrixType::DENSE && b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE)
{
GPUSparseMatrix<ElemType>::ConvolveAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, beta, *c.m_GPUMatrix, numChannels, horizontalSubsample, padding, channelwise);
}
else
{
NOT_IMPLEMENTED;
}
}
/// <summary>Columnwise scale with col-major matrix and accumulate.</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="v">Input scale vector for each column of a</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, the same shape as a</param>
template <class ElemType>
void Matrix<ElemType>::ColumnwiseScaleAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& v, ElemType beta, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, v, c);
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
CPUMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_CPUMatrix, *v.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_GPUMatrix, *v.m_GPUMatrix, beta, *c.m_GPUMatrix),
CPUSparseMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, *v.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUSparseMatrix<ElemType>::ColumnwiseScaleAndWeightedAdd(alpha, *a.m_GPUSparseMatrix, *v.m_GPUMatrix, beta, *c.m_GPUMatrix));
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a + c</summary>
/// if a is a column vector, add to all columns of c
/// if a is a row vector, add to all rows of c
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
/*static*/ void Matrix<ElemType>::ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
if (a.IsEmpty() || c.IsEmpty())
LogicError("ScaleAndAdd: one of the input matrices is empty.");
DecideAndMoveToRightDevice(c, a);
if (a.GetMatrixType() == c.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&c, &c,
{ CPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix); },
{ GPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix); },
{ NOT_IMPLEMENTED; },
{ GPUSparseMatrix<ElemType> b = move(*c.m_GPUSparseMatrix); GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, b, *c.m_GPUSparseMatrix); });
}
else
{
DISPATCH_MATRIX_ON_FLAG(&c, nullptr,
{
CPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUSparseMatrix, *c.m_CPUMatrix);
c.SetDataLocation(CPU);
},
{
if (a.m_GPUSparseMatrix->GetFormat() == MatrixFormat::matrixFormatSparseCSC)
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, *c.m_GPUMatrix, *c.m_GPUMatrix);
else // new GPU sparse matrix code
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, *c.m_GPUMatrix);
c.SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{
c.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(c.m_GPUSparseMatrix->CopyToDenseMatrix());
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, 1, *c.m_GPUSparseMatrix, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
c.m_GPUSparseMatrix = nullptr;
});
}
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a + beta * c</summary>
/// if a is a column vector, add to all columns of c
/// if a is a row vector, add to all rows of c
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, caller is responsible for allocating this</param>
template <class ElemType>
/*static*/ void Matrix<ElemType>::ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, ElemType beta, Matrix<ElemType>& c)
{
if (beta == 1)
ScaleAndAdd(alpha, a, c);
else if (beta == 0)
Scale(alpha, a, c);
else
{
ScaleAndAdd(alpha / beta, a, c); // c1=alpha/beta * a + c
Scale(beta, c); // c/beta * beta
// TODO: two lines above should be changed as follows:
// Scale(beta, c); // c1 = c * beta
// ScaleAndAdd(alpha, a, c); // c=alpha * a + c1 = alpha * a + beta * c
}
}
// tensor swapping and addition: c <- keepWeight * b + scaleFactor * swap_dimensions(a, S, K)
// where
// - a is interpreted as a tensor of dimension (D x S x M x K x T) // column-major, as usual
// - b and c as a tensor of dimension (D x K x M x S x T) // note: K and S swapped
// The main point of this function is to reshuffle a tensor w.r.t. two dimensions that get swapped in memory,
// but for gradients, we will need to add, hence the keepWeight.
// Notes:
// - c and b may be the same (in-place operation is expressly allowed).
// - D, M, and/or T may be 1. For example, D == M == T == 1 implements a 2D matrix transpose from (S x K) to (K x S).
// - If keepWeight == 0, then b will just get overwritten (straight assignment, b may be uninitialized or contain NaNs).
// - The original matrix dimensions are ignored except that sizes must match (rows x cols == D x S x M x K x T).
// For diagnostics purposes, this function also enforces the rows % D == 0 and cols % T == 0, but this is not a functional requirement and can be removed if that helps.
// - Dense matrices only.
// TODO: Handle these cases:
// - no swapping happening --just do a block copy
// - swapping can be implemented by cuDNN --do so
template <class ElemType>
/*static*/ void Matrix<ElemType>::TensorShuffleScaleAndAdd(ElemType keepWeight, const Matrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
if (a.GetNumElements() != c.GetNumElements() || b.GetNumElements() != c.GetNumElements()) // allocations must match (but not dimensions, since we reinterpret the dimensions anyway)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c must have same number of elements.");
if (c.IsEmpty()) // operating on empty minibatch slices is perfectly cromulent
return;
// sanity checks for current use cases--these are not strictly necessary and can be deleted
if (a.GetNumRows() % D != 0 || b.GetNumRows() % D != 0 || c.GetNumRows() % D != 0)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c are meant to have a row dimension that is a multiple of D.");
if (a.GetNumCols() % T != 0 || b.GetNumCols() % T != 0 || c.GetNumCols() % T != 0)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c are meant to have a column dimension that is a multiple of T.");
DecideAndMoveToRightDevice(a, b, c);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_CPUMatrix, D, S, M, K, T, scaleFactor, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_GPUMatrix, D, S, M, K, T, scaleFactor, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_GPUSparseMatrix, D, S, M, K, T, scaleFactor, *b.m_GPUSparseMatrix, *c.m_GPUSparseMatrix));
}
/// <summary>c += alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AddScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a, b);
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == c.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddScaledDifference(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AddScaledDifference(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AssignScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, c);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AssignScaledDifference(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AssignScaledDifference(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>c += alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AddScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a, b);
alpha._transferToDevice(c.GetDeviceId());
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == c.GetMatrixType() && a.GetMatrixType() == alpha.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddScaledDifference(*alpha.m_CPUMatrix, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AddScaledDifference(*alpha.m_GPUMatrix, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AssignScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, alpha);
c._transferToDevice(a.GetDeviceId());
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == alpha.GetMatrixType()))
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::AssignScaledDifference(*alpha.m_CPUMatrix, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AssignScaledDifference(*alpha.m_GPUMatrix, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//c[ci,cj] += a[ai,aj]
template <class ElemType>
void Matrix<ElemType>::AddElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj)
{
DecideAndMoveToRightDevice(c, a);
if (c.GetMatrixType() != a.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddElementToElement(1, *a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
GPUMatrix<ElemType>::AddElementToElement(1, *a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//c[ci,cj] = a[ai,aj]
template <class ElemType>
void Matrix<ElemType>::AssignElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj)
{
DecideAndMoveToRightDevice(c, a);
if (c.GetMatrixType() != a.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddElementToElement(0, *a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
GPUMatrix<ElemType>::AddElementToElement(0, *a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//for each column of this, we add row slice of a starting from startIndex
template <class ElemType>
void Matrix<ElemType>::MinusOneAt(Matrix<ElemType>& a, const size_t position)
{
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::MinusOneAt(*a.m_CPUMatrix, position),
GPUMatrix<ElemType>::MinusOneAt(*a.m_GPUMatrix, position),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Scale(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a);
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
if (alpha == 0)
{
c.Resize(a);
c.SetValue(0); // this is a little faster, and also does not propagate NaNs, which we'd expect from 'beta' parameters
return;
}
else
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::Scale(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED, * c.m_GPUSparseMatrix = (*a.m_GPUSparseMatrix) * alpha);
}
/// <summary>Matrix-scalar multiply with col-major matrices: a = alpha * a</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
template <class ElemType>
void Matrix<ElemType>::Scale(ElemType alpha, Matrix<ElemType>& a)
{
if (alpha == 0)
a.SetValue(0); // this is a little faster, and also does not propagate NaNs, which we'd expect from 'beta' parameters
else if (a.IsEmpty())
return;
else
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::Scale(alpha, *a.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(alpha, *a.m_GPUMatrix),
CPUSparseMatrix<ElemType>::Scale(alpha, *a.m_CPUSparseMatrix),
GPUSparseMatrix<ElemType>::Scale(alpha, *a.m_GPUSparseMatrix));
}
/// <summary>Matrix scalar matrix multiply with col-major matrices: a = alpha[0,0] * a</summary>
/// <param name="alpha">1x1 matrix</param>
/// <param name="a">Input matrix</param>
template <class ElemType>
void Matrix<ElemType>::Scale(const Matrix<ElemType>& alpha, Matrix<ElemType>& a)
{
if (a.IsEmpty())
return;
DecideAndMoveToRightDevice(a, alpha);
if (a.GetMatrixType() != alpha.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
CPUMatrix<ElemType>::Scale(*alpha.m_CPUMatrix, *a.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(*alpha.m_GPUMatrix, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::InnerProduct(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
// TODO: consider swapping the arguments in this case
if (b.GetMatrixType() != DENSE) // only support a being sparse/dense. Both b and c should be dense
NOT_IMPLEMENTED;
c.SwitchToMatrixType(b.GetMatrixType(), b.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::InnerProduct(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::InnerProduct(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, isColWise),
CPUSparseMatrix<ElemType>::InnerProduct(*a.m_CPUSparseMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUSparseMatrix<ElemType>::InnerProduct(*a.m_GPUSparseMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, isColWise));
}
template <class ElemType>
void Matrix<ElemType>::BatchMatMul(ElemType beta, const Matrix<ElemType>& a, const bool transposeA, const int m, const Matrix<ElemType>& b, const bool transposeB, const int n, Matrix<ElemType>& c, const bool isColWise)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("BatchMatMul: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != DENSE || b.GetMatrixType() != DENSE) // only support a & b being dense
NOT_IMPLEMENTED;
c.SwitchToMatrixType(b.GetMatrixType(), b.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::BatchMatMul(beta, *a.m_CPUMatrix, transposeA, m, *b.m_CPUMatrix, transposeB, n, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::BatchMatMul(beta, *a.m_GPUMatrix, transposeA, m, *b.m_GPUMatrix, transposeB, n, *c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
ElemType Matrix<ElemType>::InnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProductOfMatrices: one of the input matrices is empty.");
DecideAndMoveToRightDevice(a, b);
if (a.GetMatrixType() == b.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
return CPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_CPUMatrix, *b.m_CPUMatrix),
return GPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
else
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
NOT_IMPLEMENTED,
return GPUSparseMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUSparseMatrix),
NOT_IMPLEMENTED,
return GPUSparseMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUSparseMatrix, *b.m_GPUMatrix));
}
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProductOfMatrices: one of the input matrices is empty.");
Resize(1, 1);
DecideAndMoveToRightDevice(a, b, *this);
if (a.GetMatrixType() == b.GetMatrixType())
{
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->SetValue(CPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_CPUMatrix, *b.m_CPUMatrix)),
m_GPUMatrix->AssignInnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
else
{
NOT_IMPLEMENTED;
}
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ElementWisePower(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
if (a.IsEmpty())
return;
DecideAndMoveToRightDevice(a, c);
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::ElementWisePower(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::ElementWisePower(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::ElementWisePower(alpha, *a.m_GPUSparseMatrix, *c.m_GPUSparseMatrix));
}
template <class ElemType>
bool Matrix<ElemType>::AreEqual(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const ElemType threshold /*= 1e-8*/)
{
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
return false;
DecideAndMoveToRightDevice(a, b);
if (a.GetMatrixType() == b.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
return CPUMatrix<ElemType>::AreEqual(*a.m_CPUMatrix, *b.m_CPUMatrix, threshold),
return GPUMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUMatrix, threshold),
return CPUSparseMatrix<ElemType>::AreEqual(*a.m_CPUSparseMatrix, *b.m_CPUSparseMatrix, threshold),
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUSparseMatrix, threshold));
}
else
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
NOT_IMPLEMENTED;
return false,
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUSparseMatrix, threshold),
NOT_IMPLEMENTED;
return false,
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUMatrix, threshold));
}
}
template <class ElemType>
bool Matrix<ElemType>::HasElement(const Matrix<ElemType>& a, const ElemType value)
{
if (a.IsEmpty())
return false;
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
return CPUMatrix<ElemType>::HasElement(*a.m_CPUMatrix, value),
return GPUMatrix<ElemType>::HasElement(*a.m_GPUMatrix, value),
NOT_IMPLEMENTED;
return false,
NOT_IMPLEMENTED;
return false);
}
// diagnostics helper to check if matrix has a NaN
// This is very slow.
template <class ElemType>
bool Matrix<ElemType>::HasNan(const char* name) const
{
// Not implemented for sparse matrices.
// Return false as a workaround to at
// least evaluate the dense matrices.
if (m_matrixType == MatrixType::SPARSE)
return false;
if (IsEmpty())
return false;
// if GPU then first detect NaN there, will be faster
if (GetDeviceId() != CPUDEVICE)
{
Matrix<ElemType> sum(GetDeviceId());
sum.AssignSumOfElements(*this);
auto x = sum.Get00Element();
if (!std::isnan(x))
return false;
}
// const auto & us = *this;
const Matrix<ElemType>& us = *this;
foreach_coord (i, j, us)
if (std::isnan(us(i, j)))
{
fprintf(stderr, "HasNan: NaN detected at %s (%ld,%ld) in (%d,%d) matrix\n", name, i, j, (int) GetNumRows(), (int) GetNumCols());
return true;
}
return false;
}
#define CheckNan(m) m.HasNan(#m)
// another diagnostics helper to check if matrix has a NaN
// This is used at load and save time. This test is slow.
template <class ElemType>
size_t Matrix<ElemType>::CountNanInf() const
{
const auto& us = *this;
size_t n = 0; // number of NaNs/INF found
foreach_coord (i, j, us)
{
auto val = us(i, j);
if (std::isnan(val) || !std::isfinite(val))
n++;
}
return n;
}
// TODO: these are scalar operations--why are they in Matrix?
template <class ElemType>
ElemType Matrix<ElemType>::Exp10(ElemType num)
{
return (ElemType) exp(num * 2.302585093);
}
template <class ElemType>
ElemType Matrix<ElemType>::Mod(ElemType x, ElemType y)
{
assert(y > 0);
if (y <= 0)
LogicError("y is smaller than zero");
return x - y * floor(x / y);
}
// TODO: use static LogAdd() as defined in TensorOps.h
// Not doing this currently because that one uses ElemType for all ops, while this one uses double inside. Must compare before making this change.
template <class ElemType>
ElemType Matrix<ElemType>::LogAdd(ElemType x, ElemType y)
{
ElemType temp, diff, z;
if (x < y)
{
temp = x;
x = y;
y = temp; // TODO: ::swap(x,y)?
}
diff = y - x;
if (diff < MINLOGEXP)
{
return (ElemType)((x < LSMALL) ? (ElemType)LZERO : x);
}
else
{
z = exp(diff);
return (ElemType)(x + log(1.0 + z));
}
}
//Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, size_t shift)
//[this]= (a right shift by n), padded with zeros
// shift left, shift needs to be negative value
// shift right, shift needs to be positive value
// BUGBUG: Leaves uninitialized values in the opened-up columns.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, int shift)
{
if (a.IsEmpty())
LogicError("Shift: Matrix is empty.");
else
LogicError("Shift: BUGBUG This function currently leaves uninitialized values. Fix the code or contact fseide@microsoft.com.");
auto& us = *this;
if (this != &a)
{
Resize(a.GetNumRows(), a.GetNumCols());
}
long n = (long) GetNumCols();
if (shift >= 0 && shift < n)
us.ColumnSlice(shift, n - shift).AssignValuesOf(a.ColumnSlice(0, n - shift));
if (shift < 0 && shift > -n)
us.ColumnSlice(0, n + shift).AssignValuesOf(a.ColumnSlice(-shift, n + shift));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOfWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, size_t shift, size_t negnumber)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOfWithShiftNeg: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (a.GetNumRows() != 1)
InvalidArgument("AssignElementProductOfWithShiftNeg: The input matrix must be a row vector.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOfWithShiftNeg(*a.m_CPUMatrix, *b.m_CPUMatrix, shift, negnumber),
m_GPUMatrix->AssignElementProductOfWithShiftNeg(*a.m_GPUMatrix, *b.m_GPUMatrix, shift, negnumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOfWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise, size_t shift, size_t negnumber)
{
InnerProductWithShiftNeg(a, b, *this, isColWise, shift, negnumber);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::InnerProductWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise, size_t shift, size_t negnumber)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::InnerProductWithShiftNeg(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise, shift, negnumber),
GPUMatrix<ElemType>::InnerProductWithShiftNeg(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, shift, negnumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::GetARowByIndex(const Matrix<ElemType>& a, size_t index)
{
if (a.IsEmpty())
LogicError("GetARowByIndex: Matrix is empty.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->GetARowByIndex(*a.m_CPUMatrix, index),
m_GPUMatrix->GetARowByIndex(*a.m_GPUMatrix, index),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ConductRowElementMultiplyWithShift(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, size_t shift, bool bFirstmatrixfixed)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::ConductRowElementMultiplyWithShift(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, shift, bFirstmatrixfixed),
GPUMatrix<ElemType>::ConductRowElementMultiplyWithShift(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, shift, bFirstmatrixfixed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOfWithShift(const Matrix<ElemType>& a, const Matrix<ElemType>& b, size_t shift)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOfWithShift: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (a.GetNumRows() != 1)
InvalidArgument("AssignElementProductOfWithShiftNeg: The input matrix must be a row vector.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOfWithShift(*a.m_CPUMatrix, *b.m_CPUMatrix, shift),
m_GPUMatrix->AssignElementProductOfWithShift(*a.m_GPUMatrix, *b.m_GPUMatrix, shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::RCRFBackwardCompute(const Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const int shift)
{
DecideAndMoveToRightDevice(alpha, beta);
functionValues._transferToDevice(alpha.GetDeviceId());
beta._transferToDevice(alpha.GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(&alpha,
&beta,
CPUMatrix<ElemType>::RCRFBackwardCompute(
*alpha.m_CPUMatrix,
*beta.m_CPUMatrix,
*lbls.m_CPUMatrix,
*pair_scores.m_CPUMatrix),
GPUMatrix<ElemType>::RCRFBackwardCompute(
*alpha.m_GPUMatrix,
*beta.m_GPUMatrix,
*lbls.m_GPUMatrix,
*pos_scores.m_GPUMatrix,
*pair_scores.m_GPUMatrix, shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RCRFTransGrdCompute(const Matrix<ElemType>& lbls,
const Matrix<ElemType>& alpha,
const Matrix<ElemType>& beta,
const Matrix<ElemType>& pair_scores,
Matrix<ElemType>& grd,
const int startLbl,
const int shift)
{
DecideAndMoveToRightDevice(alpha, grd);
grd._transferToDevice(alpha.GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(&alpha,
&grd,
CPUMatrix<ElemType>::RCRFTransGrdCompute(
*lbls.m_CPUMatrix,
*alpha.m_CPUMatrix,
*beta.m_CPUMatrix,
*pair_scores.m_CPUMatrix,
*grd.m_CPUMatrix),
GPUMatrix<ElemType>::RCRFTransGrdCompute(
*lbls.m_GPUMatrix,
*alpha.m_GPUMatrix,
*beta.m_GPUMatrix,
*pair_scores.m_GPUMatrix,
*grd.m_GPUMatrix,
startLbl,
shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DropFrame(const Matrix<ElemType>& label, const Matrix<ElemType>& gamma, const ElemType& threshhold)
{
DecideAndMoveToRightDevice(*this, label, gamma);
if (label.GetNumCols() != gamma.GetNumCols() || label.GetNumRows() != gamma.GetNumRows())
LogicError("DropFrame: label matrix is not in the same size as gamm matrix.");
SwitchToMatrixType(label.GetMatrixType(), label.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->DropFrame(*label.m_CPUMatrix, *gamma.m_CPUMatrix, threshhold),
m_GPUMatrix->DropFrame(*label.m_GPUMatrix, *gamma.m_GPUMatrix, threshhold),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSequenceError(const ElemType hsmoothingWeight, const Matrix<ElemType>& label,
const Matrix<ElemType>& dnnoutput, const Matrix<ElemType>& gamma, ElemType alpha)
{
DecideAndMoveToRightDevice(label, dnnoutput, gamma);
if (!(label.GetMatrixType() == gamma.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(label.GetMatrixType(), label.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignSequenceError(hsmoothingWeight, *label.m_CPUMatrix, *dnnoutput.m_CPUMatrix, *gamma.m_CPUMatrix, alpha),
m_GPUMatrix->AssignSequenceError(hsmoothingWeight, *label.m_GPUMatrix, *dnnoutput.m_GPUMatrix, *gamma.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
// Calculate CTC score
// prob (input): the posterior output from the network
// alpha, beta (output): alpha and beta for forward-backward calculation.
// phoneSeq (input): phone ID sequence for each utterance in this minibatch, each col is one utterance
// phoneBound (input): phone boundary (frame index) of each phone for each utterance in this minibatch, each col is one utterance
// totalScore (output): total CTC score
// uttToChanInd (input): map from utterance ID to minibatch channel ID. We need this because each channel may contain more than one utterance.
// uttBeginFrame(input): the position of the first frame of each utterance in the minibatch channel. We need this because each channel may contain more than one utterance.
// uttFrameNum (input): the frame number of each utterance. The size of this vector = the number of all utterances in this minibatch
// uttPhoneNum (input): the phone number of each utterance. The size of this vector = the number of all utterances in this minibatch
// numParallelSequences (input): num of parallel sequences
// mbsize (input): the maximum channel frame number
// blankTokenId (input): id of the CTC blank token
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference. This using the original time information to enforce that CTC tokens only get aligned within a time margin.
// Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy.
// delayConstraint=-1 means no constraint
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCTCScore(const Matrix<ElemType>& prob, Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
const Matrix<ElemType>& phoneSeq, const Matrix<ElemType>& phoneBound, Matrix<ElemType> &totalScore, const std::vector<size_t> & uttToChanInd,
const std::vector<size_t> & uttBeginFrame, const std::vector<size_t> & uttFrameNum, const std::vector<size_t> & uttPhoneNum,
const size_t numParallelSequences, const size_t mbsize, const size_t blankTokenId, const int delayConstraint, const bool isColWise)
{
DecideAndMoveToRightDevice(prob, *this);
alpha.Resize(phoneSeq.GetNumRows(), prob.GetNumCols());
beta.Resize(phoneSeq.GetNumRows(), prob.GetNumCols());
Resize(prob.GetNumRows(), prob.GetNumCols());
alpha.SetValue(LZERO);
beta.SetValue(LZERO);
SetValue(LZERO);
SwitchToMatrixType(prob.GetMatrixType(), prob.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&prob,
this,
this->m_CPUMatrix->AssignCTCScore(*prob.m_CPUMatrix, *alpha.m_CPUMatrix, *beta.m_CPUMatrix, *phoneSeq.m_CPUMatrix, *phoneBound.m_CPUMatrix, *totalScore.m_CPUMatrix,
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, isColWise),
this->m_GPUMatrix->AssignCTCScore(*prob.m_GPUMatrix, *alpha.m_GPUMatrix, *beta.m_GPUMatrix, *phoneSeq.m_GPUMatrix, *phoneBound.m_GPUMatrix, *totalScore.m_GPUMatrix,
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
return *this;
}
#pragma endregion Static BLAS Functions
// TensorView currently does not interface with sparse matrices. For now, we just catch this and throw.
template <class ElemType>
static bool VerifyIsDense(const Matrix<ElemType>& a)
{
if (a.GetMatrixType() != DENSE)
RuntimeError("TensorOp: Tensor operations are currently not supported for sparse matrices.");
return true;
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
const array<size_t, 2>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a);
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
const array<size_t, 3>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a) && VerifyIsDense(b);
DecideAndMoveToRightDevice(*this, a, b);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
const array<size_t, 4>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a) && VerifyIsDense(b) && VerifyIsDense(c);
DecideAndMoveToRightDevice(*this, a, b, c);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorArgOp(const Matrix<ElemType>& a, ElementWiseOperator reductionOp,
const array<size_t, 2>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a);
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorArgOp(*a.m_CPUMatrix, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorArgOp(*a.m_GPUMatrix, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//template class Matrix<short>;
template class Matrix<float>;
template class Matrix<double>;
template class Matrix<half>;
//template class Matrix<char>;
// instantiate some templated methods
template MATH_API void Matrix<float>::AdaDeltaUpdate(Matrix<float>& gradients, Matrix<float>& functionvalues, float learningRatePerSample, float rho, float epsilon, int* timestamps, int currentTimestamp);
template MATH_API void Matrix<double>::AdaDeltaUpdate(Matrix<double>& gradients, Matrix<double>& functionvalues, double learningRatePerSample, double rho, double epsilon, int* timestamps, int currentTimestamp);
template MATH_API void Matrix<float>::AdaDeltaUpdate(Matrix<half>& gradients, Matrix<float>& functionvalues, float learningRatePerSample, float rho, float epsilon, int* timestamps, int currentTimestamp);
template MATH_API void Matrix<float>::BatchNormalizationForward(const Matrix<float>& scale, const Matrix<float>& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, Matrix<float>& runMean, Matrix<float>& runVariance, Matrix<float>& out, double epsilon, Matrix<float>& saveMean, Matrix<float>& saveInvStdDev) const;
template MATH_API void Matrix<double>::BatchNormalizationForward(const Matrix<double>& scale, const Matrix<double>& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, Matrix<double>& runMean, Matrix<double>& runVariance, Matrix<double>& out, double epsilon, Matrix<double>& saveMean, Matrix<double>& saveInvStdDev) const;
template MATH_API void Matrix<half>::BatchNormalizationForward(const Matrix<float>& scale, const Matrix<float>& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, Matrix<float>& runMean, Matrix<float>& runVariance, Matrix<half>& out, double epsilon, Matrix<float>& saveMean, Matrix<float>& saveInvStdDev) const;
template MATH_API void Matrix<float>::BatchNormalizationBackward(const Matrix<float>& in, Matrix<float>& grad, const Matrix<float>& scale, double blendFactor, const Matrix<float>& saveMean, const Matrix<float>& saveInvStdDev, Matrix<float>& scaleGrad, Matrix<float>& biasGrad) const;
template MATH_API void Matrix<double>::BatchNormalizationBackward(const Matrix<double>& in, Matrix<double>& grad, const Matrix<double>& scale, double blendFactor, const Matrix<double>& saveMean, const Matrix<double>& saveInvStdDev, Matrix<double>& scaleGrad, Matrix<double>& biasGrad) const;
template MATH_API void Matrix<half>::BatchNormalizationBackward(const Matrix<half>& in, Matrix<half>& grad, const Matrix<float>& scale, double blendFactor, const Matrix<float>& saveMean, const Matrix<float>& saveInvStdDev, Matrix<float>& scaleGrad, Matrix<float>& biasGrad) const;
// We use Matrix<char> as the backing store for QuantizedMatrix, and also as a flag matrix.
// Let's explicitly instantiate the methods we need for that purpose
template Matrix<char>::Matrix(DEVICEID_TYPE);
template Matrix<char>::Matrix(Matrix<char>&&);
template Matrix<char>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat, const size_t nnz);
template Matrix<char>::Matrix(const size_t numRows, const size_t numCols, char* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz);
template Matrix<char>::~Matrix();
template Matrix<char>& Matrix<char>::operator=(Matrix<char>&& moveFrom);
template char* Matrix<char>::Data() const;
template int Matrix<char>::GetDeviceId() const;
template size_t Matrix<char>::GetNumElements() const;
template Matrix<char> Matrix<char>::ColumnSlice(size_t startColumn, size_t numCols) const;
template void Matrix<char>::_transferToDevice(int id_to, bool isBeingMoved, bool emptyTransfer) const;
template void Matrix<char>::TransferToDeviceIfNotThere(int id_to, bool isBeingMoved, bool emptyTransfer, bool updatePreferredDevice) const;
template size_t Matrix<char>::GetNumRows() const;
template size_t Matrix<char>::GetNumCols() const;
template void Matrix<char>::SetValue(const char);
template void Matrix<char>::SetValue(size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags, DataTransferer* transferer);
//template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
template void Matrix<char>::SetValue(const Matrix<char>&);
template void Matrix<char>::AssignValuesOf(const Matrix<char>&);
template void Matrix<char>::CastAssignValuesOf(const MatrixBase& other);
template bool Matrix<char>::IsEmpty() const;
template void Matrix<char>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly, bool keepValue);
template void Matrix<char>::Reshape(const size_t, const size_t);
template char* Matrix<char>::CopyToArray(void) const;
template bool Matrix<char>::IsView() const;
template Matrix<char> Matrix<char>::RandomUniform(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const char low, const char high, unsigned long seed);
template void Matrix<char>::SetUniformRandomValue(const char low, const char high, unsigned long seed);
template void Matrix<char>::SetUniformRandomValue(RNGHandle& rngHandle, const char low, const char high);
template Matrix<char> Matrix<char>::RandomGaussian(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const char mean, const char sigma, unsigned long seed);
template void Matrix<char>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const char* h_Val,
const size_t nz, const size_t numRows, const size_t numCols, DataTransferer* transferer);
template void Matrix<char>::AdjustSparseBlockColumn(const GPUSPARSE_INDEX_TYPE* cpuCol2BlockId, size_t numBlocks, bool useBlockId2Col);
template void Matrix<char>::TransferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const;
// Matrix<short> methods
template Matrix<short>::Matrix(DEVICEID_TYPE);
template Matrix<short>::Matrix(Matrix<short>&&);
template Matrix<short>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat, const size_t nnz);
template Matrix<short>::Matrix(const size_t numRows, const size_t numCols, short* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz);
template Matrix<short>::~Matrix();
template Matrix<short>& Matrix<short>::operator=(Matrix<short>&& moveFrom);
template short* Matrix<short>::Data() const;
template int Matrix<short>::GetDeviceId() const;
template size_t Matrix<short>::GetNumElements() const;
template Matrix<short> Matrix<short>::ColumnSlice(size_t startColumn, size_t numCols) const;
template void Matrix<short>::_transferToDevice(int id_to, bool isBeingMoved, bool emptyTransfer) const;
template void Matrix<short>::TransferToDeviceIfNotThere(int id_to, bool isBeingMoved, bool emptyTransfer, bool updatePreferredDevice) const;
template size_t Matrix<short>::GetNumRows() const;
template size_t Matrix<short>::GetNumCols() const;
template void Matrix<short>::SetValue(const short);
template void Matrix<short>::SetValue(size_t numRows, const size_t numCols, int deviceId, short* pArray, size_t matrixFlags, DataTransferer* transferer);
//template void Matrix<short>::SetValue(const Matrix<short>&, MatrixFormat);
template void Matrix<short>::SetValue(const Matrix<short>&);
template void Matrix<short>::AssignValuesOf(const Matrix<short>&);
template void Matrix<short>::CastAssignValuesOf(const MatrixBase& other);
template bool Matrix<short>::IsEmpty() const;
template void Matrix<short>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly, bool keepValue);
template void Matrix<short>::Reshape(const size_t, const size_t);
template short* Matrix<short>::CopyToArray(void) const;
template bool Matrix<short>::IsView() const;
template Matrix<short> Matrix<short>::RandomUniform(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const short low, const short high, unsigned long seed);
template void Matrix<short>::SetUniformRandomValue(const short low, const short high, unsigned long seed);
template void Matrix<short>::SetUniformRandomValue(RNGHandle& rngHandle, const short low, const short high);
template Matrix<short> Matrix<short>::RandomGaussian(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const short mean, const short sigma, unsigned long seed);
template void Matrix<short>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const short* h_Val,
const size_t nz, const size_t numRows, const size_t numCols, DataTransferer* transferer);
template void Matrix<short>::AdjustSparseBlockColumn(const GPUSPARSE_INDEX_TYPE* cpuCol2BlockId, size_t numBlocks, bool useBlockId2Col);
template void Matrix<short>::TransferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const;
template void Matrix<short>::CollapseDataLocation() const;
template Matrix<int>::Matrix(const size_t, const size_t, int*, DEVICEID_TYPE, const size_t, const size_t);
}}}