https://github.com/Microsoft/CNTK
Tip revision: 99647833fddfd80aa10e1d261ff309088ed71b12 authored by Alexey Kamenev on 28 March 2016, 18:14:08 UTC
Changes to test perf.
Changes to test perf.
Tip revision: 9964783
Matrix.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Matrix.cpp -- main CPP file that contains all Matrix functions exported by the CNTKMath.dll
//
#include "stdafx.h"
#include "Basics.h"
#include "Matrix.h"
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUMatrix.h"
#include "GPUSparseMatrix.h"
#include "File.h"
#include <assert.h>
#include <math.h>
#include "GPUWatcher.h" // bring in this class as well so that it gets exported from this DLL
#ifndef CPUONLY
#pragma comment(lib, "MathCUDA.lib") // built by CNTKMathCUDA project
#endif
#pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
#pragma warning(disable : 4239) // nonstandard extension; triggered by this pattern: "auto& second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;"
#pragma warning(disable : 4702) // unreachable code; triggered for unknown reasons
#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
// Helper to dispath matrix calls to the 4 underlying matrix libraries (CPU,GPU) x (DENSE,SPARSE)
// 'MatrixPointerToCheck' determines where the operation takes place.
// 'MatrixPointerToSetFlag' is the output. If not null and its location is BOTH, we collapse it to one.
#define DISPATCH_MATRIX_ON_FLAG(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
// version of dispatch macro that prefers the CPU if the 'MatrixPointerToCheck' location is BOTH
#define DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::GPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU || curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
// version of helper macro that executes both CPU and GPU macros if 'MatrixPointerToCheck' location is BOTH
#define DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
{ \
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
if (curLocation == CurrentDataLocation::BOTH) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::BOTH, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::BOTH, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::GPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
GPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
} \
else \
{ \
GPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
} \
} \
else if (curLocation == CurrentDataLocation::CPU) \
{ \
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
{ \
CPUDense; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
} \
else \
{ \
CPUSparse; \
if (MatrixPointerToSetFlag != nullptr) \
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
} \
} \
else \
{ \
RuntimeError("Matrices do not exist in either CPU or GPU."); \
} \
}
namespace Microsoft { namespace MSR { namespace CNTK {
MatrixBase::~MatrixBase() { }
#pragma region Constructors, destructors and other static matrix builders
// Initialize all members over virgin memory.
//This function will only initialize default bland matrix. The actual matrices need to allocated
//after calling this function and flags need to set correctly by calling SetDataLocation.
// This clears out the entire object and brings it into destructable state.
// Note: Keep this in sync with member definition and ShallowCopyFrom().
template <class ElemType>
void Matrix<ElemType>::Init(DEVICEID_TYPE deviceId)
{
m_baseMatrix = nullptr;
m_GPUMatrix = nullptr;
m_CPUMatrix = nullptr;
m_GPUSparseMatrix = nullptr;
m_CPUSparseMatrix = nullptr;
m_matrixType = MatrixType::UNDETERMINED;
m_currentDataLocation = CurrentDataLocation::NONE;
m_preferredDeviceId = deviceId;
m_numTimesDeviceChanged = 0;
m_numTimesMatrixTypeChanged = 0;
m_devicesTransferedTo[1] = m_devicesTransferedTo[0] = CPUDEVICE - 1; // (some value that is different from any valid value)
}
// shallow-copy all members
template <class ElemType>
void Matrix<ElemType>::ShallowCopyFrom(const Matrix<ElemType>& other)
{
m_baseMatrix = other.m_baseMatrix;
m_GPUMatrix = other.m_GPUMatrix;
m_CPUMatrix = other.m_CPUMatrix;
m_GPUSparseMatrix = other.m_GPUSparseMatrix;
m_CPUSparseMatrix = other.m_CPUSparseMatrix;
m_matrixType = other.m_matrixType;
m_currentDataLocation = other.m_currentDataLocation;
m_preferredDeviceId = other.m_preferredDeviceId;
m_numTimesDeviceChanged = other.m_numTimesDeviceChanged;
m_numTimesMatrixTypeChanged = other.m_numTimesMatrixTypeChanged;
m_devicesTransferedTo[0] = other.m_devicesTransferedTo[0]; // TODO: spelling
m_devicesTransferedTo[1] = other.m_devicesTransferedTo[1];
}
// Call this function after an update operation has created/set/updated the respective pointers.
// What gets updated:
// - m_currentDataLocation: from function argument
// - m_matrixType: from function argument unless UNDETERMINED in which case m_matrixType remains unmodified
// - m_baseMatrix: to one of current values of m_[GC]PU{Sparse,}Matrix
template <class ElemType>
void Matrix<ElemType>::SetDataLocation(CurrentDataLocation location, MatrixType type) const
{
// if the object used to live on BOTH, this will collapse it to 'location' (unless we actually wrote into BOTH)
// In that case, we do a sanity check here that the object is an owning Matrix,
// since otherwise the collapsing would go unnoticed by the original owner.
// The cases to cover:
// - original owner is BOTH, and this is called on the original owner
// -> The result was written to 'location' so we should collapse it to there.
// - original owning matrix is in BOTH state
// and a view inherits this
// -> FORBIDDEN to write into CPU or GPU since we cannot ensure we wrote into the one that will be read next
// - original owning matrix is CPU or GPU
// and a view onto it is put into BOTH state
// -> inefficent to read, since this is likely happening over again; so put the owner into BOTH state
// -> FORBIDDEN to write into CPU or GPU since we don't know the owner's true location and hence cannot ensure we wrote to the correct place
if (m_currentDataLocation == CurrentDataLocation::BOTH && location != CurrentDataLocation::BOTH)
{
// we get here if we wrote into this object that was BOTH but is no longer
if (!OwnBuffer()) // this means we should not have written into it in the first place, so fail now (better late than never)
LogicError("SetDataLocation: A non-owning object cannot be written to in BOTH state.");
}
m_currentDataLocation = location;
// set the matrix type if passed in
if (type != MatrixType::UNDETERMINED)
m_matrixType = type;
// Note: m_currentDataLocation may also be CurrentDataLocation::BOTH, in which case the base matrix will be GPU.
if (m_matrixType == MatrixType::DENSE)
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? (BaseMatrix<ElemType>*) m_CPUMatrix : (BaseMatrix<ElemType>*) m_GPUMatrix);
else if (m_matrixType == MatrixType::SPARSE)
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? (BaseMatrix<ElemType>*) m_CPUSparseMatrix : (BaseMatrix<ElemType>*) m_GPUSparseMatrix);
// sanity check
if (!m_baseMatrix && m_matrixType != MatrixType::UNDETERMINED)
LogicError("SetDataLocation: new m_baseMatrix must not be NULL.");
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(matrixType, matrixFormat, false);
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(matrixType, matrixType == MatrixType::DENSE ? MatrixFormat::matrixFormatDense : MatrixFormat::matrixFormatSparseCSC, false);
}
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, DEVICEID_TYPE deviceID)
{
Init(deviceID);
if (!(matrixFlags & matrixFlagDontOwnBuffer))
SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
template <class ElemType>
Matrix<ElemType>::Matrix(DEVICEID_TYPE deviceID)
{
Init(deviceID);
SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
// constructor for Matrix class to wrap an externally managed BaseMatrix
// baseMatrix - base matrix for this element
// pArray - pointer to current data array, will replace existing pointer in baseMatrix if != NULL
// deviceId - deviceId where the pArray exists
template <class ElemType>
Matrix<ElemType>::Matrix(BaseMatrix<ElemType>* baseMatrix, ElemType* pArray, DEVICEID_TYPE deviceId) // constructor for setting Matrix from a base matrix
{
Init(deviceId);
if (baseMatrix->GetFormat() & matrixFormatSparse)
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUSparseMatrix = (CPUSparseMatrix<ElemType>*) baseMatrix;
SetDataLocation(CPU, SPARSE);
}
else
{
m_GPUSparseMatrix = (GPUSparseMatrix<ElemType>*) baseMatrix;
SetDataLocation(GPU, SPARSE);
}
}
else
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUMatrix = (CPUMatrix<ElemType>*) baseMatrix;
SetDataLocation(CPU, DENSE);
}
else
{
m_GPUMatrix = (GPUMatrix<ElemType>*) baseMatrix;
SetDataLocation(GPU, DENSE);
}
}
m_baseMatrix = baseMatrix;
m_baseMatrix->SetArray(pArray);
}
template <class ElemType>
Matrix<ElemType>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat)
{
Init(deviceId);
if (matrixType == MatrixType::SPARSE)
{
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(matrixFormat, numRows, numCols, 0);
SetDataLocation(CPU, SPARSE);
}
else
{
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(numRows, numCols, 0, m_preferredDeviceId, matrixFormat);
SetDataLocation(GPU, SPARSE);
}
}
else
{
if (matrixFormat != matrixFormatDense)
{
NOT_IMPLEMENTED;
}
if (m_preferredDeviceId == CPUDEVICE)
{
m_CPUMatrix = new CPUMatrix<ElemType>(numRows, numCols);
SetDataLocation(CPU, DENSE);
}
else
{
m_GPUMatrix = new GPUMatrix<ElemType>(numRows, numCols, m_preferredDeviceId);
SetDataLocation(GPU, DENSE);
}
SetValue(0);
}
}
template <class ElemType>
Matrix<ElemType>::Matrix(const size_t numRows, const size_t numCols, ElemType* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz)
{
Init(deviceId);
if (m_preferredDeviceId == CPUDEVICE)
{
if (matrixFlags & matrixFormatSparse)
{
// WARNING: matrixFlag is not passed in and externally managed array cannot be passed in
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(matrixFormatSparseCSC, numRows, numCols, nnz);
SetDataLocation(CPU, SPARSE);
}
else
{
m_CPUMatrix = new CPUMatrix<ElemType>(numRows, numCols, pArray, matrixFlags);
SetDataLocation(CPU, DENSE);
}
}
else
{
if (matrixFlags & matrixFormatSparse)
{
// m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(numRows,numCols,nnz, pArray,matrixFlags,m_preferredDeviceId);
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(m_preferredDeviceId, MatrixFormat(matrixFlags & MatrixFormat::matrixFormatMask));
m_GPUSparseMatrix->Resize(numRows, numCols, nnz, true, false);
SetDataLocation(GPU, SPARSE);
}
else
{
m_GPUMatrix = new GPUMatrix<ElemType>(numRows, numCols, m_preferredDeviceId, pArray, matrixFlags);
SetDataLocation(GPU, DENSE);
}
}
if (matrixFlagDontOwnBuffer & matrixFlags)
m_baseMatrix->SetOwnBuffer(false);
}
//copy constructor, deep copy
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::DeepClone() const
{
return Matrix<ElemType>(*this, GetDeviceId());
}
template <class ElemType>
Matrix<ElemType>::Matrix(const Matrix<ElemType>& deepCopyFrom, DEVICEID_TYPE deviceId)
{
int origCopyFromDeviceId = deepCopyFrom.GetDeviceId();
Init(deviceId); // will set m_preferredDeviceId
deepCopyFrom._transferToDevice(m_preferredDeviceId, true);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom,
this,
m_CPUMatrix = new CPUMatrix<ElemType>(*(deepCopyFrom.m_CPUMatrix)),
m_GPUMatrix = new GPUMatrix<ElemType>(*(deepCopyFrom.m_GPUMatrix)),
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(*(deepCopyFrom.m_CPUSparseMatrix)),
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(*(deepCopyFrom.m_GPUSparseMatrix)));
// should we move back?
deepCopyFrom._transferToDevice(origCopyFromDeviceId, true);
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
}
//move constructor, shallow copy
template <class ElemType>
Matrix<ElemType>::Matrix(Matrix<ElemType>&& moveFrom)
{
Init((DEVICEID_TYPE) moveFrom.GetDeviceId());
#if 1
operator=(move(moveFrom));
#else
DISPATCH_MATRIX_ON_FLAG(&moveFrom,
this,
m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix))),
m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix))),
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix))),
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix))));
m_preferredDeviceId = moveFrom.m_preferredDeviceId;
#endif
}
//move assignment operator, shallow copy
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator=(Matrix<ElemType>&& moveFrom)
{
if (this == &moveFrom)
LogicError("Matrix: Move assignment into itself is forbidden.");
ReleaseMemory(); // free held memory if any
#if 1
// shallow-copy all members
ShallowCopyFrom(moveFrom);
// virgin-init the source
moveFrom.Init(CPUDEVICE);
#else
m_preferredDeviceId = moveFrom.m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(&moveFrom,
this,
if (m_CPUMatrix != nullptr) m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix)));
else m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(*(moveFrom.m_CPUMatrix))),
if (m_GPUMatrix != nullptr) m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix)));
else m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(*(moveFrom.m_GPUMatrix))),
if (m_CPUSparseMatrix != nullptr) m_CPUSparseMatrix->operator=(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix)));
else m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(*(moveFrom.m_CPUSparseMatrix))),
if (m_GPUSparseMatrix != nullptr) m_GPUSparseMatrix->operator=(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix)));
else m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(*(moveFrom.m_GPUSparseMatrix))));
#endif
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ReleaseMemory()
{
delete m_CPUMatrix;
delete m_GPUMatrix;
delete m_GPUSparseMatrix;
delete m_CPUSparseMatrix;
m_CPUMatrix = nullptr;
m_GPUMatrix = nullptr;
m_GPUSparseMatrix = nullptr;
m_CPUSparseMatrix = nullptr;
m_matrixType = MatrixType::UNDETERMINED;
m_currentDataLocation = CurrentDataLocation::NONE;
}
template <class ElemType>
Matrix<ElemType>::~Matrix(void)
{
ReleaseMemory();
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Ones(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetValue(1);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Zeros(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetValue(0);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Eye(const size_t rows, DEVICEID_TYPE deviceId)
{
Matrix<ElemType> c(rows, rows, deviceId); // will initialize to 0
c.SetDiagonalValue(1);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RandomUniform(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType low, const ElemType high, unsigned long seed)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetUniformRandomValue(low, high, seed);
return c;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RandomGaussian(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType mean, const ElemType sigma, unsigned long seed)
{
Matrix<ElemType> c(rows, cols, deviceId); // will initialize to 0
c.SetGaussianRandomValue(mean, sigma, seed);
return c;
}
template <class ElemType>
void Matrix<ElemType>::SetDevice(DEVICEID_TYPE deviceId)
{
if (deviceId >= 0)
GPUMatrix<ElemType>::SetDevice(deviceId);
}
template <class ElemType>
void Matrix<ElemType>::Read(File& stream)
{
Matrix<ElemType>& M = *this;
char type;
stream >> type;
if (type == 'd')
{
if (M.GetDeviceId() < 0)
{
if (M.m_CPUMatrix == NULL)
M.m_CPUMatrix = new CPUMatrix<ElemType>();
stream >> (*M.m_CPUMatrix);
M.SetDataLocation(CPU, DENSE);
}
else
{
if (M.m_GPUMatrix == NULL)
M.m_GPUMatrix = new GPUMatrix<ElemType>(M.GetDeviceId());
stream >> (*M.m_GPUMatrix);
M.SetDataLocation(GPU, DENSE);
}
}
else if (type == 's')
{
if (M.GetDeviceId() < 0)
{
NOT_IMPLEMENTED; // You might want to tranfer your matrix to GPU
}
else
{
if (M.m_GPUSparseMatrix == NULL)
M.m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(M.GetDeviceId());
stream >> (*M.m_GPUSparseMatrix);
M.SetDataLocation(GPU, SPARSE);
}
}
else
LogicError("Read: Input file corrupt (invalid matrix type field 0x%02d, should be 'f' or 'd').", type);
}
template <class ElemType>
void Matrix<ElemType>::Write(File& stream) const
{
const Matrix<ElemType>& M = *this;
if (M.GetMatrixType() == MatrixType::DENSE)
{
stream << 'd';
if (M.GetDeviceId() < 0)
stream << (*M.m_CPUMatrix);
else
stream << (*M.m_GPUMatrix);
}
else
{
stream << 's';
if (M.GetDeviceId() < 0)
NOT_IMPLEMENTED // stream<<(*M.m_CPUMatrix);
else stream
<< (*M.m_GPUSparseMatrix);
}
}
#pragma endregion Constructors, destructors and other static matrix builders
#pragma region Basic Operators
template <class ElemType>
size_t Matrix<ElemType>::BufferSize() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_baseMatrix->GetSizeAllocated() * sizeof(ElemType),
return m_baseMatrix->GetSizeAllocated() * sizeof(ElemType),
return m_CPUSparseMatrix->BufferSize(),
return m_GPUSparseMatrix->BufferSizeAllocated());
}
template <class ElemType>
ElemType* Matrix<ElemType>::BufferPointer() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_baseMatrix->GetArray(),
return m_baseMatrix->GetArray(),
return m_CPUSparseMatrix->BufferPointer(),
return (ElemType*) m_GPUSparseMatrix->BufferPointer());
}
template <class ElemType>
size_t Matrix<ElemType>::NzCount() const
{
return m_baseMatrix->NzCount();
}
template <class ElemType>
ElemType* Matrix<ElemType>::CopyToArray() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->CopyToArray(),
return m_GPUMatrix->CopyToArray(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//memory will be allocated by the callee if not enough but need to be deleted by the caller after it's done
//return number of elements copied
template <class ElemType>
size_t Matrix<ElemType>::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->CopyToArray(arrayCopyTo, currentArraySize),
return m_GPUMatrix->CopyToArray(arrayCopyTo, currentArraySize),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->CopySection(numRows, numCols, dst, colStride),
m_GPUMatrix->CopySection(numRows, numCols, dst, colStride),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
// BUGBUG: Some code checks before calling here whether one of the dimensions is 0.
// This function must handle that case properly, that is, preserving the non-zero dimension.
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
{
int devId = GetDeviceId();
Matrix<ElemType> slice(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId); // this already creates pointers
slice.m_preferredDeviceId = m_preferredDeviceId;
// create slices for the underlying object
// Note: In case of data location == BOTH, this creates two objects just like in the source.
if (GetMatrixType() == MatrixType::DENSE)
{
if (GetCurrentMatrixLocation() == CPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_CPUMatrix != nullptr)
slice.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->ColumnSlice(startColumn, numCols)));
}
if (GetCurrentMatrixLocation() == GPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_GPUMatrix != nullptr)
slice.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->ColumnSlice(startColumn, numCols)));
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
if (GetCurrentMatrixLocation() == CPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_CPUSparseMatrix != nullptr)
slice.m_CPUSparseMatrix->operator=(static_cast<CPUSparseMatrix<ElemType>&&>(m_CPUSparseMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(static_cast<CPUSparseMatrix<ElemType>&&>(m_CPUSparseMatrix->ColumnSlice(startColumn, numCols)));
}
if (GetCurrentMatrixLocation() == GPU || GetCurrentMatrixLocation() == BOTH)
{
if (slice.m_GPUSparseMatrix != nullptr)
slice.m_GPUSparseMatrix->operator=(static_cast<GPUSparseMatrix<ElemType>&&>(m_GPUSparseMatrix->ColumnSlice(startColumn, numCols)));
else
slice.m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(static_cast<GPUSparseMatrix<ElemType>&&>(m_GPUSparseMatrix->ColumnSlice(startColumn, numCols)));
}
}
else
LogicError("Undetermined matrix type");
// update the slice's m_currentDataLocation, m_matrixType, and m_baseMatrix
// This will work for CPU, GPU, and BOTH.
slice.SetDataLocation(GetCurrentMatrixLocation(), GetMatrixType());
return slice;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{
ReleaseMemory();
m_preferredDeviceId = fromMatrix.m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
if (m_CPUMatrix != nullptr) m_CPUMatrix->AssignColumnSlice(*fromMatrix.m_CPUMatrix, startColumn, numCols);
else m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(fromMatrix.m_CPUMatrix->ColumnSlice(startColumn, numCols))),
if (m_GPUMatrix != nullptr) m_GPUMatrix->AssignColumnSlice(*fromMatrix.m_GPUMatrix, startColumn, numCols);
else m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(fromMatrix.m_GPUMatrix->ColumnSlice(startColumn, numCols))),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::SetColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{
assert(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr);
// must already been allocated
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
m_CPUMatrix->SetColumnSlice(*fromMatrix.m_CPUMatrix, startColumn, numCols),
m_GPUMatrix->SetColumnSlice(*fromMatrix.m_GPUMatrix, startColumn, numCols),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::CopyColumnsStrided(const Matrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride)
{
assert(m_CPUMatrix != nullptr || m_GPUMatrix != nullptr);
DISPATCH_MATRIX_ON_FLAG(&fromMatrix,
this,
m_CPUMatrix->CopyColumnsStrided(*fromMatrix.m_CPUMatrix, numCols, srcNumColsStride, destNumColsStride),
m_GPUMatrix->CopyColumnsStrided(*fromMatrix.m_GPUMatrix, numCols, srcNumColsStride, destNumColsStride),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Diagonal() const
{
int devId = GetDeviceId();
Matrix<ElemType> diag(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId);
diag.m_preferredDeviceId = m_preferredDeviceId;
AssignDiagonalValuesTo(diag);
return diag;
}
template <class ElemType>
void Matrix<ElemType>::AssignDiagonalValuesTo(Matrix<ElemType>& diag) const
{
int devId = GetDeviceId();
DecideAndMoveToRightDevice(*this, diag);
if (GetMatrixType() == MatrixType::DENSE)
{
if (devId == CPUDEVICE)
{
if (diag.m_CPUMatrix != nullptr)
diag.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->Diagonal()));
else
diag.m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(m_CPUMatrix->Diagonal()));
diag.SetDataLocation(CPU, DENSE);
}
else
{
if (diag.m_GPUMatrix != nullptr)
diag.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->Diagonal()));
else
diag.m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->Diagonal()));
diag.SetDataLocation(GPU, DENSE);
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
// TODO: Implement optimized diagonal functions for sparse matrices. For now use the DiagonalToDense instead.
if (devId == CPUDEVICE)
{
if (diag.m_CPUMatrix != nullptr)
diag.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&>(m_CPUSparseMatrix->DiagonalToDense()));
else
diag.m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&>(m_CPUSparseMatrix->DiagonalToDense()));
diag.SetDataLocation(CPU, DENSE);
}
else
{
if (diag.m_GPUMatrix != nullptr)
diag.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUSparseMatrix->DiagonalToDense()));
else
diag.m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(m_GPUSparseMatrix->DiagonalToDense()));
diag.SetDataLocation(GPU, DENSE);
}
}
else
LogicError("Undetermined matrix type");
}
//this function will change the matrix type between DENSE and SPARSE.
//WARNING: The correct implementation is to copy the matrix between DENSE and SPARSE
// However, the conversion functions are not implemented yet and so it will always create
// a new blank matrix and destroy all info in the original matrix if different matrix type is asked.
// In case of !keepValues, the matrix content will be undefined.
template <class ElemType>
void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat newMatrixFormat, bool keepValues)
{
// This check should be uncommented but unfortunately there are still places
// this function is being called with incorrect "default" format value
/*if (m_matrixType == newMatrixType && GetFormat() != newMatrixFormat)
NOT_IMPLEMENTED;*/
if (m_matrixType == newMatrixType)
return;
if (m_baseMatrix == nullptr)
keepValues = false;
#define NUM_MATRIXTYPE_CHANGED_WARN 20
m_numTimesMatrixTypeChanged++;
if (m_numTimesMatrixTypeChanged == NUM_MATRIXTYPE_CHANGED_WARN)
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_MATRIXTYPE_CHANGED_WARN);
if (GetDeviceId() < 0) // CPU
{
if (newMatrixType == MatrixType::SPARSE)
{
if (m_baseMatrix == nullptr)
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(newMatrixFormat);
else
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(newMatrixFormat, GetNumRows(), GetNumCols(), 1);
if (keepValues)
CopyElementsFromDenseToSparse(*m_CPUMatrix, *m_CPUSparseMatrix);
delete m_CPUMatrix;
m_CPUMatrix = nullptr;
SetDataLocation(CPU, SPARSE);
}
else if (newMatrixType == MatrixType::DENSE)
{
if (m_baseMatrix == nullptr)
m_CPUMatrix = new CPUMatrix<ElemType>();
else
m_CPUMatrix = new CPUMatrix<ElemType>(GetNumRows(), GetNumCols());
if (keepValues)
m_CPUMatrix->SetValue(m_CPUSparseMatrix->CopyColumnSliceToDense(0, GetNumCols()));
delete m_CPUSparseMatrix;
m_CPUSparseMatrix = nullptr;
SetDataLocation(CPU, DENSE);
}
else
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
}
else // GPU
{
if (newMatrixType == MatrixType::SPARSE)
{
if (m_baseMatrix == nullptr)
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(GetDeviceId(), newMatrixFormat);
else
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(GetNumRows(), GetNumCols(), 0, GetDeviceId(), newMatrixFormat);
if (keepValues)
m_GPUSparseMatrix->SetValue(*m_GPUMatrix);
delete m_GPUMatrix;
m_GPUMatrix = nullptr;
SetDataLocation(GPU, SPARSE);
}
else if (newMatrixType == MatrixType::DENSE)
{
if (m_baseMatrix == nullptr)
m_GPUMatrix = new GPUMatrix<ElemType>(GetDeviceId());
else
m_GPUMatrix = new GPUMatrix<ElemType>(GetNumRows(), GetNumCols(), GetDeviceId());
if (keepValues)
m_GPUSparseMatrix->CopyToDenseMatrix(*m_GPUMatrix);
delete m_GPUSparseMatrix;
m_GPUSparseMatrix = nullptr;
SetDataLocation(GPU, DENSE);
}
else
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
}
}
template <class ElemType>
void Matrix<ElemType>::CopyElementsFromDenseToSparse(CPUMatrix<ElemType>& from, CPUSparseMatrix<ElemType>& dest)
{
foreach_coord (row, col, from)
{
auto val = from(row, col);
dest.SetValue(row, col, val);
}
}
template <class ElemType>
ElemType Matrix<ElemType>::Get00Element() const
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->Get00Element(),
return m_GPUMatrix->Get00Element(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
const ElemType Matrix<ElemType>::operator()(const size_t row, const size_t col) const
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
nullptr,
return m_CPUMatrix->operator()(row, col),
_transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false); return m_CPUMatrix->operator()(row, col),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//WARNING: This function is very slow for GPUs since it requires copying values between CPUs and GPUs.
//In addition, if ColumnSlice is used after this function but before the values are copied back to GPU
//the operation will fail since the memory is not managed by the slice.
// If you don't need to modify the values, to call the const version above, or GetValue(row,col) which does that for you unambiguously.
// TODO: Can we remove this, and have users use SetValue() instead? To avoid this potential error?
template <class ElemType>
ElemType& Matrix<ElemType>::operator()(const size_t row, const size_t col)
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
nullptr,
return m_CPUMatrix->operator()(row, col),
_transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false); SetDataLocation(CPU, DENSE); return m_CPUMatrix->operator()(row, col),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::Transpose()
{
if (IsEmpty())
LogicError("Transpose: Matrix is empty.");
Matrix<ElemType> c(GetNumCols(), GetNumRows(), (DEVICEID_TYPE) GetDeviceId(), this->GetMatrixType(), this->GetFormat());
c.AssignTransposeOf(*this);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTransposeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTransposeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignTransposeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTransposeOf(*a.m_GPUSparseMatrix));
return *this;
}
// *this[:,j] = a[:,m[j]] * alpha + *this[:,j] * beta
// m has width of 'this' and contains values w.r.t. 'a'
// Invalid entries (gap columns) are denoted by m(0,j) == -1.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DoGatherColumnsOf(ElemType beta, const Matrix<ElemType>& m, const Matrix<ElemType>& a, ElemType alpha)
{
DecideAndMoveToRightDevice(*this, m, a); // TODO: only move target if beta != 0
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->DoGatherColumnsOf(beta, *m.m_CPUMatrix, *a.m_CPUMatrix, alpha),
m_GPUMatrix->DoGatherColumnsOf(beta, *m.m_GPUMatrix, *a.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
// *this[:,m[j]] = a[:,j] * alpha + *this[:,m[j]] * beta
// m has width of 'a' and contains values w.r.t. 'this'
// Unlike gather, for scatter, 'this' must have been sized already.
// Invalid entries (gap columns) are denoted by m(0,j) == -1.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matrix<ElemType>& m, const Matrix<ElemType>& a, ElemType alpha)
{
DecideAndMoveToRightDevice(*this, m, a); // TODO: only move target if beta != 0
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->DoScatterColumnsOf(beta, *m.m_CPUMatrix, *a.m_CPUMatrix, alpha),
m_GPUMatrix->DoScatterColumnsOf(beta, *m.m_GPUMatrix, *a.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
// set all elements of a matrix to a scalar value
// For sparse matrices, the only allowed value is 0.
template <class ElemType>
void Matrix<ElemType>::SetValue(const ElemType v)
{
if (IsEmpty()) // if empty then we are done
return;
if (v == 0 && GetMatrixType() == MatrixType::SPARSE) // if sparse, setting it to 0 is special
{
Reset();
return;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetValue(v),
m_GPUMatrix->SetValue(v),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const DeviceBoundNumber<ElemType>& db_number)
{
if (IsEmpty()) // if empty then we are done
return;
// LogicError("SetValue: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetValue(*db_number.ExposePointer2Value()),
{
if (GetDeviceId() != db_number.GetDeviceId())
RuntimeError("Matrix and device bound number must be on the same device");
m_GPUMatrix->SetValue(db_number.ExposePointer2Value());
},
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <>
/*static*/ float Matrix<float>::MakeNan(size_t /*payload*/)
{
return nanf("");
}
template <>
/*static*/ double Matrix<double>::MakeNan(size_t /*payload*/)
{
return nan("");
}
template <>
/*static*/ char Matrix<char>::MakeNan(size_t)
{
return 0;
} // (needed for completeness)
template <class ElemType>
void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemType val)
{
if (GetNumCols() != columnsMask.GetNumCols())
RuntimeError("MaskColumnsValue: Matrix and column mask must have equal number of columns.");
if (GetCurrentMatrixLocation() == CPU && (columnsMask.GetCurrentMatrixLocation() == CPU || columnsMask.GetCurrentMatrixLocation() == BOTH))
; // OK
else if (GetDeviceId() != columnsMask.GetDeviceId() && columnsMask.GetCurrentMatrixLocation() != BOTH)
RuntimeError("MaskColumnsValue: Matrix and column mask must be on the same device.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val),
m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const ElemType* colPointer, size_t colInd)
{
if (colPointer == nullptr)
InvalidArgument("SetColumn: colPointer is null.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(colPointer, colInd),
m_GPUMatrix->SetColumn(colPointer, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const ElemType val, size_t colInd)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(val, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetColumn(const Matrix<ElemType>& colMat, size_t colInd)
{
DecideAndMoveToRightDevice(*this, colMat);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetColumn(*colMat.m_CPUMatrix, colInd),
m_GPUMatrix->SetColumn(*colMat.m_GPUMatrix, colInd),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format /*= matrixFormatSparseCSR*/)
{
if (this == &deepCopyFrom)
return;
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
DecideAndMoveToRightDevice(deepCopyFrom, *this);
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), format, false);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom,
this,
m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix),
m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix),
m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix),
m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix));
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags)
{
if (((numRows * numCols) > 0) && (pArray == nullptr))
InvalidArgument("Invalid pArray.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetValue(numRows, numCols, pArray, matrixFlags),
m_GPUMatrix->SetValue(numRows, numCols, deviceId, pArray, matrixFlags),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t rIdx, const size_t cIdx, ElemType val)
{
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
this,
(*m_CPUMatrix)(rIdx, cIdx) = val,
NOT_IMPLEMENTED,
m_CPUSparseMatrix->SetValue(rIdx, cIdx, val),
NOT_IMPLEMENTED);
}
// read features
template <class ElemType>
void Matrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const ElemType* h_Val,
const size_t nz, const size_t numRows, const size_t numCols)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols),
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols));
}
template <class ElemType>
void Matrix<ElemType>::SetDiagonalValue(const ElemType v)
{
if (IsEmpty())
LogicError("SetDiagonalValue: Matrix is empty.");
if (GetNumRows() != GetNumCols())
LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetDiagonalValue(v),
m_GPUMatrix->SetDiagonalValue(v),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetDiagonalValue(const Matrix<ElemType>& vector)
{
if (GetNumRows() != GetNumCols())
LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
LogicError("SetDiagonalValue: Input vector must be a vector.");
if (vector.GetNumRows() * vector.GetNumCols() != GetNumRows())
LogicError("SetDiagonalValue: Input vector must match matrix dimension.");
if (IsEmpty())
return;
DecideAndMoveToRightDevice(*this, vector);
if (vector.GetNumElements() == 1) // reduce to simple form
{
DISPATCH_MATRIX_ON_FLAG(&vector,
nullptr,
SetDiagonalValue(vector(0, 0)),
SetDiagonalValue(vector.m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
SetDiagonalValue(vector(0, 0)),
SetDiagonalValue(vector.m_GPUMatrix->Get00Element()) // BUGBUG: efficiency
);
}
else if (vector.GetNumRows() != GetNumRows())
LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
else
{
// WARNING: we use this pointer to decide which function to call. However, vector may be stored in a different matrix type (DENSE, SPARSE)
DISPATCH_MATRIX_ON_FLAG(this,
this,
assert(vector.m_CPUMatrix != nullptr);
m_CPUMatrix->SetDiagonalValue(*vector.m_CPUMatrix),
assert(vector.m_GPUMatrix != nullptr);
m_GPUMatrix->SetDiagonalValue(*vector.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
}
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomValue(low, high, seed),
m_GPUMatrix->SetUniformRandomValue(low, high, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("SetUniformRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetGaussianRandomValue(mean, sigma, seed),
m_GPUMatrix->SetGaussianRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
if (sigma <= 0)
InvalidArgument("SetUniformRandomValue: sigma must be a positive value.");
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddGaussianRandomValue(mean, sigma, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//maskRate: percentage of values masked out (similar to dropout rate)
//scaleValue: which scale value to set to the left ones (unmasked items).
template <class ElemType>
void Matrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed)
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetUniformRandomMask(maskRate, scaleValue, seed),
m_GPUMatrix->SetUniformRandomMask(maskRate, scaleValue, seed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::NormalGrad(Matrix<ElemType>& gradients,
Matrix<ElemType>& functionValues,
const ElemType learnRatePerSample,
const ElemType momentum,
const bool useNesterovMomentum)
{
DecideAndMoveToRightDevice(*this, gradients, functionValues);
if (!useNesterovMomentum)
{
DISPATCH_MATRIX_ON_FLAG(&gradients,
nullptr,
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
functionValues -= *this,
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
functionValues -= *this,
if (momentum != 0) gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
ScaleAndAdd(-learnRatePerSample, gradients, functionValues),
if (momentum != 0) gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
ScaleAndAdd(-learnRatePerSample, gradients, functionValues));
}
else
{
DISPATCH_MATRIX_ON_FLAG(&gradients,
nullptr,
{ /* CPU dense */
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
// w_t = w_{t-1} - momentum * v_ {t-1} - (1-momentum)*learnRatePerSampele*gardient,
},
{ /* GPU dense */
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
},
{ /* CPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.SetValue(gradients);
gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
}
},
{ /* GPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.SetValue(gradients);
gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
}
});
}
}
//both this and gradients will be changed
template <class ElemType>
ElemType Matrix<ElemType>::Adagrad(Matrix<ElemType>& gradients, const bool needAveMultiplier)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(&gradients,
&gradients,
return m_CPUMatrix->Adagrad(*gradients.m_CPUMatrix, needAveMultiplier);
SetDataLocation(CPU),
return m_GPUMatrix->Adagrad(*gradients.m_GPUMatrix, needAveMultiplier);
SetDataLocation(GPU),
return gradients.m_CPUSparseMatrix->Adagrad(*m_CPUMatrix, needAveMultiplier);
SetDataLocation(CPU),
return gradients.m_GPUSparseMatrix->Adagrad(*m_GPUMatrix, needAveMultiplier);
SetDataLocation(GPU));
}
template <class ElemType>
void Matrix<ElemType>::FSAdagrad(size_t mbSize, Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum)
{
// TODO: The values of 'adagradT' and 'targetadagradavdenom' are currently hardcoded constants taken from DBN (empirically determined).
// These should be made configurable if needed
const size_t adagradT = 2 * 3600 * 100;
const ElemType targetadagradavdenom = 0.0025; // 1/400 magic constant
const ElemType adagradkeepweight = static_cast<ElemType>(exp(-1.0 * mbSize / adagradT));
static ElemType aggadagradsqrframes = 0;
aggadagradsqrframes = adagradkeepweight * aggadagradsqrframes + (1.0f - adagradkeepweight) * mbSize;
const ElemType targetadagradavdenom_x_sqrtadagradsqrframes = static_cast<ElemType>(targetadagradavdenom * sqrt(aggadagradsqrframes));
DISPATCH_MATRIX_ON_FLAG(&gradients,
&gradients,
m_CPUMatrix->FSAdagrad(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes);
SetDataLocation(CPU),
m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes);
SetDataLocation(GPU),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
ElemType Matrix<ElemType>::RmsProp(Matrix<ElemType>& gradients,
ElemType RMS_GAMMA,
ElemType RMS_WGT_INC,
ElemType RMS_WGT_MAX,
ElemType RMS_WGT_DEC,
ElemType RMS_WGT_MIN,
const bool needAveMultiplier)
{
DecideAndMoveToRightDevice(*this, gradients);
DISPATCH_MATRIX_ON_FLAG(this,
&gradients,
return m_CPUMatrix->RmsProp(*gradients.m_CPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier);
SetDataLocation(CPU),
return m_GPUMatrix->RmsProp(*gradients.m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier);
SetDataLocation(GPU),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
{
if (numRows != GetNumRows() || numCols != GetNumCols())
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->Reshape(numRows, numCols),
m_GPUMatrix->Reshape(numRows, numCols),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->Reshape(numRows, numCols));
}
}
// Note: Resize() will leave the matrix content undefined.
template <class ElemType>
void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve /*=0*/, bool growOnly /*=true*/)
{
// TODO: should this function test whether the size is changing, and skip if it isn't? We have at least one explicit test for this code calling this (recurrent node)
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
this,
m_CPUMatrix->Resize(numRows, numCols, growOnly),
m_GPUMatrix->Resize(numRows, numCols, growOnly),
m_CPUSparseMatrix->Resize(numRows, numCols, numNZElemToReserve, growOnly, false),
m_GPUSparseMatrix->Resize(numRows, numCols, numNZElemToReserve, growOnly, false));
#ifdef _DEBUG
if (GetMatrixType() != MatrixType::SPARSE)
Invalidate(); // Fill the matrix with NaNs to detect using the content which is undefined. Unfortunately this won't work for sparse matrices.
#endif
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::RepMat(const Matrix<ElemType>& frmMat, const size_t rowRatio, const size_t colRatio)
{
size_t nCols = frmMat.GetNumCols();
size_t nRows = frmMat.GetNumRows();
if (rowRatio > 1)
RuntimeError("RepMat not yet supporting raw ratio larger than 1");
size_t newCols = colRatio * nCols;
Matrix<ElemType> c(nRows, newCols, frmMat.GetDeviceId());
for (size_t i = 0; i < colRatio; i++)
{
c.ColumnSlice(i * nCols, nCols).SetValue(frmMat);
}
return c;
}
template <class ElemType>
size_t Matrix<ElemType>::GetAllocatedSize() const
{
return m_baseMatrix->GetSizeAllocated();
}
// reset for sparse matrix. Semantically the same as setting all values to 0.
template <class ElemType>
void Matrix<ElemType>::Reset()
{
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_CPUSparseMatrix->Reset(),
m_GPUSparseMatrix->Reset());
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumRows() const
{
return m_baseMatrix->GetNumRows();
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumCols() const
{
return m_baseMatrix->GetNumCols();
}
template <class ElemType>
size_t Matrix<ElemType>::GetNumElements() const
{
return GetNumRows() * GetNumCols();
}
template <class ElemType>
bool Matrix<ElemType>::IsEmpty() const
{
return m_baseMatrix->IsEmpty();
}
#pragma endregion Basic Operators
#pragma region Member BLAS Functions
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator+=(ElemType alpha)
{
return AssignSumOf(alpha, *this);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator+(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), GetDeviceId());
c.AssignSumOf(alpha, *this);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOf(const ElemType alpha, const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSumOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSumOf(alpha, *a.m_CPUMatrix),
m_GPUMatrix->AssignSumOf(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then [this]=[this]+a
//if a is a column vector, add to all columns of [this]
//if a is a row vector, add to all rows of [this]
//if a is a scalar, add it to all elements.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator+=(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(*this, a);
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->operator+=(*a.m_CPUMatrix),
m_GPUMatrix->operator+=(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then OUTPUT=[this]+a
//if a is a column vector, add to all columns of [this]
//if a is a row vector, add to all rows of [this]
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator+(const Matrix<ElemType>& a) const
{
if (GetNumElements() == 1)
{
Matrix<ElemType> c(a.DeepClone());
DISPATCH_MATRIX_ON_FLAG(this,
&c,
c += (*this)(0, 0),
c += (m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
c += (*this)(0, 0),
NOT_IMPLEMENTED);
return c;
}
else if (a.GetNumElements() == 1)
{
Matrix<ElemType> c(this->DeepClone());
DISPATCH_MATRIX_ON_FLAG(&a,
&c,
c += a(0, 0),
c += (a.m_GPUMatrix->Get00Element()), // BUGBUG: efficiency
c += a(0, 0),
NOT_IMPLEMENTED);
return c;
}
else
{
Matrix<ElemType> c(this->DeepClone()); // this implementation will introduce a copy overhead. but make resue of the code
c += a;
return c;
}
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.GetNumElements() == 1)
{
SetValue(b);
(*this) += a;
}
else
{
SetValue(a);
(*this) += b;
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator-=(ElemType alpha)
{
return AssignDifferenceOf(*this, alpha);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator-(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), GetDeviceId());
c.AssignDifferenceOf(*this, alpha);
return c;
}
//for each column of a, we assign numRows starting from startIndex to this
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AssignRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for each column of a, we assign all rows of a to this starting from startIndex
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignToRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AssignToRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for the row slice of this starting from startIndex we add a to it.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddToRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AddToRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//for each column of this, we add row slice of a starting from startIndex
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddWithRowSliceValuesOf(*a.m_CPUMatrix, startIndex, numRows),
m_GPUMatrix->AddWithRowSliceValuesOf(*a.m_GPUMatrix, startIndex, numRows),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignRepeatOf(*a.m_CPUMatrix, numRowRepeats, numColRepeats),
m_GPUMatrix->AssignRepeatOf(*a.m_GPUMatrix, numRowRepeats, numColRepeats),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddToRowRepeatValuesOf(const Matrix<ElemType>& a, const size_t numRepeats)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddToRowRepeatValuesOf(*a.m_CPUMatrix, numRepeats),
m_GPUMatrix->AddToRowRepeatValuesOf(*a.m_GPUMatrix, numRepeats),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//used in the DSSM model. The resulted *this is a [a.GetRows()*(negNumber+1), a.GetCols()] matrix
//each column contains posNumber of positive samples (original) and negNumber negative samples generated by copying
//sample shifted by shiftNumber columns
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignPositiveAndShiftedNegSample(*a.m_CPUMatrix, posNumber, negNumber, shiftNumber),
m_GPUMatrix->AssignPositiveAndShiftedNegSample(*a.m_GPUMatrix, posNumber, negNumber, shiftNumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//used in the DSSM model. *this = *this + positive and negative samples folded back to the right place
//each column of a contains posNumber of positive samples (original) and negNumber negative samples generated by copying
//sample shifted by shiftNumber columns
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddFoldedPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber)
{
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddFoldedPositiveAndShiftedNegSample(*a.m_CPUMatrix, posNumber, negNumber, shiftNumber),
m_GPUMatrix->AddFoldedPositiveAndShiftedNegSample(*a.m_GPUMatrix, posNumber, negNumber, shiftNumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const ElemType alpha, const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignDifferenceOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignDifferenceOf(alpha, *a.m_CPUMatrix),
m_GPUMatrix->AssignDifferenceOf(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const Matrix<ElemType>& a, const ElemType alpha)
{
if (a.IsEmpty())
LogicError("AssignDifferenceOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignDifferenceOf(*a.m_CPUMatrix, alpha),
m_GPUMatrix->AssignDifferenceOf(*a.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then [this]=[this]-a
//if a is a column vector, minus it from all columns of [this]
//if a is a row vector, minus it from all rows of [this]
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator-=(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("Minus Operation: Matrix a is empty.");
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this, * m_CPUMatrix -= *a.m_CPUMatrix, * m_GPUMatrix -= *a.m_GPUMatrix,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//if [this] and a have same dimension then output=[this]-a
//if a is a column vector, minus it from all columns of [this]
//if a is a row vector, minus it from all rows of [this]
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator-(const Matrix<ElemType>& a) const
{
Matrix<ElemType> c(this->DeepClone()); // this implementation will introduce a copy overhead. but make resue of the code
ScaleAndAdd(-1, a, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignDifferenceOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
// if first arg broadcasts, we swap first and the flip the sign
// This is because there is no equivalent to operator-=() that works the other way round.
// TODO: We need ternary ops where the output storage is separate.
if (a.GetNumRows() < b.GetNumRows() || a.GetNumCols() < b.GetNumCols())
{
if (a.GetNumRows() > b.GetNumRows() || a.GetNumCols() > b.GetNumCols())
LogicError("AssignDifferenceOf: Invalid dimensions.");
AssignDifferenceOf(b, a);
*this *= -1;
return *this;
}
if (this != &a)
SetValue(a);
(*this) -= b;
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator*=(ElemType alpha)
{
Scale(alpha, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator*(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), (DEVICEID_TYPE) m_preferredDeviceId);
Scale(alpha, *this, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignProductOf(const ElemType alpha, const Matrix<ElemType>& a)
{
Scale(alpha, a, *this);
return *this;
}
// [this]=a*b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignProductOf(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB)
{
if (a.GetNumElements() == 1)
{
if (transposeB)
AssignTransposeOf(b);
else
this->SetValue(b);
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
(*this) *= a(0, 0),
(*this) *= a.m_GPUMatrix->Get00Element(),
(*this) *= a(0, 0),
NOT_IMPLEMENTED);
}
else if (b.GetNumElements() == 1)
{
if (transposeA)
AssignTransposeOf(a);
else
this->SetValue(a);
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
(*this) *= b(0, 0),
(*this) *= b.m_GPUMatrix->Get00Element(),
(*this) *= b(0, 0),
NOT_IMPLEMENTED);
}
else
Multiply(a, transposeA, b, transposeB, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator*(const Matrix<ElemType>& a) const
{
if (GetNumElements() == 1)
{
Matrix<ElemType> c((DEVICEID_TYPE) a.GetPreferredDeviceId());
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
c.AssignProductOf((*this)(0, 0), a),
c.AssignProductOf(m_GPUMatrix->Get00Element(), a), // BUGBUG: efficiency
c.AssignProductOf((*this)(0, 0), a),
NOT_IMPLEMENTED);
return c;
}
else if (a.GetNumElements() == 1)
{
Matrix<ElemType> c((DEVICEID_TYPE) GetPreferredDeviceId());
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
c.AssignProductOf(a(0, 0), (*this)),
c.AssignProductOf(a.m_GPUMatrix->Get00Element(), (*this)), // BUGBUG: efficiency
c.AssignProductOf(a(0, 0), (*this)),
NOT_IMPLEMENTED);
return c;
}
else
{
Matrix<ElemType> c(GetNumRows(), a.GetNumCols(), (DEVICEID_TYPE) GetPreferredDeviceId());
Multiply(*this, a, c);
return c;
}
}
// [this]=a*b where a is a 1x1 scalar
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::Assign1x1ProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
Multiply1x1AndWeightedAdd(+1, a, b, 0.0f, *this);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator/=(ElemType alpha)
{
(*this) *= 1 / alpha;
return (*this);
}
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator/(ElemType alpha) const
{
return ((*this) * (1 / alpha));
}
//element-wise power
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::operator^=(ElemType alpha)
{
auto& us = *this;
ElementWisePower(alpha, us, us);
return us;
}
//element-wise power
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::operator^(ElemType alpha) const
{
Matrix<ElemType> c(GetNumRows(), GetNumCols(), (DEVICEID_TYPE) GetDeviceId());
ElementWisePower(alpha, *this, c);
return c;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementPowerOf(const Matrix<ElemType>& a, const ElemType power)
{
ElementWisePower(power, a, *this);
return *this;
}
//[this]=[this] .* a (we cannot override operator .* in c++)
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementMultiplyWith(const Matrix<ElemType>& a)
{
return AssignElementProductOf(*this, a);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementDivideBy(const Matrix<ElemType>& a)
{
return AssignElementDivisionOf(*this, a);
}
//[this]=a .* b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignElementProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddElementProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AddElementProductOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == GetNumCols()))
InvalidArgument("The input matrix dimensions do not match [this].");
DecideAndMoveToRightDevice(*this, a, b);
if (!(a.GetMatrixType() == b.GetMatrixType() && GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->AddElementProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AddElementProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=a ./ b
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementDivisionOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementDivisionOf: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementDivisionOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignElementDivisionOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ColumnElementMultiplyWith(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("ColumnElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
InvalidArgument("ColumnElementMultiplyWith: The input matrix should be a col vector and match [this]'s rows.");
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->ColumnElementMultiplyWith(*a.m_CPUMatrix),
m_GPUMatrix->ColumnElementMultiplyWith(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::RowElementMultiplyWith(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("RowElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
InvalidArgument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->RowElementMultiplyWith(*a.m_CPUMatrix),
m_GPUMatrix->RowElementMultiplyWith(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::RowElementDivideBy(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("RowElementDivideBy: Matrix is empty.");
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
InvalidArgument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->RowElementDivideBy(*a.m_CPUMatrix),
m_GPUMatrix->RowElementDivideBy(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ColumnElementDivideBy(const Matrix<ElemType>& a)
{
if (a.IsEmpty() || IsEmpty())
LogicError("ColumnElementDivideBy: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
InvalidArgument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows.");
DecideAndMoveToRightDevice(*this, a);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->ColumnElementDivideBy(*a.m_CPUMatrix),
m_GPUMatrix->ColumnElementDivideBy(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=1 ./ a
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ElementInverse()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->ElementInverse(),
m_GPUMatrix->ElementInverse(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->ElementInverse());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementInverseOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignElementInverseOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignElementInverseOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignElementInverseOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignElementInverseOf(*a.m_GPUSparseMatrix));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSigmoid()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSigmoid(),
m_GPUMatrix->InplaceSigmoid(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceSigmoid());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSigmoidOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSigmoidOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSigmoidOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignSigmoidOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=sigmoid([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLinearRectifierDerivative()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLinearRectifierDerivative(),
m_GPUMatrix->InplaceLinearRectifierDerivative(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceLinearRectifierDerivative());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLinearRectifierDerivativeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLinearRectifierDerivativeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignLinearRectifierDerivativeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLinearRectifierDerivativeOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=sigmoid([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSigmoidDerivative()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSigmoidDerivative(),
m_GPUMatrix->InplaceSigmoidDerivative(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSigmoidDerivativeOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSigmoidDerivativeOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSigmoidDerivativeOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b, bool searchInCol)
{
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignNumOfDiff(*a.m_CPUMatrix, *b.m_CPUMatrix, searchInCol),
m_GPUMatrix->AssignNumOfDiff(*a.m_GPUMatrix, *b.m_GPUMatrix, searchInCol),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=tanh([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTanh()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTanh(),
m_GPUMatrix->InplaceTanh(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceTanh());
return (*this);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTanhOf(const Matrix<ElemType>& a)
{
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTanhOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignTanhOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTanhOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=softmax([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLogSoftmax(const bool isColWise)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLogSoftmax(isColWise),
m_GPUMatrix->InplaceLogSoftmax(isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLogSoftmaxOf(const Matrix<ElemType>& a, const bool isColWise)
{
if (a.IsEmpty())
LogicError("AssignLogSoftmaxOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLogSoftmaxOf(*a.m_CPUMatrix, isColWise),
m_GPUMatrix->AssignLogSoftmaxOf(*a.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]=softmax([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceHardmax(const bool isColWise)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceHardmax(isColWise),
m_GPUMatrix->InplaceHardmax(isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignHardmaxOf(const Matrix<ElemType>& a, const bool isColWise)
{
if (a.IsEmpty())
LogicError("AssignHardmaxOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignHardmaxOf(*a.m_CPUMatrix, isColWise),
m_GPUMatrix->AssignHardmaxOf(*a.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSqrt()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSqrt(),
m_GPUMatrix->InplaceSqrt(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceSqrt());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSqrtOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSqrtOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSqrtOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSqrtOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignSqrtOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=exp([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceExp()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceExp(),
m_GPUMatrix->InplaceExp(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceExp());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignExpOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignExpOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignExpOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignExpOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignExpOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=exp([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceAbs()
{
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
m_CPUMatrix->InplaceAbs(),
m_GPUMatrix->InplaceAbs(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceAbs());
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAbsOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignAbsOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignAbsOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignAbsOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignAbsOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=log([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLog()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLog(),
m_GPUMatrix->InplaceLog(),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceLog());
return *this;
}
//[this]=log([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceLog10()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceLog10(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLogOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignLogOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLogOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignLogOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLogOf(*a.m_GPUSparseMatrix));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignLog10Of(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignLogOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignLog10Of(*a.m_CPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignLogOf(*a.m_GPUSparseMatrix));
return *this;
}
//[this]=cos([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceCosine()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceCosine(),
m_GPUMatrix->InplaceCosine(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCosineOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignCosineOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignCosineOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignCosineOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//[this]= -sin([this]) element wise
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceNegativeSine()
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceNegativeSine(),
m_GPUMatrix->InplaceNegativeSine(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNegativeSineOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignNegativeSineOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignNegativeSineOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignNegativeSineOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncate(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncate: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncate(threshold),
m_GPUMatrix->InplaceTruncate(threshold),
m_CPUSparseMatrix->InplaceTruncate(threshold),
m_GPUSparseMatrix->InplaceTruncate(threshold));
return *this;
}
template <class ElemType>
void Matrix<ElemType>::InplaceTranspose()
{
if (IsEmpty())
return;
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_GPUSparseMatrix->InplaceTranspose());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
{
assert(threshold >= 0);
if (IsEmpty())
LogicError("InplaceSoftThreshold: Matrix is empty.");
if (threshold == 0)
return *this;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceSoftThreshold(threshold),
m_GPUMatrix->InplaceSoftThreshold(threshold),
m_CPUSparseMatrix->InplaceSoftThreshold(threshold),
m_GPUSparseMatrix->InplaceSoftThreshold(threshold));
return *this;
}
//Threshold truncating: this[i] = max( this[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateBottom(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateBottom: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncateBottom(threshold),
m_GPUMatrix->InplaceTruncateBottom(threshold),
m_CPUSparseMatrix->InplaceTruncateBottom(threshold),
m_GPUSparseMatrix->InplaceTruncateBottom(threshold));
return *this;
}
//Threshold truncating: this[i] = max( a[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTruncateBottomOf(const Matrix<ElemType>& a, const ElemType threshold)
{
if (a.IsEmpty())
LogicError("AssignTruncateBottomOf: Matrix a is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
{
this->SetValue(a);
return *this;
}
}
else
{
if (!isfinite(threshold))
{
this->SetValue(a);
return *this;
}
}
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTruncateBottomOf(*a.m_CPUMatrix, threshold),
m_GPUMatrix->AssignTruncateBottomOf(*a.m_GPUMatrix, threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTruncateBottomOf(*a.m_GPUSparseMatrix, threshold));
return *this;
}
//Threshold truncating: this[i] = min( this[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateTop(const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateTop: Matrix is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
return *this;
}
else
{
if (!isfinite(threshold))
return *this;
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->InplaceTruncateTop(threshold),
m_GPUMatrix->InplaceTruncateTop(threshold),
m_CPUSparseMatrix->InplaceTruncateTop(threshold),
m_GPUSparseMatrix->InplaceTruncateTop(threshold));
return *this;
}
//Threshold truncating: this[i] = min( a[i], threshold )
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignTruncateTopOf(const Matrix<ElemType>& a, const ElemType threshold)
{
if (a.IsEmpty())
LogicError("AssignTruncateTopOf: Matrix a is empty.");
if (sizeof(ElemType) == sizeof(float))
{
if (!isfinite((float) threshold))
{
this->SetValue(a);
return *this;
}
}
else
{
if (!isfinite(threshold))
{
this->SetValue(a);
return *this;
}
}
DecideAndMoveToRightDevice(a, *this);
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignTruncateTopOf(*a.m_CPUMatrix, threshold),
m_GPUMatrix->AssignTruncateTopOf(*a.m_GPUMatrix, threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->AssignTruncateTopOf(*a.m_GPUSparseMatrix, threshold));
return *this;
}
//Threshold truncating: this[i] = 0 if abs(this[i]<threshold).
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::SetToZeroIfAbsLessThan(const ElemType threshold)
{
if (IsEmpty())
LogicError("SetToZeroIfAbsLessThan: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->SetToZeroIfAbsLessThan(threshold),
m_GPUMatrix->SetToZeroIfAbsLessThan(threshold),
NOT_IMPLEMENTED,
m_GPUSparseMatrix->SetToZeroIfAbsLessThan(threshold));
return *this;
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::SumOfElements() const
{
if (IsEmpty())
LogicError("SumOfElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->SumOfElements(),
return m_GPUMatrix->SumOfElements(),
return m_CPUSparseMatrix->SumOfElements(),
return m_GPUSparseMatrix->SumOfElements());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSumOfElements(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSumOfElements: Matrix a is empty.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSumOfElements(*a.m_CPUMatrix),
m_GPUMatrix->AssignSumOfElements(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
DeviceBoundNumber<ElemType> Matrix<ElemType>::Sum_AsDeviceBoundNum() const
{
DeviceBoundNumber<ElemType> result;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
ElemType* val = new ElemType;
* val = m_CPUMatrix->SumOfElements(); result.ShallowCopyFrom(val, -1); return result,
return m_GPUMatrix->Sum_AsDeviceBoundNum(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::SumOfAbsElements() const
{
if (IsEmpty())
LogicError("SumOfAbsElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->SumOfAbsElements(),
return m_GPUMatrix->SumOfAbsElements(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->SumOfAbsElements());
}
//sum of all elements
template <class ElemType>
ElemType Matrix<ElemType>::LogAddSumOfElements() const
{
if (IsEmpty())
LogicError("LogAddSumOfElements: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->LogAddSumOfElements(),
return m_GPUMatrix->LogAddSumOfElements(),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
bool Matrix<ElemType>::IsValid() const
{
if (m_currentDataLocation == CurrentDataLocation::GPU && GetMatrixType() == MatrixType::SPARSE)
{
return this->m_GPUSparseMatrix->IsValid();
}
else
{
NOT_IMPLEMENTED;
}
return false;
}
template <class ElemType>
bool Matrix<ElemType>::IsEqualTo(const Matrix<ElemType>& a, const ElemType threshold /*= 1e-8*/) const
{
return AreEqual(*this, a, threshold);
}
template <class ElemType>
void Matrix<ElemType>::VectorSum(const Matrix<ElemType>& a, Matrix<ElemType>& c, const bool isColWise)
{
DecideAndMoveToRightDevice(c, a);
if (!(a.GetMatrixType() == c.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::VectorSum(*a.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::VectorSum(*a.m_GPUMatrix, *c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::VectorNorm1(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNormInf: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNorm1(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNorm1(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNorm1Of(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNorm1(*this, isColWise);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::VectorNorm2(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNorm2: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNorm2(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNorm2(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNorm2Of(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNorm2(*this, isColWise);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::VectorNormInf(Matrix<ElemType>& c, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorNormInf: Matrix is empty.");
DecideAndMoveToRightDevice(*this, c);
c.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&c,
m_CPUMatrix->VectorNormInf(*c.m_CPUMatrix, isColWise),
m_GPUMatrix->VectorNormInf(*c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignVectorNormInfOf(Matrix<ElemType>& a, const bool isColWise)
{
a.VectorNormInf(*this, isColWise);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise)
{
InnerProduct(a, b, *this, isColWise);
return *this;
}
//column-wise crossproduct
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignKhatriRaoProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignKhatriRaoProductOf: Matrix is empty.");
assert(a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumCols() == b.GetNumCols()))
InvalidArgument("AssignKhatriRaoProductOf: The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(a, b, *this);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignKhatriRaoProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix),
m_GPUMatrix->AssignKhatriRaoProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//column-wise reshaped product. Used to compute KhatriRaoProduct Gradient
// this = reshape each column of a from (K1xK2,1) to (K1, K2)
// if each column of a is not transposed, each (K1, K2) times each column of b (K2, frames).
// the output is a (K1, frames) matrix
// if each column of a is tranposed, each (K1, K2)^T times each column of b(K1, frames) and output is (K2, frames)
//column-wise crossproduct
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddColumnReshapeProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool transposeAColumn)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AddColumnReshapeProductOf: Matrix is empty.");
assert(a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumCols() == b.GetNumCols()))
InvalidArgument("AddColumnReshapeProductOf: The input matrix dimensions do not match.");
DecideAndMoveToRightDevice(*this, a, b);
// WARNING: a and b must have same type
if (!(a.GetMatrixType() == b.GetMatrixType() && GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddColumnReshapeProductOf(*a.m_CPUMatrix, *b.m_CPUMatrix, transposeAColumn),
m_GPUMatrix->AddColumnReshapeProductOf(*a.m_GPUMatrix, *b.m_GPUMatrix, transposeAColumn),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddWithScaleOf(ElemType alpha, const Matrix<ElemType>& a)
{
ScaleAndAdd(alpha, a, *this);
return *this;
}
template <class ElemType>
ElemType Matrix<ElemType>::FrobeniusNorm() const
{
if (IsEmpty())
LogicError("FrobeniusNorm: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->FrobeniusNorm(),
return m_GPUMatrix->FrobeniusNorm(),
return m_CPUSparseMatrix->FrobeniusNorm(),
return m_GPUSparseMatrix->FrobeniusNorm());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignFrobeniusNormOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignFrobeniusNormOf: Matrix a is empty.");
Resize(1, 1);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignFrobeniusNormOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignFrobeniusNormOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNormInf() const
{
if (IsEmpty())
LogicError("MatrixNormInf: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNormInf(),
return m_GPUMatrix->MatrixNormInf(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNormInf());
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNorm1() const
{
if (IsEmpty())
LogicError("MatrixNorm1: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNorm1(),
return m_GPUMatrix->MatrixNorm1(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNorm1());
}
template <class ElemType>
ElemType Matrix<ElemType>::MatrixNorm0() const
{
if (IsEmpty())
LogicError("MatrixNorm0: Matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return m_CPUMatrix->MatrixNorm0(),
return m_GPUMatrix->MatrixNorm0(),
NOT_IMPLEMENTED,
return m_GPUSparseMatrix->MatrixNorm0());
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSignOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AssignSignOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AssignSignOf(*a.m_CPUMatrix),
m_GPUMatrix->AssignSignOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddSignOf(const Matrix<ElemType>& a)
{
if (a.IsEmpty())
LogicError("AddSignOf: Matrix a is empty.");
DecideAndMoveToRightDevice(a, *this);
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->AddSignOf(*a.m_CPUMatrix),
m_GPUMatrix->AddSignOf(*a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//I decided to use Matrix<ElemType>& maxIndexes instead of integer vector because the result may be used to do additional calculation
template <class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndexes, maxValues);
maxIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&maxValues,
m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise);
maxIndexes.SetDataLocation(CPU, DENSE),
m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise);
maxIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndexes, maxValues);
maxIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&maxValues,
m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK);
maxIndexes.SetDataLocation(CPU, DENSE),
m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK);
maxIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::VectorMin(Matrix<ElemType>& minIndexes, Matrix<ElemType>& minValues, const bool isColWise) const
{
if (IsEmpty())
LogicError("VectorMin: Matrix is empty.");
DecideAndMoveToRightDevice(*this, minIndexes, minValues);
minIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
minValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&minValues,
m_CPUMatrix->VectorMin(*minIndexes.m_CPUMatrix, *minValues.m_CPUMatrix, isColWise);
minIndexes.SetDataLocation(CPU, DENSE),
m_GPUMatrix->VectorMin(*minIndexes.m_GPUMatrix, *minValues.m_GPUMatrix, isColWise);
minIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
#pragma endregion Member BLAS Functions
#pragma region Other helper Functions
template <class ElemType>
int Matrix<ElemType>::GetDeviceId() const
{
if (m_currentDataLocation == CurrentDataLocation::NONE)
return m_preferredDeviceId;
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
return CPUDEVICE,
return m_GPUMatrix->GetComputeDeviceId(),
return CPUDEVICE,
return m_GPUSparseMatrix->GetComputeDeviceId());
}
// TODO: Move the shared core functions to the front of this source file.
// BUGBUG: This performs a copy operation even for the output matrix that gets overwritten right away.
// We should (1) define which is the output and (2) whether it will be completely overwritten (so we won't actually copy it).
// bring two matrices onto the same device
// If different and prefered devices are the same, move to preferred device.
// Otherwise GPU takes precedence over CPU, and if both are GPU move to a's device.
// The inputs are only distinguished in that a's GPU takes precedence over b's in case they differ.
// TODO: This is called somewhat inconsistently, sometimes with a=*this, sometimes with b=*this.
template <class ElemType>
template <class ElemType2>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType2>& b)
{
int deviceIdA = a.GetDeviceId(), deviceIdB = b.GetDeviceId();
if (deviceIdA == deviceIdB)
return;
int preferredDeviceIdA = a.GetPreferredDeviceId(), preferredDeviceIdB = b.GetPreferredDeviceId();
if (preferredDeviceIdA == preferredDeviceIdB) // both prefer the same device: move to preferred
{
a._transferToDevice(preferredDeviceIdA);
b._transferToDevice(preferredDeviceIdA);
}
else if (deviceIdA != CPUDEVICE) // one of them lives on GPU: use that
{
b._transferToDevice(deviceIdA);
}
else
{
a._transferToDevice(deviceIdB);
}
}
// same but for 3 matrices
// If b and c are both on the same GPU then a will be forced to go there; otherwise a's GPU takes precedence, then b's.
template <class ElemType>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c)
{
int deviceIdA = a.GetDeviceId(), deviceIdB = b.GetDeviceId(), deviceIdC = c.GetDeviceId();
if (deviceIdA == deviceIdB && deviceIdA == deviceIdC)
return;
int preferredDeviceIdA = a.GetPreferredDeviceId(), preferredDeviceIdB = b.GetPreferredDeviceId(), preferredDeviceIdC = c.GetPreferredDeviceId();
if (preferredDeviceIdA == preferredDeviceIdB && preferredDeviceIdA == preferredDeviceIdC)
{
a._transferToDevice(preferredDeviceIdA);
b._transferToDevice(preferredDeviceIdA);
c._transferToDevice(preferredDeviceIdA);
}
else if (deviceIdB == deviceIdC && deviceIdB != CPUDEVICE) // TODO: why not the other two combinations?
{
a._transferToDevice(deviceIdB); // 'a' is outvoted
}
else if (deviceIdA != CPUDEVICE) // one of them lives on GPU: use that
{
b._transferToDevice(deviceIdA);
c._transferToDevice(deviceIdA);
}
else if (deviceIdB != CPUDEVICE)
{
a._transferToDevice(deviceIdB);
c._transferToDevice(deviceIdB);
}
else
{
a._transferToDevice(deviceIdC);
b._transferToDevice(deviceIdC);
}
}
// same but for 4 matrices
template <class ElemType>
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& d)
{
// this function is only called for one operator, so for now we keep it imple
DecideAndMoveToRightDevice(a, b, c);
d._transferToDevice(a.GetDeviceId()); // BUGBUG: Is this correct in case a,b,c share the same preferredDevice?
}
template <class ElemType>
void Matrix<ElemType>::_transferToDevice(int to_id, bool isBeingMoved /*= true*/, bool emptyTransfer /* = false*/) const
{
int from_id = GetDeviceId();
if (to_id == from_id) // nothing to do
return;
if (OwnBuffer())
_transferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer);
else
RuntimeError("Cannot move externally owned matrices to the preferred device.");
}
// this function performs data transfer and updates data location, but not the device that is stored with it
template <class ElemType>
void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved /*= true*/, bool emptyTransfer /* = false*/) const
{
if (from_id < 0)
from_id = CPUDEVICE;
if (to_id < 0)
to_id = CPUDEVICE;
if (from_id == to_id)
{
if (from_id != GetDeviceId())
RuntimeError("Trying to transfer matrix from device to the same device while the matrix does not live in the from device.");
return;
}
#define NUM_DEVICE_CHANGED_WARN 20
if (m_numTimesDeviceChanged <= NUM_DEVICE_CHANGED_WARN &&
(!emptyTransfer || (from_id >= 0 && to_id >= 0)))
{
m_numTimesDeviceChanged++;
if (m_devicesTransferedTo[0] < CPUDEVICE)
{
m_devicesTransferedTo[0] = to_id;
}
else if (m_devicesTransferedTo[0] != to_id)
{
m_devicesTransferedTo[1] = to_id;
}
}
if (m_numTimesDeviceChanged == NUM_DEVICE_CHANGED_WARN && m_devicesTransferedTo[1] >= CPUDEVICE)
{
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_DEVICE_CHANGED_WARN);
}
if (m_matrixType == MatrixType::SPARSE)
{
if (from_id == CPUDEVICE) // from CPU to GPU
{
if (m_CPUSparseMatrix == NULL)
LogicError("Can't move from CPU because I'm not there!");
if (m_GPUSparseMatrix == NULL)
m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>(to_id, m_CPUSparseMatrix->GetFormat());
else
m_GPUSparseMatrix->ChangeDeviceTo(to_id);
if (m_CPUSparseMatrix->GetNumElements() != 0 && !emptyTransfer)
{
m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
}
if (isBeingMoved)
{
delete m_CPUSparseMatrix;
m_CPUSparseMatrix = NULL;
SetDataLocation(GPU, SPARSE);
}
else
{
SetDataLocation(BOTH, SPARSE);
}
}
else // from GPU
{
if (m_GPUSparseMatrix == NULL || m_GPUSparseMatrix->GetComputeDeviceId() != from_id)
LogicError("This matrix isn't on this (or any?) GPU");
if (to_id < 0) // to CPU
{
if (m_CPUSparseMatrix == NULL)
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(m_GPUSparseMatrix->GetFormat());
if (m_GPUSparseMatrix->GetNumElements() != 0 && !emptyTransfer)
{
m_GPUSparseMatrix->CopyToCPUSparseMatrix(*m_CPUSparseMatrix);
}
if (isBeingMoved)
{
delete m_GPUSparseMatrix;
m_GPUSparseMatrix = NULL;
SetDataLocation(CPU, SPARSE);
}
else
{
SetDataLocation(BOTH, SPARSE);
}
}
else // to another GPU
{
m_GPUSparseMatrix->ChangeDeviceTo(to_id);
}
}
}
else
#pragma omp critical
{
if (from_id == CPUDEVICE) // from CPU to GPU
{
if (m_CPUMatrix == NULL)
LogicError("Can't move from CPU because I'm not there!");
if (m_GPUMatrix != NULL)
delete m_GPUMatrix;
if (m_CPUMatrix->GetNumElements() != 0 && !emptyTransfer)
{
m_GPUMatrix = new GPUMatrix<ElemType>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->GetArray(), matrixFlagNormal);
}
else
{
m_GPUMatrix = new GPUMatrix<ElemType>(to_id);
}
if (isBeingMoved)
{
delete m_CPUMatrix;
m_CPUMatrix = NULL;
SetDataLocation(GPU, DENSE);
}
else
{
SetDataLocation(BOTH, DENSE);
}
}
else // from GPU
{
if (m_GPUMatrix == NULL || m_GPUMatrix->GetComputeDeviceId() != from_id)
LogicError("This matrix isn't on this (or any?) GPU");
if (to_id < 0) // to CPU
{
if (m_CPUMatrix != NULL)
delete m_CPUMatrix;
if (m_GPUMatrix->GetNumElements() != 0 && !emptyTransfer)
{
ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
m_CPUMatrix = new CPUMatrix<ElemType>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
delete[] arr;
}
else
{
m_CPUMatrix = new CPUMatrix<ElemType>();
}
if (isBeingMoved)
{
delete m_GPUMatrix;
m_GPUMatrix = NULL;
SetDataLocation(CPU, DENSE);
}
else
{
SetDataLocation(BOTH, DENSE);
}
}
else // to another GPU
{
m_GPUMatrix->ChangeDeviceTo(to_id);
}
}
} // and of omp critical section
}
template <class ElemType>
void Matrix<ElemType>::TransferFromDeviceToDevice(int from_id, int to_id, bool isBeingMoved, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const
{
_transferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer);
if (updatePreferredDevice)
m_preferredDeviceId = GetDeviceId();
}
template <class ElemType>
void Matrix<ElemType>::TransferToDeviceIfNotThere(int to_id, bool isBeingMoved/*false: may leave in BOTH state*/, bool emptyTransfer/* = false*/, bool updatePreferredDevice/* = true*/) const
{
int from_id = GetDeviceId();
if (from_id == to_id) // already at the right place
return;
if (GetCurrentMatrixLocation() == BOTH && // if currently in BOTH state
!isBeingMoved && // and leaving in BOTH state is OK
(from_id < 0 || to_id < 0)) // and this is not about changing GPUs
{
return; // then we are good
}
TransferFromDeviceToDevice(from_id, to_id, isBeingMoved, emptyTransfer, updatePreferredDevice);
}
template <class ElemType>
void Matrix<ElemType>::Print(const char* matrixName, ptrdiff_t rowStart, ptrdiff_t rowEnd, ptrdiff_t colStart, ptrdiff_t colEnd) const
{
DEVICEID_TYPE orgdevice = GetDeviceId();
DISPATCH_MATRIX_ON_FLAG(this,
nullptr,
// CPU:
m_CPUMatrix->Print(matrixName, rowStart, rowEnd, colStart, colEnd),
// GPU;
{
_transferToDevice(CPUDEVICE, false, false);
m_CPUMatrix->Print(matrixName, rowStart, rowEnd, colStart, colEnd);
_transferToDevice(orgdevice, false, false);
},
// CPU, sparse:
m_CPUSparseMatrix->Print(matrixName),
// GPU, sparse:
{
_transferToDevice(CPUDEVICE, false, false);
m_CPUSparseMatrix->Print(matrixName);
_transferToDevice(orgdevice, false, false);
});
}
template <class ElemType>
void Matrix<ElemType>::Print(const char* matrixName /*=nullptr*/) const
{
Print(matrixName, 0, GetNumRows() - 1, 0, GetNumCols() - 1);
}
//helpfer function used for convolution neural network
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignPackedConvolutionInput(const Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding)
{
DecideAndMoveToRightDevice(inputSubBatch, *this);
SwitchToMatrixType(inputSubBatch.GetMatrixType(), inputSubBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputSubBatch,
this,
m_CPUMatrix->AssignPackedConvolutionInput(*(inputSubBatch.m_CPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
m_GPUMatrix->AssignPackedConvolutionInput(*(inputSubBatch.m_GPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
//helpfer function used for convolution neural network
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::UnpackConvolutionInput(Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding) const
{
DecideAndMoveToRightDevice(*this, inputSubBatch);
inputSubBatch.SwitchToMatrixType(GetMatrixType(), inputSubBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&inputSubBatch,
m_CPUMatrix->UnpackConvolutionInput(*(inputSubBatch.m_CPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
m_GPUMatrix->UnpackConvolutionInput(*(inputSubBatch.m_GPUMatrix),
inputWidth, inputHeight, inputChannels,
outputWidth, outputHeight, outputChannels,
kernelWidth, kernelHeight, horizontalSubsample, verticalSubsample,
zeroPadding),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return inputSubBatch;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignMaxPoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(inputBatch, *this);
SwitchToMatrixType(inputBatch.GetMatrixType(), inputBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputBatch,
this,
m_CPUMatrix->AssignMaxPoolingResult(*(inputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AssignMaxPoolingResult(*(inputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddMaxPoolingGradient(const Matrix<ElemType>& outputGradientBatch, const Matrix<ElemType>& inputBatch, const Matrix<ElemType>& outputBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(*this, outputGradientBatch, inputBatch);
outputBatch._transferToDevice(GetDeviceId());
if (!(GetMatrixType() == outputGradientBatch.GetMatrixType() && GetMatrixType() == inputBatch.GetMatrixType() && GetMatrixType() == outputBatch.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddMaxPoolingGradient(*(outputGradientBatch.m_CPUMatrix), *(inputBatch.m_CPUMatrix), *(outputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AddMaxPoolingGradient(*(outputGradientBatch.m_GPUMatrix), *(inputBatch.m_GPUMatrix), *(outputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample);
,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignAveragePoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(inputBatch, *this);
SwitchToMatrixType(inputBatch.GetMatrixType(), inputBatch.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&inputBatch,
this,
m_CPUMatrix->AssignAveragePoolingResult(*(inputBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AssignAveragePoolingResult(*(inputBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSoftmaxSum(const Matrix<ElemType>& a, const Matrix<ElemType>& softmax)
{
Resize(1, 1);
if (GetDeviceId() < 0)
a.m_CPUMatrix->AssignSoftmaxSum(*softmax.m_CPUMatrix, *m_CPUMatrix);
else
a.m_GPUMatrix->AssignSoftmaxSum(*softmax.m_GPUMatrix, *m_GPUMatrix);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNceUnnormalizedEval(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias)
{
// if (a.GetMatrixType() != MatrixType::SPARSE)
// NOT_IMPLEMENTED;
Resize(1, 1);
if (GetDeviceId() < 0)
a.m_CPUMatrix->AssignNCEUnnormalizedEval(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *m_CPUMatrix);
else
a.m_GPUMatrix->AssignNCEUnnormalizedEval(*b.m_GPUMatrix, *c.m_GPUMatrix, *m_GPUMatrix);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNoiseContrastiveEstimation(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias, Matrix<ElemType>& tmp)
{
if (a.IsEmpty() || b.IsEmpty() || c.IsEmpty())
LogicError("AssignNoiseContrastiveEstimation: one of the input matrices is empty.");
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != GetDeviceId())
NOT_IMPLEMENTED;
Resize(1, 1);
if (GetDeviceId() < 0)
{
size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix,
*bias.m_CPUMatrix, *tmp.m_CPUMatrix, *m_CPUMatrix);
}
else
{
size_t sampleCount = a.m_GPUMatrix->GetNumElements() / a.m_GPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_GPUMatrix->AssignNoiseContrastiveEstimation(*b.m_GPUMatrix, *c.m_GPUMatrix,
*bias.m_GPUMatrix, sampleCount, *tmp.m_GPUMatrix, *m_GPUMatrix);
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNCEDerivative(const Matrix<ElemType>& tmp, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, size_t inputIndex)
{
if (a.IsEmpty() || b.IsEmpty() || c.IsEmpty())
LogicError("AssignNoiseContrastiveEstimation: one of the input matrices is empty.");
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != GetDeviceId())
NOT_IMPLEMENTED;
assert(tmp.GetNumRows() == a.GetNumRows() / 2);
if (GetDeviceId() < 0)
{
// samples gradient hidden embedding embedding/hidden
a.m_CPUMatrix->AssignNCEDerivative(*tmp.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, inputIndex, *m_CPUMatrix);
}
else
{
a.m_GPUMatrix->AssignNCEDerivative(*tmp.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, inputIndex, *m_GPUMatrix);
}
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AddAveragePoolingGradient(const Matrix<ElemType>& outputGradientBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample)
{
DecideAndMoveToRightDevice(*this, outputGradientBatch);
if (!(GetMatrixType() == outputGradientBatch.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AddAveragePoolingGradient(*(outputGradientBatch.m_CPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
m_GPUMatrix->AddAveragePoolingGradient(*(outputGradientBatch.m_GPUMatrix), channels,
inputWidth, inputHeight, inputSizePerSample,
outputWidth, outputHeight, outputSizePerSample,
windowWidth, windowHeight, horizontalSubsample, verticalSubsample),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
#pragma endregion Other Helper Functions
#pragma region Static BLAS Functions
template <class ElemType>
void Matrix<ElemType>::SVD(const Matrix<ElemType>& A, Matrix<ElemType>& SIGMA, Matrix<ElemType>& U, Matrix<ElemType>& VT, Matrix<ElemType>& W)
{
if (A.IsEmpty())
LogicError("SVD: the input matrix is empty.");
DecideAndMoveToRightDevice(A, SIGMA, U);
VT._transferToDevice(A.GetDeviceId());
W._transferToDevice(A.GetDeviceId());
SIGMA.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
U.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
VT.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
W.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&A,
nullptr,
Matrix<ElemType> tA = A.DeepClone();
CPUMatrix<ElemType>::SVD(*tA.m_CPUMatrix, *SIGMA.m_CPUMatrix, *U.m_CPUMatrix, *VT.m_CPUMatrix, *W.m_CPUMatrix);
SIGMA.SetDataLocation(CPU);
U.SetDataLocation(CPU);
VT.SetDataLocation(CPU);
W.SetDataLocation(CPU),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, c);
if (c.GetDeviceId() < 0) // CPU
{
if (a.GetMatrixType() == MatrixType::SPARSE)
NOT_IMPLEMENTED;
if (b.GetMatrixType() == MatrixType::SPARSE)
{
if (c.GetMatrixType() == MatrixType::DENSE)
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUSparseMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else if (c.GetMatrixType() == MatrixType::SPARSE)
{
CPUSparseMatrix<ElemType>::MultiplyAndAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUSparseMatrix, transposeB, *c.m_CPUSparseMatrix);
c.SetDataLocation(CPU, SPARSE);
}
else
NOT_IMPLEMENTED;
}
else
{
c.SwitchToMatrixType(MatrixType::DENSE, matrixFormatDense, false);
CPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
}
else // GPU operations
{
if (a.m_matrixType == b.m_matrixType && b.m_matrixType == c.m_matrixType && a.m_matrixType == MatrixType::DENSE) // All dense
{
GPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUMatrix, transposeB, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::SPARSE && b.m_matrixType == c.m_matrixType && b.m_matrixType == MatrixType::DENSE) // Sparse*Dense+Dense
{
GPUMatrix<ElemType> second = transposeB ? b.m_GPUMatrix->Transpose() : *b.m_GPUMatrix;
GPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUSparseMatrix, transposeA, second, false, beta, *c.m_GPUMatrix);
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::DENSE) // Dense*Sparse + Dense
{
// if (b.m_GPUSparseMatrix->GetFormat() == MatrixFormat::matrixFormatSparseCSR)
// {
GPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, beta, *c.m_GPUMatrix);
// }
// else
// {
// GPUMatrix<ElemType> firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha;
// GPUMatrix<ElemType> & first= firstDummy; // GCC does not support mixing refs and non-refs
// GPUSparseMatrix<ElemType> secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix;
// GPUSparseMatrix<ElemType> & second = secondDummy;
// if (beta==0)
// {
// GPUSparseMatrix<ElemType>::Multiply(first,second,*c.m_GPUMatrix);
// }
// else
// {
// Matrix<ElemType> tmp(c.GetNumRows(),c.GetNumCols(),(DEVICEID_TYPE)c.GetDeviceId());
// GPUSparseMatrix<ElemType>::Multiply(first,second,*tmp.m_GPUMatrix);
// c=tmp+c*beta;
// }
// }
c.SetDataLocation(GPU, DENSE);
}
else if (a.m_matrixType == MatrixType::DENSE && b.m_matrixType == MatrixType::SPARSE && c.m_matrixType == MatrixType::SPARSE) // h -> u0
{
// new GPU sparse matrix code
GPUSparseMatrix<ElemType>::MultiplyAndAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, *c.m_GPUSparseMatrix);
c.SetDataLocation(GPU, SPARSE);
}
else if (a.m_matrixType == b.m_matrixType && b.m_matrixType == c.m_matrixType && a.m_matrixType == MatrixType::SPARSE)
{
GPUSparseMatrix<ElemType> firstDummy = alpha == 1 ? *a.m_GPUSparseMatrix : (*a.m_GPUSparseMatrix) * alpha;
GPUSparseMatrix<ElemType>& first = firstDummy; // By Malcolm.. gcc doesn't support auto
if (beta == 0)
{
GPUSparseMatrix<ElemType>::Multiply(first, transposeA, *b.m_GPUSparseMatrix, transposeB, *c.m_GPUSparseMatrix);
c.SetDataLocation(GPU, SPARSE);
}
else
{
GPUSparseMatrix<ElemType> tmp(b.m_GPUSparseMatrix->GetComputeDeviceId());
GPUSparseMatrix<ElemType>::Multiply(first, transposeA, *b.m_GPUSparseMatrix, transposeB, tmp);
*c.m_GPUSparseMatrix = tmp + (*c.m_GPUSparseMatrix) * beta;
c.SetDataLocation(GPU, SPARSE);
}
}
else if (a.m_matrixType == b.m_matrixType && a.m_matrixType == MatrixType::DENSE && c.m_matrixType == MatrixType::SPARSE)
{
GPUMatrix<ElemType> tmp(a.m_GPUMatrix->GetComputeDeviceId());
GPUSparseMatrix<ElemType> tmpSparse(a.m_GPUMatrix->GetComputeDeviceId());
GPUMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUMatrix, transposeB, beta, tmp);
tmpSparse.SetValue(tmp);
*c.m_GPUSparseMatrix = tmpSparse + (*c.m_GPUSparseMatrix) * beta;
c.SetDataLocation(GPU, SPARSE);
}
else
NOT_IMPLEMENTED;
}
}
template <class ElemType>
/*static*/ void Matrix<ElemType>::Multiply1x1AndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType beta, Matrix<ElemType>& c)
{
// special case: a is a 1x1 matrix
// The only alternative is to Get00Elements(), which makes things inefficient.
if (a.GetNumElements() != 1)
InvalidArgument("Multiply1x1AndWeightedAdd: first arg must be a scalar.");
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::Multiply1x1AndWeightedAdd(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, beta, *c.m_CPUMatrix),
GPUMatrix<ElemType>::Multiply1x1AndWeightedAdd(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, beta, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = op(a) * op(b) + c</summary>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::MultiplyAndAdd(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 1.0, c);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = op(a) * op(b)</summary>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Multiply(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 0.0, c);
}
/// <summary>Matrix-matrix multiply with col-major matrices (a and b are not transposed): c = a * b</summary>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Multiply(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
return Matrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, false, b, false, 0.0, c);
}
/// <summary>1-D Convolution with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c. MultiplyAndWeightedAdd is just a special case of this.</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="transposeA">Whether matrix a is transposed</param>
/// <param name="b">Input matrix</param>
/// <param name="transposeB">Whether matrix b is transposed</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::ConvolveAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c, size_t numChannels, size_t horizontalSubsample, bool padding, bool channelwise)
{
DecideAndMoveToRightDevice(a, b, c);
if (c.GetDeviceId() >= 0 /*GPU*/ && a.GetMatrixType() == MatrixType::DENSE && b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE)
{
GPUSparseMatrix<ElemType>::ConvolveAndWeightedAdd(alpha, *a.m_GPUMatrix, transposeA, *b.m_GPUSparseMatrix, transposeB, beta, *c.m_GPUMatrix, numChannels, horizontalSubsample, padding, channelwise);
}
else
{
NOT_IMPLEMENTED;
}
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a + c</summary>
/// if a is a column vector, add to all columns of c
/// if a is a row vector, add to all rows of c
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
/*static*/ void Matrix<ElemType>::ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
if (a.IsEmpty() || c.IsEmpty())
LogicError("ScaleAndAdd: one of the input matrices is empty.");
DecideAndMoveToRightDevice(c, a);
if (a.GetMatrixType() == c.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType> b = move(*c.m_GPUSparseMatrix);
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, b, *c.m_GPUSparseMatrix));
}
else
{
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUSparseMatrix, *c.m_CPUMatrix);
c.SetDataLocation(CPU),
if (a.m_GPUSparseMatrix->GetFormat() == MatrixFormat::matrixFormatSparseCSC)
{
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, *c.m_GPUMatrix, *c.m_GPUMatrix);
} else // new GPU sparse matrix code
{
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, *c.m_GPUMatrix);
} c.SetDataLocation(GPU),
NOT_IMPLEMENTED,
{
c.m_GPUMatrix = new GPUMatrix<ElemType>(c.m_GPUSparseMatrix->CopyToDenseMatrix());
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, 1, *c.m_GPUSparseMatrix, *c.m_GPUMatrix);
delete c.m_GPUSparseMatrix;
c.m_GPUSparseMatrix = NULL;
c.SetDataLocation(GPU, DENSE);
});
}
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a + beta * c</summary>
/// if a is a column vector, add to all columns of c
/// if a is a row vector, add to all rows of c
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="beta">Scalar</param>
/// <param name="c">Resulting matrix, caller is responsible for allocating this</param>
template <class ElemType>
/*static*/ void Matrix<ElemType>::ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, ElemType beta, Matrix<ElemType>& c)
{
if (beta == 1)
ScaleAndAdd(alpha, a, c);
else if (beta == 0)
{
Scale(alpha, a, c);
}
else
{
ScaleAndAdd(alpha / beta, a, c); // c1=alpha/beta * a + c
Scale(beta, c); // c/beta * beta
}
}
// tensor swapping and addition: c <- keepWeight * b + scaleFactor * swap_dimensions(a, S, K)
// where
// - a is interpreted as a tensor of dimension (D x S x M x K x T) // column-major, as usual
// - b and c as a tensor of dimension (D x K x M x S x T) // note: K and S swapped
// The main point of this function is to reshuffle a tensor w.r.t. two dimensions that get swapped in memory,
// but for gradients, we will need to add, hence the keepWeight.
// Notes:
// - c and b may be the same (in-place operation is expressly allowed).
// - D, M, and/or T may be 1. For example, D == M == T == 1 implements a 2D matrix transpose from (S x K) to (K x S).
// - If keepWeight == 0, then b will just get overwritten (straight assignment, b may be uninitialized or contain NaNs).
// - The original matrix dimensions are ignored except that sizes must match (rows x cols == D x S x M x K x T).
// For diagnostics purposes, this function also enforces the rows % D == 0 and cols % T == 0, but this is not a functional requirement and can be removed if that helps.
// - Dense matrices only.
// TODO: Handle these cases:
// - no swapping happening --just do a block copy
// - swapping can be implemented by cuDNN --do so
template <class ElemType>
/*static*/ void Matrix<ElemType>::TensorShuffleScaleAndAdd(ElemType keepWeight, const Matrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
if (a.GetNumElements() != c.GetNumElements() || b.GetNumElements() != c.GetNumElements()) // allocations must match (but not dimensions, since we reinterpret the dimensions anyway)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c must have same number of elements.");
if (c.IsEmpty()) // operating on empty minibatch slices is perfectly cromulent
return;
// sanity checks for current use cases--these are not strictly necessary and can be deleted
if (a.GetNumRows() % D != 0 || b.GetNumRows() % D != 0 || c.GetNumRows() % D != 0)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c are meant to have a row dimension that is a multiple of D.");
if (a.GetNumCols() % T != 0 || b.GetNumCols() % T != 0 || c.GetNumCols() % T != 0)
InvalidArgument("TensorShuffleScaleAndAdd: a, b, and c are meant to have a column dimension that is a multiple of T.");
DecideAndMoveToRightDevice(a, b, c);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_CPUMatrix, D, S, M, K, T, scaleFactor, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_GPUMatrix, D, S, M, K, T, scaleFactor, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::TensorShuffleScaleAndAdd(keepWeight, *a.m_GPUSparseMatrix, D, S, M, K, T, scaleFactor, *b.m_GPUSparseMatrix, *c.m_GPUSparseMatrix));
}
/// <summary>c += alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AddScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a, b);
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == c.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddScaledDifference(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AddScaledDifference(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AssignScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, c);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AssignScaledDifference(alpha, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AssignScaledDifference(alpha, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>c += alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AddScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a, b);
alpha._transferToDevice(c.GetDeviceId());
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == c.GetMatrixType() && a.GetMatrixType() == alpha.GetMatrixType()))
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddScaledDifference(*alpha.m_CPUMatrix, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AddScaledDifference(*alpha.m_GPUMatrix, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::AssignScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(a, b, alpha);
c._transferToDevice(a.GetDeviceId());
if (!(a.GetMatrixType() == b.GetMatrixType() && a.GetMatrixType() == alpha.GetMatrixType()))
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::AssignScaledDifference(*alpha.m_CPUMatrix, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::AssignScaledDifference(*alpha.m_GPUMatrix, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//c[ci,cj] += a[ai,aj]
template <class ElemType>
void Matrix<ElemType>::AddElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj)
{
DecideAndMoveToRightDevice(c, a);
if (c.GetMatrixType() != a.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AddElementToElement(*a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
GPUMatrix<ElemType>::AddElementToElement(*a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//c[ci,cj] = a[ai,aj]
template <class ElemType>
void Matrix<ElemType>::AssignElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj)
{
DecideAndMoveToRightDevice(c, a);
if (c.GetMatrixType() != a.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::AssignElementToElement(*a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
//for each column of this, we add row slice of a starting from startIndex
template <class ElemType>
void Matrix<ElemType>::MinusOneAt(Matrix<ElemType>& a, const size_t position)
{
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::MinusOneAt(*a.m_CPUMatrix, position),
GPUMatrix<ElemType>::MinusOneAt(*a.m_GPUMatrix, position),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
/// <summary>Matrix-scalar multiply with col-major matrices: c = alpha * a</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
void Matrix<ElemType>::Scale(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
DecideAndMoveToRightDevice(c, a);
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
if (alpha == 0)
{
c.Resize(a);
c.SetValue(0); // this is a little faster, and also does not propagate NaNs, which we'd expect from 'beta' parameters
return;
}
else
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::Scale(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED, * c.m_GPUSparseMatrix = (*a.m_GPUSparseMatrix) * alpha);
}
/// <summary>Matrix-scalar multiply with col-major matrices: a = alpha * a</summary>
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
template <class ElemType>
void Matrix<ElemType>::Scale(ElemType alpha, Matrix<ElemType>& a)
{
if (alpha == 0)
a.SetValue(0); // this is a little faster, and also does not propagate NaNs, which we'd expect from 'beta' parameters
else if (a.IsEmpty())
return;
else
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
CPUMatrix<ElemType>::Scale(alpha, *a.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(alpha, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::Scale(alpha, *a.m_GPUSparseMatrix));
}
/// <summary>Matrix scalar matrix multiply with col-major matrices: a = alpha[0,0] * a</summary>
/// <param name="alpha">1x1 matrix</param>
/// <param name="a">Input matrix</param>
template <class ElemType>
void Matrix<ElemType>::Scale(const Matrix<ElemType>& alpha, Matrix<ElemType>& a)
{
if (a.IsEmpty())
return;
DecideAndMoveToRightDevice(a, alpha);
if (a.GetMatrixType() != alpha.GetMatrixType())
NOT_IMPLEMENTED;
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
CPUMatrix<ElemType>::Scale(*alpha.m_CPUMatrix, *a.m_CPUMatrix),
GPUMatrix<ElemType>::Scale(*alpha.m_GPUMatrix, *a.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::InnerProduct(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::InnerProduct(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise),
GPUMatrix<ElemType>::InnerProduct(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
ElemType Matrix<ElemType>::InnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProductOfMatrices: one of the input matrices is empty.");
DecideAndMoveToRightDevice(a, b);
if (a.GetMatrixType() == b.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
return CPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_CPUMatrix, *b.m_CPUMatrix),
return GPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
else
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
NOT_IMPLEMENTED,
return GPUSparseMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUSparseMatrix),
NOT_IMPLEMENTED,
return GPUSparseMatrix<ElemType>::InnerProductOfMatrices(*a.m_GPUSparseMatrix, *b.m_GPUMatrix));
}
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProductOfMatrices: one of the input matrices is empty.");
Resize(1, 1);
DecideAndMoveToRightDevice(a, b, *this);
if (a.GetMatrixType() == b.GetMatrixType())
{
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&a,
this,
m_CPUMatrix->SetValue(CPUMatrix<ElemType>::InnerProductOfMatrices(*a.m_CPUMatrix, *b.m_CPUMatrix)),
m_GPUMatrix->AssignInnerProductOfMatrices(*a.m_GPUMatrix, *b.m_GPUMatrix),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
else
{
NOT_IMPLEMENTED;
}
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ElementWisePower(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c)
{
if (a.IsEmpty())
return;
DecideAndMoveToRightDevice(a, c);
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
nullptr,
CPUMatrix<ElemType>::ElementWisePower(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
GPUMatrix<ElemType>::ElementWisePower(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
NOT_IMPLEMENTED,
GPUSparseMatrix<ElemType>::ElementWisePower(alpha, *a.m_GPUSparseMatrix, *c.m_GPUSparseMatrix));
}
template <class ElemType>
bool Matrix<ElemType>::AreEqual(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const ElemType threshold /*= 1e-8*/)
{
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
return false;
DecideAndMoveToRightDevice(a, b);
if (a.GetMatrixType() == b.GetMatrixType())
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
return CPUMatrix<ElemType>::AreEqual(*a.m_CPUMatrix, *b.m_CPUMatrix, threshold),
return GPUMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUMatrix, threshold),
return CPUSparseMatrix<ElemType>::AreEqual(*a.m_CPUSparseMatrix, *b.m_CPUSparseMatrix, threshold),
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUSparseMatrix, threshold));
}
else
{
DISPATCH_MATRIX_ON_FLAG(&a,
nullptr,
NOT_IMPLEMENTED;
return false,
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUSparseMatrix, threshold),
NOT_IMPLEMENTED;
return false,
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUMatrix, threshold));
}
}
template <class ElemType>
bool Matrix<ElemType>::HasElement(const Matrix<ElemType>& a, const ElemType value)
{
if (a.IsEmpty())
return false;
DISPATCH_MATRIX_ON_FLAG(&a,
&a,
return CPUMatrix<ElemType>::HasElement(*a.m_CPUMatrix, value),
return GPUMatrix<ElemType>::HasElement(*a.m_GPUMatrix, value),
NOT_IMPLEMENTED;
return false,
NOT_IMPLEMENTED;
return false);
}
// diagnostics helper to check if matrix has a NaN
// This is very slow.
template <class ElemType>
bool Matrix<ElemType>::HasNan(const char* name) const
{
// Not implemented for sparse matrices.
// Return false as a workaround to at
// least evaluate the dense matrices.
if (m_matrixType == MatrixType::SPARSE)
return false;
if (IsEmpty())
return false;
// if GPU then first detect NaN there, will be faster
if (GetDeviceId() != CPUDEVICE)
{
Matrix<ElemType> sum(GetDeviceId());
sum.AssignSumOfElements(*this);
auto x = sum.Get00Element();
if (!std::isnan(x))
return false;
}
// const auto & us = *this;
const Matrix<ElemType>& us = *this;
foreach_coord (i, j, us)
if (std::isnan(us(i, j)))
{
fprintf(stderr, "HasNan: NaN detected at %s (%ld,%ld) in (%d,%d) matrix\n", name, i, j, (int) GetNumRows(), (int) GetNumCols());
return true;
}
return false;
}
#define CheckNan(m) m.HasNan(#m)
// another diagnostics helper to check if matrix has a NaN
// This is used at load and save time. This test is slow.
template <class ElemType>
size_t Matrix<ElemType>::CountNanInf() const
{
const auto& us = *this;
size_t n = 0; // number of NaNs/INF found
foreach_coord (i, j, us)
{
auto val = us(i, j);
if (std::isnan(val) || !std::isfinite(val))
n++;
}
return n;
}
// TODO: these are scalar operations--why are they in Matrix?
template <class ElemType>
ElemType Matrix<ElemType>::Exp10(ElemType num)
{
return (ElemType) exp(num * 2.302585093);
}
template <class ElemType>
ElemType Matrix<ElemType>::Mod(ElemType x, ElemType y)
{
assert(y > 0);
if (y <= 0)
LogicError("y is smaller than zero");
return x - y * floor(x / y);
}
// TODO: use static LogAdd() as defined in TensorOps.h
// Not doing this currently because that one uses ElemType for all ops, while this one uses double inside. Must compare before making this change.
template <class ElemType>
ElemType Matrix<ElemType>::LogAdd(ElemType x, ElemType y)
{
ElemType temp, diff, z;
if (x < y)
{
temp = x;
x = y;
y = temp; // TODO: ::swap(x,y)?
}
diff = y - x;
if (diff < MINLOGEXP)
{
return (ElemType)((x < LSMALL) ? LZERO : x);
}
else
{
z = exp(diff);
return (ElemType)(x + log(1.0 + z));
}
}
//Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, size_t shift)
//[this]= (a right shift by n), padded with zeros
// shift left, shift needs to be negative value
// shift right, shift needs to be positive value
// BUGBUG: Leaves uninitialized values in the opened-up columns.
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, int shift)
{
if (a.IsEmpty())
LogicError("Shift: Matrix is empty.");
else
LogicError("Shift: BUGBUG This function currently leaves uninitialized values. Fix the code or contact fseide@microsoft.com.");
auto& us = *this;
if (this != &a)
{
Resize(a.GetNumRows(), a.GetNumCols());
}
long n = (long) GetNumCols();
if (shift >= 0 && shift < n)
us.ColumnSlice(shift, n - shift).SetValue(a.ColumnSlice(0, n - shift));
if (shift < 0 && shift > -n)
us.ColumnSlice(0, n + shift).SetValue(a.ColumnSlice(-shift, n + shift));
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOfWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, size_t shift, size_t negnumber)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOfWithShiftNeg: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (a.GetNumRows() != 1)
InvalidArgument("AssignElementProductOfWithShiftNeg: The input matrix must be a row vector.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOfWithShiftNeg(*a.m_CPUMatrix, *b.m_CPUMatrix, shift, negnumber),
m_GPUMatrix->AssignElementProductOfWithShiftNeg(*a.m_GPUMatrix, *b.m_GPUMatrix, shift, negnumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignInnerProductOfWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise, size_t shift, size_t negnumber)
{
InnerProductWithShiftNeg(a, b, *this, isColWise, shift, negnumber);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::InnerProductWithShiftNeg(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise, size_t shift, size_t negnumber)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::InnerProductWithShiftNeg(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, isColWise, shift, negnumber),
GPUMatrix<ElemType>::InnerProductWithShiftNeg(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, shift, negnumber),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::GetARowByIndex(const Matrix<ElemType>& a, size_t index)
{
if (a.IsEmpty())
LogicError("GetARowByIndex: Matrix is empty.");
// WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->GetARowByIndex(*a.m_CPUMatrix, index),
m_GPUMatrix->GetARowByIndex(*a.m_GPUMatrix, index),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::ConductRowElementMultiplyWithShift(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, size_t shift, bool bFirstmatrixfixed)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("InnerProduct: one of the input matrix is empty.");
DecideAndMoveToRightDevice(a, b, c);
if (a.GetMatrixType() != b.GetMatrixType())
NOT_IMPLEMENTED;
c.SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&c,
&c,
CPUMatrix<ElemType>::ConductRowElementMultiplyWithShift(*a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, shift, bFirstmatrixfixed),
GPUMatrix<ElemType>::ConductRowElementMultiplyWithShift(*a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, shift, bFirstmatrixfixed),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignElementProductOfWithShift(const Matrix<ElemType>& a, const Matrix<ElemType>& b, size_t shift)
{
if (a.IsEmpty() || b.IsEmpty())
LogicError("AssignElementProductOfWithShift: Matrix is empty.");
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
InvalidArgument("The input matrix dimensions do not match.");
if (a.GetNumRows() != 1)
InvalidArgument("AssignElementProductOfWithShiftNeg: The input matrix must be a row vector.");
DecideAndMoveToRightDevice(a, b, *this);
if (!(a.GetMatrixType() == b.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(a.GetMatrixType(), a.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignElementProductOfWithShift(*a.m_CPUMatrix, *b.m_CPUMatrix, shift),
m_GPUMatrix->AssignElementProductOfWithShift(*a.m_GPUMatrix, *b.m_GPUMatrix, shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
void Matrix<ElemType>::RCRFBackwardCompute(const Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const int shift)
{
DecideAndMoveToRightDevice(alpha, beta);
functionValues._transferToDevice(alpha.GetDeviceId());
beta._transferToDevice(alpha.GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(&alpha,
&beta,
CPUMatrix<ElemType>::RCRFBackwardCompute(
*alpha.m_CPUMatrix,
*beta.m_CPUMatrix,
*lbls.m_CPUMatrix,
*pair_scores.m_CPUMatrix),
GPUMatrix<ElemType>::RCRFBackwardCompute(
*alpha.m_GPUMatrix,
*beta.m_GPUMatrix,
*lbls.m_GPUMatrix,
*pos_scores.m_GPUMatrix,
*pair_scores.m_GPUMatrix, shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::RCRFTransGrdCompute(const Matrix<ElemType>& lbls,
const Matrix<ElemType>& alpha,
const Matrix<ElemType>& beta,
const Matrix<ElemType>& pair_scores,
Matrix<ElemType>& grd,
const int startLbl,
const int shift)
{
DecideAndMoveToRightDevice(alpha, grd);
grd._transferToDevice(alpha.GetDeviceId());
DISPATCH_MATRIX_ON_FLAG(&alpha,
&grd,
CPUMatrix<ElemType>::RCRFTransGrdCompute(
*lbls.m_CPUMatrix,
*alpha.m_CPUMatrix,
*beta.m_CPUMatrix,
*pair_scores.m_CPUMatrix,
*grd.m_CPUMatrix),
GPUMatrix<ElemType>::RCRFTransGrdCompute(
*lbls.m_GPUMatrix,
*alpha.m_GPUMatrix,
*beta.m_GPUMatrix,
*pair_scores.m_GPUMatrix,
*grd.m_GPUMatrix,
startLbl,
shift),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::DropFrame(const Matrix<ElemType>& label, const Matrix<ElemType>& gamma, const ElemType& threshhold)
{
DecideAndMoveToRightDevice(*this, label, gamma);
if (label.GetNumCols() != gamma.GetNumCols() || label.GetNumRows() != gamma.GetNumRows())
LogicError("DropFrame: label matrix is not in the same size as gamm matrix.");
SwitchToMatrixType(label.GetMatrixType(), label.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->DropFrame(*label.m_CPUMatrix, *gamma.m_CPUMatrix, threshhold),
m_GPUMatrix->DropFrame(*label.m_GPUMatrix, *gamma.m_GPUMatrix, threshhold),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
/// <summary> c = alpha * (a-b)</summary>
/// if a, b, c must have same dim
/// <param name="alpha">Scalar</param>
/// <param name="a">Input matrix</param>
/// <param name="b">Input matrix</param>
/// <param name="c">Resulting matrix, user is responsible for allocating this</param>
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSequenceError(const ElemType hsmoothingWeight, const Matrix<ElemType>& label,
const Matrix<ElemType>& dnnoutput, const Matrix<ElemType>& gamma, ElemType alpha)
{
DecideAndMoveToRightDevice(label, dnnoutput, gamma);
if (!(label.GetMatrixType() == gamma.GetMatrixType()))
NOT_IMPLEMENTED;
SwitchToMatrixType(label.GetMatrixType(), label.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->AssignSequenceError(hsmoothingWeight, *label.m_CPUMatrix, *dnnoutput.m_CPUMatrix, *gamma.m_CPUMatrix, alpha),
m_GPUMatrix->AssignSequenceError(hsmoothingWeight, *label.m_GPUMatrix, *dnnoutput.m_GPUMatrix, *gamma.m_GPUMatrix, alpha),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
#pragma endregion Static BLAS Functions
// TensorView currently does not interface with sparse matrices. For now, we just catch this and throw.
template <class ElemType>
static bool VerifyIsDense(const Matrix<ElemType>& a)
{
if (a.GetMatrixType() != DENSE)
RuntimeError("TensorOp: Tensor operations are currently not supported for sparse matrices.");
return true;
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
const array<size_t, 2>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a);
DecideAndMoveToRightDevice(*this, a);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
const array<size_t, 3>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a) && VerifyIsDense(b);
DecideAndMoveToRightDevice(*this, a, b);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
const array<size_t, 4>& offsets,
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
{
VerifyIsDense(*this) && VerifyIsDense(a) && VerifyIsDense(b) && VerifyIsDense(c);
DecideAndMoveToRightDevice(*this, a, b, c);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template class Matrix<float>;
template class Matrix<double>;
// We use Matrix<char> as the backing store for QuantizedMatrix, and also as a flag matrix.
// Let's explicitly instantiate the methods we need for that purpose
template Matrix<char>::Matrix(DEVICEID_TYPE);
template Matrix<char>::Matrix(Matrix<char>&&);
template Matrix<char>::Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId, const MatrixType matrixType, const MatrixFormat matrixFormat);
template Matrix<char>::Matrix(const size_t numRows, const size_t numCols, char* pArray, DEVICEID_TYPE deviceId, const size_t matrixFlags, const size_t nnz);
template Matrix<char>::~Matrix();
template Matrix<char>& Matrix<char>::operator=(Matrix<char>&& moveFrom);
template char* Matrix<char>::BufferPointer() const;
template int Matrix<char>::GetDeviceId() const;
template size_t Matrix<char>::GetNumElements() const;
template Matrix<char> Matrix<char>::ColumnSlice(size_t startColumn, size_t numCols) const;
template void Matrix<char>::_transferToDevice(int id_to, bool isBeingMoved, bool emptyTransfer) const;
template void Matrix<char>::TransferToDeviceIfNotThere(int id_to, bool isBeingMoved, bool emptyTransfer, bool updatePreferredDevice) const;
template size_t Matrix<char>::GetNumRows() const;
template size_t Matrix<char>::GetNumCols() const;
template void Matrix<char>::SetValue(const char);
template void Matrix<char>::SetValue(size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags);
template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
template bool Matrix<char>::IsEmpty() const;
template void Matrix<char>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly);
}}}