// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #include "stdafx.h" #include "BestGpu.h" #ifdef CPUONLY #include "CommonMatrix.h" #include "GPUMatrix.h" #include "GPUSparseMatrix.h" #include "MatrixQuantizerGPU.h" #include "CuDnnFactories.h" #include "TensorShape.h" #include "GPUDataTransferer.h" #pragma warning(disable : 4100) // unreferenced formal parameter, which is OK since all functions in here are dummies; disabling this allows to copy-paste prototypes here when we add new functions #pragma warning(disable : 4702) // unreachable code, which we get from the NOT_IMPLEMENTED macro which is OK namespace Microsoft { namespace MSR { namespace CNTK { // the reset below are dummy implementations MATH_API std::size_t GetCUDNNVersion() { return 0; } void PrepareDevice(DEVICEID_TYPE deviceId); template GPUSPARSE_INDEX_TYPE GPUSparseMatrix::SecondaryIndexValueAt(size_t idx) const { return (GPUSPARSE_INDEX_TYPE) 0; } #pragma region Constructors and Destructor template GPUSparseMatrix::GPUSparseMatrix(DEVICEID_TYPE computeDevice, const MatrixFormat matrixFormat /*= MatrixFormat::matrixFormatSparseCSR*/) { } template void GPUSparseMatrix::ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE computeDevice) { } template GPUSparseMatrix::GPUSparseMatrix(const GPUMatrix& deepCopy, const MatrixFormat matrixFormat /*= MatrixFormat::matrixFormatSparseCSR*/) { } template GPUSparseMatrix::GPUSparseMatrix(const GPUSparseMatrix& deepCopy) { } template GPUSparseMatrix::GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, DEVICEID_TYPE computeDevice, const MatrixFormat matrixFormat /*= MatrixFormat::matrixFormatSparseCSR*/) { } // PrepareDevice - Setup the correct cuda context for an operation // deviceId - the device on which the operation will take place // defaults to -1, which means use matrices current device template DEVICEID_TYPE GPUSparseMatrix::PrepareDevice(DEVICEID_TYPE deviceId /*=-1*/) const { return deviceId; } template template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopy) { } template void GPUSparseMatrix::DeepCopy(const GPUSparseMatrix& deepCopy) { } template void GPUSparseMatrix::SetValue(const GPUSparseMatrix& deepCopy) { } #if 0 template void GPUSparseMatrix::SetValue(const CPUMatrix& denseMatrix) { } #endif template void GPUSparseMatrix::SetValue(const CPUSparseMatrix& denseMatrix) { } template void GPUSparseMatrix::SetValue(const GPUMatrix& denseMatrix) { } template void GPUSparseMatrix::SetValue(const GPUMatrix& denseMatrix, const MatrixFormat matrixFormat) { } template void GPUSparseMatrix::SetDiagonalValue(const ElemType v) { } template void GPUSparseMatrix::SetDiagonalValue(const GPUMatrix& vector) { } template GPUSPARSE_INDEX_TYPE* GPUSparseMatrix::GetCondensedVector() const { return NULL; } template void GPUSparseMatrix::MaskColumnsValue(const GPUMatrix& columnsMask, ElemType val, size_t numColsPerMaskEntry) { } template GPUSparseMatrix& GPUSparseMatrix::operator=(const GPUSparseMatrix& deepCopy) { return *this; } template GPUSparseMatrix::GPUSparseMatrix(GPUSparseMatrix&& moveFrom) { } template GPUSparseMatrix& GPUSparseMatrix::operator=(GPUSparseMatrix&& moveFrom) { return *this; } template GPUSparseMatrix::~GPUSparseMatrix() { } //ResizeAsAndCopyIndexFrom - Resize this sparse matrix to have the same element structure as the passed matrix // a - sparse matrix whose structure we want to clone // remark: this was done for element wise operations where the structure will be identical after an operation template void GPUSparseMatrix::ResizeAsAndCopyIndexFrom(const GPUSparseMatrix& a, const bool growOnly /*= true*/) { } //------------------------------------------------------------------------- // Start of new GPU Sparse Matrix code //------------------------------------------------------------------------- template void GPUSparseMatrix::ClearNzCount() { } template void GPUSparseMatrix::Allocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly, bool keepExistingValues) { } template void GPUSparseMatrix::RequireSizeAndAllocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly, bool keepExistingValues) { } template void GPUSparseMatrix::RequireSizeAndAllocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly, bool keepExistingValues) { } template void GPUSparseMatrix::RequireSize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat format, const bool growOnly) { } template void GPUSparseMatrix::RequireSize(const size_t numRows, const size_t numCols, const bool growOnly) { } template void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly) { } template void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly) { } template void GPUSparseMatrix::AdjustCol2BlockId(const GPUSPARSE_INDEX_TYPE* cpuCol2BlockId, size_t numBlocks, bool useBlockId2Col) { } template GPUMatrix GPUSparseMatrix::CopyToDenseMatrix() const { GPUMatrix res(0); return res; } template void GPUSparseMatrix::CopyToDenseMatrix(GPUMatrix& denseMatrix) const { } template void GPUSparseMatrix::CopyToCPUSparseMatrix(CPUSparseMatrix& cpuSparseMatrix) const { } template void GPUSparseMatrix::ChangeDeviceTo(DEVICEID_TYPE toId) { } template template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other) { } //Reset matrix so it can be reused template void GPUSparseMatrix::Reset() { } #pragma endregion Constructors and Destructor #pragma region Static BLAS Functions // copy features to GPU matrix template void GPUSparseMatrix::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const ElemType* h_Val, const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice /*= false*/, const DEVICEID_TYPE devId /*= -1*/, DataTransferer* transferer) { } template void GPUSparseMatrix::SetMatrixFromSBCFormat(const size_t*, const ElemType*, const size_t, const size_t, const size_t) { } // forward pass from feature to hidden layer template void GPUSparseMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, const GPUSparseMatrix& rhs, const bool transposeB, ElemType beta, GPUMatrix& c) { } // backward pass from hidden layer to feature weight template void GPUSparseMatrix::MultiplyAndAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, const GPUSparseMatrix& rhs, const bool transposeB, GPUSparseMatrix& c) { } template void GPUSparseMatrix::ColumnwiseScaleAndWeightedAdd(ElemType alpha, const GPUSparseMatrix& a, const GPUMatrix& v, ElemType beta, GPUMatrix& c) { } // used for gradients udpate template void GPUSparseMatrix::ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix& lhs, GPUMatrix& rhs) { } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncate(const ElemType threshold) { return *this; } // normal update for smoothed gradients c and current gradients (this) template void GPUSparseMatrix::NormalGrad(GPUMatrix& c, const ElemType momentum, ElemType unitGainFactor) { } template ElemType GPUSparseMatrix::Adagrad(GPUMatrix& c, const bool needAveMultiplier) { return 1; } template void GPUSparseMatrix::FSAdagrad(GPUMatrix&, GPUMatrix&, ElemType, ElemType, ElemType, ElemType, ElemType) { } template void GPUSparseMatrix::Adam(GPUMatrix& c, GPUMatrix& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, ElemType epsilon, ElemType unitGainFactor, bool adamax) { } template ElemType GPUSparseMatrix::RmsProp(GPUMatrix&, ElemType, ElemType, ElemType, ElemType, ElemType, const bool, const bool) { return 1; } template template void GPUSparseMatrix::AdaDelta(GPUMatrix&c, GPUMatrix&functionValues, AccumType learningRate, AccumType rho, AccumType epsilon, int* timestamps, int currentTimestamp) { } template void GPUSparseMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix& a, const bool transposeA, const GPUMatrix& b, const bool transposeD, ElemType beta, GPUMatrix& c) { } template void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S, const GPUMatrix& D, GPUMatrix& C) { } template void GPUSparseMatrix::Multiply(const GPUMatrix& D, const GPUSparseMatrix& S, GPUMatrix& C) { } template size_t GPUSparseMatrix::ElemCountFromBufferSize(const size_t numRows, const size_t numCols, const MatrixFormat format, const size_t totalBufferSize) const { return 0; } template size_t GPUSparseMatrix::ElemCountFromBufferSize() const { return 0; } // PrepareBuffer - Get the dimensions start buffer, computes the starting row/column of each value // m - rows in the source // n - cols in the source // canReuseBuffer - target matrix can be reused for temporary space // func - function to call to count elements in the result (returns count, and fills csrRowPtr array) template void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func) { } // Multiply - multiply one spares matrix by another sparse matrix // S1 - first sparse matrix // transposeS1 - transpose first matrix? // S2 - second sparse matrix // transposeS2 - tanspose second matrix? // c - result matrix // NOTE: if c has enough space allocated, it will be reused, otherwise it will be freed and a new memory block used template void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S1, bool transposeS1, const GPUSparseMatrix& S2, bool transposeS2, GPUSparseMatrix& c) { } template GPUSparseMatrix& GPUSparseMatrix::AssignProductOf(const GPUSparseMatrix& a, const bool transposeA, const GPUSparseMatrix& /*b*/, const bool transposeB) { return *this; } template void GPUSparseMatrix::ScaleAndAdd(ElemType alpha, const GPUSparseMatrix& a, ElemType beta, const GPUSparseMatrix& /*b*/, GPUSparseMatrix& c) { } template void GPUSparseMatrix::ScaleAndAdd(ElemType alpha, const GPUSparseMatrix& a, ElemType beta, const GPUMatrix& /*b*/, GPUMatrix& c) { } template void GPUSparseMatrix::ScaleAndAdd(ElemType alpha, const GPUMatrix& /*a*/, ElemType beta, const GPUSparseMatrix& /*b*/, GPUMatrix& c) { } template void GPUSparseMatrix::Scale(ElemType alpha, GPUSparseMatrix& a) { } template void GPUSparseMatrix::ElementWisePower(ElemType alpha, const GPUSparseMatrix& a, GPUSparseMatrix& c) { } template ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUSparseMatrix& a, const GPUMatrix& /*b*/) { return ElemType(0); } template ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUMatrix& /*a*/, const GPUSparseMatrix& /*b*/) { return ElemType(0); } template void GPUSparseMatrix::InnerProduct(const GPUSparseMatrix&, const GPUMatrix&, GPUMatrix&, const bool) { } template bool GPUSparseMatrix::AreEqual(const GPUSparseMatrix& a, const GPUSparseMatrix& /*b*/, const ElemType threshold) { return false; } template bool GPUSparseMatrix::AreEqual(const GPUMatrix& /*a*/, const GPUSparseMatrix& /*b*/, const ElemType threshold) { return false; } template bool GPUSparseMatrix::AreEqual(const GPUSparseMatrix& a, const GPUMatrix& /*b*/, const ElemType threshold) { return false; } template bool GPUSparseMatrix::IsEqualTo(const GPUSparseMatrix& a, const ElemType threshold) const { return false; } template bool GPUSparseMatrix::IsEqualTo(const GPUMatrix& /*a*/, const ElemType threshold) const { return false; } #pragma endregion Static BLAS Functions #pragma region Member BLAS Functions template GPUMatrix GPUSparseMatrix::ElementProductOf(const GPUSparseMatrix& a, const GPUMatrix& /*b*/) { GPUMatrix c(0); return c; } template GPUMatrix GPUSparseMatrix::ElementProductOf(const GPUMatrix& a, const GPUSparseMatrix& b) { return GPUSparseMatrix::ElementProductOf(b, a); } template GPUSparseMatrix GPUSparseMatrix::operator+(const GPUSparseMatrix& a) const { return *this; } template GPUSparseMatrix GPUSparseMatrix::operator-(const GPUSparseMatrix& a) const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::operator^=(ElemType alpha) { return *this; } template GPUSparseMatrix GPUSparseMatrix::operator^(ElemType alpha) const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::operator*=(ElemType alpha) { return *this; } template GPUSparseMatrix GPUSparseMatrix::operator*(ElemType alpha) const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignElementPowerOf(const GPUSparseMatrix& a, const ElemType power) { return *this; } template GPUSparseMatrix GPUSparseMatrix::Transpose() const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTransposeOf(const GPUSparseMatrix& a) { return *this; } template void GPUSparseMatrix::InplaceTranspose() { } template GPUSparseMatrix GPUSparseMatrix::ColumnSlice(size_t startColumn, size_t numCols) const { GPUSparseMatrix a(0); return a; } template void GPUSparseMatrix::AssignColumnSliceToDense(GPUMatrix& slice, size_t startColumn, size_t numCols) const { } template GPUMatrix GPUSparseMatrix::CopyColumnSliceToDense(size_t startColumn, size_t numCols) const { GPUMatrix a(0); return a; } template GPUMatrix GPUSparseMatrix::DiagonalToDense() const { GPUMatrix a(0); return a; } template ElemType GPUSparseMatrix::SumOfAbsElements() const { return ElemType(0); } template ElemType GPUSparseMatrix::SumOfElements() const { return ElemType(0); } template ElemType GPUSparseMatrix::FrobeniusNorm() const { return ElemType(0); } template ElemType GPUSparseMatrix::MatrixNormInf() const { return ElemType(0); } template ElemType GPUSparseMatrix::MatrixNorm1() const { return ElemType(0); } #pragma endregion Member BLAS Functions #pragma region Other Functions template GPUSparseMatrix& GPUSparseMatrix::ElementInverse() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignElementInverseOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceSigmoid() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignSigmoidOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceLinearRectifierDerivative() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignLinearRectifierDerivativeOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTanh() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTanhOf(const GPUSparseMatrix& a) { return *this; } template GPUMatrix& GPUMatrix::InplaceAtanh() { return *this; } template GPUMatrix& GPUMatrix::AssignAtanhOf(const GPUMatrix& /*a*/) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceSqrt() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignSqrtOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceExp() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignExpOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceLog() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignLogOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceAbs() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignAbsOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateBottom(const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTruncateBottomOf(const GPUSparseMatrix& a, const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateTop(const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTruncateTopOf(const GPUSparseMatrix& a, const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::SetToZeroIfAbsLessThan(const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceSoftThreshold(const ElemType threshold) { return (*this); } template size_t GPUSparseMatrix::IdentifyRowsWithValues() const { return 0; } #pragma endregion #pragma region Helper Functions template void* GPUSparseMatrix::ReserveTempHostBuffer(const size_t sizeInByte) const { return nullptr; } template void GPUSparseMatrix::performElementWiseFunction(const ElementWiseOperator kind, const GPUSparseMatrix& src) { } template void GPUSparseMatrix::SetMatrixFromCSRFormat(const CPUSPARSE_INDEX_TYPE* h_CSRRow, const CPUSPARSE_INDEX_TYPE* h_Col, const ElemType* h_Val, const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice /*= false*/, const DEVICEID_TYPE devId /*= -1*/) { } template void GPUSparseMatrix::GetMatrixFromCSRFormat(CPUSPARSE_INDEX_TYPE*& h_CSRRow, CPUSPARSE_INDEX_TYPE*& h_Col, ElemType*& h_Val, size_t& numElemAllocated, size_t& nz, size_t& numRows, size_t& numCols) const { } template void GPUSparseMatrix::GetMatrixFromCSCFormat(CPUSPARSE_INDEX_TYPE*& h_CSCCol, CPUSPARSE_INDEX_TYPE*& h_Row, ElemType*& h_Val, size_t& numElemAllocated, size_t& nz, size_t& numRows, size_t& numCols) const { } template void GPUSparseMatrix::ConvertToSparseFormat(MatrixFormat newFormat) { } template void GPUSparseMatrix::ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix& outMatrix) const { } template void GPUSparseMatrix::ConvolveAndWeightedAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, const GPUSparseMatrix& rhs, const bool transposeB, ElemType beta, GPUMatrix& c, size_t numChannels, size_t horizontalSubsample, bool padding, bool channelwise){}; template void GPUSparseMatrix::TensorShuffleScaleAndAdd(ElemType keepWeight, const GPUSparseMatrix& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const GPUSparseMatrix& b, GPUSparseMatrix& c) { } template void GPUSparseMatrix::Reshape(const size_t numRows, const size_t numCols) { } template bool GPUSparseMatrix::IsValid() const { return true; } template template void GPUSparseMatrix::ConvertBuffer(OutType* outBuffer, const InType* inBuffer, const size_t size) { } template GPUSparseMatrix& GPUSparseMatrix::AssignOneHot(const GPUMatrix& a, vector& shape, size_t axis) { return *this; } #pragma endregion Helper Functions template class MATH_API GPUSparseMatrix; template class MATH_API GPUSparseMatrix; template class MATH_API GPUSparseMatrix; template class MATH_API GPUSparseMatrix; template class MATH_API GPUSparseMatrix; template class MATH_API GPUSparseMatrix; template MATH_API File& operator>>(File& stream, GPUSparseMatrix& us) { return stream; } template MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); template MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); template MATH_API File& operator>>(File& stream, GPUSparseMatrix& us); template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us) { return stream; } template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); template MATH_API File& operator<<(File& stream, const GPUSparseMatrix& us); #pragma region DeviceBoundNumber class template DeviceBoundNumber::DeviceBoundNumber(const DeviceBoundNumber& deepCopy) { NOT_IMPLEMENTED; } template DeviceBoundNumber::DeviceBoundNumber(DeviceBoundNumber&& shallowCopy) { this->ShallowCopyFrom(shallowCopy.m_data, shallowCopy.m_computeDevice); shallowCopy.m_data = NULL; } template void DeviceBoundNumber::ShallowCopyFrom(ElemType* newVal, int newValsDevceId) { } template DeviceBoundNumber::~DeviceBoundNumber() { } #pragma endregion DeviceBoundNumber class #pragma region Helper functions template void GPUMatrix::SetDevice(DEVICEID_TYPE deviceId){}; // PrepareDevice - Setup the correct cuda context for an operation // deviceId - the device on which the operation will take place // defaults to -1, which means use matrices current device template DEVICEID_TYPE GPUMatrix::PrepareDevice(DEVICEID_TYPE deviceId /*=-1*/) const { return deviceId; } template ElemType* GPUMatrix::CopyToArray() const { return NULL; } template void GPUMatrix::CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const { } //memory will be allocated by the callee if not enough but need to be deleted by the caller after it's done //return number of elements copied template size_t GPUMatrix::CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const { return 0; } template void GPUMatrix::ChangeDeviceTo(int to_id) { } template void GPUMatrix::performElementWiseFunction(const ElementWiseOperator kind, const ElemType* src) { } #pragma endregion Helper functions #pragma region Constructors and Destructor //should only be used by constructors. template void GPUMatrix::ZeroInit(int deviceId) { } template GPUMatrix::GPUMatrix(int deviceId){}; template GPUMatrix::GPUMatrix(const size_t numRows, const size_t numCols, int deviceId){}; template GPUMatrix::GPUMatrix(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags){}; template GPUMatrix::GPUMatrix(const GPUMatrix& deepCopyFrom) { } template GPUMatrix::GPUMatrix(GPUMatrix&& moveFrom) { } //assignment operator, deep copy template GPUMatrix& GPUMatrix::operator=(const GPUMatrix& deepCopyFrom) { return *this; } //move assignment operator, shallow copy template GPUMatrix& GPUMatrix::operator=(GPUMatrix&& moveFrom) { return *this; } template GPUMatrix::~GPUMatrix(void) { } template void GPUMatrix::Clear() { } #pragma endregion Constructors and Destructor #pragma region Basic Operators template GPUMatrix GPUMatrix::ColumnSlice(size_t startColumn, size_t numCols) const { GPUMatrix slice(0); return slice; } template GPUMatrix& GPUMatrix::AssignColumnSlice(const GPUMatrix& fromMatrix, size_t startColumn, size_t numCols) { return *this; } template GPUMatrix& GPUMatrix::SetColumnSlice(const GPUMatrix& fromMatrix, size_t startColumn, size_t numCols) { return *this; } template GPUMatrix GPUMatrix::Diagonal() const { GPUMatrix diag(0); return diag; } //for each column of a, we assign numRows starting from startIndex to this template GPUMatrix& GPUMatrix::AssignRowSliceValuesOf(const GPUMatrix& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } //for each column of a, we assign all rows of a to this starting from startIndex template GPUMatrix& GPUMatrix::AssignToRowSliceValuesOf(const GPUMatrix& a, const size_t startIndex, const size_t numRows) { return *this; } //for each column of a, we add all rows of a to this starting from startIndex template GPUMatrix& GPUMatrix::AddToRowSliceValuesOf(const GPUMatrix& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } template GPUMatrix& GPUMatrix::AddWithRowSliceValuesOf(const GPUMatrix& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } //template GPUMatrix& GPUMatrix::AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; } template GPUMatrix& GPUMatrix::AssignRepeatOf(const GPUMatrix& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; } template GPUMatrix& GPUMatrix::AddToRowRepeatValuesOf(const GPUMatrix& /*a*/, const size_t numRowRepeats) { return *this; } template GPUMatrix& GPUMatrix::AssignPositiveAndShiftedNegSample(const GPUMatrix& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; } template GPUMatrix& GPUMatrix::AddFoldedPositiveAndShiftedNegSample(const GPUMatrix& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; } template GPUMatrix GPUMatrix::Transpose() const { return *this; } // GetCublasHandle - get a cublas handle for the given GPU, should only need one per GPU // computeDevice - The compute device for which the cublas handle is desired // returns: cublas handle // NOTE: we currently don't bother to ever free the CUBLAS handle, it will be freed automatically by CUDA when the process ends template cublasHandle_t GPUMatrix::GetCublasHandle(int computeDevice /*=-1*/) { cublasHandle_t cuHandle = 0; return cuHandle; } template GPUMatrix& GPUMatrix::AssignTransposeOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::DoGatherColumnsOf(ElemType beta, const GPUMatrix& m, const GPUMatrix& a, ElemType alpha) { return *this; } template GPUMatrix& GPUMatrix::DoScatterColumnsOf(ElemType beta, const GPUMatrix& m, const GPUMatrix& a, ElemType alpha, bool idxHaveDups) { return *this; } template GPUMatrix& GPUMatrix::GatherFromTarget(const GPUMatrix& indices, const GPUMatrix& target, size_t row_elements) { return *this; } template GPUMatrix& GPUMatrix::ScatterToIndices(const GPUMatrix& values, const GPUMatrix& indices, size_t row_elements, const GPUMatrix* mask/* = nullptr*/) { return *this; } template void GPUMatrix::SetValue(const ElemType v) { } template void GPUMatrix::SetValue(const ElemType* d_v) // d_v is pointer to the value in GPU memory { } template void GPUMatrix::SetColumn(const ElemType* colPointer, size_t colInd) { } template void GPUMatrix::SetColumn(const GPUMatrix& valMat, size_t colInd) { } template void GPUMatrix::MaskColumnsValue(const GPUMatrix& columnsMask, ElemType val, size_t numColsPerMaskEntry) { } template void GPUMatrix::CopyColumnsStrided(const GPUMatrix& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride) { } #if 0 template void GPUMatrix::SetValue(CPUMatrix const&) { } #endif template void GPUMatrix::SetValue(GPUMatrix const&) { } #if 0 template void GPUMatrix::SetValue(CPUSparseMatrix const&) { } template void GPUMatrix::SetValue(GPUSparseMatrix const&) { } #endif template void GPUMatrix::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags, DataTransferer* transferer) { } template void GPUMatrix::SetDiagonalValue(const ElemType v) { } template void GPUMatrix::SetDiagonalValue(const GPUMatrix& vector) { } template void GPUMatrix::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed) { } template void GPUMatrix::SetUniformRandomValue(RNGHandle& rngHandle, const ElemType low, const ElemType high) { } template void GPUMatrix::SetGaussianRandomValue(RNGHandle& rngHandle, const ElemType mean, const ElemType stdev) { } template void GPUMatrix::SetGumbelRandomValue(RNGHandle& rngHandle, const ElemType loc, const ElemType scale) { } template void GPUMatrix::SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed) { } template void GPUMatrix::SetTruncatedNormalRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed) { } //maskRate: percentage of values masked out (similar to dropout rate) //scaleValue: which scale value to set to the left ones (unmasked items). template void GPUMatrix::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& seed) { } template ElemType GPUMatrix::Adagrad(GPUMatrix& gradients, const bool needAveMultiplier) { return 0; } template void GPUMatrix::FSAdagrad(GPUMatrix& gradients, GPUMatrix& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, ElemType unitGainFactor) { } template void GPUMatrix::Adam(GPUMatrix& gradients, GPUMatrix& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, ElemType epsilon, ElemType unitGainFactor, bool adamax) { } template ElemType GPUMatrix::RmsProp(GPUMatrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier, const bool initialized) { return 0; } template template void GPUMatrix::AdaDelta(GPUMatrix& gradients, GPUMatrix& functionValues, ElemType learningRate, ElemType rho, ElemType epsilon) { } template void GPUMatrix::AdaDeltaFlushTimestamps(size_t cols, ElemType rho, int* timestamps, int currentTimestamp) { } template void GPUMatrix::Reshape(const size_t numRows, const size_t numCols) { } template void GPUMatrix::RequireSize(const size_t numRows, const size_t numCols, bool growOnly) { } template void GPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly) { } template size_t GPUMatrix::LocateElement(const size_t row, const size_t col) const { return 0; } template std::unique_ptr> GPUMatrix::GetOrCreateWorkspace() const { return NULL; } template void GPUMatrix::ReleaseWorkspace(std::unique_ptr> src) const { } template size_t GPUMatrix::LocateColumn(const size_t col) const { return 0; } template ElemType GPUMatrix::Get00Element() const { ElemType res = 0; return res; } #pragma endregion Basic Operators #pragma region Member BLAS Functions template GPUMatrix& GPUMatrix::operator+=(ElemType alpha) { return *this; } template GPUMatrix GPUMatrix::operator+(ElemType alpha) const { return *this; } template GPUMatrix& GPUMatrix::AssignSumOf(const ElemType alpha, const GPUMatrix& /*a*/) { return (*this); } template GPUMatrix& GPUMatrix::operator+=(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix GPUMatrix::operator+(const GPUMatrix& /*a*/) const { return *this; } template GPUMatrix& GPUMatrix::AssignSumOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return (*this); } template GPUMatrix& GPUMatrix::operator-=(ElemType alpha) { return *this; } template GPUMatrix GPUMatrix::operator-(ElemType alpha) const { return *this; } template GPUMatrix& GPUMatrix::AssignDifferenceOf(const ElemType alpha, const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::AssignDifferenceOf(const GPUMatrix& /*a*/, const ElemType alpha) { return *this; } template GPUMatrix& GPUMatrix::operator-=(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix GPUMatrix::operator-(const GPUMatrix& /*a*/) const { return *this; } template GPUMatrix& GPUMatrix::AssignDifferenceOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return *this; } template GPUMatrix& GPUMatrix::operator*=(ElemType alpha) { return *this; } template GPUMatrix GPUMatrix::operator*(ElemType alpha) const { return *this; } template GPUMatrix& GPUMatrix::AssignProductOf(const ElemType alpha, const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::AssignProductOf(const GPUMatrix& /*a*/, const bool transposeA, const GPUMatrix& /*b*/, const bool transposeB) { return *this; } template GPUMatrix GPUMatrix::operator*(const GPUMatrix& /*a*/) const { return *this; } template GPUMatrix& GPUMatrix::operator/=(ElemType alpha) { return (*this); } template GPUMatrix GPUMatrix::operator/(ElemType alpha) const { return *this; } //element-wise power template GPUMatrix& GPUMatrix::operator^=(ElemType alpha) { return *this; } template GPUMatrix GPUMatrix::operator^(ElemType alpha) const { return *this; } template GPUMatrix& GPUMatrix::AssignElementPowerOf(const GPUMatrix& /*a*/, const ElemType power) { return *this; } template GPUMatrix& GPUMatrix::AddElementProductOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return *this; } template GPUMatrix& GPUMatrix::ColumnElementMultiplyWith(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::RowElementMultiplyWith(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::ColumnElementDivideBy(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::RowElementDivideBy(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::ElementInverse() { return *this; } template GPUMatrix& GPUMatrix::AssignElementInverseOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceSigmoid() { return *this; } template GPUMatrix& GPUMatrix::AssignSigmoidOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceSigmoidDerivative() { return *this; } template GPUMatrix& GPUMatrix::AssignSigmoidDerivativeOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceTanh() { return *this; } template GPUMatrix& GPUMatrix::AssignTanhOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceLogSoftmax(const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::AssignLogSoftmaxOf(const GPUMatrix& /*a*/, const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::InplaceHardmax(const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::AssignHardmaxOf(const GPUMatrix& /*a*/, const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::DropFrame(const GPUMatrix& label, const GPUMatrix& gamma, const ElemType& threshhold) { return *this; } template GPUMatrix& GPUMatrix::AssignSequenceError(const ElemType hsmoothingWeight, const GPUMatrix& label, const GPUMatrix& dnnoutput, const GPUMatrix& gamma, ElemType alpha) { return *this; } template GPUMatrix& GPUMatrix::AssignCTCScore(const GPUMatrix& prob, GPUMatrix& alpha, GPUMatrix& beta, const GPUMatrix phoneSeq, const GPUMatrix phoneBound, GPUMatrix & totalScore, const std::vector& uttMap, const std::vector & uttBeginFrame, const std::vector & uttFrameNum, const std::vector & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const size_t blankTokenId, const int delayConstraint, const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::AssignRNNTScore(const GPUMatrix& prob, GPUMatrix& alpha, GPUMatrix& beta, const GPUMatrix phoneSeq, const GPUMatrix phoneBound, const vector& uttFrameToChanInd, const vector & uttFrameBeginIdx, const vector & uttBeginForOutputditribution, const vector& uttPhoneToChanInd, const vector & uttPhoneBeginIdx, const vector & uttFrameNum, const vector & uttPhoneNum, const size_t numParallelSequences, const size_t numPhoneParallelSequences, const size_t maxPhoneNum, const size_t maxFrameNum, GPUMatrix& totalScore, const size_t blankTokenId, const int delayConstraint, const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::AssignUserOp1(GPUMatrix& in1, GPUMatrix& in2, const vector& uttFrameToChanInd, const vector& uttPhoneToChanInd, const vector& uttFrameBeginIdx, const vector& uttPhoneBeginIdx, const vector& uttBeginForOutputditribution, const vector& uttFrameNum, const vector& uttPhoneNum, const size_t totalcol, const size_t numParallelSequences, const size_t numPhoneParallelSequences) { return *this; } template GPUMatrix& GPUMatrix::AssignUserOp2(GPUMatrix& in1, const vector& uttFrameToChanInd, const vector& uttPhoneToChanInd, const vector& uttFrameBeginIdx, const vector& uttPhoneBeginIdx, const vector& uttBeginForOutputditribution, const vector& uttFrameNum, const vector& uttPhoneNum, const size_t numParallelSequences, const size_t numPhoneParallelSequences, const size_t maxFrameNum, const size_t maxPhoneNum, const size_t Idx) { return *this; } GPUMatrix& GPUMatrix::InplaceSqrt() { return *this; } template GPUMatrix& GPUMatrix::AssignSqrtOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceExp() { return *this; } template GPUMatrix& GPUMatrix::AssignExpOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceLog() { return *this; } template GPUMatrix& GPUMatrix::AssignLogOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceAbs() { return *this; } template GPUMatrix& GPUMatrix::AssignAbsOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceLinearRectifierDerivative() { return *this; } template GPUMatrix& GPUMatrix::AssignLinearRectifierDerivativeOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceCosine() { return *this; } template GPUMatrix& GPUMatrix::AssignCosineOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceNegativeSine() { return *this; } template GPUMatrix& GPUMatrix::AssignNegativeSineOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceTan() { return *this; } template GPUMatrix& GPUMatrix::AssignTanOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceAcos() { return *this; } template GPUMatrix& GPUMatrix::AssignAcosOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceAsin() { return *this; } template GPUMatrix& GPUMatrix::AssignAsinOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceAtan() { return *this; } template GPUMatrix& GPUMatrix::AssignAtanOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceCosh() { return *this; } template GPUMatrix& GPUMatrix::AssignCoshOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceSinh() { return *this; } template GPUMatrix& GPUMatrix::AssignSinhOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceAsinh() { return *this; } template GPUMatrix& GPUMatrix::AssignAsinhOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::InplaceTruncateBottom(const ElemType threshold) { return *this; } template GPUMatrix& GPUMatrix::AssignTruncateBottomOf(const GPUMatrix& /*a*/, const ElemType threshold) { return *this; } template GPUMatrix& GPUMatrix::InplaceTruncateTop(const ElemType threshold) { return *this; } template GPUMatrix& GPUMatrix::AssignTruncateTopOf(const GPUMatrix& /*a*/, const ElemType threshold) { return *this; } template GPUMatrix& GPUMatrix::SetToZeroIfAbsLessThan(const ElemType threshold) { return *this; } template ElemType GPUMatrix::SumOfAbsElements() const { return ElemType(0); } template ElemType GPUMatrix::SumOfElements() const { return ElemType(0); } template GPUMatrix& GPUMatrix::AssignSumOfElements(const GPUMatrix& /*a*/) { return (*this); } template void GPUMatrix::MinusOneAt(GPUMatrix& c, const size_t position) { } template void GPUMatrix::VectorSum(const GPUMatrix& a, GPUMatrix& c, const bool isColWise) { } template GPUMatrix& GPUMatrix::InplaceTruncate(const ElemType threshold) { return (*this); } template GPUMatrix& GPUMatrix::InplaceSoftThreshold(const ElemType threshold) { return (*this); } template GPUMatrix& GPUMatrix::GetARowByIndex(const GPUMatrix& a, const size_t m) { return (*this); } template GPUMatrix& GPUMatrix::AssignElementProductOfWithShiftNeg(const GPUMatrix& a, const GPUMatrix& b, const size_t shift, const size_t nt) { return (*this); } template GPUMatrix& GPUMatrix::AssignElementProductOfWithShift(const GPUMatrix& a, const GPUMatrix& b, const size_t shift) { return (*this); } template void GPUMatrix::InnerProductWithShiftNeg(const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c, const size_t shift, const size_t nt) { } template void GPUMatrix::ConductRowElementMultiplyWithShift(const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c, const size_t shift, const bool isafixed) { } template DeviceBoundNumber GPUMatrix::Sum_AsDeviceBoundNum() const { DeviceBoundNumber result; return result; } template ElemType GPUMatrix::AbsoluteMax() const { return ElemType(0); } template GPUMatrix& GPUMatrix::ElementMultiplyWith(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::AssignElementProductOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return *this; } template GPUMatrix& GPUMatrix::AssignElementDivisionOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return *this; } template GPUMatrix& GPUMatrix::ElementDivideBy(const GPUMatrix& /*a*/) { return *this; } template bool GPUMatrix::IsEqualTo(const GPUMatrix& a, const ElemType threshold /*= 1e-8*/) const { return AreEqual(*this, a, threshold); } template void GPUMatrix::VectorNorm1(GPUMatrix& c, const bool isColWise) const { } template GPUMatrix& GPUMatrix::AssignVectorNorm1Of(GPUMatrix& /*a*/, const bool isColWise) { return *this; } template void GPUMatrix::VectorNorm2(GPUMatrix& c, const bool isColWise) const { } template GPUMatrix& GPUMatrix::AssignVectorNorm2Of(GPUMatrix& /*a*/, const bool isColWise) { return *this; } template void GPUMatrix::VectorNormInf(GPUMatrix& c, const bool isColWise) const { } template GPUMatrix& GPUMatrix::AssignVectorNormInfOf(GPUMatrix& /*a*/, const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::AssignInnerProductOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, const bool isColWise) { return *this; } template GPUMatrix& GPUMatrix::AssignKhatriRaoProductOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return *this; } //column-wise reshaped product. Used to compute KhatriRaoProduct Gradient // this = reshape each column of a from (K1xK2,1) to (K1, K2) // if each column of a is not transposed, each (K1, K2) times each column of b (K2, frames). // the output is a (K1, frames) matrix // if each column of a is tranposed, each (K1, K2)^T times each column of b(K1, frames) and output is (K2, frames) template GPUMatrix& GPUMatrix::AddColumnReshapeProductOf(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, const bool transposeAColumn) { return *this; } template GPUMatrix& GPUMatrix::AddWithScaleOf(ElemType alpha, const GPUMatrix& /*a*/) { return *this; } template ElemType GPUMatrix::FrobeniusNorm() const { ElemType h_sum = 0; return (h_sum); } template GPUMatrix& GPUMatrix::AssignFrobeniusNormOf(const GPUMatrix& /*a*/) { return *this; } template ElemType GPUMatrix::MatrixNormInf() const { ElemType h_maxAbs = 0; return h_maxAbs; } template ElemType GPUMatrix::MatrixNorm1() const { return ElemType(0); } template ElemType GPUMatrix::MatrixNorm0() const { return ElemType(0); } template GPUMatrix& GPUMatrix::AssignSignOf(const GPUMatrix& /*a*/) { return *this; } template GPUMatrix& GPUMatrix::AddSignOf(const GPUMatrix& /*a*/) { return *this; } template void GPUMatrix::VectorMax(GPUMatrix& maxIndexes, GPUMatrix& maxValues, const bool isColWise) const { } template void GPUMatrix::VectorMax(GPUMatrix& maxIndexes, GPUMatrix& maxValues, const bool isColWise, int topK) const { } template void GPUMatrix::VectorMin(GPUMatrix& minIndexes, GPUMatrix& minValues, const bool isColWise) const { } template GPUMatrix& GPUMatrix::AssignNumOfDiff(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, bool /*searchInCol = false*/) { return *this; } #pragma endregion Member BLAS Functions #pragma region Other helper functions template void GPUMatrix::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const { } template void GPUMatrix::Print(const char* matrixName /*=nullptr*/) const { } //helpfer function used for convolution neural network template GPUMatrix& GPUMatrix::AssignPackedConvolutionInput(const GPUMatrix& inputSubBatch, const size_t inputWidth, const size_t inputHeight, const size_t inputChannels, const size_t outputWidth, const size_t outputHeight, const size_t outputChannels, const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding) { return *this; } //helpfer function used for convolution neural network template GPUMatrix& GPUMatrix::UnpackConvolutionInput(GPUMatrix& inputSubBatch, const size_t inputWidth, const size_t inputHeight, const size_t inputChannels, const size_t outputWidth, const size_t outputHeight, const size_t outputChannels, const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding) const { return inputSubBatch; } template GPUMatrix& GPUMatrix::AssignMaxPoolingResult(const GPUMatrix& inputBatch, const size_t channels, const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) { return *this; } template GPUMatrix& GPUMatrix::AddMaxPoolingGradient(const GPUMatrix& outputGradientBatch, const GPUMatrix& inputBatch, const GPUMatrix& outputBatch, const size_t channels, const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) { return *this; } template GPUMatrix& GPUMatrix::AssignAveragePoolingResult(const GPUMatrix& inputBatch, const size_t channels, const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) { return *this; } template GPUMatrix& GPUMatrix::AddAveragePoolingGradient(const GPUMatrix& outputGradientBatch, const size_t channels, const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample, const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample) { return *this; } template void GPUMatrix::ConvolutionForward(const GPUMatrix& kernel, const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIwht, const GPUMatrix& mpRowRun, const GPUMatrix& runs, GPUMatrix& output) const { } template void GPUMatrix::ConvolutionBackwardData(const GPUMatrix& kernel, const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIwht, const GPUMatrix& mpRowRun, const GPUMatrix& runs, GPUMatrix& grad) const { } template void GPUMatrix::ConvolutionBackwardKernel(const GPUMatrix& in, const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIwht, const GPUMatrix& mpRowRun, const GPUMatrix& runs, GPUMatrix& kernelGrad) const { } template void GPUMatrix::MaxROIPoolingForward(const size_t numRois, const size_t numImg, const size_t channels, const size_t width, const size_t height, const size_t pooledWidth, const size_t pooledHeight, const GPUMatrix& roiData, GPUMatrix& output, GPUMatrix& argmax, double spatialScale) const { } template void GPUMatrix::MaxROIPoolingBackward(const size_t numRois, const size_t numImg, const size_t channels, const size_t width, const size_t height, const size_t pooledWidth, const size_t pooledHeight, const GPUMatrix& roiData, GPUMatrix& grad, GPUMatrix& argmax, double spatialScale) const { } template void GPUMatrix::MaxPoolingForward(const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIndices, const GPUMatrix& indices, GPUMatrix& output) const { } template void GPUMatrix::MaxPoolingBackward(const GPUMatrix& out, const GPUMatrix& in, const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIndices, const GPUMatrix& indices, GPUMatrix& grad, bool accumulateGradient) const { } template void GPUMatrix::MaxUnpooling(const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIndices, const GPUMatrix& indices, const GPUMatrix& poolInput, GPUMatrix& input) const { } template void GPUMatrix::AveragePoolingForward(const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIndices, const GPUMatrix& indices, GPUMatrix& output) const { } template void GPUMatrix::AveragePoolingBackward(const GPUMatrix& mpRowCol, const GPUMatrix& mpRowIndices, const GPUMatrix& indices, GPUMatrix& grad, bool accumulateGradient) const { } template template void GPUMatrix::BatchNormalizationForward(const GPUMatrix& scale, const GPUMatrix& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, GPUMatrix& runMean, GPUMatrix& runVariance, GPUMatrix& out, double epsilon, GPUMatrix& saveMean, GPUMatrix& saveInvStdDev) const { } template template void GPUMatrix::BatchNormalizationBackward(const GPUMatrix& in, GPUMatrix& grad, const GPUMatrix& scale, double blendFactor, const GPUMatrix& saveMean, const GPUMatrix& saveInvStdDev, GPUMatrix& scaleGrad, GPUMatrix& biasGrad) const { } template void GPUMatrix::RNNForward(const GPUMatrix &inputX, const GPUMatrix ¶mW, size_t xDim, size_t yDim, const vector& numSequencesForFrame, const RnnAttributes& rnnAttributes, GPUMatrix& reserve, GPUMatrix& workspace) { } template void GPUMatrix::RNNBackwardData(const GPUMatrix& outputDY, const GPUMatrix& paramW, GPUMatrix& outputDX, const RnnAttributes& rnnAttributes, GPUMatrix& reserve, GPUMatrix& workspace) { } template void GPUMatrix::RNNBackwardWeights(const GPUMatrix& inputX, const GPUMatrix& outputY, GPUMatrix& dw, const RnnAttributes& rnnAttributes, GPUMatrix& reserve, GPUMatrix& workspace) { } #pragma endregion Other helper functions #pragma region Static BLAS Functions template void GPUMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix& /*a*/, const bool transposeA, const GPUMatrix& /*b*/, const bool transposeB, ElemType beta, GPUMatrix& c) { } template void GPUMatrix::Multiply1x1AndWeightedAdd(ElemType alpha, const GPUMatrix& lhs, const GPUMatrix& rhs, ElemType beta, GPUMatrix& c) { } template void GPUMatrix::MultiplyAndAdd(const GPUMatrix& /*a*/, const bool transposeA, const GPUMatrix& /*b*/, const bool transposeB, GPUMatrix& c) { } template void GPUMatrix::Multiply(const GPUMatrix& /*a*/, const bool transposeA, const GPUMatrix& /*b*/, const bool transposeB, GPUMatrix& c) { } template void GPUMatrix::Multiply(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c) { } template void GPUMatrix::ColumnwiseScaleAndWeightedAdd(ElemType alpha, const GPUMatrix& a, const GPUMatrix& v, ElemType beta, GPUMatrix& c) { } /// Matrix-scalar multiply with col-major matrices: c = alpha * a + c /// if a is a column vector, add to all columns of c /// if a is a row vector, add to all rows of c /// if a is a scalar, add to all elements of c /// Scalar /// Input matrix /// Resulting matrix, user is responsible for allocating this template void GPUMatrix::ScaleAndAdd(ElemType alpha, const GPUMatrix& /*a*/, GPUMatrix& c) { } /// Matrix-scalar multiply with col-major matrices: c = alpha * a + b /// if a is a column vector, add to all columns of b /// if a is a row vector, add to all rows of b /// if a is a scalar, add to all elements of b /// Scalar /// Input matrix /// Input matrix /// Resulting matrix, user is responsible for allocating this template void GPUMatrix::ScaleAndAdd(ElemType alpha, const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c) { } /// c += alpha * (a-b) /// if a, b, c must have same dim /// Scalar /// Input matrix /// Input matrix /// Resulting matrix, user is responsible for allocating this template void GPUMatrix::AddScaledDifference(const ElemType alpha, const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c) { } /// c = alpha * (a-b) /// if a, b, c must have same dim /// Scalar /// Input matrix /// Input matrix /// Resulting matrix, user is responsible for allocating this template void GPUMatrix::AssignScaledDifference(const ElemType alpha, const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c) { } /// c += alpha * (a-b) /// if a, b, c must have same dim /// 1X1 matrix /// Input matrix /// Input matrix /// Resulting matrix, user is responsible for allocating this template void GPUMatrix::AddScaledDifference(const GPUMatrix& /*alpha*/, const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c) { } /// c = alpha * (a-b) /// if a, b, c must have same dim /// Scalar /// Input matrix /// Input matrix /// Resulting matrix, user is responsible for allocating this template void GPUMatrix::AssignScaledDifference(const GPUMatrix& /*alpha*/, const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c) { } //c[ci,cj] += a[ai,aj] template void GPUMatrix::AddElementToElement(ElemType beta, const GPUMatrix& /*a*/, const size_t ai, const size_t aj, GPUMatrix& c, const size_t ci, const size_t cj) { } template void GPUMatrix::Scale(ElemType alpha, GPUMatrix& /*a*/) { } template void GPUMatrix::Scale(GPUMatrix& /*alpha*/, GPUMatrix& /*a*/) { } template // c = alpha * a void GPUMatrix::Scale(ElemType alpha, const GPUMatrix& /*a*/, GPUMatrix& c) { } template bool GPUMatrix::HasElement(const GPUMatrix& a, const ElemType value) { return false; } template void GPUMatrix::InnerProduct(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, GPUMatrix& c, const bool isColWise) { } template ElemType GPUMatrix::InnerProductOfMatrices(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return ElemType(0); } template GPUMatrix& GPUMatrix::AssignInnerProductOfMatrices(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/) { return *this; } template void GPUMatrix::ElementWisePower(ElemType alpha, const GPUMatrix& /*a*/, GPUMatrix& c) { } template void GPUMatrix::BatchMatMul(ElemType beta, const GPUMatrix& a, const bool transposeA, const int m, const GPUMatrix& b, const bool transposeB, const int n, GPUMatrix& c, const bool isColWise) { } template bool GPUMatrix::AreEqual(const GPUMatrix& /*a*/, const GPUMatrix& /*b*/, const ElemType threshold /*= 1e-8*/) { return false; } template void GPUMatrix::TensorShuffleScaleAndAdd(ElemType keepWeight, const GPUMatrix& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const GPUMatrix& b, GPUMatrix& c) { } template void GPUMatrix::TensorOp(ElemType beta, const GPUMatrix& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp, const array& offsets, const SmallVector& regularOpDims, const array, 2>& regularStrides, const SmallVector& reducingOpDims, const array, 2>& reducingStrides) { } template void GPUMatrix::TensorOp(ElemType beta, const GPUMatrix& a, const GPUMatrix& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp, const array& offsets, const SmallVector& regularOpDims, const array, 3>& regularStrides, const SmallVector& reducingOpDims, const array, 3>& reducingStrides) { } template void GPUMatrix::TensorOp(ElemType beta, const GPUMatrix& a, const GPUMatrix& b, const GPUMatrix& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp, const array& offsets, const SmallVector& regularOpDims, const array, 4>& regularStrides, const SmallVector& reducingOpDims, const array, 4>& reducingStrides) { } template void GPUMatrix::TensorArgOp(const GPUMatrix& a, ElementWiseOperator reductionOp, const array& offsets, const SmallVector& regularOpDims, const array, 2>& regularStrides, const SmallVector& reducingOpDims, const array, 2>& reducingStrides) { } template void GPUMatrix::CreateCurandObject(unsigned long seed, const char* caller) { } template void GPUMatrix::ResetCurandObject(unsigned long seed, const char* caller) { } template GPUMatrix GPUMatrix::Ones(const size_t rows, const size_t cols, int deviceId) { GPUMatrix mat(0); return mat; } template GPUMatrix GPUMatrix::Zeros(const size_t rows, const size_t cols, int deviceId) { GPUMatrix mat(0); return mat; } template GPUMatrix GPUMatrix::Eye(const size_t rows, int deviceId) { GPUMatrix mat(0); return mat; } template GPUMatrix GPUMatrix::RandomUniform(const size_t rows, const size_t cols, int deviceId, const ElemType low, const ElemType high, unsigned long seed) { GPUMatrix mat(0); return mat; } template GPUMatrix& GPUMatrix::AssignOneHot(const GPUMatrix& a, vector& shape, size_t axis) { return *this; } template GPUMatrix GPUMatrix::RandomGaussian(const size_t rows, const size_t cols, int deviceId, const ElemType mean, const ElemType sigma, unsigned long seed) { GPUMatrix mat(0); return mat; } template ElemType GPUMatrix::GetLearnRateForBlock_Helper(const GPUMatrix& Gradients, const GPUMatrix& SmoothedGradients) { return ElemType(0); } template ElemType GPUMatrix::LogSumOfElements() const { return ElemType(0); } template void GPUMatrix::RCRFBackwardCompute( const GPUMatrix& alpha, GPUMatrix& beta, const GPUMatrix& lbls, const GPUMatrix& pos_scores, const GPUMatrix& pair_scores, const int shift) { } template void GPUMatrix::RCRFTransGrdCompute(const GPUMatrix& lbls, const GPUMatrix& alpha, const GPUMatrix& beta, const GPUMatrix& pair_scores, GPUMatrix& grd, const int startLbl, const int shift) { } template void GPUMatrix::AssignNoiseContrastiveEstimation(const GPUMatrix& a, const GPUMatrix& b, const GPUMatrix& bias, size_t sampleCount, GPUMatrix& tmp, GPUMatrix& c) { } template void GPUMatrix::AssignNCEDerivative(GPUMatrix& tmp, const GPUMatrix& a, const GPUMatrix& b, size_t inputIndex, GPUMatrix& c) { } template void GPUMatrix::AssignSoftmaxSum(const GPUMatrix& a, GPUMatrix& c) { } template void GPUMatrix::AssignNCEUnnormalizedEval(const GPUMatrix& a, const GPUMatrix& b, GPUMatrix& c) { } #pragma endregion Static BLAS Functions #pragma region MatrixQuantizerGPU functions template MatrixQuantizerGPU::MatrixQuantizerGPU(int deviceId, bool useDedicatedComputeStream, bool forceSync) { } template MatrixQuantizerGPU::~MatrixQuantizerGPU() { } template void MatrixQuantizerGPU::QuantizeAsync(const Matrix& inMatrix, const Matrix& inResidual, QuantizedMatrix& outQMatrix, Matrix& outResidual, bool zeroThresholdFor1Bit) { } template void MatrixQuantizerGPU::WaitQuantizeAsyncDone() { } template void MatrixQuantizerGPU::UnquantizeAsync(QuantizedMatrix& inQMatrix, Matrix& outMatrix, bool add /*= false*/) { } template void MatrixQuantizerGPU::WaitUnquantizeAsyncDone() { } #pragma endregion MatrixQuantizerGPU functions #pragma region GPUMatrixComputeStreamEvent functions GPUMatrixComputeStreamEvent::GPUMatrixComputeStreamEvent(int deviceId) : MatrixComputeStreamEvent(deviceId) { } GPUMatrixComputeStreamEvent::~GPUMatrixComputeStreamEvent(){}; void GPUMatrixComputeStreamEvent::SynchronizeEvent(){}; template <> void GPUMatrixComputeStreamEvent::SynchronizeQuantizationComputeStreamWithEvent(){}; template <> void GPUMatrixComputeStreamEvent::SynchronizeQuantizationComputeStreamWithEvent(){}; template <> void GPUMatrixComputeStreamEvent::SynchronizeDataTransferFetchStreamWithEvent(){}; template <> void GPUMatrixComputeStreamEvent::SynchronizeDataTransferFetchStreamWithEvent(){}; #pragma endregion GPUMatrixComputeStreamEvent functions #pragma region GPUDataTransferer functions GranularGPUDataTransferer::~GranularGPUDataTransferer() {} void GranularGPUDataTransferer::CopyGPUToCPUAsync(const void* /*gpuBuffer*/, size_t /*numElements*/, size_t /*elementSize*/, void* /*cpuBuffer*/) {} void GranularGPUDataTransferer::RecordGPUToCPUCopy() {} void GranularGPUDataTransferer::WaitForCopyGPUToCPU() {} void GranularGPUDataTransferer::CopyCPUToGPUAsync(const void* /*cpuBuffer*/, size_t /*numElements*/, size_t /*elementSize*/, void* /*gpuBuffer*/) {} void GranularGPUDataTransferer::RecordCPUToGPUCopy() {} void GranularGPUDataTransferer::WaitForCopyCPUToGPU() {} void GranularGPUDataTransferer::RecordComputeStreamSyncPoint() {} void GranularGPUDataTransferer::WaitForSyncPointOnFetchStreamAsync() {} void GranularGPUDataTransferer::WaitForSyncPointOnAssignStreamAsync() {} PrefetchGPUDataTransferer::PrefetchGPUDataTransferer(int /*deviceId*/) : GranularGPUDataTransferer() {} PrefetchGPUDataTransferer::~PrefetchGPUDataTransferer() {} GPUDataTransferer::GPUDataTransferer(int, bool){} GPUDataTransferer::~GPUDataTransferer(){} void GPUDataTransferer::CopyGPUToCPUAsync(void*, size_t, void*){} void GPUDataTransferer::WaitForCopyGPUToCPUAsync(){} void GPUDataTransferer::CopyCPUToGPUAsync(void*, size_t, void*){} void GPUDataTransferer::WaitForCopyCPUToGPUAsync(){} #pragma endregion GPUDataTransferer functions #pragma region GPURNGHandle functions GPURNGHandle::GPURNGHandle(int deviceId, uint64_t seed, uint64_t offset) : RNGHandle(deviceId) { } /*virtual*/ GPURNGHandle::~GPURNGHandle() { } #pragma endregion GPURNGHandle functions template class GPUMatrix; template class GPUMatrix; template class GPUMatrix; template class GPUMatrix; template class GPUMatrix; template class GPUMatrix; template class DeviceBoundNumber; template class DeviceBoundNumber; template class DeviceBoundNumber; template MatrixQuantizerGPU::~MatrixQuantizerGPU(); template MatrixQuantizerGPU::~MatrixQuantizerGPU(); template void MatrixQuantizerGPU::QuantizeAsync(const Matrix&, const Matrix&, QuantizedMatrix&, Matrix&, bool); template void MatrixQuantizerGPU::QuantizeAsync(const Matrix&, const Matrix&, QuantizedMatrix&, Matrix&, bool); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other); template void GPUMatrix::AdaDelta(GPUMatrix& gradients, GPUMatrix& functionValues, float learningRate, float rho, float epsilon); template void GPUMatrix::AdaDelta(GPUMatrix& gradients, GPUMatrix& functionValues, double learningRate, double rho, double epsilon); template void GPUMatrix::AdaDelta(GPUMatrix& gradients, GPUMatrix& functionValues, float learningRate, float rho, float epsilon); template void GPUMatrix::BatchNormalizationForward(const GPUMatrix& scale, const GPUMatrix& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, GPUMatrix& runMean, GPUMatrix& runVariance, GPUMatrix& out, double epsilon, GPUMatrix& saveMean, GPUMatrix& saveInvStdDev) const; template void GPUMatrix::BatchNormalizationForward(const GPUMatrix& scale, const GPUMatrix& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, GPUMatrix& runMean, GPUMatrix& runVariance, GPUMatrix& out, double epsilon, GPUMatrix& saveMean, GPUMatrix& saveInvStdDev) const; template void GPUMatrix::BatchNormalizationForward(const GPUMatrix& scale, const GPUMatrix& bias, bool inferenceOnly, double expAvgFactor, double blendFactor, GPUMatrix& runMean, GPUMatrix& runVariance, GPUMatrix& out, double epsilon, GPUMatrix& saveMean, GPUMatrix& saveInvStdDev) const; template void GPUMatrix::BatchNormalizationBackward(const GPUMatrix& in, GPUMatrix& grad, const GPUMatrix& scale, double blendFactor, const GPUMatrix& saveMean, const GPUMatrix& saveInvStdDev, GPUMatrix& scaleGrad, GPUMatrix& biasGrad) const; template void GPUMatrix::BatchNormalizationBackward(const GPUMatrix& in, GPUMatrix& grad, const GPUMatrix& scale, double blendFactor, const GPUMatrix& saveMean, const GPUMatrix& saveInvStdDev, GPUMatrix& scaleGrad, GPUMatrix& biasGrad) const; template void GPUMatrix::BatchNormalizationBackward(const GPUMatrix& in, GPUMatrix& grad, const GPUMatrix& scale, double blendFactor, const GPUMatrix& saveMean, const GPUMatrix& saveInvStdDev, GPUMatrix& scaleGrad, GPUMatrix& biasGrad) const; template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopyFrom); template void GPUSparseMatrix::AdaDelta(GPUMatrix&c, GPUMatrix&functionValues, float learningRate, float rho, float epsilon, int* timestamps, int currentTimestamp); template void GPUSparseMatrix::AdaDelta(GPUMatrix&c, GPUMatrix&functionValues, double learningRate, double rho, double epsilon, int* timestamps, int currentTimestamp); template void GPUSparseMatrix::AdaDelta(GPUMatrix&c, GPUMatrix&functionValues, float learningRate, float rho, float epsilon, int* timestamps, int currentTimestamp); template cublasHandle_t GPUMatrix::s_cuHandle[GPUMatrix::MaxGpus] = {0}; template void* GPUMatrix::s_curandGenerator = NULL; template std::unique_ptr> CuDnnConvolutionEngineFactory::Create(ConvolveGeometryPtr, DEVICEID_TYPE, ImageLayoutKind, size_t, PoolKind, bool, bool, bool) { RuntimeError("The code is compiled with CPUONLY macro."); } template bool CuDnnConvolutionEngineFactory::IsSupported(DEVICEID_TYPE, ConvolveGeometryPtr, PoolKind) { return false; } template class CuDnnConvolutionEngineFactory; template class CuDnnConvolutionEngineFactory; template class CuDnnConvolutionEngineFactory; template std::unique_ptr> CuDnnBatchNormEngineFactory::Create(DEVICEID_TYPE deviceId, const TensorShape& inOutT, bool spatial, ImageLayoutKind imageLayout) { RuntimeError("The code is compiled with CPUONLY macro."); } template class CuDnnBatchNormEngineFactory; template class CuDnnBatchNormEngineFactory; template class CuDnnBatchNormEngineFactory; CudaTimer::~CudaTimer() { } void CudaTimer::Start() { } void CudaTimer::Stop() { } float CudaTimer::Elapsed() { return 0; } /*static*/ void SyncGuard::EnableSync() { } /*static*/ bool SyncGuard::IsSyncEnabled() { return false; } } } } // define a dummy GPUWatcher class too #include "GPUWatcher.h" int GPUWatcher::GetGPUIdWithTheMostFreeMemory() { return 0; } size_t GPUWatcher::GetFreeMemoryOnCUDADevice(int /*devId*/) { return 0; } GPUWatcher::GPUWatcher(void) { } GPUWatcher::~GPUWatcher(void) { } #endif // CPUONLY