// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #include "stdafx.h" #include "BestGpu.h" #ifdef CPUONLY #include "CommonMatrix.h" #include "GPUMatrix.h" #include "GPUSparseMatrix.h" #include "MatrixQuantizerGPU.h" #include "CuDnnFactories.h" #include "TensorShape.h" #include "GPUDataTransferer.h" #pragma warning(disable : 4100) // unreferenced formal parameter, which is OK since all functions in here are dummies; disabling this allows to copy-paste prototypes here when we add new functions #pragma warning(disable : 4702) // unreachable code, which we get from the NOT_IMPLEMENTED macro which is OK namespace Microsoft { namespace MSR { namespace CNTK { // the reset below are dummy implementations MATH_API std::size_t GetCUDNNVersion() { return 0; } void PrepareDevice(DEVICEID_TYPE deviceId); template GPUSPARSE_INDEX_TYPE GPUSparseMatrix::SecondaryIndexValueAt(size_t idx) const { return (GPUSPARSE_INDEX_TYPE) 0; } #pragma region Constructors and Destructor template GPUSparseMatrix::GPUSparseMatrix(DEVICEID_TYPE computeDevice, const MatrixFormat matrixFormat /*= MatrixFormat::matrixFormatSparseCSR*/) { } template void GPUSparseMatrix::ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE computeDevice) { } template GPUSparseMatrix::GPUSparseMatrix(const GPUMatrix& deepCopy, const MatrixFormat matrixFormat /*= MatrixFormat::matrixFormatSparseCSR*/) { } template GPUSparseMatrix::GPUSparseMatrix(const GPUSparseMatrix& deepCopy) { } template GPUSparseMatrix::GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, DEVICEID_TYPE computeDevice, const MatrixFormat matrixFormat /*= MatrixFormat::matrixFormatSparseCSR*/) { } // PrepareDevice - Setup the correct cuda context for an operation // deviceId - the device on which the operation will take place // defaults to -1, which means use matrices current device template DEVICEID_TYPE GPUSparseMatrix::PrepareDevice(DEVICEID_TYPE deviceId /*=-1*/) const { return deviceId; } template template void GPUSparseMatrix::DeepCast(const GPUSparseMatrix& deepCopy) { } template void GPUSparseMatrix::DeepCopy(const GPUSparseMatrix& deepCopy) { } template void GPUSparseMatrix::SetValue(const GPUSparseMatrix& deepCopy) { } #if 0 template void GPUSparseMatrix::SetValue(const CPUMatrix& denseMatrix) { } #endif template void GPUSparseMatrix::SetValue(const CPUSparseMatrix& denseMatrix) { } template void GPUSparseMatrix::SetValue(const GPUMatrix& denseMatrix) { } template void GPUSparseMatrix::SetValue(const GPUMatrix& denseMatrix, const MatrixFormat matrixFormat) { } template void GPUSparseMatrix::SetDiagonalValue(const ElemType v) { } template void GPUSparseMatrix::SetDiagonalValue(const GPUMatrix& vector) { } template GPUSPARSE_INDEX_TYPE* GPUSparseMatrix::GetCondensedVector() const { return NULL; } template void GPUSparseMatrix::MaskColumnsValue(const GPUMatrix& columnsMask, ElemType val, size_t numColsPerMaskEntry) { } template GPUSparseMatrix& GPUSparseMatrix::operator=(const GPUSparseMatrix& deepCopy) { return *this; } template GPUSparseMatrix::GPUSparseMatrix(GPUSparseMatrix&& moveFrom) { } template GPUSparseMatrix& GPUSparseMatrix::operator=(GPUSparseMatrix&& moveFrom) { return *this; } template GPUSparseMatrix::~GPUSparseMatrix() { } //ResizeAsAndCopyIndexFrom - Resize this sparse matrix to have the same element structure as the passed matrix // a - sparse matrix whose structure we want to clone // remark: this was done for element wise operations where the structure will be identical after an operation template void GPUSparseMatrix::ResizeAsAndCopyIndexFrom(const GPUSparseMatrix& a, const bool growOnly /*= true*/) { } //------------------------------------------------------------------------- // Start of new GPU Sparse Matrix code //------------------------------------------------------------------------- template void GPUSparseMatrix::ClearNzCount() { } template void GPUSparseMatrix::Allocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly, bool keepExistingValues) { } template void GPUSparseMatrix::RequireSizeAndAllocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly, bool keepExistingValues) { } template void GPUSparseMatrix::RequireSizeAndAllocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly, bool keepExistingValues) { } template void GPUSparseMatrix::RequireSize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat format, const bool growOnly) { } template void GPUSparseMatrix::RequireSize(const size_t numRows, const size_t numCols, const bool growOnly) { } template void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly) { } template void GPUSparseMatrix::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly) { } template void GPUSparseMatrix::AdjustCol2BlockId(const GPUSPARSE_INDEX_TYPE* cpuCol2BlockId, size_t numBlocks, bool useBlockId2Col) { } template GPUMatrix GPUSparseMatrix::CopyToDenseMatrix() const { GPUMatrix res(0); return res; } template void GPUSparseMatrix::CopyToDenseMatrix(GPUMatrix& denseMatrix) const { } template void GPUSparseMatrix::CopyToCPUSparseMatrix(CPUSparseMatrix& cpuSparseMatrix) const { } template void GPUSparseMatrix::ChangeDeviceTo(DEVICEID_TYPE toId) { } template template void GPUMatrix::CastAssignValuesOf(const GPUMatrix* other) { } //Reset matrix so it can be reused template void GPUSparseMatrix::Reset() { } #pragma endregion Constructors and Destructor #pragma region Static BLAS Functions // copy features to GPU matrix template void GPUSparseMatrix::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const ElemType* h_Val, const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice /*= false*/, const DEVICEID_TYPE devId /*= -1*/, DataTransferer* transferer) { } template void GPUSparseMatrix::SetMatrixFromSBCFormat(const size_t*, const ElemType*, const size_t, const size_t, const size_t) { } // forward pass from feature to hidden layer template void GPUSparseMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, const GPUSparseMatrix& rhs, const bool transposeB, ElemType beta, GPUMatrix& c) { } // backward pass from hidden layer to feature weight template void GPUSparseMatrix::MultiplyAndAdd(ElemType alpha, const GPUMatrix& lhs, const bool transposeA, const GPUSparseMatrix& rhs, const bool transposeB, GPUSparseMatrix& c) { } template void GPUSparseMatrix::ColumnwiseScaleAndWeightedAdd(ElemType alpha, const GPUSparseMatrix& a, const GPUMatrix& v, ElemType beta, GPUMatrix& c) { } // used for gradients udpate template void GPUSparseMatrix::ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix& lhs, GPUMatrix& rhs) { } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncate(const ElemType threshold) { return *this; } // normal update for smoothed gradients c and current gradients (this) template void GPUSparseMatrix::NormalGrad(GPUMatrix& c, const ElemType momentum, ElemType unitGainFactor) { } template ElemType GPUSparseMatrix::Adagrad(GPUMatrix& c, const bool needAveMultiplier) { return 1; } template void GPUSparseMatrix::FSAdagrad(GPUMatrix&, GPUMatrix&, ElemType, ElemType, ElemType, ElemType, ElemType) { } template void GPUSparseMatrix::Adam(GPUMatrix& c, GPUMatrix& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, ElemType epsilon, ElemType unitGainFactor, bool adamax) { } template ElemType GPUSparseMatrix::RmsProp(GPUMatrix&, ElemType, ElemType, ElemType, ElemType, ElemType, const bool, const bool) { return 1; } template template void GPUSparseMatrix::AdaDelta(GPUMatrix&c, GPUMatrix&functionValues, AccumType learningRate, AccumType rho, AccumType epsilon, int* timestamps, int currentTimestamp) { } template void GPUSparseMatrix::MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix& a, const bool transposeA, const GPUMatrix& b, const bool transposeD, ElemType beta, GPUMatrix& c) { } template void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S, const GPUMatrix& D, GPUMatrix& C) { } template void GPUSparseMatrix::Multiply(const GPUMatrix& D, const GPUSparseMatrix& S, GPUMatrix& C) { } template size_t GPUSparseMatrix::ElemCountFromBufferSize(const size_t numRows, const size_t numCols, const MatrixFormat format, const size_t totalBufferSize) const { return 0; } template size_t GPUSparseMatrix::ElemCountFromBufferSize() const { return 0; } // PrepareBuffer - Get the dimensions start buffer, computes the starting row/column of each value // m - rows in the source // n - cols in the source // canReuseBuffer - target matrix can be reused for temporary space // func - function to call to count elements in the result (returns count, and fills csrRowPtr array) template void GPUSparseMatrix::PrepareBuffer(size_t m, size_t n, bool canReuseBuffer, std::function func) { } // Multiply - multiply one spares matrix by another sparse matrix // S1 - first sparse matrix // transposeS1 - transpose first matrix? // S2 - second sparse matrix // transposeS2 - tanspose second matrix? // c - result matrix // NOTE: if c has enough space allocated, it will be reused, otherwise it will be freed and a new memory block used template void GPUSparseMatrix::Multiply(const GPUSparseMatrix& S1, bool transposeS1, const GPUSparseMatrix& S2, bool transposeS2, GPUSparseMatrix& c) { } template GPUSparseMatrix& GPUSparseMatrix::AssignProductOf(const GPUSparseMatrix& a, const bool transposeA, const GPUSparseMatrix& /*b*/, const bool transposeB) { return *this; } template void GPUSparseMatrix::ScaleAndAdd(ElemType alpha, const GPUSparseMatrix& a, ElemType beta, const GPUSparseMatrix& /*b*/, GPUSparseMatrix& c) { } template void GPUSparseMatrix::ScaleAndAdd(ElemType alpha, const GPUSparseMatrix& a, ElemType beta, const GPUMatrix& /*b*/, GPUMatrix& c) { } template void GPUSparseMatrix::ScaleAndAdd(ElemType alpha, const GPUMatrix& /*a*/, ElemType beta, const GPUSparseMatrix& /*b*/, GPUMatrix& c) { } template void GPUSparseMatrix::Scale(ElemType alpha, GPUSparseMatrix& a) { } template void GPUSparseMatrix::ElementWisePower(ElemType alpha, const GPUSparseMatrix& a, GPUSparseMatrix& c) { } template ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUSparseMatrix& a, const GPUMatrix& /*b*/) { return ElemType(0); } template ElemType GPUSparseMatrix::InnerProductOfMatrices(const GPUMatrix& /*a*/, const GPUSparseMatrix& /*b*/) { return ElemType(0); } template void GPUSparseMatrix::InnerProduct(const GPUSparseMatrix&, const GPUMatrix&, GPUMatrix&, const bool) { } template bool GPUSparseMatrix::AreEqual(const GPUSparseMatrix& a, const GPUSparseMatrix& /*b*/, const ElemType threshold) { return false; } template bool GPUSparseMatrix::AreEqual(const GPUMatrix& /*a*/, const GPUSparseMatrix& /*b*/, const ElemType threshold) { return false; } template bool GPUSparseMatrix::AreEqual(const GPUSparseMatrix& a, const GPUMatrix& /*b*/, const ElemType threshold) { return false; } template bool GPUSparseMatrix::IsEqualTo(const GPUSparseMatrix& a, const ElemType threshold) const { return false; } template bool GPUSparseMatrix::IsEqualTo(const GPUMatrix& /*a*/, const ElemType threshold) const { return false; } #pragma endregion Static BLAS Functions #pragma region Member BLAS Functions template GPUMatrix GPUSparseMatrix::ElementProductOf(const GPUSparseMatrix& a, const GPUMatrix& /*b*/) { GPUMatrix c(0); return c; } template GPUMatrix GPUSparseMatrix::ElementProductOf(const GPUMatrix& a, const GPUSparseMatrix& b) { return GPUSparseMatrix::ElementProductOf(b, a); } template GPUSparseMatrix GPUSparseMatrix::operator+(const GPUSparseMatrix& a) const { return *this; } template GPUSparseMatrix GPUSparseMatrix::operator-(const GPUSparseMatrix& a) const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::operator^=(ElemType alpha) { return *this; } template GPUSparseMatrix GPUSparseMatrix::operator^(ElemType alpha) const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::operator*=(ElemType alpha) { return *this; } template GPUSparseMatrix GPUSparseMatrix::operator*(ElemType alpha) const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignElementPowerOf(const GPUSparseMatrix& a, const ElemType power) { return *this; } template GPUSparseMatrix GPUSparseMatrix::Transpose() const { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTransposeOf(const GPUSparseMatrix& a) { return *this; } template void GPUSparseMatrix::InplaceTranspose() { } template GPUSparseMatrix GPUSparseMatrix::ColumnSlice(size_t startColumn, size_t numCols) const { GPUSparseMatrix a(0); return a; } template void GPUSparseMatrix::AssignColumnSliceToDense(GPUMatrix& slice, size_t startColumn, size_t numCols) const { } template GPUMatrix GPUSparseMatrix::CopyColumnSliceToDense(size_t startColumn, size_t numCols) const { GPUMatrix a(0); return a; } template GPUMatrix GPUSparseMatrix::DiagonalToDense() const { GPUMatrix a(0); return a; } template ElemType GPUSparseMatrix::SumOfAbsElements() const { return ElemType(0); } template ElemType GPUSparseMatrix::SumOfElements() const { return ElemType(0); } template ElemType GPUSparseMatrix::FrobeniusNorm() const { return ElemType(0); } template ElemType GPUSparseMatrix::MatrixNormInf() const { return ElemType(0); } template ElemType GPUSparseMatrix::MatrixNorm1() const { return ElemType(0); } #pragma endregion Member BLAS Functions #pragma region Other Functions template GPUSparseMatrix& GPUSparseMatrix::ElementInverse() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignElementInverseOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceSigmoid() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignSigmoidOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceLinearRectifierDerivative() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignLinearRectifierDerivativeOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTanh() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTanhOf(const GPUSparseMatrix& a) { return *this; } template GPUMatrix& GPUMatrix::InplaceAtanh() { return *this; } template GPUMatrix& GPUMatrix::AssignAtanhOf(const GPUMatrix& /*a*/) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceSqrt() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignSqrtOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceExp() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignExpOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceLog() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignLogOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceAbs() { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignAbsOf(const GPUSparseMatrix& a) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateBottom(const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTruncateBottomOf(const GPUSparseMatrix& a, const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncateTop(const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::AssignTruncateTopOf(const GPUSparseMatrix& a, const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::SetToZeroIfAbsLessThan(const ElemType threshold) { return *this; } template GPUSparseMatrix& GPUSparseMatrix::InplaceSoftThreshold(const ElemType threshold) { return (*this); } template size_t GPUSparseMatrix::IdentifyRowsWithValues() const { return 0; } #pragma endregion #pragma region Helper Functions template void* GPUSparseMatrix::ReserveTempHostBuffer(const size_t sizeInByte) const { return nullptr; } template void GPUSparseMatrix::performElementWiseFunction(const ElementWiseOperator kind, const GPUSparseMatrix& src) { } template void GPUSparseMatrix::SetMatrixFromCSRFormat(const CPUSPARSE_INDEX_TYPE* h_CSRRow, const CPUSPARSE_INDEX_TYPE* h_Col, const ElemType* h_Val, const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice /*= false*/, const DEVICEID_TYPE devId /*= -1*/) { } template void GPUSparseMatrix::GetMatrixFromCSRFormat(CPUSPARSE_INDEX_TYPE*& h_CSRRow, CPUSPARSE_INDEX_TYPE*& h_Col, ElemType*& h_Val, size_t& numElemAllocated, size_t& nz, size_t& numRows, size_t& numCols) const { } template void GPUSparseMatrix::GetMatrixFromCSCFormat(CPUSPARSE_INDEX_TYPE*& h_CSCCol, CPUSPARSE_INDEX_TYPE*& h_Row, ElemType*& h_Val, size_t& numElemAllocated, size_t& nz, size_t& numRows, size_t& numCols) const { } template void GPUSparseMatrix