https://github.com/Microsoft/CNTK
Tip revision: 1aefd22f8abd788ac79f7958d75e4d46a533123f authored by Mark Hillebrand on 22 January 2016, 14:46:46 UTC
Change default targets for build-and-test
Change default targets for build-and-test
Tip revision: 1aefd22
CPUMatrix.h
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "Basics.h" // for RuntimeError()
#include "Matrix.h"
#include "File.h"
#include "Helpers.h"
#include "CommonMatrix.h"
#include <vector>
#include <stdio.h>
#include <ctime>
#include <limits.h>
// NOTE NOTE NOTE:
// use CPUSingleMatrix and CPUDoubleMatrix instead of using the template directly
///////////////////////////////////////////////
// This class is exported from the Math.dll
namespace Microsoft { namespace MSR { namespace CNTK {
double logadd(double x, double y);
//To compy with BLAS libraries matrices are stored in ColMajor. However, by default C/C++/C# use RowMajor
//convertion is need when passing data between CPUMatrix and C++ matrices
template <class ElemType>
class MATH_API CPUMatrix : public BaseMatrix<ElemType>
{
typedef BaseMatrix<ElemType> B;
using B::m_numRows;
using B::m_numCols;
using B::m_pArray;
using B::m_computeDevice;
using B::m_elemSizeAllocated;
using B::m_externalBuffer;
using B::m_format;
using B::m_matrixName; // without this, base members would require to use thi-> in GCC
public:
CPUMatrix();
CPUMatrix(FILE* f, const char* matrixName); //matrixName is used to verify that correct matrix is read.
CPUMatrix(const size_t numRows, const size_t numCols);
CPUMatrix(const size_t numRows, const size_t numCols, ElemType* pArray, const size_t matrixFlags = matrixFlagNormal);
CPUMatrix(const CPUMatrix<ElemType>& deepCopyFrom); //copy constructor, deep copy
CPUMatrix<ElemType>& operator=(const CPUMatrix<ElemType>& deepCopyFrom); //assignment operator, deep copy
CPUMatrix(CPUMatrix<ElemType>&& moveFrom); //move constructor, shallow copy
CPUMatrix<ElemType>& operator=(CPUMatrix<ElemType>&& moveFrom); //move assignment operator, shallow copy
~CPUMatrix();
public:
using B::OwnBuffer;
using B::GetNumElements;
using B::IsEmpty;
using B::GetNumRows;
using B::GetNumCols;
using B::SetOwnBuffer;
using B::SetMatrixName;
size_t BufferSize() const
{
return m_numRows * m_numCols * sizeof(ElemType);
}
ElemType* BufferPointer() const
{
return m_pArray;
}
CPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
CPUMatrix<ElemType>& AssignColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
CPUMatrix<ElemType>& SetColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
void CopyColumnsStrided(const CPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride);
CPUMatrix<ElemType> Diagonal() const;
ElemType Adagrad(CPUMatrix<ElemType>& gradients, const bool needAveMultiplier);
void FSAdagrad(CPUMatrix<ElemType>& gradients, CPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul);
ElemType RmsProp(CPUMatrix<ElemType>& gradients,
ElemType RMS_GAMMA,
ElemType RMS_WGT_INC,
ElemType RMS_WGT_MAX,
ElemType RMS_WGT_DEC,
ElemType RMS_WGT_MIN,
const bool needAveMultiplier);
void Reshape(const size_t numRows, const size_t numCols);
void Resize(const size_t numRows, const size_t numCols, bool growOnly = true); //by default we only reallocate if need to grow
ElemType* CopyToArray() const; //allocated by the callee but need to be deleted by the caller
size_t CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const; //allocated by the callee but need to be deleted by the caller
void CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const;
inline ElemType& operator()(const size_t row, const size_t col)
{
return m_pArray[LocateElement(row, col)];
}
inline const ElemType& operator()(const size_t row, const size_t col) const
{
return m_pArray[LocateElement(row, col)];
}
inline ElemType Get00Element() const
{
return m_pArray[0];
}
void SetValue(const ElemType v);
void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
void SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
void SetColumn(const ElemType* colPointer, size_t colInd);
void SetColumn(const CPUMatrix<ElemType>& valMat, size_t colInd);
void SetColumn(const ElemType val, size_t j);
void SetDiagonalValue(const ElemType v);
void SetDiagonalValue(const CPUMatrix<ElemType>& vector);
void SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
void SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed = USE_TIME_BASED_SEED);
void AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
CPUMatrix<ElemType> Transpose();
CPUMatrix<ElemType>& AssignTransposeOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& operator+=(const ElemType alpha);
CPUMatrix<ElemType> operator+(const ElemType alpha) const;
CPUMatrix<ElemType>& AssignSumOf(const ElemType alpha, const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& operator+=(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType> operator+(const CPUMatrix<ElemType>& a) const;
CPUMatrix<ElemType>& AssignSumOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& operator-=(const ElemType alpha);
CPUMatrix<ElemType> operator-(const ElemType alpha) const;
CPUMatrix<ElemType>& AssignDifferenceOf(const ElemType alpha, const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& AssignDifferenceOf(const CPUMatrix<ElemType>& a, const ElemType alpha);
CPUMatrix<ElemType>& operator-=(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType> operator-(const CPUMatrix<ElemType>& a) const;
CPUMatrix<ElemType>& AssignDifferenceOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& operator*=(const ElemType alpha);
CPUMatrix<ElemType> operator*(const ElemType alpha) const;
CPUMatrix<ElemType>& AssignProductOf(const ElemType alpha, const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType> operator*(const CPUMatrix<ElemType>& a) const;
CPUMatrix<ElemType>& AssignProductOf(const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB);
CPUMatrix<ElemType>& operator/=(ElemType alpha);
CPUMatrix<ElemType> operator/(ElemType alpha) const;
CPUMatrix<ElemType>& operator^=(ElemType alpha); //element-wise power
CPUMatrix<ElemType> operator^(ElemType alpha) const; //element-wise power
CPUMatrix<ElemType>& AssignElementPowerOf(const CPUMatrix<ElemType>& a, const ElemType power);
CPUMatrix<ElemType>& ElementMultiplyWith(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& AssignElementProductOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& AddElementProductOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& AssignElementDivisionOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& ElementDivideBy(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& ColumnElementMultiplyWith(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& RowElementMultiplyWith(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& ColumnElementDivideBy(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& RowElementDivideBy(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& ElementInverse();
CPUMatrix<ElemType>& AssignElementInverseOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceSigmoid();
CPUMatrix<ElemType>& AssignSigmoidOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceLinearRectifierDerivative();
CPUMatrix<ElemType>& AssignLinearRectifierDerivativeOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceSigmoidDerivative();
CPUMatrix<ElemType>& AssignSigmoidDerivativeOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceTanh();
CPUMatrix<ElemType>& AssignTanhOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceLogSoftmax(const bool isColWise);
CPUMatrix<ElemType>& AssignLogSoftmaxOf(const CPUMatrix<ElemType>& a, const bool isColWise);
CPUMatrix<ElemType>& InplaceHardmax(const bool isColWise);
CPUMatrix<ElemType>& AssignHardmaxOf(const CPUMatrix<ElemType>& a, const bool isColWise);
//sequence training
CPUMatrix<ElemType>& DropFrame(const CPUMatrix<ElemType>& label, const CPUMatrix<ElemType>& gamma, const ElemType& threshhold);
CPUMatrix<ElemType>& AssignSequenceError(const ElemType hsmoothingWeight, const CPUMatrix<ElemType>& label, const CPUMatrix<ElemType>& dnnoutput, const CPUMatrix<ElemType>& gamma, ElemType alpha);
CPUMatrix<ElemType>& InplaceSqrt();
CPUMatrix<ElemType>& AssignSqrtOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceExp();
CPUMatrix<ElemType>& AssignExpOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceLog();
CPUMatrix<ElemType>& AssignLogOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceLog10();
CPUMatrix<ElemType>& AssignLog10Of(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceCosine();
CPUMatrix<ElemType>& AssignCosineOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceNegativeSine();
CPUMatrix<ElemType>& AssignNegativeSineOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceAbs();
CPUMatrix<ElemType>& AssignAbsOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& InplaceTruncateBottom(const ElemType threshold);
CPUMatrix<ElemType>& AssignTruncateBottomOf(const CPUMatrix<ElemType>& a, const ElemType threshold);
CPUMatrix<ElemType>& InplaceTruncateTop(const ElemType threshold);
CPUMatrix<ElemType>& AssignTruncateTopOf(const CPUMatrix<ElemType>& a, const ElemType threshold);
CPUMatrix<ElemType>& InplaceTruncate(const ElemType threshold);
CPUMatrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
CPUMatrix<ElemType>& SetToZeroIfAbsLessThan(const ElemType threshold);
ElemType SumOfAbsElements() const; //sum of all abs(elements)
ElemType SumOfElements() const; //sum of all elements
CPUMatrix<ElemType>& AssignSumOfElements(const CPUMatrix<ElemType>& a);
bool IsEqualTo(const CPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
static void VectorSum(const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& c, const bool isColWise);
void VectorNorm1(CPUMatrix<ElemType>& c, const bool isColWise) const;
CPUMatrix<ElemType>& AssignVectorNorm1Of(CPUMatrix<ElemType>& a, const bool isColWise);
void VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWise) const;
CPUMatrix<ElemType>& AssignVectorNorm2Of(CPUMatrix<ElemType>& a, const bool isColWise);
void AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias,
CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
void AssignSoftmaxSum(const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& softmax);
void AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a,
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& c);
CPUMatrix<ElemType>& AssignNCEDerivative(const CPUMatrix<ElemType>& tmp, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, size_t inputIndex, CPUMatrix<ElemType>& c);
void VectorNormInf(CPUMatrix<ElemType>& c, const bool isColWise) const;
CPUMatrix<ElemType>& AssignVectorNormInfOf(CPUMatrix<ElemType>& a, const bool isColWise);
CPUMatrix<ElemType>& AssignInnerProductOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const bool isColWise);
CPUMatrix<ElemType>& AssignKhatriRaoProductOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& AddColumnReshapeProductOf(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const bool transposeAColumn);
CPUMatrix<ElemType>& AddWithScaleOf(ElemType alpha, const CPUMatrix<ElemType>& a);
ElemType FrobeniusNorm() const;
CPUMatrix<ElemType>& AssignFrobeniusNormOf(const CPUMatrix<ElemType>& a);
ElemType MatrixNormInf() const;
ElemType MatrixNorm1() const;
ElemType MatrixNorm0() const; //number of non-zero elemets
CPUMatrix<ElemType>& AssignSignOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& AddSignOf(const CPUMatrix<ElemType>& a);
CPUMatrix<ElemType>& AssignRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddWithRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
//CPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
CPUMatrix<ElemType>& AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AssignRepeatOf(const CPUMatrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
CPUMatrix<ElemType>& AddToRowRepeatValuesOf(const CPUMatrix<ElemType>& a, const size_t numRowRepeats);
CPUMatrix<ElemType>& AssignPositiveAndShiftedNegSample(const CPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
CPUMatrix<ElemType>& AddFoldedPositiveAndShiftedNegSample(const CPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
void VectorMax(CPUMatrix<ElemType>& maxIndexes, CPUMatrix<ElemType>& maxValues, const bool isColWise, int topK = 1) const;
void VectorMin(CPUMatrix<ElemType>& minIndexes, CPUMatrix<ElemType>& minValues, const bool isColWise) const;
CPUMatrix<ElemType>& AssignNumOfDiff(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, bool searchInCol = false);
void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const;
void Print(const char* matrixName = nullptr) const; //print whole matrix. can be expensive
void ReadFromFile(FILE* f, const char* matrixName); //matrixName is used to verify that correct matrix is read.
void WriteToFile(FILE* f, const char* matrixName); //matrixName is used to verify that correct matrix is read.
CPUMatrix<ElemType>& AssignPackedConvolutionInput(const CPUMatrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false);
CPUMatrix<ElemType>& UnpackConvolutionInput(CPUMatrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false) const;
CPUMatrix<ElemType>& AssignMaxPoolingResult(const CPUMatrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
CPUMatrix<ElemType>& AddMaxPoolingGradient(const CPUMatrix<ElemType>& outputGradientBatch, const CPUMatrix<ElemType>& inputBatch, const CPUMatrix<ElemType>& outputBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
CPUMatrix<ElemType>& AssignAveragePoolingResult(const CPUMatrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
CPUMatrix<ElemType>& AddAveragePoolingGradient(const CPUMatrix<ElemType>& outputGradientBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
public:
static int SetNumThreads(int numThreads); // note: this does not depend on <ElemType>, i.e. you can call it on any <ElemType>
//static BLAS functions
static void SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>& SIGMA, CPUMatrix<ElemType>& U, CPUMatrix<ElemType>& VT, CPUMatrix<ElemType>& W);
static void MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB, ElemType beta, CPUMatrix<ElemType>& c);
static void MultiplyAndAdd(const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB, CPUMatrix<ElemType>& c);
static void Multiply(const CPUMatrix<ElemType>& a, const bool transposeA, const CPUMatrix<ElemType>& b, const bool transposeB, CPUMatrix<ElemType>& c);
static void Multiply(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
static void Multiply1x1AndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, ElemType beta, CPUMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& c);
static void AddScaledDifference(const ElemType alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
static void AssignScaledDifference(const ElemType alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
static void AddScaledDifference(const CPUMatrix<ElemType>& alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c); //alpha must be 1X1
static void AssignScaledDifference(const CPUMatrix<ElemType>& alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c); //alpha must be 1X1
static void AddElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
//static void AddLogElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
static void AssignElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
static void MinusOneAt(CPUMatrix<ElemType>& c, const size_t position);
static void Scale(ElemType alpha, CPUMatrix<ElemType>& a);
static void Scale(CPUMatrix<ElemType> alpha, CPUMatrix<ElemType>& a); //In this case Matrix alpha must be 1x1
static void Scale(ElemType alpha, const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& c);
static void InnerProduct(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c, const bool isColWise);
static ElemType InnerProductOfMatrices(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
static void ElementWisePower(ElemType alpha, const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& c);
static bool AreEqual(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const CPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
const std::array<size_t, 2>& offsets,
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& reducingStrides);
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
const std::array<size_t, 3>& offsets,
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& reducingStrides);
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
const std::array<size_t, 4>& offsets,
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& regularStrides,
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& reducingStrides);
static CPUMatrix<ElemType> Ones(const size_t rows, const size_t cols);
static CPUMatrix<ElemType> Zeros(const size_t rows, const size_t cols);
static CPUMatrix<ElemType> Eye(const size_t rows);
static CPUMatrix<ElemType> RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
static CPUMatrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
/// return true if v is an element in matrix c
static bool HasElement(const CPUMatrix<ElemType>& a, const ElemType v = 0.0);
public:
CPUMatrix<ElemType>& AssignElementProductOfWithShiftNeg(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, size_t shift, size_t negnumber);
static void InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c, const bool isColWise, size_t shift, size_t negnumber);
// extract out a row from a, assign it to [this].
CPUMatrix<ElemType>& GetARowByIndex(const CPUMatrix<ElemType>& a, const size_t index);
static void ConductRowElementMultiplyWithShift(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c, const size_t shift, bool bFirstmatrixfixed);
CPUMatrix<ElemType>& AssignElementProductOfWithShift(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const size_t shift);
public:
friend File& operator>>(File& stream, CPUMatrix<ElemType>& us)
{
stream.GetMarker(fileMarkerBeginSection, std::wstring(L"BMAT"));
size_t elsize;
stream >> elsize;
if (sizeof(ElemType) != elsize)
RuntimeError("Template argument size doesn't match those in file");
std::wstring matrixName;
size_t numRows, numCols;
int format;
stream >> matrixName >> format >> numRows >> numCols;
ElemType* d_array = new ElemType[numRows * numCols];
for (size_t i = 0; i < numRows * numCols; ++i)
stream >> d_array[i];
stream.GetMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
us.SetValue(numRows, numCols, d_array, matrixFlagNormal);
if (us.m_matrixName)
delete[] us.m_matrixName;
us.m_matrixName = new wchar_t[matrixName.length() + 1];
wmemcpy(us.m_matrixName, matrixName.c_str(), matrixName.length() + 1);
delete[] d_array;
return stream;
}
friend File& operator<<(File& stream, const CPUMatrix<ElemType>& us)
{
stream.PutMarker(fileMarkerBeginSection, std::wstring(L"BMAT"));
stream << sizeof(ElemType);
std::wstring s = (us.m_matrixName == NULL) ? std::wstring(L"unnamed") : std::wstring(us.m_matrixName);
int format = us.m_format;
stream << s << format;
stream << us.m_numRows << us.m_numCols;
for (size_t i = 0; i < us.GetNumElements(); ++i)
stream << us.m_pArray[i];
stream.PutMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
return stream;
}
public:
ElemType LogAddSumOfElements() const;
public:
/// for RCRF
static void RCRFBackwardCompute(const CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta,
const CPUMatrix<ElemType>& lbls,
const CPUMatrix<ElemType>& pair_scores);
static void _rcrfBackwardCompute(size_t t, size_t k, const CPUMatrix<ElemType>& alpha,
CPUMatrix<ElemType>& beta,
const CPUMatrix<ElemType>& pair_scores);
static void RCRFTransGrdCompute(const CPUMatrix<ElemType>& lbls,
const CPUMatrix<ElemType>& alpha,
const CPUMatrix<ElemType>& beta,
const CPUMatrix<ElemType>& pair_scores,
CPUMatrix<ElemType>& grd);
static void _rcrfTransGrdCompute(size_t i,
const CPUMatrix<ElemType>& lbls,
const CPUMatrix<ElemType>& alpha,
const CPUMatrix<ElemType>& beta,
const CPUMatrix<ElemType>& pair_scores,
CPUMatrix<ElemType>& grd,
const size_t tPos /// position
);
protected:
size_t LocateElement(const size_t i, const size_t j) const;
size_t LocateColumn(const size_t j) const;
private:
void ZeroInit(); //should only be used by constructors.
void Clear();
};
typedef CPUMatrix<float> CPUSingleMatrix;
typedef CPUMatrix<double> CPUDoubleMatrix;
} } }