// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #include "stdafx.h" #ifdef _WIN32 #define _SCL_SECURE_NO_WARNINGS #endif #include "CNTKLibrary.h" #include "CompositeFunction.h" #include "Utils.h" #include "Value.h" #include "Matrix.h" #include "CommonMatrix.h" #include "CPUSparseMatrix.h" #include "RecurrentNodes.h" namespace CNTK { Value::Value(const NDArrayViewPtr& data) : Value(data, nullptr) { } Value::Value(const NDArrayViewPtr& data, const NDMaskPtr& mask) : m_data(data), m_mask(mask) { if (mask != nullptr) { auto dataShape = data->Shape(); auto maskShape = mask->Shape(); if (maskShape.Rank() > dataShape.Rank()) InvalidArgument("The rank (%zu) of the mask of a Value object cannot exceed the rank (%zu) of the data NDArrayView object", maskShape.Rank(), dataShape.Rank()); if (dataShape.SubShape(dataShape.Rank() - maskShape.Rank()) != maskShape) InvalidArgument("Invalid Value object: data and mask are incompatible. The %s dimensions of the data with shape '%S' " "do not match the dimensions of the mask with shape '%S'", Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "leading" : "trailing", dataShape.AsString().c_str(), maskShape.AsString().c_str()); } } // // Create NDMask for the 'sequences' if the 'sequences' do not have the same length. // It returns null if all the 'sequences' have the same length. // template static NDMaskPtr CreateMask(size_t numElementsPerSample, const std::vector>& sequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device) { size_t numSequences = sequences.size(); std::vector sequenceLengths(numSequences); for (size_t i = 0; i < numSequences; ++i) sequenceLengths[i] = sequences[i].size() / numElementsPerSample; return CreateMask(sequenceLengths, sequenceStartFlags, device); } template /*static*/ ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& oneHotSequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/) { if (oneHotSequences.size() == 0) InvalidArgument("Value::Create:: The number of sequences must be > 0"); if (sampleShape.Rank() < 1) InvalidArgument("Value::Create:: The sample rank must be > 0"); auto dimension = sampleShape[0]; auto numElementsPerSample = sampleShape.SubShape(1).TotalSize(); NDMaskPtr deviceValueMask = CreateMask(numElementsPerSample, oneHotSequences, sequenceStartFlags, DeviceDescriptor::CPUDevice()); // If deviceValueMask is null, all the sequences have the same length. size_t maxSequenceLength = (deviceValueMask == nullptr) ? (oneHotSequences[0].size() / numElementsPerSample) : deviceValueMask->Shape()[0]; size_t maxSequenceNumCols = maxSequenceLength * numElementsPerSample; size_t numSequences = oneHotSequences.size(); NDShape valueDataShape = sampleShape.AppendShape({ maxSequenceLength, numSequences }); size_t numCSCCols = valueDataShape.SubShape(1).TotalSize() + 1; std::vector colStarts(numCSCCols); std::vector nonZeroValues; std::vector rowIndices; for (size_t i = 0; i < numSequences; ++i) { size_t currentSequenceNumCols = oneHotSequences[i].size(); size_t j = 0; for (; j < currentSequenceNumCols; ++j) { colStarts[(i * maxSequenceNumCols) + j] = (SparseIndexType)nonZeroValues.size(); size_t oneHotIdx = oneHotSequences[i][j]; if ((oneHotIdx & OneHotSkip) == OneHotSkip) // note that OneHotSkip used to be (size_t)-1, and later changed to (uint32_t)-1. Both are supported { nonZeroValues.push_back(0); rowIndices.push_back(0); } else { nonZeroValues.push_back(1); if (oneHotIdx >= dimension) InvalidArgument("Value::Create: one-hot index value (%zu) exceeds vocabulary size (%zu).", oneHotSequences[i][j], dimension); rowIndices.push_back((SparseIndexType)(oneHotSequences[i][j])); } } for (; j < maxSequenceNumCols; ++j) colStarts[(i * maxSequenceNumCols) + j] = (SparseIndexType)(nonZeroValues.size()); } colStarts[numCSCCols - 1] = (SparseIndexType)(nonZeroValues.size()); NDArrayViewPtr deviceValueData = MakeSharedObject(AsDataType(), valueDataShape, colStarts.data(), rowIndices.data(), nonZeroValues.data(), nonZeroValues.size(), device, readOnly); return MakeSharedObject(deviceValueData, deviceValueMask); } template /*static*/ void Value::AppendSparseSequenceData(const NDArrayViewPtr& sequenceData, std::vector& colStarts, std::vector& rowIndices, std::vector& nonZeroValues, size_t maxSequenceLengthInCols) { size_t existingNumNonZeroValues = nonZeroValues.size() / sizeof(ElementType); std::vector currentSequencePaddedColStarts(maxSequenceLengthInCols); auto matrix = sequenceData->GetMatrix(); matrix->TransferToDeviceIfNotThere(AsCNTKImplDeviceId(DeviceDescriptor::CPUDevice()), true); auto cpuSparseMatrix = matrix->m_CPUSparseMatrix; auto currentSequenceNumCols = matrix->GetNumCols(); auto currentSequenceColStarts = cpuSparseMatrix->SecondaryIndexLocation(); auto currentSequenceNumNonZeroValues = currentSequenceColStarts[currentSequenceNumCols] - currentSequenceColStarts[0]; std::copy(cpuSparseMatrix->MajorIndexLocation(), cpuSparseMatrix->MajorIndexLocation() + currentSequenceNumNonZeroValues, std::back_inserter(rowIndices)); std::copy((char*)(cpuSparseMatrix->Data()), (char*)(cpuSparseMatrix->Data() + currentSequenceNumNonZeroValues), std::back_inserter(nonZeroValues)); for (size_t j = 0; j < currentSequenceNumCols; ++j) currentSequencePaddedColStarts[j] = existingNumNonZeroValues + (currentSequenceColStarts[j] - currentSequenceColStarts[0]); for (size_t j = currentSequenceNumCols; j < maxSequenceLengthInCols; ++j) currentSequencePaddedColStarts[j] = existingNumNonZeroValues + currentSequenceNumNonZeroValues; std::copy(currentSequencePaddedColStarts.begin(), currentSequencePaddedColStarts.end(), std::back_inserter(colStarts)); } /*static*/ ValuePtr Value::Create(const NDShape& sampleShape, const std::vector& sequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly, bool createNewCopy) { auto numSequences = sequences.size(); if (numSequences == 0) InvalidArgument("Value::Create:: The number of sequences must be > 0"); std::vector sequenceLengths(numSequences); size_t maxSequenceLength = 0; auto dataType = sequences[0]->GetDataType(); auto storageFormat = sequences[0]->GetStorageFormat(); NDShape fullyDefinedSampleShape = sampleShape; for (size_t i = 0; i < numSequences; ++i) { auto currentSequenceData = sequences[i]; if (currentSequenceData->GetDataType() != dataType) InvalidArgument("Value::Create: The data for all sequences/samples must have the same data type"); if (currentSequenceData->GetStorageFormat() != storageFormat) InvalidArgument("Value::Create: All NDArrayView objects must have the same storage format"); if ((numSequences > 1) && (currentSequenceData->Device() != DeviceDescriptor::CPUDevice())) InvalidArgument("Value::Create: All NDArrayView objects must be located on the CPU"); auto currentSequenceDataShape = currentSequenceData->Shape(); // Since scalar samples can be rank=1 with dim=1, we automatically pad the sequence data shape with a leading axis // of dim=1 if the sequence data shape's leading axis's dimensionality is not 1 if ((fullyDefinedSampleShape.Rank() == 1) && !fullyDefinedSampleShape.HasUnboundDimension() && (fullyDefinedSampleShape.TotalSize() == 1) && (currentSequenceDataShape.Rank() > 0) && (currentSequenceDataShape[0] != 1)) currentSequenceDataShape = NDShape(1, 1).AppendShape(currentSequenceDataShape); if ((currentSequenceDataShape.Rank() < fullyDefinedSampleShape.Rank()) || (currentSequenceDataShape.Rank() > (fullyDefinedSampleShape.Rank() + 1))) InvalidArgument("Value::Create: The shape '%S' of sequence #%zu is not compatible with the sample shape '%S'.", currentSequenceData->Shape().AsString().c_str(), i, sampleShape.AsString().c_str()); auto sequenceValueVarSubshape = currentSequenceDataShape.SubShape(0, fullyDefinedSampleShape.Rank()); if (sequenceValueVarSubshape != fullyDefinedSampleShape) { for (size_t k = 0; k < fullyDefinedSampleShape.Rank(); ++k) { if (fullyDefinedSampleShape[k] == NDShape::FreeDimension) fullyDefinedSampleShape[k] = sequenceValueVarSubshape[k]; else if (fullyDefinedSampleShape[k] != sequenceValueVarSubshape[k]) InvalidArgument("Value::Create: The shape '%S' of sequence #%zu is not compatible with the sample shape '%S'.", currentSequenceData->Shape().AsString().c_str(), i, sampleShape.AsString().c_str()); } } sequenceLengths[i] = currentSequenceDataShape.SubShape(fullyDefinedSampleShape.Rank()).TotalSize(); maxSequenceLength = std::max(maxSequenceLength, sequenceLengths[i]); } bool isDataSparse = sequences[0]->IsSparse(); NDMaskPtr deviceValueMask = CreateMask(sequenceLengths, sequenceStartFlags, DeviceDescriptor::CPUDevice()); NDArrayViewPtr valueData; NDShape valueDataShape = fullyDefinedSampleShape.AppendShape({ maxSequenceLength, numSequences }); if (numSequences == 1) { if (createNewCopy) valueData = sequences[0]->DeepClone(); else valueData = sequences[0]; // We can use the original buffer directly but need to reshape to the valueDataShape valueData = valueData->AsShape(valueDataShape); } else { if (isDataSparse) { if (storageFormat != StorageFormat::SparseCSC) LogicError("Value::Create currently only SparseCSC format sparse data is supported"); auto numColsPerSample = fullyDefinedSampleShape.SubShape(ShapeRowColSplitPoint(fullyDefinedSampleShape, isDataSparse, /*noDynamicAxes =*/ false)).TotalSize(); std::vector colStarts; std::vector rowIndices; std::vector nonZeroValues; for (size_t i = 0; i < numSequences; ++i) { switch (dataType) { case DataType::Float: AppendSparseSequenceData(sequences[i], colStarts, rowIndices, nonZeroValues, maxSequenceLength * numColsPerSample); break; case DataType::Double: AppendSparseSequenceData(sequences[i], colStarts, rowIndices, nonZeroValues, maxSequenceLength * numColsPerSample); break; default: NOT_IMPLEMENTED; } } auto totalNumNonZeroValues = nonZeroValues.size() / DataTypeSize(dataType); colStarts.push_back(totalNumNonZeroValues); valueData = MakeSharedObject(dataType, valueDataShape, colStarts.data(), rowIndices.data(), (void*)nonZeroValues.data(), totalNumNonZeroValues, device, readOnly); } else { valueData = MakeSharedObject(dataType, valueDataShape, DeviceDescriptor::CPUDevice()); auto maxSequenceSizeInElements = fullyDefinedSampleShape.TotalSize() * maxSequenceLength; switch (dataType) { case DataType::Float: { float* dataBuffer = valueData->WritableDataBuffer(); for (size_t i = 0; i < numSequences; ++i) { const float* currentSequenceBuffer = sequences[i]->DataBuffer(); auto currentSequenceSizeInElements = sequences[i]->Shape().TotalSize(); std::copy(currentSequenceBuffer, currentSequenceBuffer + currentSequenceSizeInElements, dataBuffer + (maxSequenceSizeInElements * i)); } break; } case DataType::Double: { double* dataBuffer = valueData->WritableDataBuffer(); for (size_t i = 0; i < numSequences; ++i) { const double* currentSequenceBuffer = sequences[i]->DataBuffer(); auto currentSequenceSizeInElements = sequences[i]->Shape().TotalSize(); std::copy(currentSequenceBuffer, currentSequenceBuffer + currentSequenceSizeInElements, dataBuffer + (maxSequenceSizeInElements * i)); } break; } default: NOT_IMPLEMENTED; } } } NDArrayViewPtr deviceValueData; if (device == valueData->Device()) { if (readOnly) deviceValueData = valueData->Alias(readOnly); else deviceValueData = valueData; } else deviceValueData = valueData->DeepClone(device, readOnly); return MakeSharedObject(deviceValueData, deviceValueMask); } template /*static*/ ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly) { // Create a NDArrayView object wrapping each of the vectors representing a sequence size_t numElementsPerSample = sampleShape.TotalSize(); size_t numSequences = sequences.size(); std::vector sequencesData; for (size_t i = 0; i < numSequences; ++i) { auto& currentSequence = sequences[i]; if ((currentSequence.size() % numElementsPerSample) != 0) InvalidArgument("Value::Create: The number of elements (%zu) in the vector containing sequence data must be a multiple of the size (%zu) of specified sample shape '%S'", currentSequence.size(), numElementsPerSample, sampleShape.AsString().c_str()); auto sequenceLength = currentSequence.size() / numElementsPerSample; auto sequenceDataShape = sampleShape.AppendShape({ sequenceLength }); sequencesData.push_back(MakeSharedObject(sequenceDataShape, currentSequence)); } return Create(sampleShape, sequencesData, sequenceStartFlags, device, readOnly, /*createNewCopy =*/ true); } template /*static*/ ValuePtr Value::CreateBatch(const NDShape& sampleShape, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly /*= false */) { auto shapeSize = sampleShape.TotalSize(); if (batchData.size() % shapeSize != 0) InvalidArgument("The number of elements (%zu) in the vector containing batch data must be a multiple of the size (%zu) of the sample shape '%S'.", batchData.size(), shapeSize, sampleShape.AsString().c_str()); auto numOfSequences = batchData.size() / shapeSize; std::vector sequencesView(numOfSequences); for (size_t i = 0; i < numOfSequences; i++) { // Sequence length is 1. auto sequenceDataShape = sampleShape.AppendShape({ 1 }); sequencesView[i] = MakeSharedObject(sequenceDataShape, batchData.data() + i * shapeSize, shapeSize, DeviceDescriptor::CPUDevice()); } // Pass the empty seqStartFlags means all sequences have the start flag with true. return Create(sampleShape, sequencesView, {}, device, readOnly, /*createNewCopy =*/ true); } template /*static*/ ValuePtr Value::CreateSequence(const NDShape& sampleShape, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly /*= false */) { auto shapeSize = sampleShape.TotalSize(); if (sequenceData.size() % shapeSize != 0) InvalidArgument("The number of elements (%zu) in the sequence data must be a multiple of the size (%zu) of the sample shape '%S'", sequenceData.size(), shapeSize, sampleShape.AsString().c_str()); auto sequenceLength = sequenceData.size() / shapeSize; std::vector sequencesView(1); auto sequenceDataShape = sampleShape.AppendShape({ sequenceLength }); sequencesView[0] = MakeSharedObject(sequenceDataShape, sequenceData); return Create(sampleShape, sequencesView, { sequenceStartFlag }, device, readOnly, /*createNewCopy =*/ true); } template /*static*/ ValuePtr Value::CreateBatch(size_t dimension, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly/* = false*/) { //TODO: avoid data copy. std::vector> input(batchData.size()); for (size_t i = 0; i < batchData.size(); i++) { input[i] = {batchData[i]}; } // Pass the empty seqStartFlags means all sequences have the start flag with true. return Create(dimension, input, {}, device, readOnly); } template /*static*/ ValuePtr Value::CreateSequence(size_t dimension, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/) { //TODO: avoid data copy. std::vector> input = { sequenceData }; return Create(dimension, input, {sequenceStartFlag}, device, readOnly); } template /*static*/ ValuePtr Value::CreateSequence(const NDShape& sampleShape, size_t sequenceLength, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const ElementType* nonZeroValues, size_t numNonZeroValues, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/) { auto sequenceShape = sampleShape.AppendShape({sequenceLength}); auto sequenceData = MakeSharedObject(AsDataType(), sequenceShape, colStarts, rowIndices, nonZeroValues, numNonZeroValues, device, readOnly); return Create(sampleShape, {sequenceData}, {sequenceStartFlag}, device, readOnly, false); } /*virtual*/ Value::~Value() { } /*virtual*/ void Value::Erase() { m_data = nullptr; m_mask = nullptr; } /*virtual*/ NDArrayViewPtr Value::Data() const { if (!m_data) { RuntimeError("This Value object is invalid and can no longer be accessed. This usually happens when a temporary Value object returned by the CNTK library" " is not cloned and accessed later after it has been erased by the library. The Value objects created inside and returned by the library from APIs " "like Forward, Backward etc. are temporary and are only guaranteed to be valid until the next Forward/Backward call. If you want to access the Values " "later, you must explicitly clone them."); } // TODO: Check if this is a derived type and throw an exception in that case return m_data; } /*virtual*/ NDMaskPtr Value::Mask() const { // TODO: Check if this is a derived type and throw an exception in that case return m_mask; } /*virtual*/ ValuePtr Value::DeepClone(bool readOnly/* = false*/) const { // TODO: Check if this is a derived type and throw an exception in that case return MakeSharedObject(Data()->DeepClone(readOnly), (Mask() != nullptr) ? Mask()->DeepClone() : nullptr); } /*virtual*/ ValuePtr Value::Alias(bool readOnly/* = false*/) const { // TODO: Check if this is a derived type and throw an exception in that case return MakeSharedObject(Data()->Alias(readOnly), (Mask() != nullptr) ? Mask()->Alias() : nullptr); } /*virtual*/ void Value::CopyFrom(const Value& source) { // TODO: Check if this is a derived type and throw an exception in that case Data()->CopyFrom(*source.Data()); if ((Mask() == nullptr) && (source.Mask() != nullptr)) InvalidArgument("Value::CopyFrom: Invalid source object; Cannot copy a Value with a mask into 'this' Value which does not have a mask."); if (source.Mask() != nullptr) Mask()->CopyFrom(*source.Mask()); else { if (Mask() != nullptr) { // Clear the mask Mask()->Clear(); } } } void Value::GetSequenceStartsAndLengths(const NDMaskPtr& mask, std::vector& sequenceBeginIndices, std::vector& sequenceLengths, size_t numDynamicAxes) { if (!mask) return; auto cpuMask = mask; if (mask->Device() != DeviceDescriptor::CPUDevice()) cpuMask = mask->DeepClone(DeviceDescriptor::CPUDevice()); const MaskKind* maskBuffer = cpuMask->DataBuffer(); size_t maxNumTimeSteps, numSequences; std::tie(maxNumTimeSteps, numSequences) = GetNumTimeStepsAndSequences(mask->Shape(), numDynamicAxes); assert(sequenceLengths.size() == numSequences); assert(sequenceBeginIndices.size() == numSequences); for (size_t i = 0; i < numSequences; ++i) { MaskKind firstMaskEntry = maskBuffer[i * maxNumTimeSteps]; if (firstMaskEntry == MaskKind::SequenceBegin) sequenceBeginIndices[i] = 0; else if (firstMaskEntry == MaskKind::Valid) sequenceBeginIndices[i] = Microsoft::MSR::CNTK::SentinelValueIndicatingUnspecifedSequenceBeginIdx; else LogicError("The first entry of a Value mask must be Valid or SequenceBegin"); size_t currentSequenceLength = 1; bool currentSequenceEndAlreadyFound = false; for (size_t j = 1; j < maxNumTimeSteps; ++j) { if (maskBuffer[(i * maxNumTimeSteps) + j] == MaskKind::Invalid) currentSequenceEndAlreadyFound = true; else { if (currentSequenceEndAlreadyFound) InvalidArgument("Invalid Value object; only trailing steps of a sequence can be masked."); currentSequenceLength++; } } sequenceLengths[i] = currentSequenceLength; } } template void DirectCopy(const ElementType *source, size_t elementCount, std::vector& dest); template void CopyDenseToOneHot(const ElementType *source, const size_t sampleCount, const size_t sampleSize, std::vector& dest); template void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences) { // Check the data type matches if (AsDataType() != GetDataType()) InvalidArgument("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(GetDataType())); CopyVariableValueToImpl(outputVariable, sequences); } template void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences) { if (outputVariable.Shape()[0] != outputVariable.Shape().TotalSize()) InvalidArgument("For sparse data, the outputVariable's leading axis dimensionality (%zu) must equal the total size (%zu) of the Variable '%S'.", outputVariable.Shape()[0], outputVariable.Shape().TotalSize(), outputVariable.AsString().c_str()); CopyVariableValueToImpl(outputVariable, sequences); } template void Value::CopyVariableValueToImpl(const Variable& outputVariable, std::vector>& sequences) { // PackedValue should be automatically unpacked when accessing Data() and Mask(). NDShape inferredVarShape; size_t numOfSequences; size_t maxSequenceLen; // Verify compatibility of 'this' value and outputVariable, get sequence and batch length, and get the inferred shape if the variable has a free dimension. std::tie(maxSequenceLen, numOfSequences) = GetSequenceAndBatchLength(outputVariable, &inferredVarShape); if (sequences.size() < numOfSequences) RuntimeError("The size of output buffer (%zu) is smaller than the number (%zu) of sequences.", sequences.size(), numOfSequences); // Copy data to the CPU device if required. const ValueType *valueData; NDArrayViewPtr cpuArrayView; if (Device().Type() == DeviceKind::GPU) { // TODO: leverage sparse if the original NDArrayView is in spase. cpuArrayView = MakeSharedObject(GetDataType(), Shape(), DeviceDescriptor::CPUDevice()); cpuArrayView->CopyFrom(*Data()); } else if (Device().Type() == DeviceKind::CPU) { // TODO: direct process sparse data without copy if (GetStorageFormat() != StorageFormat::Dense) { cpuArrayView = MakeSharedObject(GetDataType(), Shape(), DeviceDescriptor::CPUDevice()); cpuArrayView->CopyFrom(*Data()); } else { cpuArrayView = Data(); } } else { LogicError("Invalid device type (%u).", (unsigned int)Device().Type()); } valueData = cpuArrayView->DataBuffer(); auto sampleSize = inferredVarShape.TotalSize(); for (auto seqIndex = 0; seqIndex < numOfSequences; seqIndex++) { size_t seqStart = seqIndex * maxSequenceLen; // The assumption here is that a sequence always start at 0 (no invaid mark at the beginning), // and ends at the first invalid mask. // Therefore, no need to check NDMask again. // And the sequences has been resized to match the number of sequences and the length of each sequence in the Value object. // TODO: if function pointer or lambda could support template, switch to use them. if (std::is_same::value) { // If the output is of the one-hot vector format, each value in sequences[seqIndex] is an index which represents a sample of sampleSize elements. CopyDenseToOneHot(valueData + seqStart * sampleSize, sequences[seqIndex].size(), sampleSize, sequences[seqIndex]); } else { // If the output is of the dense format, each value in sequences[seqIndex] represents an element of a sample. DirectCopy(valueData + seqStart * sampleSize, sequences[seqIndex].size(), sequences[seqIndex]); } } } std::pair Value::GetSequenceAndBatchLength(const Variable& outputVariable, NDShape* inferredVarShape) { Utils::VerifyVariableValueCompatibility(outputVariable, shared_from_this(), inferredVarShape); size_t varRank = outputVariable.Shape().Rank(); size_t maxSequenceLength = 1; size_t numSequences = 1; std::tie(maxSequenceLength, numSequences) = GetNumTimeStepsAndSequences(Shape().SubShape(varRank), outputVariable.DynamicAxes().size()); return std::pair(maxSequenceLength, numSequences); } template std::tuple Value::ValidateSparseCSCAndGetIndexBufferSizes(const Variable& outputVariable) { auto varShape = outputVariable.Shape(); if (varShape.IsUnknown() || varShape.HasInferredDimension()) InvalidArgument("The outputVariable '%S' shape '%S' is of unknown shape or has inferred dimension for at least one axis.", outputVariable.AsString().c_str(), varShape.AsString().c_str()); if (!outputVariable.IsSparse()) InvalidArgument("The outputVariable '%S' must be in the sparse format.", outputVariable.AsString().c_str()); size_t numOfSequences; size_t maxSequenceLen; std::tie(maxSequenceLen, numOfSequences) = GetSequenceAndBatchLength(outputVariable); // Only support sequence without batch if (numOfSequences != 1) InvalidArgument("The Value cannot be copied to buffers in sparse format, since it contains multiple sequences. Only a single sequence is supported."); if (MaskedCount() != 0) RuntimeError("There should not be any masks for a Value containing only one single sequence."); auto numNonZeroValues = std::get<3>(Data()->SparseCSCDataBuffers()); auto numOfColsInMatrix = GetMatrixDimensions(Shape()).second + 1; return std::tuple(maxSequenceLen, numOfColsInMatrix, numNonZeroValues); } template void Value::CopyVariableValueToCSCSparse(size_t sequenceLength, std::vector& colStarts, std::vector& rowIndices, std::vector& nonZeroValues, size_t& numNonZeroValues) { // All sanity check has been done in ValidateSparseCSCAndGetIndexSizes(). NDArrayViewPtr cpuView; if (Device().Type() == DeviceKind::GPU) { // Todo: GPUSparseMatrix to CPUSparseMatrix is not implemented in matrix, as a workaround the dense matrix is used as intermediate presentation. // However, it is possible that data value very close to 0 could treated as 0 after transformation between dense and sparse. auto cpuDenseView = MakeSharedObject(GetDataType(), StorageFormat::Dense, Shape(), DeviceDescriptor::CPUDevice()); cpuDenseView->CopyFrom(*Data()); cpuView = MakeSharedObject(GetDataType(), GetStorageFormat(), Shape(), DeviceDescriptor::CPUDevice()); cpuView->CopyFrom(*cpuDenseView); } else cpuView = Data(); auto numOfColsInMatrix = GetMatrixDimensions(cpuView->Shape()).second + 1; const ElementType* rawNonZeroValues; const SparseIndexType* rawColStarts; const SparseIndexType* rawRowIndices; std::tie(rawNonZeroValues, rawColStarts, rawRowIndices, numNonZeroValues) = cpuView->SparseCSCDataBuffers(); memcpy(colStarts.data(), rawColStarts, numOfColsInMatrix * sizeof(SparseIndexType)); memcpy(nonZeroValues.data(), rawNonZeroValues, numNonZeroValues * sizeof(ElementType)); memcpy(rowIndices.data(), rawRowIndices, numNonZeroValues * sizeof(SparseIndexType)); } template ElementType Value::AsScalar() const { if (Mask()) LogicError("Value::AsScalar: Scalar Value object must not have an associated mask"); return Data()->AsScalar(); } /* virtual */ bool Value::IsValid() const { return !!m_data; } std::wstring Value::AsString() const { wstringstream wss; if (IsValid()) wss << L"Value(" << Shape().AsString() << ", " << DeviceKindName(Device().Type()) << L")"; else wss << L"Value(###)"; return wss.str(); } void PackedValue::Unpack() const { if (m_packedDataLayout && (m_packedDataLayout->GetNumTimeSteps() != 1) && (m_packedDataLayout->GetNumSequences() != 1) && Internal::IsAutomaticUnpackingOfPackedValuesDisabled()) LogicError("PackedValue::Unpack: Automatic unpacking of PackedValue objects is disabled"); if (m_isPacked) { ValuePtr valueObject; auto dataType = m_packedData->GetDataType(); switch (dataType) { case DataType::Float: valueObject = Utils::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, m_sampleDynamicAxes, *(m_packedData->GetMatrix()), m_packedDataLayout, m_isReadOnly); break; case DataType::Double: valueObject = Utils::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, m_sampleDynamicAxes, *(m_packedData->GetMatrix()), m_packedDataLayout, m_isReadOnly); break; case DataType::Float16: valueObject = Utils::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, m_sampleDynamicAxes, *(m_packedData->GetMatrix()), m_packedDataLayout, m_isReadOnly); break; default: LogicError("Unsupported DataType %s", DataTypeName(dataType)); } m_data = valueObject->Data(); m_mask = valueObject->Mask(); m_packedData = nullptr; m_packedDataLayout = nullptr; m_isPacked = false; if (m_unpackedShape != m_data->Shape()) LogicError("The computed unpacked shape '%S' of the PackedValue object does not match the actual Data NDArrayView's shape '%S' after unpacking.", m_unpackedShape.AsString().c_str(), m_data->Shape().AsString().c_str()); } } template void DirectCopy(const ElementType *source, const size_t elementCount, std::vector& dest) { if (!std::is_same::value) RuntimeError("Copy: Source and destination must be the same data type."); DestType *destData = dest.data(); if (elementCount > dest.size()) RuntimeError("Copy: The output buffer size (%zu) is smaller than the number (%zu) of source elements to copy.", dest.size(), elementCount); std::copy(source, source + elementCount, reinterpret_cast(destData)); } template void CopyDenseToOneHot(const ElementType *source, const size_t sampleCount, const size_t sampleSize, std::vector& dest) { if (!std::is_same::value) RuntimeError("Copy: The destination data type must be size_t."); const ElementType *currentp = source; const ElementType *lastp = source + sampleCount * sampleSize; size_t destIndex = 0; while (currentp < lastp) { size_t index = sampleSize; bool found = false; for (size_t i = 0; i < sampleSize; i++) { if (*currentp == (ElementType)1) { if (found) RuntimeError("CopyDenseToOneHot: Cannot convert to onehot vector; more than one non-zero value in the sample."); index = i; found = true; } else if (*currentp != (ElementType)0) RuntimeError("CopyDenseToOneHot: Cannot convert to onehot vector; contains value other than 0/1."); currentp++; } if (!found) RuntimeError("CopyDenseToOneHot: Cannot convert to onehot vector; the sample does not have any non-zero value."); assert(index != sampleSize); dest[destIndex++] = static_cast(index); } assert(currentp == lastp); } // Explicit template instantiations template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& oneHotSequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& oneHotSequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& oneHotSequences, const std::vector& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateBatch(const NDShape& sampleShape, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly /*= false */); template /*static*/ CNTK_API ValuePtr Value::CreateBatch(const NDShape& sampleShape, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly /*= false */); template /*static*/ CNTK_API ValuePtr Value::CreateBatch(const NDShape& sampleShape, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly /*= false */); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(const NDShape& sampleShape, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly /*= false */); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(const NDShape& sampleShape, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly /*= false */); template /*static*/ CNTK_API ValuePtr Value::CreateSequence (const NDShape& sampleShape, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly /*= false */); template /*static*/ CNTK_API ValuePtr Value::CreateBatch(size_t dimension, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateBatch(size_t dimension, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateBatch (size_t dimension, const std::vector& batchData, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(size_t dimension, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(size_t dimension, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(size_t dimension, const std::vector& sequenceData, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(const NDShape& sampleShape, size_t sequenceLength, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const float* nonZeroValues, size_t numNonZeroValues, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(const NDShape& sampleShape, size_t sequenceLength, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const double* nonZeroValues, size_t numNonZeroValues, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::CreateSequence(const NDShape& sampleShape, size_t sequenceLength, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const float16* nonZeroValues, size_t numNonZeroValues, bool sequenceStartFlag, const DeviceDescriptor& device, bool readOnly/* = false*/); template CNTK_API void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences); template CNTK_API void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences); template CNTK_API void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences); template CNTK_API void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences); template CNTK_API void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences); template CNTK_API void Value::CopyVariableValueToVector(const Variable& outputVariable, std::vector>& sequences); template CNTK_API std::tuple Value::ValidateSparseCSCAndGetIndexBufferSizes(const Variable& outputVariable); template CNTK_API std::tuple Value::ValidateSparseCSCAndGetIndexBufferSizes(const Variable& outputVariable); template CNTK_API std::tuple Value::ValidateSparseCSCAndGetIndexBufferSizes(const Variable& outputVariable); template CNTK_API void Value::CopyVariableValueToCSCSparse(size_t sequenceLength, std::vector& colStarts, std::vector& rowIndices, std::vector& nonZeroValues, size_t& numNonZeroValues); template CNTK_API void Value::CopyVariableValueToCSCSparse(size_t sequenceLength, std::vector& colStarts, std::vector& rowIndices, std::vector& nonZeroValues, size_t& numNonZeroValues); template CNTK_API void Value::CopyVariableValueToCSCSparse(size_t sequenceLength, std::vector& colStarts, std::vector& rowIndices, std::vector& nonZeroValues, size_t& numNonZeroValues); template float Value::AsScalar() const; template double Value::AsScalar() const; template float16 Value::AsScalar() const; }