// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #pragma once #include "Basics.h" #include "ComputationNode.h" #include "Matrix.h" #include "CNTKLibrary.h" #include "Utils.h" namespace Microsoft { namespace MSR { namespace CNTK { // ----------------------------------------------------------------------- // ToSequenceNodeBase(sourceData) // Abstract base class for ToSequenceNode and ToSequenceLikeNode. // Converts the sourceData to a sequence using the most significant static axis [-1] as the sequence axis. // ----------------------------------------------------------------------- template class ToSequenceNodeBase : public ComputationNodeNonLooping { typedef ComputationNodeNonLooping Base; UsingComputationNodeMembers; using Base::OperationName; public: ToSequenceNodeBase(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name) {} virtual std::vector GetSequenceLengths() { NOT_IMPLEMENTED; } virtual MBLayoutPtr DetermineMBLayout() { NOT_IMPLEMENTED; } virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override { let& inMBLayout = InputRef(0).GetMBLayout(); if (inMBLayout->GetNumTimeSteps() != 1) InvalidArgument("%ls %ls operation can only operate on non-sequence input data.", NodeName().c_str(), OperationName().c_str()); auto numSequences = inMBLayout->GetNumSequences(); auto inputSampleLayout = InputRef(0).GetSampleLayout(); auto inputDataTensorShape = inputSampleLayout; inputDataTensorShape.AppendInPlace(inputDataTensorShape.GetRank(), numSequences); let& inputDataMatrix = InputRef(0).Value(); auto inputDataNDArrayView = ::CNTK::MakeSharedObject<::CNTK::NDArrayView>(::CNTK::AsDataType(), ::CNTK::AsDeviceDescriptor(inputDataMatrix.GetDeviceId()), ::CNTK::AsStorageFormat(inputDataMatrix.GetFormat()), ::CNTK::AsNDShape(inputDataTensorShape), /*readOnly =*/ true, new TensorView(InputRef(0).ValuePtr(), inputDataTensorShape)); std::vector sequenceLengths = GetSequenceLengths(); auto inputDataValue = ::CNTK::MakeSharedObject<::CNTK::Value>(inputDataNDArrayView, ::CNTK::CreateMask(sequenceLengths)); auto dummyVar = ::CNTK::InputVariable(::CNTK::AsNDShape(GetSampleLayout()), this->IsValueSparse(), ::CNTK::AsDataType()); #ifdef _MSC_VER auto& outputValuePtrRef = ValuePtrRef(); #else auto& outputValuePtrRef = this->template ValuePtrRef(); #endif auto packedMatrixAndLayout = ::CNTK::Utils::GetCNTKImplMatrixAndMBLayoutFromValueObject(dummyVar, inputDataValue, nullptr, outputValuePtrRef, m_tempGatherIndices); let& outMBLayout = GetMBLayout(); outMBLayout->CopyFrom(packedMatrixAndLayout.second, /*keepName=*/true); if (packedMatrixAndLayout.first != outputValuePtrRef) Value().AssignValuesOf(*packedMatrixAndLayout.first); } virtual void /*ComputationNodeNonLooping::*/ BackpropToNonLooping(size_t inputIndex) override { if (inputIndex == 0) { ElemType gapPadValue = 0; auto gradient = ComputationNode::Unpack(GetSampleLayout(), Gradient(), m_pMBLayout, m_tempUnpackedData, m_tempScatterIndices, std::shared_ptr>(nullptr), /*batchMajor=*/ false, &gapPadValue); auto inputGradient = InputRef(inputIndex).GradientTensorFor(InputRef(inputIndex).GetSampleLayout().GetRank(), FrameRange(InputRef(inputIndex).GetMBLayout())); if (InputRef(inputIndex).IsGradientInitializedBy(this)) inputGradient.AssignCopyOf(gradient); else inputGradient.AddCopyOf(gradient); } } virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; } virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override { return false; } virtual ParentGradientOptimization ImplementsGradientOptimization(const ComputationNodeBase* input) const override { if (Input(0).get() == input) return ParentGradientOptimization::Overwrite; else return ParentGradientOptimization::None; } virtual void Validate(bool isFinalValidationPass) override { ComputationNodeBase::Validate(isFinalValidationPass); ComputationNodeBase::m_isValueSparse = Input(0)->IsValueSparse(); if (!m_pMBLayout) m_pMBLayout = DetermineMBLayout(); TensorShape inputShape = Input(0)->GetSampleLayout(); if (isFinalValidationPass) { // we generate its own MBLayout if (!Input(0)->HasMBLayout()) InvalidArgument("%ls %ls operation can only operate on minibatch data (which have a layout).", NodeName().c_str(), OperationName().c_str()); if (inputShape.GetRank() <= 1) InvalidArgument("%ls %ls operation can only operate on input data of rank >= 2.", NodeName().c_str(), OperationName().c_str()); } SmallVector outDims = inputShape.GetDims(); outDims.resize(inputShape.GetRank() - 1); SetDims(TensorShape(outDims), true); } void RequestMatricesBeforeForwardProp(MatrixPool& matrixPool) override { Base::RequestMatricesBeforeForwardProp(matrixPool); RequestMatrixFromPool(m_tempGatherIndices, matrixPool, 1, HasMBLayout()); } void ReleaseMatricesAfterForwardProp(MatrixPool& matrixPool) override { Base::ReleaseMatricesAfterForwardProp(matrixPool); ReleaseMatrixToPool(m_tempGatherIndices, matrixPool); } void RequestMatricesBeforeBackprop(MatrixPool& matrixPool) override { Base::RequestMatricesBeforeBackprop(matrixPool); RequestMatrixFromPool(m_tempScatterIndices, matrixPool, 1, HasMBLayout()); RequestMatrixFromPool(m_tempUnpackedData, matrixPool, InputRef(0).GetSampleLayout().GetNumElements(), InputRef(0).HasMBLayout()); } void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool) override { Base::ReleaseMatricesAfterBackprop(matrixPool); ReleaseMatrixToPool(m_tempScatterIndices, matrixPool); ReleaseMatrixToPool(m_tempUnpackedData, matrixPool); } private: shared_ptr> m_tempGatherIndices; shared_ptr> m_tempScatterIndices; shared_ptr> m_tempUnpackedData; }; // ----------------------------------------------------------------------- // ToSequenceNode(sourceData, sequenceLengths) // Converts the sourceData to a sequence using the most significant static axis [-1] as the sequence axis. // The sequenceLengths input is optional; if unspecified, all sequences are assumed to be of the same length // i.e. the dimensionality of the most significant static axis // ----------------------------------------------------------------------- template class ToSequenceNode : public ToSequenceNodeBase { typedef ToSequenceNodeBase Base; UsingComputationNodeMembersBoilerplate; static const std::wstring TypeName() { return L"ToSequence"; } static const std::wstring DefaultToSequenceNodeDynamicAxisName() { return L"ToSequenceNodeAxis"; } public: DeclareConstructorFromConfig(ToSequenceNode); ToSequenceNode(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName = DefaultToSequenceNodeDynamicAxisName()) : Base(deviceId, name), m_dynamicAxisName(dynamicAxisName) {} virtual std::vector GetSequenceLengths() override { let& inMBLayout = InputRef(0).GetMBLayout(); auto numSequences = inMBLayout->GetNumSequences(); auto inputSampleLayout = InputRef(0).GetSampleLayout(); std::vector sequenceLengths(numSequences, inputSampleLayout[inputSampleLayout.GetRank() - 1]); if (GetNumInputs() > 1) { if (*Input(0)->GetMBLayout() != *Input(1)->GetMBLayout()) LogicError("%ls %ls operation requires both its inputs to have identical minibatch layouts.", NodeName().c_str(), OperationName().c_str()); let& sequenceLengthsInputValue = InputRef(1).Value(); for (size_t i = 0; i < numSequences; ++i) sequenceLengths[i] = (size_t)sequenceLengthsInputValue(0, i); } return sequenceLengths; } virtual MBLayoutPtr DetermineMBLayout() override { assert(!m_pMBLayout); // this generates a new layout auto newLayout = make_shared(); newLayout->SetUniqueAxisName(m_dynamicAxisName); return newLayout; } virtual void Validate(bool isFinalValidationPass) override { Base::Validate(isFinalValidationPass); if ((GetNumInputs() > 1) && (Input(1)->NeedsGradient())) InvalidArgument("%ls %ls operation does not support gradient propgation to its Input(1) denoting sequence lengths.", NodeName().c_str(), OperationName().c_str()); if (isFinalValidationPass) { if ((GetNumInputs() > 1) && (Input(1)->GetSampleLayout().GetNumElements() != 1)) InvalidArgument("%ls %ls operation's Input(1) denotes sequence lengths and must be a scalar.", NodeName().c_str(), OperationName().c_str()); } } virtual void Load(File& fstream, size_t modelVersion) override { Base::Load(fstream, modelVersion); fstream >> m_dynamicAxisName; } virtual void Save(File& fstream) const override { Base::Save(fstream); fstream << m_dynamicAxisName; } std::wstring DynamicAxisName() const { return m_dynamicAxisName; } private: std::wstring m_dynamicAxisName; }; template class ToSequenceNode; template class ToSequenceNode; // ----------------------------------------------------------------------- // ToSequenceLikeNode(sourceData, dynamicAxesLike) // Converts the sourceData to a sequence using the most significant static axis [-1] as the sequence axis. // The 'dynamicAxesLike' operand is used to obtain the lengths of the generated sequences. // The dynamic axes of the generated sequence are required to match the dynamic axes of the 'dynamicAxesLike' operand. // ----------------------------------------------------------------------- template class ToSequenceLikeNode : public ToSequenceNodeBase { typedef ToSequenceNodeBase Base; UsingComputationNodeMembersBoilerplate; static const std::wstring TypeName() { return L"ToSequenceLike"; } public: DeclareConstructorFromConfig(ToSequenceLikeNode); ToSequenceLikeNode(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name) {} virtual std::vector GetSequenceLengths() override { let& inMBLayout = InputRef(0).GetMBLayout(); auto sequencesMBLayout = InputRef(1).GetMBLayout(); auto inputSampleLayout = InputRef(0).GetSampleLayout(); auto sequenceMBLayoutNumTimeSteps = sequencesMBLayout->GetNumTimeSteps(); auto inputSampleLayoutTrailingDimension = inputSampleLayout[inputSampleLayout.GetRank() - 1]; if (sequenceMBLayoutNumTimeSteps != inputSampleLayoutTrailingDimension) InvalidArgument("%ls %ls operation's Input(0) trailing dimension (%zu) must match the number of timesteps (%zu) in Input(1)'s MBLayout.", NodeName().c_str(), OperationName().c_str(), inputSampleLayoutTrailingDimension, sequenceMBLayoutNumTimeSteps); if (inMBLayout->GetNumSequences() != sequencesMBLayout->GetNumSequences()) InvalidArgument("%ls %ls operation's Input(0) MBLayout must have the same number of sequences as Input(1)'s MBLayout.", NodeName().c_str(), OperationName().c_str()); auto numSequences = inMBLayout->GetNumSequences(); let& sequences = sequencesMBLayout->GetAllSequences(); std::vector sequenceLengths(numSequences, inputSampleLayoutTrailingDimension); for (size_t i = 0; i < sequences.size(); i++) { let& seq = sequences[i]; if (seq.seqId == GAP_SEQUENCE_ID) continue; sequenceLengths[seq.seqId] = seq.GetNumTimeSteps(); } return sequenceLengths; } virtual MBLayoutPtr DetermineMBLayout() override { assert(!m_pMBLayout); return InputRef(1).GetMBLayout(); } virtual void Validate(bool isFinalValidationPass) override { Base::Validate(isFinalValidationPass); if (isFinalValidationPass && !InputRef(1).HasMBLayout()) RuntimeError("%ls %ls operation requires Input(1) must have a dynamic axis.", NodeName().c_str(), OperationName().c_str()); } }; template class ToSequenceLikeNode; template class ToSequenceLikeNode; // ------------------------------------------------------------------------------------------------ // UnpackSequenceNode(sequenceData) // Converts the sequenceData to non-sequence data by unpacking the sequence along the // the most significant static axis [-1] and padding any gaps with the specified padding value. // The node has 2 outputs; viz. the unpacked non-sequence data and a mask denoting the gaps in the // unpacked output due to differences across lengths of the sequences in 'sequenceData' // ------------------------------------------------------------------------------------------------ template class UnpackSequenceNode : public ComputationNodeNonLooping, public MultiOutputNode, public NumInputs<1> { typedef ComputationNodeNonLooping Base; UsingComputationNodeMembersBoilerplate; static const std::wstring TypeName() { return L"UnpackSequence"; } public: DeclareConstructorFromConfig(UnpackSequenceNode); UnpackSequenceNode(DEVICEID_TYPE deviceId, const wstring& name, double paddingValue = 0, bool suppressMaskOutput = false) : Base(deviceId, name), MultiOutputNode(suppressMaskOutput ? 1 : 2), m_paddingValue((ElemType)paddingValue), m_suppressMaskOutput(suppressMaskOutput) {} virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override { auto inputMBLayout = InputRef(0).GetMBLayout(); if (inputMBLayout->HasSequenceBeyondBegin() || inputMBLayout->HasSequenceBeyondEnd()) LogicError("%ls: %s node cannot perform sequence axis reduction for truncated sequence.", Base::NodeDescription().c_str(), typeid(*this).name()); GetMBLayout()->InitAsFrameMode(inputMBLayout->GetNumSequences()); UpdateFunctionValuesSize(); #ifdef _MSC_VER auto& outputValuePtrRef = ValuePtrRef(); #else auto& outputValuePtrRef = this->template ValuePtrRef(); #endif // Directly unpack into Value() matrix auto valueMatrixNumRows = outputValuePtrRef->GetNumRows(); auto valueMatrixNumCols = outputValuePtrRef->GetNumCols(); auto unpackedInput = ComputationNode::Unpack(InputRef(0).GetSampleLayout(), InputRef(0).Value(), InputRef(0).GetMBLayout(), outputValuePtrRef, m_tempScatterIndices, m_tempMask, /*batchMajor=*/ false, &m_paddingValue); if (unpackedInput.GetSOBPtr() != outputValuePtrRef) Value().AssignValuesOf(*unpackedInput.GetSOBPtr()); Value().Reshape(valueMatrixNumRows, valueMatrixNumCols); if (!m_suppressMaskOutput) { auto numSequences = GetMBLayout()->GetNumSequences(); size_t maxSequenceLength = inputMBLayout->GetNumTimeSteps(); std::vector maskValues(maxSequenceLength * numSequences, 0); let& inputSequences = inputMBLayout->GetAllSequences(); size_t j = 0; for (size_t i = 0; i < inputSequences.size(); i++) { let& seq = inputSequences[i]; if (seq.seqId == GAP_SEQUENCE_ID) continue; auto currentSequenceLength = seq.GetNumTimeSteps(); auto rangeStart = maskValues.begin() + (j * maxSequenceLength); std::fill(rangeStart, rangeStart + currentSequenceLength, (ElemType)1); j++; } assert(j == numSequences); this->m_outputsValue[1]->SetValue(maxSequenceLength, numSequences, outputValuePtrRef->GetDeviceId(), maskValues.data()); } } virtual void /*ComputationNodeNonLooping::*/ BackpropToNonLooping(size_t inputIndex) override { auto numSequences = GetMBLayout()->GetNumSequences(); auto gradientSampleLayout = GetSampleLayout(); auto gradientDataTensorShape = gradientSampleLayout; gradientDataTensorShape.AppendInPlace(gradientDataTensorShape.GetRank(), numSequences); let& gradientDataMatrix = Gradient(); auto gradientDataNDArrayView = ::CNTK::MakeSharedObject<::CNTK::NDArrayView>(::CNTK::AsDataType(), ::CNTK::AsDeviceDescriptor(gradientDataMatrix.GetDeviceId()), ::CNTK::AsStorageFormat(gradientDataMatrix.GetFormat()), ::CNTK::AsNDShape(gradientDataTensorShape), /*readOnly =*/ true, new TensorView(GradientPtr(), gradientDataTensorShape)); std::vector sequenceLengths(numSequences); let& inMBLayout = InputRef(0).GetMBLayout(); let& inputSequences = inMBLayout->GetAllSequences(); size_t j = 0; for (size_t i = 0; i < inputSequences.size(); i++) { let& seq = inputSequences[i]; if (seq.seqId == GAP_SEQUENCE_ID) continue; sequenceLengths[j] = seq.GetNumTimeSteps(); j++; } assert(j == numSequences); auto gradientDataValue = ::CNTK::MakeSharedObject<::CNTK::Value>(gradientDataNDArrayView, ::CNTK::CreateMask(sequenceLengths)); auto dummyVar = ::CNTK::InputVariable(::CNTK::AsNDShape(InputRef(0).GetSampleLayout()), gradientDataNDArrayView->IsSparse(), ::CNTK::AsDataType()); auto packedGradientMatrixAndLayout = ::CNTK::Utils::GetCNTKImplMatrixAndMBLayoutFromValueObject(dummyVar, gradientDataValue, nullptr, m_tempPackedGradientData, m_tempGatherIndices); if (*packedGradientMatrixAndLayout.second != *inMBLayout) LogicError("%ls: %s node unpacked gradient MBLayout does not match input MBLayout.", Base::NodeDescription().c_str(), typeid(*this).name()); InputRef(0).Gradient() += (*packedGradientMatrixAndLayout.first); } virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; } virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override { return false; } bool ForceDynamicValidation() const override { return true; } virtual void Validate(bool isFinalValidationPass) override { ComputationNodeBase::Validate(isFinalValidationPass); ComputationNodeBase::m_isValueSparse = Input(0)->IsValueSparse(); if (!m_pMBLayout) { m_pMBLayout = make_shared(); // this generates a new layout m_pMBLayout->SetUniqueAxisName(ComputationNodeBase::DefaultNoSequenceAxisName); this->m_outputsMBLayout[0] = m_pMBLayout; if (!m_suppressMaskOutput) this->m_outputsMBLayout[1] = m_pMBLayout; } TensorShape inputShape = Input(0)->GetSampleLayout(); SmallVector outDims = inputShape.GetDims(); outDims.resize(inputShape.GetRank() + 1, 1); SmallVector maskDims = {1}; if (isFinalValidationPass) { // we generate its own MBLayout auto inputMBLayout = InputRef(0).GetMBLayout(); if (!inputMBLayout) InvalidArgument("%ls %ls operation can only operate on minibatch data (which have a layout).", NodeName().c_str(), OperationName().c_str()); if (inputMBLayout->GetNumTimeSteps() == 0) LogicError("%ls %ls operation's final validation pass must not be invoked before the input MBLayout has been initialized and populated.", NodeName().c_str(), OperationName().c_str()); outDims[inputShape.GetRank()] = inputMBLayout->GetNumTimeSteps(); maskDims[0] = inputMBLayout->GetNumTimeSteps(); } this->m_outputsShape[0] = TensorShape(outDims); if (!m_suppressMaskOutput) this->m_outputsShape[1] = TensorShape(maskDims); SetDims(TensorShape(outDims), true); } void RequestMatricesBeforeForwardProp(MatrixPool& matrixPool) override { Base::RequestMatricesBeforeForwardProp(matrixPool); m_tempMask = std::make_shared>(Base::m_deviceId); RequestMatrixFromPool(m_tempScatterIndices, matrixPool, 1, HasMBLayout()); } void ReleaseMatricesAfterForwardProp(MatrixPool& matrixPool) override { Base::ReleaseMatricesAfterForwardProp(matrixPool); ReleaseMatrixToPool(m_tempScatterIndices, matrixPool); } void RequestMatricesBeforeBackprop(MatrixPool& matrixPool) override { Base::RequestMatricesBeforeBackprop(matrixPool); RequestMatrixFromPool(m_tempGatherIndices, matrixPool, 1, HasMBLayout()); RequestMatrixFromPool(m_tempPackedGradientData, matrixPool, InputRef(0).GetSampleLayout().GetNumElements(), InputRef(0).HasMBLayout()); } void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool) override { Base::ReleaseMatricesAfterBackprop(matrixPool); ReleaseMatrixToPool(m_tempGatherIndices, matrixPool); ReleaseMatrixToPool(m_tempPackedGradientData, matrixPool); } virtual void Load(File& fstream, size_t modelVersion) override { Base::Load(fstream, modelVersion); fstream >> m_paddingValue; fstream >> m_suppressMaskOutput; } virtual void Save(File& fstream) const override { Base::Save(fstream); fstream << m_paddingValue; fstream << m_suppressMaskOutput; } ElemType PaddingValue() const { return m_paddingValue; } bool SuppressOutputMask() const { return m_suppressMaskOutput; } private: ElemType m_paddingValue; bool m_suppressMaskOutput; shared_ptr> m_tempMask; shared_ptr> m_tempScatterIndices; shared_ptr> m_tempGatherIndices; shared_ptr> m_tempPackedGradientData; }; template class UnpackSequenceNode; template class UnpackSequenceNode; }}}