// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #pragma once #include "Basics.h" #include "ComputationNode.h" #include "Matrix.h" #include "Sequences.h" #include "ScriptableObjects.h" #include namespace Microsoft { namespace MSR { namespace CNTK { static const std::ptrdiff_t SentinelValueIndicatingUnspecifedSequenceBeginIdx = std::numeric_limits::min(); template class DelayedValueNodeState; // ----------------------------------------------------------------------- // DelayedValueNodeBase (input [, initialState]) -- abstract base class for PastValueNode and FutureValueNode to hold all shared code // The two differ in the step direction, some loop directions, and sequence-boundary flags. // ----------------------------------------------------------------------- // TODO: 'direction' is really too general. signOfTimeOffset? template class DelayedValueNodeBase : public ComputationNode, public IRecurrentNode, public ILateAttachingNode, public IStatefulNode { typedef ComputationNode Base; UsingComputationNodeMembers; using Base::OperationName; typedef std::shared_ptr> DelayedNodeStatePtr; private: TensorView GetMaskTensor(size_t rank, const FrameRange& fr) const; protected: DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring& name, ElemType fixedInitialStateScalarValue, const TensorShape& sampleLayout, size_t timeStep); DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring& name) : DelayedValueNodeBase(deviceId, name, (ElemType)DEFAULT_HIDDEN_ACTIVATION, TensorShape(), 0) { } DelayedValueNodeBase(const ScriptableObjects::IConfigRecordPtr configp) : DelayedValueNodeBase(configp->Get(L"deviceId"), L"", configp->Get(L"defaultHiddenActivation"), configp->Get(L"shape"), configp->Get(L"timeStep")) { // We do NOT attach the inputs, as we cannot resolve them without causing a circular reference. // Instead, we capture them in a lambda, which will be called by ComputationNetwork during the build process through LateAttachInputs() below. // This is a contract between ComputationNetwork and this specific node type. m_attachInputsFn = [this, configp]() // This is the lambda to complete the process. Note that config captured as a shared_ptr. { AttachInputs(GetInputsFromConfig(configp)); // this is executed by network builder while iterating the nodes }; } virtual void /*ILateAttachingNode::*/ LateAttachInputs() override final { m_attachInputsFn(); m_attachInputsFn = []() { LogicError("LateAttachingNode::AttachInputs: must only be called once"); }; } public: virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override; virtual void Load(File& fstream, size_t modelVersion) override; virtual void Save(File& fstream) const override; virtual void UpdateFunctionMBSize() override; virtual void BeginForwardProp() override; virtual void ForwardProp(const FrameRange& fr) override; virtual void EndForwardProp() override; virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override; virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; } virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; } virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override; virtual int /*IRecurrentNode::*/ GetRecurrenceSteppingDirection() const override { return -direction; } virtual NodeStatePtr /*IStatefulNode::*/ ExportState() override; virtual void /*IStatefulNode::*/ ImportState(const NodeStatePtr& pImportedState) override; int TimeStep() const { return m_timeStep; } ElemType InitialActivationValue() const { return m_initialStateValue; } protected: ElemType m_initialStateValue; // starting value for hidden activation vector at boundary int m_timeStep; // delay in frames (typ. 1) function m_attachInputsFn; // for late expansion of inputs (scripting) vector m_inputInvalidMatrixTemp; // [j] CPU-side buffer for constructing the mask matrix vector m_inputAnySeqValid, m_inputAllSeqValid; // [t] denotes whether there are any valid frames at a time step, and if all are valid shared_ptr> m_initialStateValueMatrix; // potentially GPU-side versions shared_ptr> m_inputInvalidMatrix; // [0,j] contains 1 if matrix column belongs to an frame with boundary condition or a gap frame shared_ptr> m_zeroMatrix; // constant [1]-dimensional 0 used for backprop --TODO: could use a static map[deviceId] shared_ptr> m_delayedValue; // saves the activation of the previous step that this node points to MBLayoutPtr m_delayedActivationMBLayout; // layout for m_delayedValue }; #define UsingDelayedValueNodeMembers \ UsingComputationNodeMembersBoilerplate; \ using Base::m_initialStateValue; \ using Base::m_delayedValue; \ using Base::m_timeStep; // ----------------------------------------------------------------------- // PastValueNode (input) -- delay node // TODO: Can this just be a typedef? // ----------------------------------------------------------------------- template class PastValueNode : public DelayedValueNodeBase { typedef DelayedValueNodeBase Base; UsingDelayedValueNodeMembers; static const std::wstring TypeName() { return L"PastValue"; } public: PastValueNode(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name) { } PastValueNode(DEVICEID_TYPE deviceId, const wstring& name, ElemType fixedInitialStateScalarValue, const TensorShape& sampleLayout, size_t timeStep) : Base(deviceId, name, fixedInitialStateScalarValue, sampleLayout, timeStep) { } PastValueNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, size_t timeStep) : Base(deviceId, name, (ElemType)0, sampleLayout, timeStep) { } PastValueNode(DEVICEID_TYPE deviceId, const wstring& name, ElemType fixedInitialStateScalarValue, size_t numRows, size_t timeStep) : PastValueNode(deviceId, name, fixedInitialStateScalarValue, TensorShape(numRows), timeStep) { } PastValueNode(const ScriptableObjects::IConfigRecordPtr configp) : Base(configp) { } }; // ----------------------------------------------------------------------- // FutureValueNode (input) -- delay node in future direction // ----------------------------------------------------------------------- // get value from future (used in the bi-directional models) template class FutureValueNode : public DelayedValueNodeBase { typedef DelayedValueNodeBase Base; UsingDelayedValueNodeMembers; static const std::wstring TypeName() { return L"FutureValue"; } public: FutureValueNode(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name) { } FutureValueNode(DEVICEID_TYPE deviceId, const wstring& name, ElemType fixedInitialStateScalarValue, const TensorShape& sampleLayout, size_t timeStep) : Base(deviceId, name, fixedInitialStateScalarValue, sampleLayout, timeStep) { } FutureValueNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, size_t timeStep) : Base(deviceId, name, (ElemType)0, sampleLayout, timeStep) { } FutureValueNode(DEVICEID_TYPE deviceId, const wstring& name, ElemType fixedInitialStateScalarValue, size_t numRows, size_t timeStep) : FutureValueNode(deviceId, name, fixedInitialStateScalarValue, TensorShape(numRows), timeStep) { } FutureValueNode(const ScriptableObjects::IConfigRecordPtr configp) : Base(configp) { } }; }}}