// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #include "stdafx.h" #include "CNTKLibrary.h" #include "Variable.h" #include "CompositeFunction.h" #include "Serialization.h" #include "InputAndParamNodes.h" namespace CNTK { Variable::Variable(const FunctionPtr& function) : Variable(function->Output()) { } const NDShape& Variable::Shape() const { return m_dataFields->m_shape; } const std::vector& Variable::DynamicAxes() const { return m_dataFields->m_dynamicAxes; } VariableKind Variable::Kind() const { return m_dataFields->m_varKind; } bool Variable::IsSparse() const { return m_dataFields->m_isSparse; } const std::wstring& Variable::Name() const { return m_dataFields->m_name; } const std::wstring& Variable::Uid() const { return m_dataFields->m_uid; } DataType Variable::GetDataType() const { return m_dataFields->m_dataType; } bool Variable::NeedsGradient() const { return m_dataFields->m_needsGradient; } Variable Variable::Clone() const { Variable clonedVariable; clonedVariable.m_dataFields = m_dataFields->Clone(); return clonedVariable; } const Variable& Variable::BlockFunctionVariableMapping() const { return m_dataFields->m_blockFunctionVariableMapping; } FunctionPtr Variable::Owner() const { if (m_dataFields->m_ownerFunction != nullptr) return m_dataFields->m_ownerFunction->shared_from_this(); else return nullptr; } Variable Variable::CompositePreservingCopy(const std::shared_ptr& composite) const { // We have to preserve the whole subgraph. Variable result; result.m_outputComposite = composite; result.m_dataFields = m_dataFields; return result; } void Variable::SetOwner(Function* ownerFunction) { if (Kind() != VariableKind::Output) LogicError("Variable::SetOwner: Owner can only be set for Output Variables!"); if (m_dataFields->m_ownerFunction != nullptr) LogicError("Variable::SetOwner: An Output Variable whose owner has previously been set, cannot be reset!"); m_dataFields->m_ownerFunction = ownerFunction; } Variable::operator FunctionPtr() const { auto varOwner = Owner(); if (varOwner) return AsComposite(varOwner, varOwner->Name()); else return Combine({ *this }); } NDArrayViewPtr Variable::Value() const { if (!IsConstant() && !IsParameter()) LogicError("Only Variables of kind Parameter and Constant have a Value!"); if (m_dataFields->m_initValueFlag) { std::call_once(*m_dataFields->m_initValueFlag, [=]{ assert(m_dataFields->m_value == nullptr); assert(m_dataFields->m_valueInitializer); assert(m_dataFields->m_valueInitializationDevice); switch (GetDataType()) { case DataType::Float: { m_dataFields->m_value = CreateValueFromParameterInitializer(Shape(), *m_dataFields->m_valueInitializer, *m_dataFields->m_valueInitializationDevice); break; } case DataType::Double: { m_dataFields->m_value = CreateValueFromParameterInitializer(Shape(), *m_dataFields->m_valueInitializer, *m_dataFields->m_valueInitializationDevice); break; } default: LogicError("Unsupported DataType %s", DataTypeName(GetDataType())); break; } m_dataFields->m_valueInitializer = nullptr; m_dataFields->m_valueInitializationDevice = nullptr; }); } assert(m_dataFields->m_value != nullptr); return m_dataFields->m_value; } void Variable::SetValue(const NDArrayViewPtr& value) { if (!IsParameter()) LogicError("Variable::SetValue can be only invoked on a Parameter variable!"); else if (GetDataType() != value->GetDataType()) LogicError("Variable::SetValue: 'source' and 'destination' have different data types!"); else if (Shape() != value->Shape() && (AsTensorShape(Shape()) != AsTensorShape(value->Shape()))) LogicError("Variable::SetValue: 'source' and 'destination' have different shapes!"); bool alreadySet = false; if (m_dataFields->m_initValueFlag) { // In the case of lazy initialization, try to avoid the redundant call to the initializer. std::call_once(*m_dataFields->m_initValueFlag, [=, &value, &alreadySet] { // If the variable hasn't been initialized yet, clone the content of the supplied value and delete the initializer. m_dataFields->m_value = value->DeepClone(*m_dataFields->m_valueInitializationDevice, false); m_dataFields->m_valueInitializer = nullptr; m_dataFields->m_valueInitializationDevice = nullptr; alreadySet = true; }); } assert(m_dataFields->m_value != nullptr); if (!alreadySet) { // alreadySet is false, the lambda above wasn't called and the variable has been initialized before, // get a pointer to its value and simply copy the content of the supplied value. m_dataFields->m_value->CopyFrom(*value); } } static const std::wstring InitializerTypeAttributeName = L"initializerType"; static const std::wstring OutputRankAttributeName = L"outputRank"; static const std::wstring FilterRankAttributeName = L"filterRank"; static const std::wstring ValueAttributeName = L"value"; static const std::wstring ScaleAttributeName = L"scale"; static const std::wstring RandomSeedAttributeName = L"randomSeed"; static const std::wstring KernelWidthAttributeName = L"kernelWidth"; static const std::wstring KernelHeightAttributeName = L"kernelHeight"; void VariableFields::SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device) { if (m_value != nullptr) LogicError("Value initialization config cannot be set if a value already exists"); assert(!m_valueInitializer); assert(!m_valueInitializationDevice); m_initValueFlag.reset(new std::once_flag()); m_valueInitializer.reset(new ParameterInitializer(initializationConfig)); m_valueInitializationDevice.reset(new DeviceDescriptor(device)); } namespace Internal { static std::atomic s_fixedRandomSeed(0); void SetFixedRandomSeed(unsigned long fixedRandomSeed) { s_fixedRandomSeed.store(fixedRandomSeed); } } static std::atomic s_currentRandomSeed(1); static ParameterInitializer CreateInitializer(const std::wstring& initializerTypeName, double scale, int outputRank, int filterRank, unsigned long seed) { Dictionary initConfig; initConfig[InitializerTypeAttributeName] = initializerTypeName; initConfig[OutputRankAttributeName] = outputRank; initConfig[FilterRankAttributeName] = filterRank; initConfig[ScaleAttributeName] = scale; auto currentFixedRandomSeed = Internal::s_fixedRandomSeed.load(); if (currentFixedRandomSeed != 0) seed = currentFixedRandomSeed; initConfig[RandomSeedAttributeName] = (size_t)seed; return initConfig; } ParameterInitializer ConstantInitializer(double value) { Dictionary initConfig; initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::ConstantInitializerTypeName; initConfig[ValueAttributeName] = value; return initConfig; } ParameterInitializer UniformInitializer(double scale, unsigned long seed) { Dictionary initConfig; initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::UniformInitializerTypeName; initConfig[ScaleAttributeName] = scale; initConfig[RandomSeedAttributeName] = (size_t)seed; return initConfig; } ParameterInitializer NormalInitializer(double scale, int outputRank, int filterRank, unsigned long seed) { return CreateInitializer(Microsoft::MSR::CNTK::NormalInitializerTypeName, scale, outputRank, filterRank, seed); } ParameterInitializer XavierInitializer(double scale, int outputRank, int filterRank, unsigned long seed) { return CreateInitializer(Microsoft::MSR::CNTK::XavierInitializerTypeName, scale, outputRank, filterRank, seed); } ParameterInitializer GlorotUniformInitializer(double scale, int outputRank, int filterRank, unsigned long seed) { return CreateInitializer(Microsoft::MSR::CNTK::GlorotUniformInitializerTypeName, scale, outputRank, filterRank, seed); } ParameterInitializer GlorotNormalInitializer(double scale, int outputRank, int filterRank, unsigned long seed) { return CreateInitializer(Microsoft::MSR::CNTK::GlorotNormalInitializerTypeName, scale, outputRank, filterRank, seed); } ParameterInitializer HeUniformInitializer(double scale, int outputRank, int filterRank, unsigned long seed) { return CreateInitializer(Microsoft::MSR::CNTK::HeUniformInitializerTypeName, scale, outputRank, filterRank, seed); } ParameterInitializer HeNormalInitializer(double scale, int outputRank, int filterRank, unsigned long seed) { return CreateInitializer(Microsoft::MSR::CNTK::HeNormalInitializerTypeName, scale, outputRank, filterRank, seed); } ParameterInitializer BilinearInitializer(size_t kernelWidth, size_t kernelHeight) { Dictionary initConfig; initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::BilinearInitializerTypeName; initConfig[KernelWidthAttributeName] = kernelWidth; initConfig[KernelHeightAttributeName] = kernelHeight; return initConfig; } ParameterInitializer RandomInitializerWithRank(const ParameterInitializer& initializer, int outputRank, int filterRank) { ParameterInitializer newInitializerWithRanks = initializer; // 'initializer' must be a random initializer auto initializerType = initializer[InitializerTypeAttributeName].Value(); if ((initializerType != Microsoft::MSR::CNTK::UniformInitializerTypeName) && (initializerType != Microsoft::MSR::CNTK::BilinearInitializerTypeName) && (initializerType != Microsoft::MSR::CNTK::ConstantInitializerTypeName)) { int oldOutputRank = initializer[OutputRankAttributeName].Value(); int oldFilterRank = initializer[FilterRankAttributeName].Value(); if ((oldOutputRank != SentinelValueForInferParamInitRank) && (oldOutputRank != outputRank)) InvalidArgument("Output rank of a non-uniform random initialier cannot be overridden if it has been already specified!"); if ((oldFilterRank != SentinelValueForInferParamInitRank) && (oldFilterRank != filterRank)) InvalidArgument("Filer rank of a non-uniform random initialier cannot be overridden if it has been already specified!"); newInitializerWithRanks[OutputRankAttributeName] = outputRank; newInitializerWithRanks[FilterRankAttributeName] = filterRank; } return newInitializerWithRanks; } Variable::Variable(const NDShape& shape, VariableKind varType, CNTK::DataType dataType, const NDArrayViewPtr& value, bool needsGradient, const std::vector& dynamicAxes, bool isSparse, const std::wstring& name, const std::wstring& uid) : m_dataFields(MakeSharedObject(shape, varType, dataType, nullptr, value, needsGradient, dynamicAxes, isSparse, name, uid)) {} template /*static*/ NDArrayViewPtr Variable::CreateValueFromParameterInitializer(const NDShape& shape, const ParameterInitializer& initConfig, const DeviceDescriptor& device) { auto dataType = AsDataType(); auto value = MakeSharedObject(dataType, shape, device); auto valueMatrix = value->template GetWritableMatrix(); auto initializerType = initConfig[InitializerTypeAttributeName].Value(); if (initializerType == Microsoft::MSR::CNTK::ConstantInitializerTypeName) { auto constantInitValue = initConfig[ValueAttributeName].Value(); valueMatrix->SetValue((ElementType)constantInitValue); } else if (initializerType == Microsoft::MSR::CNTK::BilinearInitializerTypeName) { auto kernelWidth = initConfig[KernelWidthAttributeName].Value(); auto kernelHeight = initConfig[KernelHeightAttributeName].Value(); Microsoft::MSR::CNTK::LearnableParameter::InitBilinear(*valueMatrix, AsTensorShape(shape), kernelWidth, kernelHeight, AsCNTKImplDeviceId(device)); } else { auto randomSeed = (unsigned long)initConfig[RandomSeedAttributeName].Value(); if (randomSeed == SentinelValueForAutoSelectRandomSeed) randomSeed = s_currentRandomSeed++; auto scale = initConfig[ScaleAttributeName].Value(); int outputRank = DefaultParamInitOutputRank, filterRank = DefaultParamInitFilterRank; if (initializerType != Microsoft::MSR::CNTK::UniformInitializerTypeName) { outputRank = initConfig[OutputRankAttributeName].Value(); filterRank = initConfig[FilterRankAttributeName].Value(); if (outputRank == SentinelValueForInferParamInitRank) outputRank = DefaultParamInitOutputRank; if (filterRank == SentinelValueForInferParamInitRank) filterRank = DefaultParamInitFilterRank; if ((filterRank + outputRank) > shape.Rank()) InvalidArgument("Sum of filter rank (%d) and output rank (%d) of the parameter initializer cannot exceed the Parameter's rank(%d)", filterRank, outputRank, (int)shape.Rank()); } Microsoft::MSR::CNTK::LearnableParameter::InitRandom(*valueMatrix, AsTensorShape(shape), initializerType, randomSeed, (ElementType)scale, filterRank, outputRank, /*initOnCPUOnly=*/true, AsCNTKImplDeviceId(device)); } return value; } static const std::wstring s_variableTypeValue = L"Variable"; /*virtual*/ Dictionary Variable::Serialize() const { if (IsOutput()) { LogicError("Output variables cannot be saved"); } Dictionary dict; dict[versionKey] = CurrentVersion(); dict[typeKey] = s_variableTypeValue; dict[uidKey] = Uid(); dict[kindKey] = static_cast(Kind()); dict[dataTypeKey] = static_cast(GetDataType()); const auto& dynamicAxes = DynamicAxes(); vector dictionaryValueVector; dictionaryValueVector.reserve(dynamicAxes.size()); for (const auto& axis : dynamicAxes) dictionaryValueVector.push_back(axis); dict[dynamicAxisKey] = dictionaryValueVector; dict[isSparseKey] = IsSparse(); if (!Name().empty()) dict[nameKey] = Name(); dict[needsGradientKey] = NeedsGradient(); dict[shapeKey] = Shape(); if (IsParameter() || IsConstant()) { NDArrayView* value = Value().get(); if (value == nullptr) { LogicError("Uninitialized Parameter variable cannot be saved"); } // TODO: add a dictionary value constructor with an rvalue parameter. dict[valueKey] = DictionaryValue(*value); } return dict; } /*static*/ Variable Variable::Deserialize(const Dictionary& dict, const CNTK::DeviceDescriptor& device) { static const vector s_requiredDictionaryKeys = { typeKey, uidKey, kindKey, dataTypeKey, dynamicAxisKey, isSparseKey, needsGradientKey, shapeKey }; size_t version = ValidateDictionary(dict, s_requiredDictionaryKeys, s_variableTypeValue, s_serializationVersion); const auto& uid = dict[uidKey].Value(); VariableKind kind = VariableKind(dict[kindKey].Value()); if (kind != VariableKind::Constant && kind != VariableKind::Input && kind != VariableKind::Parameter && kind != VariableKind::Placeholder) { LogicError("Unexpected variable '%ls':'%u' (%s).", kindKey.c_str(), static_cast::type>(kind), GetVersionsString(s_serializationVersion, version).c_str()); } DataType dataType = DataType(dict[dataTypeKey].Value()); if (dataType != DataType::Unknown && dataType != DataType::Float && dataType != DataType::Double) { LogicError("Unexpected variable '%ls':'%u' (%s).", dataTypeKey.c_str(), static_cast::type>(dataType), GetVersionsString(s_serializationVersion, version).c_str()); } const vector& dictionaryValueVector = dict[dynamicAxisKey].Value>(); vector dynamicAxis; dynamicAxis.reserve(dictionaryValueVector.size()); for (const auto& dictionaryValue : dictionaryValueVector) { dynamicAxis.push_back(dictionaryValue.Value()); } bool isSparse = dict[isSparseKey].Value(); std::wstring name = L""; if (dict.Contains(nameKey)) name = dict[nameKey].Value(); bool needsGradient = dict[needsGradientKey].Value(); const auto& shape = dict[shapeKey].Value(); if (kind == VariableKind::Constant || kind == VariableKind::Parameter) { auto& value = dict[valueKey].Value(); // TODO: this copying here is redundant, value should be moved from the dictionary to the variable. // Also, the correct device should be used upfront when deserializing NDArrayView. Variable var(shape, kind, dataType, value.DeepClone(device, kind == VariableKind::Constant), needsGradient, dynamicAxis, isSparse, name, uid); if (var.IsParameter()) return Parameter(var); else return Constant(var); } return Variable(shape, kind, dataType, nullptr, needsGradient, dynamicAxis, isSparse, name, uid); } Parameter::Parameter(const NDShape& shape, DataType dataType, const ParameterInitializer& initializer, const DeviceDescriptor& device, const std::wstring& name) : Variable(shape, VariableKind::Parameter, dataType, nullptr, true, {}, name, Internal::GenerateUid(VariableKind::Parameter)) { m_dataFields->SetValueInitialization(initializer, device); } size_t Parameter::CurrentValueTimeStamp() const { return m_dataFields->m_valueTimeStamp.load(); } void Parameter::RecordValueUpdate() { m_dataFields->m_valueTimeStamp++; } Constant::Constant(const NDShape& shape, DataType dataType, const ParameterInitializer& initializer, const DeviceDescriptor& device, const std::wstring& name) : Variable(shape, VariableKind::Constant, dataType, nullptr, false, {}, name, Internal::GenerateUid(VariableKind::Constant)) { m_dataFields->SetValueInitialization(initializer, device); } }