https://github.com/Microsoft/CNTK
Raw File
Tip revision: 89e095802631d2d145cbfbd8c42cabe57be2575a authored by Mark Hillebrand on 18 January 2016, 08:33:34 UTC
License change
Tip revision: 89e0958
NDLExamples.ndl
load=ndlMacroDefine
#run=ndlFull
#run=ndlTestMDN
#run=ndlTestGMM
#run=ndlTestCosDist
#run=ndlMacroUseCNNSubSample2
run=ndlMacroUseCNN

ndlTestCosDist=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    HDim=256
    LDim=10

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)

    # compute mean/stddev for mean/stddev normalization
    fmean = Mean(features);
    finvstd=InvStdDev(features)
    finput=PerDimMeanVarNormalization(features, fmean, finvstd)

    # Layer operations
    L1 = RBFF(finput, HDim, SDim)
    L2 = RBFF(L1, HDim, HDim)
    L3 = RBFF(L2, LDim, HDim)
    L4 = SoftMax(L3);
    CD = CosDistance(L4, labels);
    CDAll=SumElements(CD)
    NCD=Negate(CDALL, tag=Criteria)
    Err=ErrorPrediction(labels, L4, tag=Eval)

    # rootNodes defined here
    OutputNodes=(L4)
]

ndlTestGMM=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    HDim=10
    LDim=10

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)

    # Layer operations
    L1=RBFF(features, HDim, SDim)
    L2=RBFF(L1, HDim, HDim)
    L3=BFF(L2, HDim, HDim)
    
    GMMComponent=4
    SizeOfAllMeans=40
    
    UnnormedPrior=Parameter(GMMComponent, 1)
    Means=Parameter(SizeOfAllMeans, 1)
    LogStddev=Parameter(GMMComponent, 1)
    
    LL=GMMLL(UnnormedPrior, Means, LogStddev, L3)
    LLAll=SumElements(LL)
    NLL=Negate(LLALL, tag=Criteria)

    # rootNodes defined here
    OutputNodes=(NLL)
    EvalNodes=(NLL)
]

ndlTestMDN=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    HDim=256
    LDim=10

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)

    # Layer operations
    L1=RBFF(features, HDim, SDim)
    L2=RBFF(L1, HDim, HDim)
    L3=RBFF(L2, HDim, HDim)
    
    GMMComponent=4
    SizeOfAllMeans=40
    
    UnnormedPrior0 =  BFF(L3, GMMComponent, HDim)
    UnnormedPrior1=Tanh(UnnormedPrior0)
    scaleFPrior=Const(2)   
    UnnormedPrior=Scale(scaleFPrior, UnnormedPrior1)
    
    Means=BFF(L3, SizeOfAllMeans, HDim)
    
    #it's important to control the range of logstddev
    LogStddev0=BFF(L3, GMMComponent, MDim)
    LogStddev1=Tanh(LogStddev0)
    scaleFStddev=Const(2)    
    LogStddev=Scale(scaleFStddev, LogStddev1)    
    
    LL=GMMLL(UnnormedPrior, Means, LogStddev, labels)
    LLAll=SumElements(LL)
    NLL=Negate(LLALL, tag=Criteria)

    # rootNodes defined here
    OutputNodes=(NLL)
    EvalNodes=(NLL)
]

ndlFull=[
    SDim=784
    HDim=256
    LDim=10
    B0=Parameter(HDim)
    W0=Parameter(HDim, SDim)
    features=Input(SDim)
    labels=Input(LDim)
    mean0=Mean(features)
    invstd0=InvStdDev(features)
    normedFeat=PerDimMeanVarNormalization(features, mean0, invstd0)
    Times1=Times(W0, normedFeat)
    Plus1=Plus(Times1, B0)
    RL1=RectifiedLinear(Plus1)
    B1=Parameter(LDim, 1)
    W1=Parameter(LDim, HDim)
    Times2=Times(W1, RL1)
    Plus2=Plus(Times2, B1)
    #LogSM=LogSoftmax(Plus2)
    #SM=Exp(LogSM)
    #SM=Softmax(Plus2)
    #CE=CrossEntropy(labels, SM)
    CE=CrossEntropyWithSoftmax(labels, Plus2)
    ErrPredict=ErrorPrediction(labels, Plus2)
    FeatureNodes=(features)
    LabelNodes=(labels)
    CriteriaNodes=(CE)
    EvalNodes=(ErrPredict)
    OutputNodes=(Plus2)
]

ndlMacroDefine=[
    # Macro definitions
    #inline Rectified Linear Feed Forward
    RFF_R(x1, w1, b1)=RectifiedLinear(Plus(Times(w1,x1),b1))
    #Feed Forward
    FF(X1, W1, B1)
    [
        T=Times(W1,X1);
        P=Plus(T, B1);
    ]
    #Base feed Forward network, defines Bias and wieght parameters
    BFF(in, rows, cols)
    {
        B=Parameter(rows, init=fixedvalue, value=0)
        W=Parameter(rows, cols)
        FF = FF(in, w, b)
    }
    #RectifiedLinear Base Feed Forward
    RBFF(in,rowCount,colCount)
    {
        BFF = BFF(in, rowCount, colCount);
        RL = RectifiedLinear(BFF);
    }
    #Rectified Linear Feed Forward
    RFF(X2,W2,B2)=[
        FF = FF(X2, W2, B2);  
        RL = RectifiedLinear(FF);
    ]
    #RectifiedLinear Feed Forward with Dropout
    RFFD(X3,W3,B3)
    {
        RFF=RFF(X3, W3, B3)
        DO=Dropout(RFF)
    }
    #Sigmoid Base Feed Forward
    SBFF(in,rowCount,colCount)
    {
        BFF = BFF(in, rowCount, colCount);
        S = Sigmoid(BFF);
    }
    #Sigmoid Feed Forward
    SFF(X2,W2,B2)=[
        FF = FF(X2, W2, B2);  
        S = Sigmoid(FF);
    ]
    #Sigmoid Feed Forward with Dropout
    SFFD(X3,W3,B3)
    {
        SFF=SFF(X3, W3, B3)
        DO=Dropout(SFF)
    }
    #SoftMax Feed Forward
    SMFF(x,y,z, labels)
    {
        FF = FF(x,y,z);  
        SM = CrossEntropyWithSoftmax(labels, FF)
    }
    #SoftMax Base Feed Forward
    SMBFF(x,r,c, labels)
    {
        BFF = BFF(x,r,c);  
        SM = CrossEntropyWithSoftmax(labels, BFF)
    }
    RFFD_R(x1, w1, b1)={Dropout(RectifiedLinear(Plus(Times(w1,x1),b1)))}
    
    WtObjFcn(o1,w1,o2,w2)
     {
		A1=Constant(w1)
		A2=Constant(w2)
		T1=Times(A1,o1)
		T2=Times(A2,o2)
		O=Plus(T1,T2) 
     }

]

ndlMacroUse2=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    HDim=256
    LDim=10

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)

    # compute mean/stddev for mean/stddev normalization
    fmean = Mean(features);
    finvstd=InvStdDev(features)
    finput=PerDimMeanVarNormalization(features, fmean, finvstd)

    # Layer operations
    L1 = RBFF(finput, HDim, SDim)
    L2 = RBFF(L1, HDim, HDim)
    L3 = RBFF(L2, HDim, HDim)
    CE = SMBFF(L3, LDim, HDim, labels, tag=Criteria)
    Err=ErrorPrediction(labels, CE.BFF, tag=Eval)

    # rootNodes defined here
    OutputNodes=(CE.BFF)
]
ndlMacroUseCNNSubSample2ZeroPadding=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    LDim=10

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)
    
    #convolution
    inputWidth=28
    inputHeight=28
    inputChannels=1
    kernelWidth=5
    kernelHeight=5
    outputChannels=24
    horizontalSubsample=2
    verticalSubsample=2
    
    # weight[outputChannels, kernelWidth * kernelHeight * inputChannels]
    cvweight=Parameter(outputChannels, 25)
    cv = Convolution(cvweight, features, inputWidth, inputHeight, inputChannels, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample, zeroPadding=true)
    
    #one bias per channel
    cvbias=Parameter(outputChannels, 1)
    
    cvplusbias=Plus(cv, cvbias);
    nlcv=Sigmoid(cvplusbias);
    
    #outputWidth = (m_inputWidth-1)/m_horizontalSubsample + 1;
    outputWidth=14
    #outputHeight = (m_inputHeight-1)/m_verticalSubsample + 1;
    outputHeight=14
    
    #maxpooling
    windowWidth=2
    windowHeight=2
    stepW=2
    stepH=2
    mp=MaxPooling(nlcv, outputWidth, outputHeight, outputChannels, windowWidth, windowHeight, stepW, stepH)
    
    #m_outputWidth = (m_inputWidth-m_windowWidth)/m_horizontalSubsample + 1;
    mpoutputWidth=7
    #m_outputHeight = (m_inputHeight-m_windowHeight)/m_verticalSubsample + 1;
    mpoutputHeight=7
    #m_outputSizePerSample = m_outputWidth * m_outputHeight * m_channels;
    mpoutputSizePerSample=1176
    # Layer operations
    
    HDim=128
    L1 = SBFF(mp, HDim, mpoutputSizePerSample)
    CE = SMBFF(L1, LDim, HDim, labels, tag=Criteria)
    Err=ErrorPrediction(labels, CE.BFF, tag=Eval)

    # rootNodes defined here
    OutputNodes=(CE.BFF)
]

ndlMacroUseCNNSubSample2=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    LDim=10

    features=ImageInput(28,28, 1, tag=feature)
    labels=Input(LDim, tag=label)
    
    #convolution
    inputWidth=28
    inputHeight=28
    inputChannels=1
    kernelWidth=5
    kernelHeight=5
    outputChannels=24
    horizontalSubsample=2
    verticalSubsample=2
    
    # weight[outputChannels, kernelWidth * kernelHeight * inputChannels]
    cvweight=Parameter(outputChannels, 25)
    cv = Convolution(cvweight, features, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample, zeroPadding=false)
    
    #one bias per channel
    cvbias=Parameter(outputChannels, 1)
    
    cvplusbias=Plus(cv, cvbias);
    nlcv=Sigmoid(cvplusbias);
    #nlcv=Sigmoid(cv);
    
    #outputWidth = (m_inputWidth-m_kernelWidth)/m_horizontalSubsample + 1;
    outputWidth=12
    #outputHeight = (m_inputHeight-m_kernelHeight)/m_verticalSubsample + 1;
    outputHeight=12
    
    #maxpooling
    windowWidth=2
    windowHeight=2
    stepW=2
    stepH=2
    mp=MaxPooling(nlcv, windowWidth, windowHeight, stepW, stepH)
    
    #m_outputWidth = (m_inputWidth-m_windowWidth)/m_horizontalSubsample + 1;
    mpoutputWidth=6
    #m_outputHeight = (m_inputHeight-m_windowHeight)/m_verticalSubsample + 1;
    mpoutputHeight=6
    #m_outputSizePerSample = m_outputWidth * m_outputHeight * m_channels;
    mpoutputSizePerSample=864
    # Layer operations
    
    HDim=128
    L1 = SBFF(mp, HDim, mpoutputSizePerSample)
    CE = SMBFF(L1, LDim, HDim, labels, tag=Criteria)
    Err=ErrorPrediction(labels, CE.BFF, tag=Eval)

    # rootNodes defined here
    OutputNodes=(CE.BFF)
]

ndlMacroUseCNN=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    inputWidth=28
    inputHeight=28
    inputChannels=1
    
    SDim=784
    LDim=10

    features=ImageInput(inputWidth, inputHeight, inputChannels, 1, tag=feature)
    labels=Input(LDim, tag=label)
    
    #convolution
    kernelWidth=5
    kernelHeight=5
    outputChannels=24
    horizontalSubsample=1
    verticalSubsample=1
    
    # weight[outputChannels, kernelWidth * kernelHeight * inputChannels]
    # cvweight=Parameter(outputChannels, 25)
    cvweight=Parameter(0, 0)  #CNTK will derive the dimension automatically
    cv = Convolution(cvweight, features, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample, zeroPadding=false)
    
    #one bias per channel
    cvbias=Parameter(outputChannels, 1)
    
    cvplusbias=Plus(cv, cvbias);
    nlcv=Sigmoid(cvplusbias);
    
    #maxpooling
    windowWidth=2
    windowHeight=2
    stepW=2
    stepH=2
    mp=MaxPooling(nlcv, windowWidth, windowHeight, stepW, stepH)
    
    mpoutputSizePerSample=0
    # Layer operations
    
    HDim=128
    L1 = SBFF(mp, HDim, 0)
    CE = SMBFF(L1, LDim, HDim, labels, tag=Criteria)
    Err=ErrorPrediction(labels, CE.BFF, tag=Eval)

    # rootNodes defined here
    OutputNodes=(CE.BFF)
]

ndlMacroUseNoBase=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    HDim=256
    LDim=10

    # Weight, Bias, features and label inputs
    B0=Parameter(HDim, init=fixedvalue, value=0)
    W0=Parameter(HDim, SDim, init=uniform)
    B1=Parameter(HDim, init=fixedvalue, value=0)
    W1=Parameter(HDim, HDim, init=uniform)
    B2=Parameter(HDim, init=fixedvalue, value=0)
    W2=Parameter(HDim, HDim, init=uniform)
    BTop=Parameter(LDim, init=fixedvalue, value=0)
    WTop=Parameter(LDim, HDim, init=uniform)

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)

    # Layer operations
    L1 = RFFD(features, HDim, SDim)
    L2 = RFFD(L1, HDim, HDim)
    L3 = RFFD(L2, HDim, HDim)
    CE = SMFF(L3, LDim, SDim, labels, tag=Criteria)
    Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
    # rootNodes defined here
    OutputNodes=(CE.BFF)
]

ndlMacroUseMask=[
    # constants defined
    # Sample, Hidden, and Label dimensions
    SDim=784
    HDim=256
    LDim=10

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)

    # compute mean/stddev for mean/stddev normalization
    fmean = Mean(features);
    finvstd=InvStdDev(features)
    finput=PerDimMeanVarNormalization(features, fmean, finvstd)

    # Layer operations
    L1 = RBFF(finput, HDim, SDim)
    L2 = RBFF(L1, HDim, HDim)
    L3 = RBFF(L2, HDim, HDim)
    
    # mask layers
    ML1=RBFF(finput, HDim, SDim)
    ML2=RBFF(ML1, HDim, HDim)
    
    # mask
    L4=ElementTimes(L3, ML2)
    
    CE = SMBFF(L4, LDim, HDim, labels, tag=Criteria)
    Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
    
    # output nodes
    Prior=Mean(labels)
    LP=Log(Prior)
    O=Minus(CE.BFF, LP)

    # rootNodes defined here
    OutputNodes=(O)
    CriteriaNodes=(CE)
    EvaluationNodes(Err)
]
back to top