# CNTK Configuration File for creating a slot tagger and an intent tagger.

command = TrainTagger:TestTagger

makeMode = false ; traceLevel = 0 ; deviceId = "auto"

rootDir = "." ; dataDir  = "$rootDir$" ; modelDir = "$rootDir$/Models"

modelPath = "$modelDir$/slu.cmf"

vocabSize = 943 ; numLabels = 129 ; numIntents = 26    # number of words in vocab, slot labels, and intent labels

# The command to train the LSTM model
TrainTagger = {
    action = "train"
    BrainScriptNetworkBuilder = {
        inputDim  = $vocabSize$
        intentDim = $numIntents$
        embDim = 150
        hiddenDim = 150

        BiRecurrentLSTMLayer {outDim} = {
            F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
            G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
            apply (x) = Splice (BS.Sequences.Last(F(x)):BS.Sequences.First(G(x)))
        }.apply

        model = Sequential (
            EmbeddingLayer {embDim, init='uniform'} :
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
            BiRecurrentLSTMLayer {hiddenDim} :
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
            DenseLayer {intentDim, init='uniform', initValueScale=7}
        )

        # features
        n = DynamicAxis()
        query        = Input {inputDim, dynamicAxis=n}
        intentLabels = Input {intentDim}

        # model application
        z = model (query)

        # loss and metric
        ce   = CrossEntropyWithSoftmax (intentLabels, z)
        errs = ClassificationError     (intentLabels, z)

        featureNodes    = (query)
        labelNodes      = (intentLabels)
        criterionNodes  = (ce)
        evaluationNodes = (errs)
        outputNodes     = (z)
    }

    SGD = {
        maxEpochs = 8 ; epochSize = 36000

        minibatchSize = 800

        learningRatesPerSample = 0.01*2:0.005*12:0.001
        gradUpdateType = "FSAdaGrad"
        gradientClippingWithTruncation = true ; clippingThresholdPerSample = 15.0

        firstMBsToShowResult = 10 ; numMBsToShowResult = 100
        parallelTrain = {
            parallelizationMethod = "DataParallelSGD"
            parallelizationStartEpoch = 2
            distributedMBReading = true
            dataParallelSGD = { gradientBits = 1 }
        }
    }

    reader = {
        readerType = "CNTKTextFormatReader"
        file = "$DataDir$/atis.train.ctf"
        randomize = true
        input = {
            query        = { alias = "S0" ; dim = $vocabSize$ ;  format = "sparse" }
            intentLabels = { alias = "S1" ; dim = $numIntents$ ; format = "sparse" }
            slotLabels   = { alias = "S2" ; dim = $numLabels$ ;  format = "sparse" }
        }
    }
}

# Test the model's accuracy (as an error count)
TestTagger = {
    action = "eval"
    modelPath = $modelPath$
    reader = {
        readerType = "CNTKTextFormatReader"
        file = "$DataDir$/atis.test.ctf"
        randomize = false
        input = {
            query        = { alias = "S0" ; dim = $vocabSize$ ;  format = "sparse" }
            intentLabels = { alias = "S1" ; dim = $numIntents$ ; format = "sparse" }
            slotLabels   = { alias = "S2" ; dim = $numLabels$ ;  format = "sparse" }
        }
    }
}