# CNTK Configuration File for training a simple CIFAR-10 convnet.
# During the hands-on tutorial, this will be fleshed out into a ResNet-20 model.

command = TrainConvNet:Eval

makeMode = false ; traceLevel = 0 ; deviceId = "auto"

rootDir = "." ; dataDir  = "$rootDir$" ; modelDir = "$rootDir$/Models"

modelPath = "$modelDir$/cifar10.cmf"

# Training action for a convolutional network
TrainConvNet = {
    action = "train"

    BrainScriptNetworkBuilder = {
        imageShape = 32:32:3
        labelDim = 10

        MySubSampleBN (x, depth, stride) =
        {
            s = Splice ((MaxPoolingLayer {(1:1), stride = (stride:stride)} (x) : ConstantTensor (0, (1:1:depth/stride))), axis = 3)  # sub-sample and pad: [W x H x depth/2] --> [W/2 x H/2 x depth]
            b = BatchNormalizationLayer {spatialRank = 2} (s)
        }.b
        MyConvBN (x, depth, initValueScale, stride) =
        {
            c = ConvolutionalLayer {depth, (3:3), pad = true, stride = (stride:stride), bias = false,
                                    init = "gaussian", initValueScale = initValueScale} (x)
            b = BatchNormalizationLayer {spatialRank = 2} (c)
        }.b
        ResNetNode (x, depth) =
        {
            c1 = MyConvBN (x,  depth, 7.07, 1)
            r1 = ReLU (c1)
            c2 = MyConvBN (r1, depth, 7.07, 1)
            r  = ReLU (x + c2)
        }.r
        ResNetIncNode (x, depth) =
        {
            c1 = MyConvBN (x,  depth, 7.07, 2)  # note the 2
            r1 = ReLU (c1)
            c2 = MyConvBN (r1, depth, 7.07, 1)

            xs = MySubSampleBN (x, depth, 2)

            r  = ReLU (xs + c2)
        }.r
        ResNetNodeStack (x, depth, L) =
        {
            r = if L == 0
                then x
                else ResNetNode (ResNetNodeStack (x, depth, L-1), depth)
        }.r
        model (features) =
        {
            conv1 = ReLU (MyConvBN (features, 16, 0.26, 1))
            rn1   = ResNetNodeStack (conv1, 16, 3)  ##### replaced

            rn2_1 = ResNetIncNode (rn1, 32)
            rn2   = ResNetNodeStack (rn2_1, 32, 2)  ##### replaced

            rn3_1 = ResNetIncNode (rn2, 64)
            rn3   = ResNetNodeStack (rn3_1, 64, 2)  ##### replaced

            pool = AveragePoolingLayer {(8:8)} (rn3)

            z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
        }.z

        # inputs
        features = Input {imageShape}
        labels   = Input {labelDim}

        # apply model to features
        z = model (features)

        # connect to system
        ce       = CrossEntropyWithSoftmax (labels, z)
        errs     = ClassificationError     (labels, z)

        featureNodes    = (features)
        labelNodes      = (labels)
        criterionNodes  = (ce)
        evaluationNodes = (errs)
        outputNodes     = (z)
    }

    SGD = {
        epochSize = 50000
    
        maxEpochs = 160 ; minibatchSize = 128
        learningRatesPerSample = 0.0078125*80:0.00078125*40:0.000078125
        momentumAsTimeConstant = 1200
        L2RegWeight = 0.0001
    
        firstMBsToShowResult = 10 ; numMBsToShowResult = 500

        ParallelTrain = {
            parallelizationMethod = "DataParallelSGD"
            parallelizationStartEpoch = 1
            distributedMBReading = true
            dataParallelSGD = { gradientBits = 2 }
        }
        AutoAdjust = {
            autoAdjustMinibatch = true        # enable automatic growing of minibatch size
            minibatchSizeTuningFrequency = 10 # try to enlarge after this many epochs
            numMiniBatch4LRSearch = 200
            minibatchSizeTuningMax = 15000    # out of memory above this
        }
    }

    reader = {
        verbosity = 0 ; randomize = true
        deserializers = ({
            type = "ImageDeserializer" ; module = "ImageReader"
            file = "$dataDir$/cifar-10-batches-py/train_map.txt"
            input = {
                features = { transforms = (
                    { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
                    { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
                    { type = "Transpose" }
                )}
                labels = { labelDim = 10 }
            }
        })
    }
}

# Eval action
Eval = {
    action = "eval"
    minibatchSize = 16
    evalNodeNames = errs
    reader = {
        verbosity = 0 ; randomize = true
        deserializers = ({
            type = "ImageDeserializer" ; module = "ImageReader"
            file = "$dataDir$/cifar-10-batches-py/test_map.txt"
            input = {
                features = { transforms = (
                   { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
                   { type = "Transpose" }
                )}
                labels = { labelDim = 10 }
            }
        })
    }
}