Content - 286c72d6ae07f0a3b38d639bf75227a57f2cdece

Permalink
# assembly components
## Convolution + Batch Normalization
ConvBNLayer {outChannels, kernel, stride, bnTimeConst} = Sequential(
    ConvolutionalLayer {outChannels, kernel, init = "heNormal", stride = stride, pad = true, bias = false} :
    BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = bnTimeConst, useCntkEngine = false}
)

## Convolution + Batch Normalization + Rectifier Linear
ConvBNReLULayer {outChannels, kernelSize, stride, bnTimeConst} = Sequential(
    ConvBNLayer {outChannels, kernelSize, stride, bnTimeConst} :
    ReLU
)

# ResNet components

# The basic ResNet block contains two 3x3 convolutions, which is added to the orignal input 
# of the block.  
ResNetBasic {outChannels, bnTimeConst} = {
    apply (x) = {
        # Convolution
        b = Sequential (
            ConvBNReLULayer {outChannels, (3:3), (1:1), bnTimeConst} : 
            ConvBNLayer {outChannels, (3:3), (1:1), bnTimeConst}) (x) 

        p = Plus(b, x)
        r = ReLU(p)
    }.r
}.apply

# A block to reduce the feature map resolution. Two 3x3 convolutions with stride, which is
# added to the original input with 1x1 convolution and stride 
ResNetBasicInc {outChannels, stride, bnTimeConst} = {
    apply (x) = {
        # Convolution 
        b = Sequential (
            ConvBNReLULayer {outChannels, (3:3), stride, bnTimeConst} :
            ConvBNLayer {outChannels, (3:3), (1:1), bnTimeConst}) (x)

        # Shortcut
        s = ConvBNLayer {outChannels, (1:1), stride, bnTimeConst} (x)

        p = Plus(b, s)
        r = ReLU(p)
    }.r
}.apply

# A bottleneck ResNet block is attempting to reduce the amount of computation by replacing
# the two 3x3 convolutions by a 1x1 convolution, bottlenecked to `interOutChannels` feature 
# maps (usually interOutChannels < outChannels, thus the name bottleneck), followed by a 
# 3x3 convolution, and then a 1x1 convolution again, with `outChannels` feature maps. 
ResNetBottleneck {outChannels, interOutChannels, bnTimeConst} = {
    apply (x) = {
        # Convolution
        b = Sequential (
            ConvBNReLULayer {interOutChannels, (1:1), (1:1), bnTimeConst} :
            ConvBNReLULayer {interOutChannels, (3:3), (1:1), bnTimeConst} :
            ConvBNLayer {outChannels, (1:1), (1:1), bnTimeConst}) (x)

        p = Plus(b, x)
        r = ReLU(p)
    }.r
}.apply

# a block to reduce the feature map resolution using bottleneck. One can reduce the size 
# either at the first 1x1 convolution by specifying "stride1x1=(2:2)" (original paper), 
# or at the 3x3 convolution by specifying "stride3x3=(2:2)" (Facebook re-implementation). 
ResNetBottleneckInc {outChannels, interOutChannels, stride1x1, stride3x3, bnTimeConst} = {
    apply (x) = {
        # Convolution
        b = Sequential (
            ConvBNReLULayer {interOutChannels, (1:1), stride1x1, bnTimeConst} :
            ConvBNReLULayer {interOutChannels, (3:3), stride3x3, bnTimeConst} :
            ConvBNLayer {outChannels, (1:1), (1:1), bnTimeConst}) (x)

        # Shortcut
        stride[i:0..Length(stride1x1)-1] = stride1x1[i] * stride3x3[i]
        s = ConvBNLayer {outChannels, (1:1), stride, bnTimeConst} (x)

        p = Plus(b, s)
        r = ReLU(p)
    }.r
}.apply

NLayerStack {n, c} = Sequential (array[0..n-1] (c))
ResNetBasicStack {n, outChannels, bnTimeConst} = NLayerStack {n, i => ResNetBasic {outChannels, bnTimeConst}}
ResNetBottleneckStack {n, outChannels, interOutChannels, bnTimeConst} = NLayerStack {n, i => ResNetBottleneck {outChannels, interOutChannels, bnTimeConst}}