Revision e1467a79dc6580ae009d827b5e6f274faff3b339 authored by liqunfu on 27 March 2020, 21:42:04 UTC, committed by GitHub on 27 March 2020, 21:42:04 UTC
support Pooling ops with Sequence axis
fastrcnn.cntk
# Fast-RCNN configuration for CNTK
# For algorithm and details see http://arxiv.org/abs/1504.08083
# Overview:
# The Fast-RCNN algorithm uses a DNN that takes as inputs a set of images
# and for each image a set of ROIs (Regions of interest). It first computes
# a convolutional feature map for the entire image using a series of
# of convolutional layers (usually from a pretrained network). Then it
# employs ROI pooling to crop out the part of the conv feature map
# that corresponds to an ROI and resizes it to the input size expected
# by the following layer (usually a set of pretrained fully connected layers).
# Classification error and evaluation criterion are computed for each ROI.
#makeMode = false
command = Train:WriteTest:WriteTrain
deviceId = "Auto"
precision = "float"
parallelTrain = "false"
traceLevel = 1
rootDir = "."
dataDir = "$rootDir$/data/"
outputDir = "$rootDir$/Output"
modelPath = "$outputDir$/Fast-RCNN.model"
#stderr = "$outputDir$/Fast-RCNN.log"
ImageH = 1000
ImageW = 1000
ImageC = 3
NumLabels = 22
NumTrainROIs = 64
NumTestROIs = 200
TrainROIDim = 800 # $NumTrainROIs$ * 4
TrainROILabelDim = 4400 # $NumTrainROIs$ * $NumLabels$
TestROIDim = 4000 # $NumTestROIs$ * 4
TestROILabelDim = 22000 # $NumTestROIs$ * $NumLabels$
# For training we load a pretrained AlexNet model (AlexNet.89) and clone three parts of it.
# For the first part (up to pool1) we keep the weights fixed. The middle part contains the
# remaining convolutional and pooling layers and the last part are the FC layers.
# In the model we apply the first two cloned parts, then an ROI pooling layer and
# finally the pretrained FC layers followed by a new FC layer that maps to the new
# label dimensionality of 21 classes.
# The inputs are images (1000 x 1000 x 3), ROIs (64 ROIs x 4 coordinates (x, y, w, h))
# and ground truht labels per ROI (64 ROIs x 21 classes).
Train = {
action = "train"
BrainScriptNetworkBuilder = {
network = BS.Network.Load ("../../../../../../../../PretrainedModels/AlexNet.model")
convLayers = BS.Network.CloneFunction(network.features, network.conv5_y, parameters = "constant")
fcLayers = BS.Network.CloneFunction(network.pool3, network.h2_d)
model (features, rois) = {
featNorm = features - 114
convOut = convLayers (featNorm)
roiOut = ROIPooling (convOut, rois, (6:6))
fcOut = fcLayers (roiOut)
W = ParameterTensor{($NumLabels$:4096), init="glorotUniform"}
b = ParameterTensor{$NumLabels$, init = 'zero'}
z = W * fcOut + b
}.z
imageShape = $ImageH$:$ImageW$:$ImageC$ # 1000:1000:3
labelShape = $NumLabels$:$NumTrainROIs$ # 21:64
ROIShape = 4:$NumTrainROIs$ # 4:64
features = Input {imageShape}
roiLabels = Input {labelShape}
rois = Input {ROIShape}
z = model (features, rois)
ce = CrossEntropyWithSoftmax(roiLabels, z, axis = 1)
errs = ClassificationError(roiLabels, z, axis = 1)
featureNodes = (features:rois)
labelNodes = (roiLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 2
maxEpochs = 17
learningRatesPerSample=0.00001*10:0.000001*5:0.0000001
momentumAsTimeConstant = 20
gradUpdateType=None
L2RegWeight=0.0005
dropoutRate=0.5
numMBsToShowResult = 50
}
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = train.rois.txt
input = { rois = { dim = $TrainROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = train.roilabels.txt
input = { roiLabels = { dim = $TrainROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = train.txt
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}
# Write network output for entire test data set
WriteTest = {
action = "write"
minibatchSize = 1
# outputPath = "$OutputDir$/fastrcnnNetOutput"
outputPath=test
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = test.rois.txt
input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = test.roilabels.txt
input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = test.txt
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}
# Write network output for entire train data set
WriteTrain = {
action = "write"
minibatchSize = 1
# outputPath = "$OutputDir$/fastrcnnNetOutput"
outputPath=train
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = train.rois.txt
input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = train.roilabels.txt
input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = train.txt
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}
# For testing you can optionally load the trained Fast-RCNN model and modify the input size,
# such that the network accepts a different number of ROIs per image, e.g. 200. To this end we load and
# clone the network and define new inputs with the desired size corresponding to 200 ROIs.
WriteTestModify = {
action = "write"
minibatchSize = 1
# outputPath = "$OutputDir$/fastrcnnNetOutput"
outputPath=test
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$ # 1000:1000:3
labelShape = $NumLabels$:$NumTestROIs$ # 21:200
ROIShape = 4:$NumTestROIs$ # 4:200
# load network
network = BS.Network.Load ("$modelPath$")
clonedNet = BS.Network.CloneFunction ((network.features:network.rois), { z = network.z }, parameters = "constant")
features = Input {imageShape}
roiLabels = Input {labelShape}
rois = Input {ROIShape}
z = clonedNet(features, rois).z
ce = CrossEntropyWithSoftmax (roiLabels, z, axis = 1)
errs = ClassificationError(z, roiLabels, axis = 1)
featureNodes = (features:rois)
labelNodes = (roiLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = test.rois.txt
input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = test.roilabels.txt
input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = test.txt
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}
Computing file changes ...