Revision e1467a79dc6580ae009d827b5e6f274faff3b339 authored by liqunfu on 27 March 2020, 21:42:04 UTC, committed by GitHub on 27 March 2020, 21:42:04 UTC
support Pooling ops with Sequence axis
FasterRCNN_train.py
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import numpy as np
import os, sys
import argparse
import easydict # pip install easydict
import cntk
from cntk import Trainer, load_model, Axis, input_variable, parameter, times, combine, \
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
from cntk.core import Value
from cntk.io import MinibatchData
from cntk.initializer import normal
from cntk.layers import placeholder, Constant, Sequential
from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
from cntk.logging import log_number_of_parameters, ProgressPrinter
from cntk.logging.graph import find_by_name, plot
from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error
from _cntk_py import force_deterministic_algorithms
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, ".."))
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, create_proposal_layer
from utils.annotations.annotations_helper import parse_class_map_file
from utils.od_mb_source import ObjectDetectionMinibatchSource
from utils.proposal_helpers import ProposalProvider
from FastRCNN.FastRCNN_train import clone_model, clone_conv_layers, create_fast_rcnn_predictor, \
create_detection_losses
def prepare(cfg, use_arg_parser=True):
cfg.MB_SIZE = 1
cfg.NUM_CHANNELS = 3
cfg.OUTPUT_PATH = os.path.join(abs_path, "Output")
cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH)
running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH)
if running_locally:
os.chdir(cfg["DATA"].MAP_FILE_PATH)
if not os.path.exists(os.path.join(abs_path, "Output")):
os.makedirs(os.path.join(abs_path, "Output"))
if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)):
os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET))
else:
# disable debug and plot outputs when running on GPU cluster
cfg["CNTK"].DEBUG_OUTPUT = False
cfg.VISUALIZE_RESULTS = False
if use_arg_parser:
parse_arguments(cfg)
data_path = cfg["DATA"].MAP_FILE_PATH
if not os.path.isdir(data_path):
raise RuntimeError("Directory %s does not exist" % data_path)
cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE)
cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE)
cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)
cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model"
.format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage"))
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
cfg["MODEL"].BASE_MODEL_FILE)
cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)
if cfg["CNTK"].FAST_MODE:
cfg["CNTK"].E2E_MAX_EPOCHS = 1
cfg["CNTK"].RPN_EPOCHS = 1
cfg["CNTK"].FRCN_EPOCHS = 1
if cfg["CNTK"].FORCE_DETERMINISTIC:
force_deterministic_algorithms()
np.random.seed(seed=cfg.RND_SEED)
if False and cfg["CNTK"].DEBUG_OUTPUT:
# report args
print("Using the following parameters:")
print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED))
print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS))
print("Random seed : {}".format(cfg.RND_SEED))
print("Momentum per MB : {}".format(cfg["CNTK"].MOMENTUM_PER_MB))
if cfg["CNTK"].TRAIN_E2E:
print("E2E epochs : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS))
else:
print("RPN lr factor : {}".format(cfg["CNTK"].RPN_LR_FACTOR))
print("RPN epochs : {}".format(cfg["CNTK"].RPN_EPOCHS))
print("FRCN lr factor : {}".format(cfg["CNTK"].FRCN_LR_FACTOR))
print("FRCN epochs : {}".format(cfg["CNTK"].FRCN_EPOCHS))
def parse_arguments(cfg):
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located',
required=False, default=cfg["DATA"].MAP_FILE_PATH)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models',
required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file',
required=False, default=None)
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int,
required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS)
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int,
required=False, default=cfg["CNTK"].MB_SIZE)
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int,
required=False, default=cfg["CNTK"].NUM_TRAIN_IMAGES)
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation',
type=int,
required=False, default='32')
parser.add_argument('-r', '--restart',
help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device",
required=False, default=None)
parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule",
required=False)
parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule",
required=False)
parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule",
required=False)
parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False)
parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False)
parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False)
parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training",
required=False)
parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False)
parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False)
parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)",
required=False)
args = vars(parser.parse_args())
if args['rpnLrFactor'] is not None:
cfg["MODEL"].RPN_LR_FACTOR = args['rpnLrFactor']
if args['frcnLrFactor'] is not None:
cfg["MODEL"].FRCN_LR_FACTOR = args['frcnLrFactor']
if args['e2eLrFactor'] is not None:
cfg["MODEL"].E2E_LR_FACTOR = args['e2eLrFactor']
if args['e2eEpochs'] is not None:
cfg["CNTK"].E2E_MAX_EPOCHS = args['e2eEpochs']
if args['rpnEpochs'] is not None:
cfg["CNTK"].RPN_EPOCHS = args['rpnEpochs']
if args['frcnEpochs'] is not None:
cfg["CNTK"].FRCN_EPOCHS = args['frcnEpochs']
if args['momentumPerMb'] is not None:
cfg["CNTK"].MOMENTUM_PER_MB = args['momentumPerMb']
if args['rndSeed'] is not None:
cfg.RND_SEED = args['rndSeed']
if args['trainConv'] is not None:
cfg.TRAIN_CONV_LAYERS = True if args['trainConv'] == 1 else False
if args['trainE2E'] is not None:
cfg.TRAIN_E2E = True if args['trainE2E'] == 1 else False
if args['datadir'] is not None:
cfg["DATA"].MAP_FILE_PATH = args['datadir']
if args['outputdir'] is not None:
cfg.OUTPUT_PATH = args['outputdir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
# Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
if Communicator.rank() == 0:
cntk.device.try_set_default_device(cntk.device.gpu(args['device']))
else:
cntk.device.try_set_default_device(cntk.device.cpu())
###############################################################
###############################################################
# Defines the Faster R-CNN network model for detecting objects in images
def create_faster_rcnn_model(features, scaled_gt_boxes, dims_input, cfg):
# Load the pre-trained classification net and clone layers
base_model = load_model(cfg['BASE_MODEL_PATH'])
conv_layers = clone_conv_layers(base_model, cfg)
fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], clone_method=CloneMethod.clone)
# Normalization and conv layers
feat_norm = features - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR])
conv_out = conv_layers(feat_norm)
# RPN and prediction targets
rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_input, cfg)
rois, label_targets, bbox_targets, bbox_inside_weights = \
create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg)
# Fast RCNN and losses
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg)
detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg)
loss = rpn_losses + detection_losses
pred_error = classification_error(cls_score, label_targets, axis=1)
return loss, pred_error
def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model=None):
print("creating eval model")
last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME
conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
model_with_rpn = model if rpn_model is None else rpn_model
rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze)
rpn_out = rpn(conv_out)
# we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training
rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)
roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
pred_net = roi_fc_layers(conv_out, rpn_rois)
cls_score = pred_net.outputs[0]
bbox_regr = pred_net.outputs[1]
if cfg.BBOX_NORMALIZE_TARGETS:
num_boxes = int(bbox_regr.shape[1] / 4)
bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes)
bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes)
bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')
cls_pred = softmax(cls_score, axis=1, name='cls_pred')
eval_model = combine([cls_pred, rpn_rois, bbox_regr])
return eval_model
def store_eval_model_with_native_udf(eval_model, cfg):
import copy
sys.path.append(os.path.join(abs_path, "..", "..", "Extensibility", "ProposalLayer"))
cntk.ops.register_native_user_function('ProposalLayerOp',
'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
'CreateProposalLayer')
def filter(x):
return type(x) == cntk.Function and x.op_name == 'UserFunction' and x.name == 'ProposalLayer'
def converter(x):
layer_config = copy.deepcopy(x.attributes)
return cntk.ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
model_w_native_udf = cntk.misc.convert(eval_model, filter, converter)
model_path = cfg['MODEL_PATH']
new_model_path = model_path[:-6] + '_native.model'
model_w_native_udf.save(new_model_path)
print("Stored eval model with native UDF to {}".format(new_model_path))
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input, cfg):
num_images = cfg["DATA"].NUM_TRAIN_IMAGES
# Create the minibatch source
od_minibatch_source = ObjectDetectionMinibatchSource(
cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
num_classes=cfg["DATA"].NUM_CLASSES,
max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
pad_width=cfg.IMAGE_WIDTH,
pad_height=cfg.IMAGE_HEIGHT,
pad_value=cfg["MODEL"].IMG_PAD_COLOR,
max_images=num_images,
randomize=False, use_flipping=False,
proposal_provider=None)
# define mapping from reader streams to network inputs
input_map = {
od_minibatch_source.image_si: image_input,
od_minibatch_source.roi_si: roi_input,
od_minibatch_source.dims_si: dims_input
}
buffered_proposals = [None for _ in range(num_images)]
sample_count = 0
while sample_count < num_images:
data = od_minibatch_source.next_minibatch(1, input_map=input_map)
output = rpn_model.eval(data)
out_dict = dict([(k.name, k) for k in output])
out_rpn_rois = output[out_dict['rpn_rois']][0]
buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16)
sample_count += 1
if sample_count % 500 == 0:
print("Buffered proposals for {} samples".format(sample_count))
return buffered_proposals
# If a trained model is already available it is loaded an no training will be performed (if MAKE_MODE=True).
def train_faster_rcnn(cfg):
# Train only if no model exists yet
model_path = cfg['MODEL_PATH']
if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
print("Loading existing model from %s" % model_path)
eval_model = load_model(model_path)
else:
if cfg["CNTK"].TRAIN_E2E:
eval_model = train_faster_rcnn_e2e(cfg)
else:
eval_model = train_faster_rcnn_alternating(cfg)
eval_model.save(model_path)
if cfg["CNTK"].DEBUG_OUTPUT:
plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval_{}_{}.{}"
.format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage", cfg["CNTK"].GRAPH_TYPE)))
print("Stored eval model at %s" % model_path)
return eval_model
# Trains a Faster R-CNN model end-to-end
def train_faster_rcnn_e2e(cfg):
# Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH),
dynamic_axes=[Axis.default_batch_axis()],
name=cfg["MODEL"].FEATURE_NODE_NAME)
roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
dims_node = alias(dims_input, name='dims_input')
# Instantiate the Faster R-CNN prediction model and loss function
loss, pred_error = create_faster_rcnn_model(image_input, roi_input, dims_node, cfg)
if cfg["CNTK"].DEBUG_OUTPUT:
print("Storing graphs and models to %s." % cfg.OUTPUT_PATH)
plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE))
# Set learning parameters
e2e_lr_factor = cfg["MODEL"].E2E_LR_FACTOR
e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE]
mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL))
print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled))
train_model(image_input, roi_input, dims_input, loss, pred_error,
e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, cfg["CNTK"].E2E_MAX_EPOCHS, cfg)
return create_faster_rcnn_eval_model(loss, image_input, dims_input, cfg)
# Trains a Faster R-CNN model using 4-stage alternating training
def train_faster_rcnn_alternating(cfg):
'''
4-Step Alternating Training scheme from the Faster R-CNN paper:
# Create initial network, only rpn, without detection network
# --> train only the rpn (and conv3_1 and up for VGG16)
# buffer region proposals from rpn
# Create full network, initialize conv layers with imagenet, use buffered proposals
# --> train only detection network (and conv3_1 and up for VGG16)
# Keep conv weights from detection network and fix them
# --> train only rpn
# buffer region proposals from rpn
# Keep conv and rpn weights from step 3 and fix them
# --> train only detection network
'''
# setting pre- and post-nms top N to training values since buffered proposals are used for further training
test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N
# Learning parameters
rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR
rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE]
frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR
frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE]
l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
rpn_epochs = cfg["CNTK"].RPN_EPOCHS
frcn_epochs = cfg["CNTK"].FRCN_EPOCHS
feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME
last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME
print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL))
print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled))
print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled))
debug_output=cfg["CNTK"].DEBUG_OUTPUT
if debug_output:
print("Storing graphs and models to %s." % cfg.OUTPUT_PATH)
# Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions
image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH),
dynamic_axes=[Axis.default_batch_axis()],
name=feature_node_name)
feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR])
roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
scaled_gt_boxes = alias(roi_input, name='roi_input')
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
dims_node = alias(dims_input, name='dims_input')
rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()])
rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois')
# base image classification model (e.g. VGG16 or AlexNet)
base_model = load_model(cfg['BASE_MODEL_PATH'])
print("stage 1a - rpn")
if True:
# Create initial network, only rpn, without detection network
# initial weights train?
# conv: base_model only conv3_1 and up
# rpn: init new yes
# frcn: - -
# conv layers
conv_layers = clone_conv_layers(base_model, cfg)
conv_out = conv_layers(feat_norm)
# RPN and losses
rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg)
stage1_rpn_network = combine([rpn_rois, rpn_losses])
# train
if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg)
print("stage 1a - buffering rpn proposals")
buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg)
print("stage 1b - frcn")
if True:
# Create full network, initialize conv layers with imagenet, fix rpn weights
# initial weights train?
# conv: base_model only conv3_1 and up
# rpn: stage1a rpn model no --> use buffered proposals
# frcn: base_model + new yes
# conv_layers
conv_layers = clone_conv_layers(base_model, cfg)
conv_out = conv_layers(feat_norm)
# use buffered proposals in target layer
rois, label_targets, bbox_targets, bbox_inside_weights = \
create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg)
# Fast RCNN and losses
fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone)
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg)
detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg)
pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error")
stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error])
# train
if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg,
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1)
buffered_proposals_s1 = None
print("stage 2a - rpn")
if True:
# Keep conv weights from detection network and fix them
# initial weights train?
# conv: stage1b frcn model no
# rpn: stage1a rpn model yes
# frcn: - -
# conv_layers
conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
# RPN and losses
rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone)
rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes)
rpn_rois = rpn_net.outputs[0]
rpn_losses = rpn_net.outputs[1]
stage2_rpn_network = combine([rpn_rois, rpn_losses])
# train
if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg)
print("stage 2a - buffering rpn proposals")
buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg)
print("stage 2b - frcn")
if True:
# Keep conv and rpn weights from step 3 and fix them
# initial weights train?
# conv: stage2a rpn model no
# rpn: stage2a rpn model no --> use buffered proposals
# frcn: stage1b frcn model yes -
# conv_layers
conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
# Fast RCNN and losses
frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"],
["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone)
stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes)
detection_losses = stage2_frcn_network.outputs[3]
pred_error = stage2_frcn_network.outputs[4]
# train
if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg,
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2)
buffered_proposals_s2 = None
# resetting config values to original test values
cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
cfg["TEST"].RPN_POST_NMS_TOP_N = test_post
return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
def train_model(image_input, roi_input, dims_input, loss, pred_error,
lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg,
rpn_rois_input=None, buffered_rpn_proposals=None):
if isinstance(loss, cntk.Variable):
loss = combine([loss])
params = loss.parameters
biases = [p for p in params if '.b' in p.name or 'b' == p.name]
others = [p for p in params if not p in biases]
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
if cfg["CNTK"].DEBUG_OUTPUT:
print("biases")
for p in biases: print(p)
print("others")
for p in others: print(p)
print("bias_lr_mult: {}".format(bias_lr_mult))
# Instantiate the learners and the trainer object
lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=True)
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=True)
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
# Get minibatches of images and perform model training
print("Training model for %s epochs." % epochs_to_train)
log_number_of_parameters(loss)
# Create the minibatch source
if buffered_rpn_proposals is not None:
proposal_provider = ProposalProvider.fromlist(buffered_rpn_proposals, requires_scaling=False)
else:
proposal_provider = None
od_minibatch_source = ObjectDetectionMinibatchSource(
cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
num_classes=cfg["DATA"].NUM_CLASSES,
max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
pad_width=cfg.IMAGE_WIDTH,
pad_height=cfg.IMAGE_HEIGHT,
pad_value=cfg["MODEL"].IMG_PAD_COLOR,
randomize=True,
use_flipping=cfg["TRAIN"].USE_FLIPPED,
max_images=cfg["DATA"].NUM_TRAIN_IMAGES,
proposal_provider=proposal_provider)
# define mapping from reader streams to network inputs
input_map = {
od_minibatch_source.image_si: image_input,
od_minibatch_source.roi_si: roi_input,
}
if buffered_rpn_proposals is not None:
input_map[od_minibatch_source.proposals_si] = rpn_rois_input
else:
input_map[od_minibatch_source.dims_si] = dims_input
progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
for epoch in range(epochs_to_train): # loop over epochs
sample_count = 0
while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch
data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES-sample_count), input_map=input_map)
trainer.train_minibatch(data) # update model with it
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
if sample_count % 100 == 0:
print("Processed {} samples".format(sample_count))
progress_printer.epoch_summary(with_metric=True)
Computing file changes ...