# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import numpy as np
import cntk as C
import os
from PIL import Image
from cntk.device import try_set_default_device, gpu
from cntk import load_model, placeholder, Constant
from cntk import Trainer
from cntk.logging.graph import find_by_name, get_node_outputs
from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef
import cntk.io.transforms as xforms
from cntk.layers import Dense
from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule
from cntk.ops import combine, softmax
from cntk.ops.functions import CloneMethod
from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error
from cntk.logging import log_number_of_parameters, ProgressPrinter
################################################
################################################
# general settings
make_mode = False
freeze_weights = False
base_folder = os.path.dirname(os.path.abspath(__file__))
tl_model_file = os.path.join(base_folder, "Output", "TransferLearning.model")
output_file = os.path.join(base_folder, "Output", "predOutput.txt")
features_stream_name = 'features'
label_stream_name = 'labels'
new_output_node_name = "prediction"
# Learning parameters
max_epochs = 20
mb_size = 50
lr_per_mb = [0.2]*10 + [0.1]
momentum_per_mb = 0.9
l2_reg_weight = 0.0005
# define base model location and characteristics
_base_model_file = os.path.join(base_folder, "..", "..", "..", "PretrainedModels", "ResNet18_ImageNet_CNTK.model")
_feature_node_name = "features"
_last_hidden_node_name = "z.x"
_image_height = 224
_image_width = 224
_num_channels = 3
# define data location and characteristics
_data_folder = os.path.join(base_folder, "..", "DataSets", "Flowers")
_train_map_file = os.path.join(_data_folder, "6k_img_map.txt")
_test_map_file = os.path.join(_data_folder, "1k_img_map.txt")
_num_classes = 102
################################################
################################################
# Creates a minibatch source for training or testing
def create_mb_source(map_file, image_width, image_height, num_channels, num_classes, randomize=True):
transforms = [xforms.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear')]
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
features =StreamDef(field='image', transforms=transforms),
labels =StreamDef(field='label', shape=num_classes))),
randomize=randomize)
# Creates the network model for transfer learning
def create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, input_features, freeze=False):
# Load the pretrained classification net and find nodes
base_model = load_model(base_model_file)
feature_node = find_by_name(base_model, feature_node_name)
last_node = find_by_name(base_model, last_hidden_node_name)
# Clone the desired layers with fixed weights
cloned_layers = combine([last_node.owner]).clone(
CloneMethod.freeze if freeze else CloneMethod.clone,
{feature_node: placeholder(name='features')})
# Add new dense layer for class prediction
feat_norm = input_features - Constant(114)
cloned_out = cloned_layers(feat_norm)
z = Dense(num_classes, activation=None, name=new_output_node_name) (cloned_out)
return z
# Trains a transfer learning model
def train_model(base_model_file, feature_node_name, last_hidden_node_name,
image_width, image_height, num_channels, num_classes, train_map_file,
num_epochs, max_images=-1, freeze=False):
epoch_size = sum(1 for line in open(train_map_file))
if max_images > 0:
epoch_size = min(epoch_size, max_images)
# Create the minibatch source and input variables
minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes)
image_input = C.input_variable((num_channels, image_height, image_width))
label_input = C.input_variable(num_classes)
# Define mapping from reader streams to network inputs
input_map = {
image_input: minibatch_source[features_stream_name],
label_input: minibatch_source[label_stream_name]
}
# Instantiate the transfer learning model and loss function
tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze)
ce = cross_entropy_with_softmax(tl_model, label_input)
pe = classification_error(tl_model, label_input)
# Instantiate the trainer object
lr_schedule = learning_parameter_schedule(lr_per_mb)
mm_schedule = momentum_schedule(momentum_per_mb)
learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)
# Get minibatches of images and perform model training
print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
log_number_of_parameters(tl_model)
for epoch in range(num_epochs): # loop over epochs
sample_count = 0
while sample_count < epoch_size: # loop over minibatches in the epoch
data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
trainer.train_minibatch(data) # update model with it
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
if sample_count % (100 * mb_size) == 0:
print ("Processed {0} samples".format(sample_count))
trainer.summarize_training_progress()
return tl_model
# Evaluates a single image using the provided model
def eval_single_image(loaded_model, image_path, image_width, image_height):
# load and format image (resize, RGB -> BGR, CHW -> HWC)
img = Image.open(image_path)
if image_path.endswith("png"):
temp = Image.new("RGB", img.size, (255, 255, 255))
temp.paste(img, img)
img = temp
resized = img.resize((image_width, image_height), Image.ANTIALIAS)
bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]]
hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2))
## Alternatively: if you want to use opencv-python
# cv_img = cv2.imread(image_path)
# resized = cv2.resize(cv_img, (image_width, image_height), interpolation=cv2.INTER_NEAREST)
# bgr_image = np.asarray(resized, dtype=np.float32)
# hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2))
# compute model output
arguments = {loaded_model.arguments[0]: [hwc_format]}
output = loaded_model.eval(arguments)
# return softmax probabilities
sm = softmax(output[0])
return sm.eval()
# Evaluates an image set using the provided model
def eval_test_images(loaded_model, output_file, test_map_file, image_width, image_height, max_images=-1, column_offset=0):
num_images = sum(1 for line in open(test_map_file))
if max_images > 0:
num_images = min(num_images, max_images)
print("Evaluating model output node '{0}' for {1} images.".format(new_output_node_name, num_images))
pred_count = 0
correct_count = 0
np.seterr(over='raise')
with open(output_file, 'wb') as results_file:
with open(test_map_file, "r") as input_file:
for line in input_file:
tokens = line.rstrip().split('\t')
img_file = tokens[0 + column_offset]
probs = eval_single_image(loaded_model, img_file, image_width, image_height)
pred_count += 1
true_label = int(tokens[1 + column_offset])
predicted_label = np.argmax(probs)
if predicted_label == true_label:
correct_count += 1
np.savetxt(results_file, probs[np.newaxis], fmt="%.3f")
if pred_count % 100 == 0:
print("Processed {0} samples ({1} correct)".format(pred_count, (float(correct_count) / pred_count)))
if pred_count >= num_images:
break
print ("{0} out of {1} predictions were correct {2}.".format(correct_count, pred_count, (float(correct_count) / pred_count)))
if __name__ == '__main__':
try_set_default_device(gpu(0))
# check for model and data existence
if not (os.path.exists(_base_model_file) and os.path.exists(_train_map_file) and os.path.exists(_test_map_file)):
print("Please run 'python install_data_and_model.py' first to get the required data and model.")
exit(0)
# You can use the following to inspect the base model and determine the desired node names
# node_outputs = get_node_outputs(load_model(_base_model_file))
# for out in node_outputs: print("{0} {1}".format(out.name, out.shape))
# Train only if no model exists yet or if make_mode is set to False
if os.path.exists(tl_model_file) and make_mode:
print("Loading existing model from %s" % tl_model_file)
trained_model = load_model(tl_model_file)
else:
trained_model = train_model(_base_model_file, _feature_node_name, _last_hidden_node_name,
_image_width, _image_height, _num_channels, _num_classes, _train_map_file,
max_epochs, freeze=freeze_weights)
trained_model.save(tl_model_file)
print("Stored trained model at %s" % tl_model_file)
# Evaluate the test set
eval_test_images(trained_model, output_file, _test_map_file, _image_width, _image_height)
print("Done. Wrote output to %s" % output_file)