# Copyright (c) Microsoft. All rights reserved. # Licensed under the MIT license. See LICENSE.md file in the project root # for full license information. # ============================================================================== from __future__ import print_function import numpy as np import cntk as C import os from PIL import Image from cntk.device import try_set_default_device, gpu from cntk import load_model, placeholder, Constant from cntk import Trainer from cntk.logging.graph import find_by_name, get_node_outputs from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef import cntk.io.transforms as xforms from cntk.layers import Dense from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule from cntk.ops import combine, softmax from cntk.ops.functions import CloneMethod from cntk.losses import cross_entropy_with_softmax from cntk.metrics import classification_error from cntk.logging import log_number_of_parameters, ProgressPrinter ################################################ ################################################ # general settings make_mode = False freeze_weights = False base_folder = os.path.dirname(os.path.abspath(__file__)) tl_model_file = os.path.join(base_folder, "Output", "TransferLearning.model") output_file = os.path.join(base_folder, "Output", "predOutput.txt") features_stream_name = 'features' label_stream_name = 'labels' new_output_node_name = "prediction" # Learning parameters max_epochs = 20 mb_size = 50 lr_per_mb = [0.2]*10 + [0.1] momentum_per_mb = 0.9 l2_reg_weight = 0.0005 # define base model location and characteristics _base_model_file = os.path.join(base_folder, "..", "..", "..", "PretrainedModels", "ResNet18_ImageNet_CNTK.model") _feature_node_name = "features" _last_hidden_node_name = "z.x" _image_height = 224 _image_width = 224 _num_channels = 3 # define data location and characteristics _data_folder = os.path.join(base_folder, "..", "DataSets", "Flowers") _train_map_file = os.path.join(_data_folder, "6k_img_map.txt") _test_map_file = os.path.join(_data_folder, "1k_img_map.txt") _num_classes = 102 ################################################ ################################################ # Creates a minibatch source for training or testing def create_mb_source(map_file, image_width, image_height, num_channels, num_classes, randomize=True): transforms = [xforms.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear')] return MinibatchSource(ImageDeserializer(map_file, StreamDefs( features =StreamDef(field='image', transforms=transforms), labels =StreamDef(field='label', shape=num_classes))), randomize=randomize) # Creates the network model for transfer learning def create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, input_features, freeze=False): # Load the pretrained classification net and find nodes base_model = load_model(base_model_file) feature_node = find_by_name(base_model, feature_node_name) last_node = find_by_name(base_model, last_hidden_node_name) # Clone the desired layers with fixed weights cloned_layers = combine([last_node.owner]).clone( CloneMethod.freeze if freeze else CloneMethod.clone, {feature_node: placeholder(name='features')}) # Add new dense layer for class prediction feat_norm = input_features - Constant(114) cloned_out = cloned_layers(feat_norm) z = Dense(num_classes, activation=None, name=new_output_node_name) (cloned_out) return z # Trains a transfer learning model def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_parameter_schedule(lr_per_mb) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model # Evaluates a single image using the provided model def eval_single_image(loaded_model, image_path, image_width, image_height): # load and format image (resize, RGB -> BGR, CHW -> HWC) img = Image.open(image_path) if image_path.endswith("png"): temp = Image.new("RGB", img.size, (255, 255, 255)) temp.paste(img, img) img = temp resized = img.resize((image_width, image_height), Image.ANTIALIAS) bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]] hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) ## Alternatively: if you want to use opencv-python # cv_img = cv2.imread(image_path) # resized = cv2.resize(cv_img, (image_width, image_height), interpolation=cv2.INTER_NEAREST) # bgr_image = np.asarray(resized, dtype=np.float32) # hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) # compute model output arguments = {loaded_model.arguments[0]: [hwc_format]} output = loaded_model.eval(arguments) # return softmax probabilities sm = softmax(output[0]) return sm.eval() # Evaluates an image set using the provided model def eval_test_images(loaded_model, output_file, test_map_file, image_width, image_height, max_images=-1, column_offset=0): num_images = sum(1 for line in open(test_map_file)) if max_images > 0: num_images = min(num_images, max_images) print("Evaluating model output node '{0}' for {1} images.".format(new_output_node_name, num_images)) pred_count = 0 correct_count = 0 np.seterr(over='raise') with open(output_file, 'wb') as results_file: with open(test_map_file, "r") as input_file: for line in input_file: tokens = line.rstrip().split('\t') img_file = tokens[0 + column_offset] probs = eval_single_image(loaded_model, img_file, image_width, image_height) pred_count += 1 true_label = int(tokens[1 + column_offset]) predicted_label = np.argmax(probs) if predicted_label == true_label: correct_count += 1 np.savetxt(results_file, probs[np.newaxis], fmt="%.3f") if pred_count % 100 == 0: print("Processed {0} samples ({1} correct)".format(pred_count, (float(correct_count) / pred_count))) if pred_count >= num_images: break print ("{0} out of {1} predictions were correct {2}.".format(correct_count, pred_count, (float(correct_count) / pred_count))) if __name__ == '__main__': try_set_default_device(gpu(0)) # check for model and data existence if not (os.path.exists(_base_model_file) and os.path.exists(_train_map_file) and os.path.exists(_test_map_file)): print("Please run 'python install_data_and_model.py' first to get the required data and model.") exit(0) # You can use the following to inspect the base model and determine the desired node names # node_outputs = get_node_outputs(load_model(_base_model_file)) # for out in node_outputs: print("{0} {1}".format(out.name, out.shape)) # Train only if no model exists yet or if make_mode is set to False if os.path.exists(tl_model_file) and make_mode: print("Loading existing model from %s" % tl_model_file) trained_model = load_model(tl_model_file) else: trained_model = train_model(_base_model_file, _feature_node_name, _last_hidden_node_name, _image_width, _image_height, _num_channels, _num_classes, _train_map_file, max_epochs, freeze=freeze_weights) trained_model.save(tl_model_file) print("Stored trained model at %s" % tl_model_file) # Evaluate the test set eval_test_images(trained_model, output_file, _test_map_file, _image_width, _image_height) print("Done. Wrote output to %s" % output_file)