#!/usr/bin/env python # ---------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT license. See LICENSE.md file in the project root for full license information. # --------------------------------------------------------- # This script extracts information (hardware used, final results) contained in the baselines files # and generates a markdown file (wiki page) import sys, os, re import TestDriver as td try: import six except ImportError: print("Python package 'six' not installed. Please run 'pip install six'.") sys.exit(1) thisDir = os.path.dirname(os.path.realpath(__file__)) windows = os.getenv("OS")=="Windows_NT" class Baseline: def __init__(self, fullPath, testResult = "", trainResult = ""): self.fullPath = fullPath self.cpuInfo = "" self.gpuInfo = "" self.testResult = testResult self.trainResult = trainResult # extracts results info. e.g. # Finished Epoch[ 5 of 5]: [Training] ce = 2.32253198 * 1000 err = 0.90000000 * 1000 totalSamplesSeen = 5000 learningRatePerSample = 2e-06 epochTime=0.175781 # Final Results: Minibatch[1-1]: err = 0.90000000 * 100 ce = 2.32170486 * 100 perplexity = 10.1930372 def extractResultsInfo(self, baselineContent): trainResults = re.findall('.*(Finished Epoch\[ *\d+ of \d+\]\: \[Training\]) (.*)', baselineContent) if trainResults: self.trainResult = Baseline.formatLastTrainResult(trainResults[-1])[0:-2] testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:)(\s+\* \d+)?\s+(.*)', baselineContent) if testResults: self.testResult = Baseline.formatLastTestResult(testResults[-1])[0:-2] # extracts cpu and gpu info from baseline content. e.g.: #CPU info: # CPU Model Name: Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz # Hardware threads: 12 #GPU info: # #Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB #Device[1]: cores = 96; computeCapability = 2.1; type = "Quadro 600"; memory = 1024 MB # Total Memory: 33474872 kB def extractHardwareInfo(self, baselineContent): startCpuInfoIndex = baselineContent.find("CPU info:") endCpuInfoIndex = baselineContent.find("----------", startCpuInfoIndex) cpuInfo = re.search("^CPU info:\s+" "CPU Model (Name:\s*.*)\s+" "(Hardware threads: \d+)\s+" "Total (Memory:\s*.*)\s+", baselineContent[startCpuInfoIndex:endCpuInfoIndex], re.MULTILINE) if cpuInfo is None: return self.cpuInfo = "\n".join(cpuInfo.groups()) startGpuInfoIndex = baselineContent.find("GPU info:") endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex) gpuInfoSnippet = baselineContent[startGpuInfoIndex:endGpuInfoIndex] gpuDevices = re.findall("\t\t(Device\[\d+\]: cores = \d+; computeCapability = \d\.\d; type = .*; memory = \d+ MB)[\r\n]?", gpuInfoSnippet) if not gpuDevices: return gpuInfo = [ device for device in gpuDevices ] self.gpuInfo = "\n".join(gpuInfo) @staticmethod def formatLastTestResult(line): return line[0] + line[1] + "\n" + line[2].replace('; ', '\n').replace(' ','\n') @staticmethod def formatLastTrainResult(line): epochsInfo, parameters = line[0], line[1] return epochsInfo + '\n' + parameters.replace('; ', '\n') class Example: allExamplesIndexedByFullName = {} def __init__(self, suite, name, testDir): self.suite = suite self.name = name self.fullName = suite + "/" + name self.testDir = testDir self.baselineList = [] self.gitHash = "" @staticmethod def discoverAllExamples(): testsDir = thisDir for dirName, subdirList, fileList in os.walk(testsDir): if 'testcases.yml' in fileList: testDir = dirName exampleName = os.path.basename(dirName) suiteDir = os.path.dirname(dirName) # suite name will be derived from the path components suiteName = os.path.relpath(suiteDir, testsDir).replace('\\', '/') example = Example(suiteName, exampleName, testDir) Example.allExamplesIndexedByFullName[example.fullName.lower()] = example # it returns a list with all baseline files for current example def findBaselineFilesList(self): baselineFilesList = [] oses = [".windows", ".linux", ""] devices = [".cpu", ".gpu", ""] flavors = [".debug", ".release", ""] for o in oses: for device in devices: for flavor in flavors: candidateName = "baseline" + o + flavor + device + ".txt" fullPath = td.cygpath(os.path.join(self.testDir, candidateName), relative=True) if os.path.isfile(fullPath): baseline = Baseline(fullPath); baselineFilesList.append(baseline) return baselineFilesList # extracts information for every example and stores it in Example.allExamplesIndexedByFullName def getExamplesMetrics(): Example.allExamplesIndexedByFullName = list(sorted(Example.allExamplesIndexedByFullName.values(), key=lambda test: test.fullName)) allExamples = Example.allExamplesIndexedByFullName print ("CNTK - Metrics collector") for example in allExamples: baselineListForExample = example.findBaselineFilesList() six.print_("Example: " + example.fullName) for baseline in baselineListForExample: with open(baseline.fullPath, "r") as f: baselineContent = f.read() gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})[\r\n]+', baselineContent, re.MULTILINE) if gitHash is None: continue example.gitHash = gitHash.group(1) baseline.extractHardwareInfo(baselineContent) baseline.extractResultsInfo(baselineContent) example.baselineList.append(baseline) # creates a list with links to each example result def createAsciidocExampleList(file): for example in Example.allExamplesIndexedByFullName: if not example.baselineList: continue file.write("".join(["<<", example.fullName.replace("/","").lower(),",", example.fullName, ">> +\n"])) file.write("\n") def writeMetricsToAsciidoc(): metricsFile = open("metrics.adoc",'wb') createAsciidocExampleList(metricsFile) for example in Example.allExamplesIndexedByFullName: if not example.baselineList: continue metricsFile.write("".join(["===== ", example.fullName, "\n"])) metricsFile.write("".join(["**Git Hash: **", example.gitHash, "\n\n"])) metricsFile.write("[cols=3, options=\"header\"]\n") metricsFile.write("|====\n") metricsFile.write("|Log file / Configuration | Train Result | Test Result\n") for baseline in example.baselineList: pathInDir=baseline.fullPath.split(thisDir)[1][1:] metricsFile.write("".join(["|link:../blob/", example.gitHash[:7],"/Tests/EndToEndTests/", pathInDir, "[", baseline.fullPath.split("/")[-1], "] .2+|", baseline.trainResult.replace("\n", " "), " .2+|", baseline.testResult.replace("\n", " "), "|\n"])) cpuInfo = "".join(["CPU: ", re.sub("[\r]?\n", ' ', baseline.cpuInfo)]) gpuInfo = re.sub("[\r]?\n", ' ', baseline.gpuInfo) if gpuInfo: metricsFile.write("".join([cpuInfo, " GPU: ", gpuInfo])) else: metricsFile.write(cpuInfo) metricsFile.write("\n|====\n\n") # ======================= Entry point ======================= six.print_("==============================================================================") Example.discoverAllExamples() getExamplesMetrics() writeMetricsToAsciidoc()