https://github.com/Microsoft/CNTK
Tip revision: b374e149b4964e0c48a19d30a9b295c5b42c7613 authored by Peyman Manikashani on 07 September 2018, 22:41:43 UTC
fixes on Batchnorm and Pooling for v1 pretrained models after removal of sequence axis from input
fixes on Batchnorm and Pooling for v1 pretrained models after removal of sequence axis from input
Tip revision: b374e14
MetricsDriver.py
#!/usr/bin/env python
# ----------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
# ---------------------------------------------------------
# This script extracts information (hardware used, final results) contained in the baselines files
# and generates a markdown file (wiki page)
import sys, os, re
import TestDriver as td
try:
import six
except ImportError:
print("Python package 'six' not installed. Please run 'pip install six'.")
sys.exit(1)
thisDir = os.path.dirname(os.path.realpath(__file__))
windows = os.getenv("OS")=="Windows_NT"
class Baseline:
def __init__(self, fullPath, testResult = "", trainResult = ""):
self.fullPath = fullPath
self.cpuInfo = ""
self.gpuInfo = ""
self.testResult = testResult
self.trainResult = trainResult
# extracts results info. e.g.
# Finished Epoch[ 5 of 5]: [Training] ce = 2.32253198 * 1000 err = 0.90000000 * 1000 totalSamplesSeen = 5000 learningRatePerSample = 2e-06 epochTime=0.175781
# Final Results: Minibatch[1-1]: err = 0.90000000 * 100 ce = 2.32170486 * 100 perplexity = 10.1930372
def extractResultsInfo(self, baselineContent):
trainResults = re.findall('.*(Finished Epoch\[ *\d+ of \d+\]\: \[Training\]) (.*)', baselineContent)
if trainResults:
self.trainResult = Baseline.formatLastTrainResult(trainResults[-1])[0:-2]
testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:)(\s+\* \d+)?\s+(.*)', baselineContent)
if testResults:
self.testResult = Baseline.formatLastTestResult(testResults[-1])[0:-2]
# extracts cpu and gpu info from baseline content. e.g.:
#CPU info:
# CPU Model Name: Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz
# Hardware threads: 12
#GPU info:
#
#Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB
#Device[1]: cores = 96; computeCapability = 2.1; type = "Quadro 600"; memory = 1024 MB
# Total Memory: 33474872 kB
def extractHardwareInfo(self, baselineContent):
startCpuInfoIndex = baselineContent.find("CPU info:")
endCpuInfoIndex = baselineContent.find("----------", startCpuInfoIndex)
cpuInfo = re.search("^CPU info:\s+"
"CPU Model (Name:\s*.*)\s+"
"(Hardware threads: \d+)\s+"
"Total (Memory:\s*.*)\s+", baselineContent[startCpuInfoIndex:endCpuInfoIndex], re.MULTILINE)
if cpuInfo is None:
return
self.cpuInfo = "\n".join(cpuInfo.groups())
startGpuInfoIndex = baselineContent.find("GPU info:")
endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex)
gpuInfoSnippet = baselineContent[startGpuInfoIndex:endGpuInfoIndex]
gpuDevices = re.findall("\t\t(Device\[\d+\]: cores = \d+; computeCapability = \d\.\d; type = .*; memory = \d+ MB)[\r\n]?", gpuInfoSnippet)
if not gpuDevices:
return
gpuInfo = [ device for device in gpuDevices ]
self.gpuInfo = "\n".join(gpuInfo)
@staticmethod
def formatLastTestResult(line):
return line[0] + line[1] + "\n" + line[2].replace('; ', '\n').replace(' ','\n')
@staticmethod
def formatLastTrainResult(line):
epochsInfo, parameters = line[0], line[1]
return epochsInfo + '\n' + parameters.replace('; ', '\n')
class Example:
allExamplesIndexedByFullName = {}
def __init__(self, suite, name, testDir):
self.suite = suite
self.name = name
self.fullName = suite + "/" + name
self.testDir = testDir
self.baselineList = []
self.gitHash = ""
@staticmethod
def discoverAllExamples():
testsDir = thisDir
for dirName, subdirList, fileList in os.walk(testsDir):
if 'testcases.yml' in fileList:
testDir = dirName
exampleName = os.path.basename(dirName)
suiteDir = os.path.dirname(dirName)
# suite name will be derived from the path components
suiteName = os.path.relpath(suiteDir, testsDir).replace('\\', '/')
example = Example(suiteName, exampleName, testDir)
Example.allExamplesIndexedByFullName[example.fullName.lower()] = example
# it returns a list with all baseline files for current example
def findBaselineFilesList(self):
baselineFilesList = []
oses = [".windows", ".linux", ""]
devices = [".cpu", ".gpu", ""]
flavors = [".debug", ".release", ""]
for o in oses:
for device in devices:
for flavor in flavors:
candidateName = "baseline" + o + flavor + device + ".txt"
fullPath = td.cygpath(os.path.join(self.testDir, candidateName), relative=True)
if os.path.isfile(fullPath):
baseline = Baseline(fullPath);
baselineFilesList.append(baseline)
return baselineFilesList
# extracts information for every example and stores it in Example.allExamplesIndexedByFullName
def getExamplesMetrics():
Example.allExamplesIndexedByFullName = list(sorted(Example.allExamplesIndexedByFullName.values(), key=lambda test: test.fullName))
allExamples = Example.allExamplesIndexedByFullName
print ("CNTK - Metrics collector")
for example in allExamples:
baselineListForExample = example.findBaselineFilesList()
six.print_("Example: " + example.fullName)
for baseline in baselineListForExample:
with open(baseline.fullPath, "r") as f:
baselineContent = f.read()
gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})[\r\n]+', baselineContent, re.MULTILINE)
if gitHash is None:
continue
example.gitHash = gitHash.group(1)
baseline.extractHardwareInfo(baselineContent)
baseline.extractResultsInfo(baselineContent)
example.baselineList.append(baseline)
# creates a list with links to each example result
def createAsciidocExampleList(file):
for example in Example.allExamplesIndexedByFullName:
if not example.baselineList:
continue
file.write("".join(["<<", example.fullName.replace("/","").lower(),",", example.fullName, ">> +\n"]))
file.write("\n")
def writeMetricsToAsciidoc():
metricsFile = open("metrics.adoc",'wb')
createAsciidocExampleList(metricsFile)
for example in Example.allExamplesIndexedByFullName:
if not example.baselineList:
continue
metricsFile.write("".join(["===== ", example.fullName, "\n"]))
metricsFile.write("".join(["**Git Hash: **", example.gitHash, "\n\n"]))
metricsFile.write("[cols=3, options=\"header\"]\n")
metricsFile.write("|====\n")
metricsFile.write("|Log file / Configuration | Train Result | Test Result\n")
for baseline in example.baselineList:
pathInDir=baseline.fullPath.split(thisDir)[1][1:]
metricsFile.write("".join(["|link:../blob/", example.gitHash[:7],"/Tests/EndToEndTests/", pathInDir, "[",
baseline.fullPath.split("/")[-1], "] .2+|", baseline.trainResult.replace("\n", " "), " .2+|",
baseline.testResult.replace("\n", " "), "|\n"]))
cpuInfo = "".join(["CPU: ", re.sub("[\r]?\n", ' ', baseline.cpuInfo)])
gpuInfo = re.sub("[\r]?\n", ' ', baseline.gpuInfo)
if gpuInfo:
metricsFile.write("".join([cpuInfo, " GPU: ", gpuInfo]))
else:
metricsFile.write(cpuInfo)
metricsFile.write("\n|====\n\n")
# ======================= Entry point =======================
six.print_("==============================================================================")
Example.discoverAllExamples()
getExamplesMetrics()
writeMetricsToAsciidoc()