https://bitbucket.org/vgl_iisc/tachyon/src/master/replicability-stamp-instructions.md
Tip revision: 7141ef3afa19632d1ea1cec954fff0c99562241a authored by Abhijath Ande on 19 May 2023, 18:34:18 UTC
Merged in update-replicability-stamp-instructions (pull request #6)
Merged in update-replicability-stamp-instructions (pull request #6)
Tip revision: 7141ef3
generate-graph.py
#!/usr/bin/python3
import matplotlib
import matplotlib.pyplot as plt
import os
from subprocess import Popen, PIPE
import hashlib
import requests
font = {'size': 16, 'family' : 'normal', 'weight' : 'bold'}
matplotlib.rc('font', **font)
labels = [ 'Silicium', 'Neghip', 'Fuel', 'Hydrogen', 'Shockwave', 'Lobster', 'Ventricles', 'Engine', 'Statue Leg', 'Teapot', 'Skull', 'Foot', 'Aneurism', 'Bonsai', 'Angio', 'Stent', 'Pancreas', 'Backpack', 'Reconnection', "Zeiss"]
MARKERSIZE=16
LINEWIDTH=4
EXTREMUM_GRAPH_COMPUTATION_LINE="Ext Graph Computation :"
TACHYON_BINARY_PATH="build/tachyon"
def sha512sum_for_file (filename: str) -> str:
BUF_SIZE = 4096 * 64
hFunc = hashlib.sha512()
with open(filename, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
hFunc.update(data)
return hFunc.hexdigest()
def link_to_filename (link: str, dirname: str) -> str:
return dirname + "/" + link.split('/')[-1]
# https://stackoverflow.com/a/16696317
def download_file (link: str, dirname: str) -> str:
local_filename = link_to_filename(link, dirname)
# NOTE the stream=True parameter below
with requests.get(link, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
f.write(chunk)
return local_filename
def parse_checksum_file (filename: str) -> dict:
with open(filename, 'r') as f:
lines = f.readlines()
line_parts = [ e.strip().split() for e in lines ]
checksum_file_pairs = [ e for e in line_parts if len(e) == 2 ]
return { e[1]:e[0] for e in checksum_file_pairs }
def download_missing_files (links_file: str, checksums_file: str, dirname: str) -> None:
file_to_checksum_map = parse_checksum_file(checksums_file)
assert len(file_to_checksum_map) == 20
if not os.path.isdir(dirname):
os.mkdir(dirname)
links = []
with open("dataset_links.txt", "r") as f:
links = f.readlines()
for link in links:
link = link.strip()
filename = link_to_filename(link, dirname)
if not os.path.isfile(filename):
print(f"File '{filename}' not found, downloading...")
download_file(link, dirname)
elif file_to_checksum_map[filename] != sha512sum_for_file(filename):
print(f"File '{filename}' sha512 digest does not match, downloading again...")
os.unlink(filename)
download_file(link, dirname)
else:
print(f"File '{filename}' OK")
def runChecksum():
process = Popen("sha512sum --quiet -c checksums.txt".split(" "), stdout=PIPE)
(output, err) = process.communicate()
exit_code = process.wait()
if output != None:
output = (output.decode()) % ()
print(output)
if err != None:
err = (err.decode()) % ()
print(err)
if exit_code != 0:
exit()
def getTachyonRuntime(binary, args):
#print([binary] + args)
process = Popen([binary] + args, stdout=PIPE)
(output, err) = process.communicate()
exit_code = process.wait()
if exit_code != 0:
print("Failed to run tachyon!")
if output != None:
output = (output.decode()) % ()
print("Output:\n", output)
if err != None:
err = (err.decode()) % ()
print("Error output:\n", err)
exit()
output = (output.decode()) % ()
output_lines = output.split("\n")
runtimeLineLambda = lambda line: line.find(EXTREMUM_GRAPH_COMPUTATION_LINE) >= 0
line_list = list(filter(runtimeLineLambda, output_lines))
if len(line_list) != 1:
print("Failed to parse process time taken")
exit()
line = line_list[0]
return float(line.split(":")[1][:-1].strip())
def getTachyonRuntimeForDataset(binary, datasetPath, datasetDims, elementType, otherArgs=[]):
args = ["-i", datasetPath, "-d"] + [str(e) for e in datasetDims ] + ["-t", elementType, "-o", "/dev/null"] + otherArgs
return getTachyonRuntime(binary, args)
def getAvgTachyonRuntimeForDataset(binary, datasetPath, datasetDims, elementType, otherArgs=[]):
times = []
for i in range(7):
times.append(getTachyonRuntimeForDataset(binary, datasetPath, datasetDims, elementType, otherArgs))
sortedTimes = sorted(times)
return sum(sortedTimes[1:-1])/len(sortedTimes)
DATATYPE_LONG_TO_SHORT_MAP = {"uint8": "u8", "int8": "i8", "uint16": "u16", "int16": "i16", "uint32": "u32", "int32": "i32", "uint64": "u64", "int64": "i64", "float32": "f32", "float64": "f64"}
def generateMetadataForFile(fname):
parts = fname.split("_")
k = parts[-1].split(".")[0]
elementType = DATATYPE_LONG_TO_SHORT_MAP[k]
datasetDims = [ str(e) for e in parts[-2].split("x") ]
return (elementType, datasetDims)
def calcSize(d):
e = [ int(f) for f in d[1] ]
return e[0] * e[1] * e[2]
def generateMetadata(dirname):
metadata = []
for file in os.listdir(dirname):
if file.endswith(".raw"):
(e, d) = generateMetadataForFile(file)
metadata.append([dirname+"/"+file, d, e])
return sorted(metadata, key=calcSize)
if __name__ == "__main__":
#runChecksum()
download_missing_files("dataset_links.txt", "checksums.txt", "datasets")
metadata = generateMetadata("datasets")
ml = 0
for e in metadata:
ml = max(ml, len(e[0]))
tachyon = []
startLine = "+" + ("-"*(ml+2)) + "+-----------+"
print(startLine)
print(("| %%%ds | %%9s |" % (ml)) % ("Dataset Name", "Time"))
print(startLine)
for e in metadata:
t = getAvgTachyonRuntimeForDataset(TACHYON_BINARY_PATH, e[0], e[1], e[2])
fmt = "| %%%ds | %%9.6f |" % (ml)
print(fmt % (e[0], t))
tachyon.append(t)
print(startLine)
plt.plot(tachyon, marker='o', label='TACHYON', linewidth=LINEWIDTH, markersize=MARKERSIZE)
plt.xticks(list(range(len(labels))), labels, rotation='30')
plt.ylabel('Total running time (in secs)')
plt.xlabel('Datasets')
plt.legend()
plt.tight_layout()
plt.show()