https://bitbucket.org/vgl_iisc/tachyon/src/master/replicability-stamp-instructions.md
Raw File
Tip revision: 7141ef3afa19632d1ea1cec954fff0c99562241a authored by Abhijath Ande on 19 May 2023, 18:34:18 UTC
Merged in update-replicability-stamp-instructions (pull request #6)
Tip revision: 7141ef3
generate-graph.py
#!/usr/bin/python3

import matplotlib
import matplotlib.pyplot as plt
import os
from subprocess import Popen, PIPE
import hashlib
import requests

font = {'size': 16, 'family' : 'normal', 'weight' : 'bold'}
matplotlib.rc('font', **font)

labels = [ 'Silicium', 'Neghip', 'Fuel', 'Hydrogen', 'Shockwave', 'Lobster', 'Ventricles', 'Engine', 'Statue Leg', 'Teapot', 'Skull', 'Foot', 'Aneurism', 'Bonsai', 'Angio', 'Stent', 'Pancreas', 'Backpack', 'Reconnection', "Zeiss"]

MARKERSIZE=16
LINEWIDTH=4
EXTREMUM_GRAPH_COMPUTATION_LINE="Ext Graph Computation :"
TACHYON_BINARY_PATH="build/tachyon"

def sha512sum_for_file (filename: str) -> str:
    BUF_SIZE = 4096 * 64
    hFunc = hashlib.sha512()
    with open(filename, 'rb') as f:
        while True:
            data = f.read(BUF_SIZE)
            if not data:
                break
            hFunc.update(data)
    return hFunc.hexdigest()

def link_to_filename (link: str, dirname: str) -> str:
    return dirname + "/" + link.split('/')[-1]

# https://stackoverflow.com/a/16696317
def download_file (link: str, dirname: str) -> str:
    local_filename = link_to_filename(link, dirname)
    # NOTE the stream=True parameter below
    with requests.get(link, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                #if chunk: 
                f.write(chunk)
    return local_filename

def parse_checksum_file (filename: str) -> dict:
    with open(filename, 'r') as f:
        lines = f.readlines()
        line_parts = [ e.strip().split() for e in lines ]
        checksum_file_pairs = [ e for e in line_parts if len(e) == 2 ]
        return { e[1]:e[0] for e in checksum_file_pairs }

def download_missing_files (links_file: str, checksums_file: str, dirname: str) -> None:
    file_to_checksum_map = parse_checksum_file(checksums_file)
    assert len(file_to_checksum_map) == 20

    if not os.path.isdir(dirname):
        os.mkdir(dirname)

    links = []
    with open("dataset_links.txt", "r") as f:
        links = f.readlines()

    for link in links:
        link = link.strip()
        filename = link_to_filename(link, dirname)
        if not os.path.isfile(filename):
            print(f"File '{filename}' not found, downloading...")
            download_file(link, dirname)
        elif file_to_checksum_map[filename] != sha512sum_for_file(filename):
            print(f"File '{filename}' sha512 digest does not match, downloading again...")
            os.unlink(filename)
            download_file(link, dirname)
        else:
            print(f"File '{filename}' OK")


def runChecksum():
    process = Popen("sha512sum --quiet -c checksums.txt".split(" "), stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()
    if output != None:
        output = (output.decode()) % ()
        print(output)
    if err != None:
        err = (err.decode()) % ()
        print(err)
    if exit_code != 0:
        exit()

def getTachyonRuntime(binary, args):
    #print([binary] + args)
    process = Popen([binary] + args, stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()
    if exit_code != 0:
        print("Failed to run tachyon!")
        if output != None:
            output = (output.decode()) % ()
            print("Output:\n", output)
        if err != None:
            err = (err.decode()) % ()
            print("Error output:\n", err)
        exit()

    output = (output.decode()) % ()
    output_lines = output.split("\n")
    runtimeLineLambda = lambda line: line.find(EXTREMUM_GRAPH_COMPUTATION_LINE) >= 0
    line_list = list(filter(runtimeLineLambda, output_lines))
    if len(line_list) != 1:
        print("Failed to parse process time taken")
        exit()
    line = line_list[0]
    return float(line.split(":")[1][:-1].strip())

def getTachyonRuntimeForDataset(binary, datasetPath, datasetDims, elementType, otherArgs=[]):
    args = ["-i", datasetPath, "-d"] + [str(e) for e in datasetDims ] + ["-t", elementType, "-o", "/dev/null"] + otherArgs
    return getTachyonRuntime(binary, args)

def getAvgTachyonRuntimeForDataset(binary, datasetPath, datasetDims, elementType, otherArgs=[]):
    times = []
    for i in range(7):
        times.append(getTachyonRuntimeForDataset(binary, datasetPath, datasetDims, elementType, otherArgs))
    sortedTimes = sorted(times)
    return sum(sortedTimes[1:-1])/len(sortedTimes)

DATATYPE_LONG_TO_SHORT_MAP = {"uint8": "u8", "int8": "i8", "uint16": "u16", "int16": "i16", "uint32": "u32", "int32": "i32", "uint64": "u64", "int64": "i64", "float32": "f32", "float64": "f64"}

def generateMetadataForFile(fname):
    parts = fname.split("_")
    k = parts[-1].split(".")[0]
    elementType = DATATYPE_LONG_TO_SHORT_MAP[k]
    datasetDims = [ str(e) for e in parts[-2].split("x") ]
    return (elementType, datasetDims)

def calcSize(d):
    e = [ int(f) for f in d[1] ]
    return e[0] * e[1] * e[2]

def generateMetadata(dirname):
    metadata = []
    for file in os.listdir(dirname):
        if file.endswith(".raw"):
            (e, d) = generateMetadataForFile(file)
            metadata.append([dirname+"/"+file, d, e])
    return sorted(metadata, key=calcSize)

if __name__ == "__main__":
    #runChecksum()
    download_missing_files("dataset_links.txt", "checksums.txt", "datasets")
    metadata = generateMetadata("datasets")

    ml = 0
    for e in metadata:
        ml = max(ml, len(e[0]))

    tachyon = []
    startLine = "+" + ("-"*(ml+2)) + "+-----------+"
    print(startLine)
    print(("| %%%ds | %%9s |" % (ml)) % ("Dataset Name", "Time"))
    print(startLine)
    for e in metadata:
        t = getAvgTachyonRuntimeForDataset(TACHYON_BINARY_PATH, e[0], e[1], e[2])
        fmt = "| %%%ds | %%9.6f |" % (ml)
        print(fmt % (e[0], t))
        tachyon.append(t)
    
    print(startLine)
    
    plt.plot(tachyon, marker='o', label='TACHYON', linewidth=LINEWIDTH, markersize=MARKERSIZE)
    plt.xticks(list(range(len(labels))), labels, rotation='30')
    plt.ylabel('Total running time (in secs)')
    plt.xlabel('Datasets')
    plt.legend()
    plt.tight_layout()
    plt.show()
back to top