https://github.com/pierre-guillou/pdiags_bench
Tip revision: b422e92c24a1485aa93e8a70474973787bd0eee5 authored by Pierre Guillou on 05 March 2021, 13:57:05 UTC
WiP
WiP
Tip revision: b422e92
main.py
#!/usr/bin/env python3
import argparse
import glob
import json
import math
import multiprocessing
import os
import re
import subprocess
import time
import compare_diags
import convert_datasets
URL = "https://klacansky.com/open-scivis-datasets/data_sets.json"
SIZE_LIMIT_MB = 80
def create_dir(dirname):
try:
os.mkdir(dirname)
except FileExistsError:
pass
def prepare_datasets(_, size_limit=SIZE_LIMIT_MB, download=False):
create_dir("datasets")
for dataset in sorted(glob.glob("raws/*.raw")):
convert_datasets.main(dataset, "datasets")
def ttk_dipha_print_pairs(diag):
pairs = compare_diags.read_diag(diag)
print(f" #Min-saddle pairs: {len(pairs[0])}")
print(f" #Saddle-saddle pairs: {len(pairs[1])}")
print(f" #Saddle-max pairs: {len(pairs[2])}")
print(f" Total: {sum(map(len, pairs))}")
return {
"#Min-saddle": len(pairs[0]),
"#Saddle-saddle": len(pairs[1]),
"#Saddle-max": len(pairs[2]),
}
def escape_ansi_chars(txt):
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
return ansi_escape.sub("", txt)
def dataset_name(dsfile):
"""File name without extension and without directory"""
return dsfile.split(".")[0].split("/")[-1]
def compute_ttk(
fname, exe, times, dipha_offload=False, hybrid_pp=False, one_thread=False
):
dataset = dataset_name(fname)
if dipha_offload:
if hybrid_pp:
print("Processing " + dataset + " with TTK (hybrid++ mode)...")
else:
print("Processing " + dataset + " with TTK (hybrid mode)...")
else:
print("Processing " + dataset + " with TTK...")
outp = f"diagrams/{dataset}.vtu"
cmd = ["omplace", "-nt", "64", exe, "-i", fname, "-d", "4", "-t", "64"]
if one_thread:
cmd.extend(["-t", "1"])
key = "ttk"
if dipha_offload:
cmd.append("-wd")
key = "ttk-hybrid"
if hybrid_pp:
cmd.append("-dpp")
key = "ttk-hybrid++"
proc = subprocess.run(cmd, capture_output=True)
def ttk_compute_time(ttk_output):
ttk_output = escape_ansi_chars(ttk_output.decode())
time_re = r"\[PersistenceDiagram\] Complete.*\[(\d+\.\d+|\d+)s"
return float(re.search(time_re, ttk_output, re.MULTILINE).group(1))
def ttk_overhead_time(ttk_output):
ttk_output = escape_ansi_chars(ttk_output.decode())
time_re = (
r"\[DiscreteGradient\] Wrote Dipha explicit complex.*\[(\d+\.\d+|\d+)s"
)
return float(re.search(time_re, ttk_output, re.MULTILINE).group(1))
times[dataset][key] = ttk_compute_time(proc.stdout)
os.rename("output_port_0.vtu", outp)
def compute_dipha(fname, exe, times, one_thread=False):
dataset = dataset_name(fname)
print("Processing " + dataset + " with dipha...")
outp = f"diagrams/{dataset}.dipha"
if True:
cmd = [
exe,
"--benchmark",
fname,
outp,
]
else:
cmd = [
"mpirun",
"--oversubscribe",
"-np",
"64",
exe,
"--benchmark",
fname,
outp,
]
start_time = time.time()
proc = subprocess.run(cmd, capture_output=True)
dipha_exec_time = time.time() - start_time
def dipha_compute_time(dipha_output, dipha_exec_time):
dipha_output = dipha_output.decode()
pat = r"^Computation lasted (\d+.\d+|\d+)s$"
time = re.search(pat, dipha_output, re.MULTILINE).group(1)
return round(float(time), 3)
times[dataset]["dipha"] = dipha_compute_time(proc.stdout, dipha_exec_time)
def compute_cubrips(fname, exe, times):
dataset = dataset_name(fname)
if "float" in dataset:
# skip large datasets
return
print("Processing " + dataset + " with CubicalRipser...")
outp = f"diagrams/{dataset}.cr"
cmd = [exe, fname, "--output", outp]
try:
start_time = time.time()
subprocess.check_call(cmd)
times[dataset]["CubicalRipser"] = round(time.time() - start_time, 3)
except subprocess.CalledProcessError:
print(dataset + " is too large for CubicalRipser")
def compute_gudhi(fname, exe, times):
dataset = dataset_name(fname)
print("Processing " + dataset + " with Gudhi...")
outp = f"diagrams/{dataset}.gudhi"
cmd = [exe, fname]
start_time = time.time()
subprocess.check_call(cmd)
times[dataset]["gudhi"] = round(time.time() - start_time, 3)
os.rename(fname.split("/")[-1] + "_persistence", outp)
def compute_diagrams(_, all_softs=True):
exes = {
"ttk": "ttkPersistenceDiagramCmd",
"dipha": "dipha",
"gudhi": (
"build_gudhi/src/Bitmap_cubical_complex"
"/utilities/cubical_complex_persistence"
),
"CubicalRipser": "CubicalRipser/CR3",
}
for exe in list(exes.values())[1:]:
if not os.path.isfile(exe):
print(exe + " not found")
all_softs = False
# output diagrams directory
create_dir("diagrams")
# store computation times
times = dict()
one_thread = False
for fname in sorted(glob.glob("datasets/*")):
# initialize compute times table
times[dataset_name(fname)] = {
"#Threads": 1
if one_thread or "impl" in fname
else multiprocessing.cpu_count()
}
try:
# compute number of vertices from dataset name
pattern = re.compile(r"_\d+x\d+x\d+_")
extent = re.search(pattern, fname).group().strip("_")
nVerts = math.prod([int(dim) for dim in extent.split("x")])
times[dataset_name(fname)]["#Vertices"] = nVerts
except AttributeError:
pass
for fname in sorted(glob.glob("datasets/*")):
ext = fname.split(".")[-1]
if ext == "vtu" or ext == "vti":
# our algo
compute_ttk(
fname,
exes["ttk"],
times,
dipha_offload=False,
hybrid_pp=False,
one_thread=one_thread,
)
# ttk-hybrid: offload Morse-Smale complex to Dipha
compute_ttk(
fname,
exes["ttk"],
times,
dipha_offload=True,
hybrid_pp=False,
one_thread=one_thread,
)
# ttk-hybrid++: offload saddle connectors to Dipha
compute_ttk(
fname,
exes["ttk"],
times,
dipha_offload=True,
hybrid_pp=True,
one_thread=one_thread,
)
elif ext == "dipha" and "expl" in fname:
compute_dipha(fname, exes["dipha"], times, one_thread)
elif all_softs and ext == "dipha" and "impl" in fname:
compute_cubrips(fname, exes["CubicalRipser"], times)
elif all_softs and ext == "pers":
compute_gudhi(fname, exes["gudhi"], times)
with open("results", "w") as dst:
dst.write(json.dumps(times))
return times
def compute_distances(_, method="auction"):
# list of datasets that have at least one persistence diagram
datasets = sorted(set(f.split(".")[0] for f in glob.glob("diagrams/*")))
float_re = r"(\d+\.\d+|\d+)"
auct_patt = re.compile(
rf"(?:Min-saddle|Saddle-saddle|Saddle-max) cost\s+:\s+{float_re}"
)
btnk_patt = re.compile(rf"diag(?:Max|Min|Sad)\({float_re}\)")
dists = dict()
for ds in datasets:
ttk_diag = ds + ".vtu"
dipha_diag = ds + ".dipha"
empty_diag = "empty.vtu"
if os.path.isfile(ttk_diag) and os.path.isfile(dipha_diag):
cmd = ["python", "ttk_distance.py", method, dipha_diag, ttk_diag]
print(f"Computing distance between TTK and Dipha for {ds}")
proc0 = subprocess.run(cmd, capture_output=True)
cmd = ["python", "ttk_distance.py", method, dipha_diag, empty_diag]
print(f"Computing Dipha distance to empty diagram for {ds}")
proc1 = subprocess.run(cmd, capture_output=True)
# match distance figures
pattern = auct_patt if method == "auction" else btnk_patt
matches0 = re.findall(pattern, str(proc0.stdout))
matches1 = re.findall(pattern, str(proc1.stdout))
# parse string to float
matches0 = [float(m) for m in matches0]
matches1 = [float(m) for m in matches1]
pairTypes = ["min-sad", "sad-sad", "sad-max"]
dists[ds] = {
"ttk-dipha": dict(zip(pairTypes, matches0)),
"empty-dipha": dict(zip(pairTypes, matches1)),
}
# clean pipeline sink
os.remove("dist.vtu")
with open("distances", "w") as dst:
dst.write(json.dumps(dists))
return dists
def main():
parser = argparse.ArgumentParser(
description=(
"Compute Persistence Diagrams with TTK, Dipha, Gudhi and "
"CubicalRipser on a selection of OpenSciVis datasets"
)
)
cmd = [
"mpirun",
"--oversubscribe",
"-np",
"8",
"dipha",
"--benchmark",
"datasets/fuel_64x64x64_uint8_order_expl.dipha",
"output.dipha",
]
subprocess.run(cmd)
cmd[6] = "datasets/hydrogen_atom_128x128x128_uint8_order_expl.dipha",
subprocess.run(cmd)
cmd[6] = "datasets/marschner_lobb_41x41x41_uint8_order_expl.dipha",
subprocess.run(cmd)
return
subparsers = parser.add_subparsers()
prep_datasets = subparsers.add_parser("prepare_datasets")
prep_datasets.set_defaults(func=prepare_datasets)
get_diags = subparsers.add_parser("compute_diagrams")
get_diags.set_defaults(func=compute_diagrams)
get_dists = subparsers.add_parser("compute_distances")
get_dists.set_defaults(func=compute_distances)
cli_args = parser.parse_args()
# force use of subcommand, display help without one
if "func" in cli_args.__dict__:
cli_args.func(cli_args)
else:
parser.parse_args(["--help"])
if __name__ == "__main__":
main()