Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://github.com/pierre-guillou/pdiags_bench
13 February 2023, 11:53:21 UTC
  • Code
  • Branches (5)
  • Releases (0)
  • Visits
    • Branches
    • Releases
    • HEAD
    • refs/heads/main
    • refs/heads/mesu
    • refs/heads/mesu_bench
    • refs/heads/saddle_pairs
    • refs/heads/zomo_variants
    No releases to show
  • 95e447f
  • /
  • compare_diags.py
Raw File Download
Take a new snapshot of a software origin

If the archived software origin currently browsed is not synchronized with its upstream version (for instance when new commits have been issued), you can explicitly request Software Heritage to take a new snapshot of it.

Use the form below to proceed. Once a request has been submitted and accepted, it will be processed as soon as possible. You can then check its processing state by visiting this dedicated page.
swh spinner

Processing "take a new snapshot" request ...

Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
  • revision
  • snapshot
origin badgecontent badge Iframe embedding
swh:1:cnt:c7dafe25d49b359b3a192e163c7b8b765cc6a905
origin badgedirectory badge Iframe embedding
swh:1:dir:95e447fd05b7bb9564fba216a8957d42fbdb26ff
origin badgerevision badge
swh:1:rev:8a2de479abf7bcbf2002b4f8f620c635217cc088
origin badgesnapshot badge
swh:1:snp:49df835a23e1f2a8e7b91973ed24143919372067
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
  • revision
  • snapshot
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Tip revision: 8a2de479abf7bcbf2002b4f8f620c635217cc088 authored by Julien J Tierny on 30 January 2023, 16:30:54 UTC
added reference to the arxiv repo
Tip revision: 8a2de47
compare_diags.py
#!/usr/bin/env python3

import argparse
import difflib
import itertools
import math

import topologytoolkit as ttk
import vtk


def read_file(fname):
    ext = fname.split(".")[-1]
    if ext == "vtu":
        reader = vtk.vtkXMLUnstructuredGridReader()
    elif ext == "dipha":
        reader = ttk.ttkDiphaReader()
    elif ext == "gudhi":
        reader = ttk.ttkGudhiPersistenceDiagramReader()
    else:
        return None
    reader.SetFileName(fname)
    reader.Update()
    return reader.GetOutput()


def read_diag(diag, filter_inf=False):
    diag = read_file(diag)
    ptype = diag.GetCellData().GetArray("PairType")
    ifin = diag.GetCellData().GetArray("IsFinite")
    pts = diag.GetPoints()
    if pts is None:
        return []
    # assert 2 * ptype.GetNumberOfTuples() - 2 == pts.GetNumberOfPoints()
    pairs = [list() for i in range(3)]
    for i in range(ptype.GetNumberOfTuples()):
        j = int(ptype.GetTuple1(i))
        if j == -1 or (filter_inf and not bool(ifin.GetTuple1(i))):
            continue
        pairs[j].append(pts.GetPoint(2 * i + 1)[0:2])
    for pr in pairs:
        pr.sort()
    return pairs


def print_diff(pairs0, pairs1):
    p0 = [str(a) + " " + str(b) for (a, b) in pairs0]
    p1 = [str(a) + " " + str(b) for (a, b) in pairs1]
    diff = difflib.unified_diff(p0, p1)
    GREEN = "\033[92m"
    RED = "\033[91m"
    ENDC = "\033[0m"
    for d in diff:
        if d.startswith("+"):
            print(f"{GREEN}{d}{ENDC}")
        elif d.startswith("-"):
            print(f"{RED}{d}{ENDC}")
        else:
            print(d)


def compare_pairs(pairs0, pairs1, ptype, show_diff):
    sm = difflib.SequenceMatcher(isjunk=None, a=pairs0, b=pairs1)
    diffrat = sm.ratio()
    if math.isclose(diffrat, 1.0):
        print(f"> Identical {ptype} pairs")
        return 0.0

    if show_diff:
        print_diff(pairs0, pairs1)

    # discard common pairs between diagrams
    rem0 = list()
    rem1 = list()
    for opc in sm.get_opcodes():
        if opc[0] in ["replace", "delete"]:
            sl = slice(opc[1], opc[2])
            rem0.extend(pairs0[sl])
        if opc[0] in ["replace", "insert"]:
            sl = slice(opc[3], opc[4])
            rem1.extend(pairs1[sl])

    def dist_to_empty(pairs):
        # compute the distance from pairs0 to the empty diagram
        # (sum of square of pairs persistence divided by 2)
        sq_dist = sum((d - b) ** 2 for (b, d) in pairs) / 2.0
        return math.sqrt(sq_dist)

    print(f"Comparing {len(rem0)} and {len(rem1)} different {ptype} pair")

    if len(rem0) == 0:
        # compute distance between rem1 and empty diagram
        wass_dist = dist_to_empty(rem1)
    elif len(rem1) == 0:
        # compute distance between rem0 and empty diagram
        wass_dist = dist_to_empty(rem0)
    else:

        try:
            import diagram_distance as diagdist

            # store rem0 and rem1 in temporary files
            with open("/tmp/diag0.gudhi", "w") as dst:
                for b, d in rem0:
                    dst.write(f"0 {b} {d}\n")
            with open("/tmp/diag1.gudhi", "w") as dst:
                for b, d in rem1:
                    dst.write(f"0 {b} {d}\n")

            # compute the distance with bottleneck
            dists = diagdist.get_diag_dist(
                "/tmp/diag0.gudhi",
                "/tmp/diag1.gudhi",
                1.0,
                diagdist.DistMethod.AUCTION,
                3600,
            )
            try:
                wass_dist = dists["min-sad"]
            except KeyError:
                wass_dist = dists["sad-max"]

        except ImportError:
            print("Fallback to Wassertein overapproximation")
            # compute an overapproximation of the Wasserstein distance
            res = 0.0
            for (ba, da), (bb, db) in itertools.zip_longest(rem0, rem1, fillvalue=(0.0, 0.0)):
                res += (bb - ba) ** 2 + (db - da) ** 2

            wass_dist = math.sqrt(res)

    # compute the distance from pairs0 to the empty diagram
    ref_dist = dist_to_empty(pairs0)

    print(
        f"> Differences in {ptype} pairs "
        f"(Wasserstein approx: {wass_dist:.8g}, {wass_dist/ref_dist:.3%} from empty diagram)"
    )
    return wass_dist


def main(diag0, diag1, show_diff=True, filter_inf=False):
    print(f"Comparing {diag0} and {diag1}...")
    pairs0 = read_diag(diag0, filter_inf)
    pairs1 = read_diag(diag1, filter_inf)
    if len(pairs0[1]) == 0:
        diag_type = ["min-max"]
    elif len(pairs0[2]) == 0:
        diag_type = ["min-saddle", "saddle-max"]
    else:
        diag_type = ["min-saddle", "saddle-saddle", "saddle-max"]
    res = dict()
    for p0, p1, t in zip(pairs0, pairs1, diag_type):
        res[t] = compare_pairs(p0, p1, t, show_diff)
    return res


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Compare two diagrams with Python difflib"
    )
    parser.add_argument("diag0", help="First diagram")
    parser.add_argument("diag1", help="Second diagram")
    parser.add_argument("-s", "--show_diff", help="Show diff", action="store_true")
    parser.add_argument(
        "-f", "--filter_inf", help="Only consider finite pairs", action="store_true"
    )

    args = parser.parse_args()
    main(args.diag0, args.diag1, args.show_diff, args.filter_inf)

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API

back to top