Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://github.com/fenderglass/Ragout
05 April 2024, 18:02:13 UTC
  • Code
  • Branches (21)
  • Releases (0)
  • Visits
    • Branches
    • Releases
    • HEAD
    • refs/heads/chr_map
    • refs/heads/devel
    • refs/heads/gh-pages
    • refs/heads/ismb_2014
    • refs/heads/master
    • refs/heads/path_cover
    • refs/heads/py3
    • refs/heads/rr_devel
    • refs/heads/tree_infer
    • refs/remotes/origin/devel
    • refs/tags/1.0
    • refs/tags/1.1
    • refs/tags/2.0
    • refs/tags/2.1
    • refs/tags/2.1.1
    • refs/tags/2.2
    • refs/tags/2.3
    • refs/tags/v0.1b
    • refs/tags/v0.2b
    • refs/tags/v0.3b
    • refs/tags/v1.2
    No releases to show
  • 4cbe8e4
  • /
  • ragout
  • /
  • breakpoint_graph
  • /
  • breakpoint_graph.py
Raw File Download Save again
Take a new snapshot of a software origin

If the archived software origin currently browsed is not synchronized with its upstream version (for instance when new commits have been issued), you can explicitly request Software Heritage to take a new snapshot of it.

Use the form below to proceed. Once a request has been submitted and accepted, it will be processed as soon as possible. You can then check its processing state by visiting this dedicated page.
swh spinner

Processing "take a new snapshot" request ...

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
  • revision
  • snapshot
origin badgecontent badge
swh:1:cnt:800fc2c77889efcd3a2ecfd5c1030ea4f051b557
origin badgedirectory badge
swh:1:dir:26947386b480ee415a03856d8895bfb76ec556e2
origin badgerevision badge
swh:1:rev:4b42ddec7d839ab6369faa31a49e2d3a8db7d124
origin badgesnapshot badge
swh:1:snp:12412e9d5850529b00b9f75cc3a4b47d1a47cc92

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
  • revision
  • snapshot
(requires biblatex-software package)
Generating citation ...
(requires biblatex-software package)
Generating citation ...
(requires biblatex-software package)
Generating citation ...
(requires biblatex-software package)
Generating citation ...
Tip revision: 4b42ddec7d839ab6369faa31a49e2d3a8db7d124 authored by fenderglass on 26 April 2014, 05:29:56 UTC
instal
Tip revision: 4b42dde
breakpoint_graph.py
#This module implements a breakpoint graph
#as well as the main algorithm that recovers missing 
#adjacencies
################################################

from collections import namedtuple
from itertools import chain
import os
import logging

import networkx as nx

from ragout.shared.debug import DebugConfig

Connection = namedtuple("Connection", ["start", "end"])
logger = logging.getLogger()
debugger = DebugConfig.get_instance()

#PUBLIC:
################################################


class BreakpointGraph:
    def __init__(self):
        self.bp_graph = nx.MultiGraph()
        self.targets = []
        self.references = []
        self.known_adjacencies = {}

    #builds breakpoint graph from permutations
    def build_from(self, perm_container, circular_refs):
        logger.info("Building breakpoint graph")

        for perm in perm_container.ref_perms_filtered:
            if perm.ref_id not in self.references:
                self.references.append(perm.ref_id)

        for perm in perm_container.target_perms_filtered:
            if perm.ref_id not in self.targets:
                self.targets.append(perm.ref_id)

        for perm in chain(perm_container.ref_perms_filtered,
                          perm_container.target_perms_filtered):
            circular = circular_refs if perm.ref_id in self.references else False

            prev_block = None
            for block in perm.iter_blocks(circular):
                if not prev_block:
                    prev_block = block
                    continue

                self.bp_graph.add_node(-prev_block)
                self.bp_graph.add_node(block)
                self.bp_graph.add_edge(-prev_block, block, genome_id=perm.ref_id)
                prev_block = block

    #infers missing adjacencies (the main Ragout part)
    def find_adjacencies(self, phylogeny):
        logger.info("Resolving breakpoint graph")
        chosen_edges = []
        subgraphs = nx.connected_component_subgraphs(self.bp_graph)

        for comp_id, subgraph in enumerate(subgraphs):
            trimmed_graph = self.trim_known_edges(subgraph)

            if len(trimmed_graph) < 2:
                continue

            if len(trimmed_graph) == 2:
                node_1, node_2 = trimmed_graph.nodes()
                chosen_edges.append((node_1, node_2))
                continue

            weighted_graph = self.make_weighted(trimmed_graph, phylogeny)
            matching_edges = _split_graph(weighted_graph)
            chosen_edges.extend(matching_edges)

        adjacencies = {}
        for edge in chosen_edges:
            adjacencies[-edge[0]] = Connection(-edge[0], edge[1])
            adjacencies[-edge[1]] = Connection(-edge[1], edge[0])

        if debugger.debugging:
            phylo_out = os.path.join(debugger.debug_dir, "phylogeny.txt")
            graph_out = os.path.join(debugger.debug_dir, "breakpoint_graph.dot")
            edges_out = os.path.join(debugger.debug_dir, "predicted_edges.dot")
            _output_graph(self.bp_graph, graph_out)
            _output_edges(chosen_edges, edges_out)
            _output_phylogeny(phylogeny.tree_string, self.targets[0], phylo_out)

        return adjacencies

    #removes edges with known target's adjacencies
    def trim_known_edges(self, graph):
        trimmed_graph = graph.copy()
        for v1, v2, data in graph.edges_iter(data=True):
            if not trimmed_graph.has_node(v1) or not trimmed_graph.has_node(v2):
                continue

            genome_ids = list(map(lambda e: e["genome_id"],
                                  graph[v1][v2].values()))
            target_id = self.targets[0]
            if target_id in genome_ids:
                trimmed_graph.remove_node(v1)
                trimmed_graph.remove_node(v2)

        return trimmed_graph

    #converts breakpoint graph into weighted graph
    def make_weighted(self, graph, phylogeny):
        assert len(graph) > 2
        g = nx.Graph()
        g.add_nodes_from(graph.nodes())
        target_id = self.targets[0]

        for node in graph.nodes():
            adjacencies = {}
            for neighbor in graph.neighbors(node):
                for edge in graph[node][neighbor].values():
                    adjacencies[edge["genome_id"]] = neighbor

            for ref_id in self.references:
                if ref_id not in adjacencies:
                    adjacencies[ref_id] = None  #"void" state in paper

            for neighbor in graph.neighbors(node):
                adjacencies[target_id] = neighbor
                break_weight = phylogeny.estimate_tree(adjacencies)

                _update_edge(g, node, neighbor, break_weight)

        return g


#PRIVATE:
###########################################################################


def _split_graph(graph):
    for v1, v2 in graph.edges_iter():
        graph[v1][v2]["weight"] = -graph[v1][v2]["weight"] #want minimum weight

    edges = nx.max_weight_matching(graph, maxcardinality=True)
    unique_edges = []
    for v1, v2 in edges.items():
        if not (v2, v1) in unique_edges:
            unique_edges.append((v1, v2))

    return unique_edges


def _update_edge(graph, v1, v2, weight):
    if not graph.has_edge(v1, v2):
        graph.add_edge(v1, v2, weight=weight)
    else:
        graph[v1][v2]["weight"] += weight

################################
#output generators

def _output_graph(graph, out_file):
    agraph = nx.write_dot(graph, out_file)


def _output_edges(edges, out_file):
    fout = open(out_file, "w")
    fout.write("graph {\n")
    for (v1, v2) in edges:
        fout.write("{0} -- {1};\n".format(v1, v2))
    fout.write("}")


def _output_phylogeny(tree_string, target_name, out_file):
    fout = open(out_file, "w")
    fout.write(tree_string + "\n")
    fout.write(target_name)

back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API