https://github.com/fenderglass/Ragout
Revision 54ec8318ee615f6aa28b3188cfd95ee5679cc317 authored by fenderglass on 15 January 2014, 22:20:24 UTC, committed by fenderglass on 15 January 2014, 22:20:24 UTC
1 parent b0a0b5a
Raw File
Tip revision: 54ec8318ee615f6aa28b3188cfd95ee5679cc317 authored by fenderglass on 15 January 2014, 22:20:24 UTC
sibelia path fix
Tip revision: 54ec831
breakpoint_graph.py
import networkx as nx
from collections import namedtuple
import os
import logging

from permutation import *
from debug import DebugConfig, write_dot
import phylogeny as phylo


Connection = namedtuple("Connection", ["start", "end"])
logger = logging.getLogger()

class BreakpointGraph:
    def __init__(self):
        self.bp_graph = nx.MultiGraph()
        self.targets = []
        self.references = []
        self.known_adjacencies = {}


    def build_from(self, perm_container, circular):
        logger.info("Building breakpoint graph")

        for perm in perm_container.ref_perms_filtered:
            if perm.ref_id not in self.references:
                self.references.append(perm.ref_id)

            prev = None
            for block in perm.iter_blocks(circular):
                if not prev:
                    prev = block
                    continue

                left_block = prev
                right_block = block
                self.bp_graph.add_node(-left_block)
                self.bp_graph.add_node(right_block)
                self.bp_graph.add_edge(-left_block, right_block, ref_id=perm.ref_id)

                prev = block

        for perm in perm_container.target_perms_filtered:
            if perm.ref_id not in self.targets:
                self.targets.append(perm.ref_id)

            prev = None
            for block in perm.iter_blocks(False):
                if not prev:
                    prev = block
                    continue

                self.known_adjacencies[-prev] = block
                self.known_adjacencies[block] = -prev
                prev = block


    def find_adjacencies(self, phylogeny):
        logger.info("Resolving breakpoint graph")
        chosen_edges = []
        subgraphs = nx.connected_component_subgraphs(self.bp_graph)

        for comp_id, subgraph in enumerate(subgraphs):
            trimmed_graph = self.trim_known_edges(subgraph)

            if len(trimmed_graph) < 2:
                continue

            if len(trimmed_graph) == 2:
                node_1, node_2 = trimmed_graph.nodes()
                chosen_edges.append((node_1, node_2))
                continue

            weighted_graph = self.make_weighted(trimmed_graph, phylogeny)
            matching_edges = split_graph(weighted_graph)
            chosen_edges.extend(matching_edges)

            if DebugConfig.get_writer().debugging:
                debug_dir = DebugConfig.get_writer().debug_dir
                debug_draw_component(comp_id, weighted_graph, subgraph, debug_dir)

        adjacencies = {}
        for edge in chosen_edges:
            adjacencies[-edge[0]] = Connection(-edge[0], edge[1])
            adjacencies[-edge[1]] = Connection(-edge[1], edge[0])

        return adjacencies


    def trim_known_edges(self, graph):
        trimmed_graph = graph.copy()
        for v1, v2, data in graph.edges_iter(data=True):
            if not trimmed_graph.has_node(v1) or not trimmed_graph.has_node(v2):
                continue

            if self.known_adjacencies.get(v1, None) == v2:
                trimmed_graph.remove_node(v1)
                trimmed_graph.remove_node(v2)

        return trimmed_graph


    def make_weighted(self, graph, phylogeny):
        assert len(graph) > 2
        g = nx.Graph()
        g.add_nodes_from(graph.nodes())
        target_id = self.targets[0]

        for node in graph.nodes():
            adjacencies = {}
            for neighbor in graph.neighbors(node):
                for edge in graph[node][neighbor].values():
                    adjacencies[edge["ref_id"]] = neighbor

            for ref_id in self.references:
                if ref_id not in adjacencies:
                    adjacencies[ref_id] = None  #"void" state in paper

            for neighbor in graph.neighbors(node):
                break_weight = 0.0
                if not (self.known_adjacencies.get(node, None) == neighbor):
                    adjacencies[target_id] = neighbor
                    break_weight = phylogeny.estimate_tree(adjacencies)

                update_edge(g, node, neighbor, break_weight)

        return g


def split_graph(graph):
    for v1, v2 in graph.edges_iter():
        graph[v1][v2]["weight"] = -graph[v1][v2]["weight"] #want minimum weight

    edges = nx.max_weight_matching(graph, maxcardinality=True)
    unique_edges = []
    for v1, v2 in edges.iteritems():
        if not (v2, v1) in unique_edges:
            unique_edges.append((v1, v2))

    return unique_edges


def update_edge(graph, v1, v2, weight):
    if not graph.has_edge(v1, v2):
        graph.add_edge(v1, v2, weight=weight)
    else:
        graph[v1][v2]["weight"] += weight


def debug_draw_component(comp_id, weighted_graph, breakpoint_graph, debug_dir):
    if len(breakpoint_graph) == 2:
        return

    for e in weighted_graph.edges_iter():
        weighted_graph[e[0]][e[1]]["label"] = ("{0:7.4f}"
                                    .format(weighted_graph[e[0]][e[1]]["weight"]))
    bg_out = os.path.join(debug_dir, "comp{0}-bg.dot".format(comp_id))
    weighted_out = os.path.join(debug_dir, "comp{0}-weighted.dot".format(comp_id))
    write_dot(breakpoint_graph, open(bg_out, "w"))
    write_dot(weighted_graph, open(weighted_out, "w"))
back to top