https://github.com/fenderglass/Ragout
Raw File
Tip revision: 4b42ddec7d839ab6369faa31a49e2d3a8db7d124 authored by fenderglass on 26 April 2014, 05:29:56 UTC
instal
Tip revision: 4b42dde
synteny_backend.py
import logging
import os
from collections import namedtuple, defaultdict

logger = logging.getLogger()

class SyntenyBackend:
    backends = {}
    def __init__(self):
        pass

    #runs backend and then prepare data for futher processing
    def make_permutations(self, config, output_dir, overwrite):
        try:
            files = self.run_backend(config, output_dir, overwrite)
        except Exception as e:
            logger.debug(e)
            return False
        assert sorted(files.keys()) == sorted(config.blocks)

        for block_size, perm_file in files.items():
            block_dir = os.path.join(output_dir, str(block_size))
            if not os.path.isdir(block_dir):
                os.mkdir(block_dir)

            all_genomes = dict(list(config.references.items()) +
                               list(config.targets.items()))
            chr_to_gen = _get_chr_names(all_genomes)
            _split_permutations(chr_to_gen, config.references,
                                config.targets, perm_file, block_dir)
        return True

    #runs backend and returns a dict with permutations files
    #indexed by block sizes
    def run_backend(self, config, output_dir, overwrite):
        return None

    @staticmethod
    def get_available_backends():
        return SyntenyBackend.backends

    @staticmethod
    def register_backend(name, instance):
        assert name not in SyntenyBackend.backends
        SyntenyBackend.backends[name] = instance


#Quickly get chromosome names
def _get_chr_names(genomes):
    chr_to_id = {}
    for seq_id, seq_file in genomes.items():
        for line in open(seq_file, "r"):
            if line.startswith(">"):
                contig_name = line.strip()[1:].split(" ")[0]
                chr_to_id[contig_name] = seq_id
    return chr_to_id


#Splits single premuation file generated by Sibelia into
#multiple files (one for each input genome)
def _split_permutations(chr_to_gen, references, targets, perm_file, out_dir):
    out_files = {}
    config = open(os.path.join(out_dir, "blocks.cfg"), "w")
    all_genomes = dict(list(references.items()) + list(targets.items()))

    for gen_id in set(chr_to_gen.values()):
        filename = all_genomes[gen_id]
        base = os.path.splitext(os.path.basename(filename))[0]
        block_file_base = base + ".blocks"
        block_file = os.path.join(out_dir, block_file_base)

        out_files[gen_id] = open(block_file, "w")
        if gen_id in references:
            config.write("REF {0}={1}\n".format(gen_id, block_file_base))
        else:
            assert gen_id in targets
            config.write("TARGET {0}={1}\n".format(gen_id, block_file_base))

    for line in open(perm_file, "r"):
        line = line.strip()
        if not line:
            continue

        if line.startswith(">"):
            name = line[1:]
        else:
            handle = out_files[chr_to_gen[name]]
            handle.write(">{0}\n{1}\n".format(name, line))
back to top