https://github.com/fenderglass/Ragout
Revision 54ec8318ee615f6aa28b3188cfd95ee5679cc317 authored by fenderglass on 15 January 2014, 22:20:24 UTC, committed by fenderglass on 15 January 2014, 22:20:24 UTC
1 parent b0a0b5a
Raw File
Tip revision: 54ec8318ee615f6aa28b3188cfd95ee5679cc317 authored by fenderglass on 15 January 2014, 22:20:24 UTC
sibelia path fix
Tip revision: 54ec831
sibelia_parser.py
import os
import sys
import shutil
import subprocess
import copy
import logging
from collections import namedtuple, defaultdict
from Bio import SeqIO

logger = logging.getLogger()

def run_sibelia(fasta_files, block_size, out_dir):
    SIBELIA_EXEC = "Sibelia"

    logger.info("Running Sibelia...")
    if not which(SIBELIA_EXEC):
        raise Exception("Sibelia is not installed")

    devnull = open(os.devnull, "w")
    cmdline = [SIBELIA_EXEC, "-s", "loose", "-m", str(block_size), "-o", out_dir]
    cmdline.extend(fasta_files)
    subprocess.check_call(cmdline, stdout=devnull)

    os.remove(os.path.join(out_dir, "coverage_report.txt"))
    os.remove(os.path.join(out_dir, "d3_blocks_diagram.html"))
    os.remove(os.path.join(out_dir, "blocks_coords.txt"))
    shutil.rmtree(os.path.join(out_dir, "circos"))

    return os.path.join(out_dir, "genomes_permutations.txt")


def get_chr_names(genomes):
    chr_to_id = {}
    for seq_id, seq_file in genomes.iteritems():
        for seq in SeqIO.parse(seq_file, "fasta"):
            chr_to_id[seq.id] = seq_id
    return chr_to_id


def split_permutations(chr_to_gen, references, targets, perm_file, out_dir):
    out_files = {}
    config = open(os.path.join(out_dir, "blocks.cfg"), "w")
    genomes = dict(references.items() + targets.items())

    for gen_id in set(chr_to_gen.values()):
        filename = genomes[gen_id]
        base = os.path.splitext(os.path.basename(filename))[0]
        block_file_base = base + ".blocks"
        block_file = os.path.join(out_dir, block_file_base)

        out_files[gen_id] = open(block_file, "w")
        if gen_id in references:
            config.write("REF {0}={1}\n".format(gen_id, block_file_base))
        else:
            assert gen_id in targets
            config.write("TARGET {0}={1}\n".format(gen_id, block_file_base))

    for line in open(perm_file, "r").read().splitlines():
        if line.startswith(">"):
            name = line[1:]
        else:
            handle = out_files[chr_to_gen[name]]
            handle.write(">{0}\n{1}\n".format(name, line))


def make_permutations(references, targets, block_size, output_dir):
    genomes = dict(references.items() + targets.items())
    perm_file = run_sibelia(genomes.values(), block_size, output_dir)
    chr_to_gen = get_chr_names(genomes)
    split_permutations(chr_to_gen, references, targets, perm_file, output_dir)


def which(program):
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            path = path.strip('"')
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None
back to top