Revision 480a94cbe722741cd3f1c2d35549a5af06c3e774 authored by fenderglass on 18 July 2014, 18:59:24 UTC, committed by fenderglass on 18 July 2014, 18:59:24 UTC
1 parent 5bc5199
Raw File
cactus.py
#(c) 2013-2014 by Authors
#This file is a part of Ragout program.
#Released under the BSD license (see LICENSE file)

"""
This module runs progressiveCactus
"""

import os
import sys
import shutil
import subprocess
import multiprocessing
import logging

from .synteny_backend import SyntenyBackend, BackendException
from ragout.shared import config
import ragout.maf2synteny.maf2synteny as m2s

CACTUS_EXEC = "bin/runProgressiveCactus.sh"
CACTUS_WORKDIR = "cactus-workdir"

try:
    CACTUS_INSTALL = os.environ["CACTUS_INSTALL"]
except:
    CACTUS_INSTALL = ""
logger = logging.getLogger()

class CactusBackend(SyntenyBackend):
    def __init__(self):
        SyntenyBackend.__init__(self)


    def run_backend(self, recipe, output_dir, overwrite):
        return _make_permutations(recipe, output_dir, overwrite)


if os.path.isfile(os.path.join(CACTUS_INSTALL, CACTUS_EXEC)):
    logger.debug("progressiveCactus is installed")
    SyntenyBackend.register_backend("cactus", CactusBackend())
else:
    logger.debug("progressiveCactus is not installed")


def _make_permutations(recipe, output_dir, overwrite):
    """
    Runs Cactus, then outputs preprocessesed results into output_dir
    """
    work_dir = os.path.join(output_dir, CACTUS_WORKDIR)
    files = {}

    if overwrite and os.path.isdir(work_dir):
        shutil.rmtree(work_dir)

    if os.path.isdir(work_dir):
        #using existing results
        logger.warning("Using existing Cactus results from previous run")
        logger.warning("Use --overwrite to force alignment")
        for block_size in recipe["blocks"]:
            block_dir = os.path.join(work_dir, str(block_size))
            coords_file = os.path.join(block_dir, "blocks_coords.txt")
            if not os.path.isfile(coords_file):
                raise BackendException("Exitsing results are incompatible "
                                       "with input recipe")
            files[block_size] = os.path.abspath(coords_file)

    else:
        #running cactus
        for genome, params in recipe["genomes"].items():
            if "fasta" not in params:
                raise BackendException("FASTA file for {0} is not "
                                       "specified".format(genome))

        os.mkdir(work_dir)
        config_path = _make_cactus_config(recipe, work_dir)
        ref_genome = recipe["target"]
        maf_file = _run_cactus(config_path, ref_genome, work_dir)

        logger.info("Converting maf to synteny")
        if not m2s.make_synteny(maf_file, work_dir, recipe["blocks"]):
            raise BackendException("Something went wrong with maf2synteny")

        for block_size in recipe["blocks"]:
            block_dir = os.path.join(work_dir, str(block_size))
            coords_file = os.path.join(block_dir, "blocks_coords.txt")
            files[block_size] = os.path.abspath(coords_file)
            if not os.path.exists(coords_file):
                raise BackendException("Something bad happened!")

    return files


def _make_cactus_config(recipe, directory):
    """
    Creates cactus' "seq" file
    """
    CONF_NAME = "cactus.cfg"
    file = open(os.path.join(directory, CONF_NAME), "w")
    file.write(recipe["tree"] + "\n")

    for genome, params in recipe["genomes"].items():
        if genome != recipe["target"]:
            file.write("*")
        file.write("{0} {1}\n".format(genome, os.path.abspath(params["fasta"])))

    return file.name


def _run_cactus(config_path, ref_genome, out_dir):
    CACTUS_OUT = "alignment.hal"
    HAL2MAF = "submodules/hal/bin/hal2maf"
    MAF_OUT = "cactus.maf"

    logger.info("Running progressiveCactus...")
    work_dir = os.path.abspath(out_dir)
    out_hal = os.path.join(work_dir, CACTUS_OUT)
    out_maf = os.path.join(work_dir, MAF_OUT)
    config_file = os.path.abspath(config_path)
    prev_dir = os.getcwd()

    num_proc = min(config.vals["cactus_max_threads"],
                   multiprocessing.cpu_count())
    threads_param = "--maxThreads=" + str(num_proc)

    os.chdir(CACTUS_INSTALL)
    devnull = open(os.devnull, "w")
    cmdline = [CACTUS_EXEC, config_file, work_dir, out_hal, threads_param]
    try:
        subprocess.check_call(cmdline, stdout=devnull)
    except subprocess.CalledProcessError:
        raise BackendException()

    #convert to maf
    logger.info("Converting HAL to MAF...")
    cmdline = [HAL2MAF, out_hal, out_maf, "--noAncestors",
               "--refGenome", ref_genome, "--ucscNames"]
    subprocess.check_call(cmdline, stdout=devnull)

    os.chdir(prev_dir)
    return out_maf
back to top