https://github.com/fenderglass/Ragout
Raw File
Tip revision: d3f213f733f5d80a62f00d4967b26edb7a06ba72 authored by Mikhail Kolmogorov on 22 March 2015, 19:11:36 UTC
1.1 release
Tip revision: d3f213f
main.py
#(c) 2013-2014 by Authors
#This file is a part of Ragout program.
#Released under the BSD license (see LICENSE file)

"""
The main Ragout module. It defines top-level logic of the program
"""

import os
import sys
import shutil
import logging
import argparse

import ragout.assembly_graph.assembly_refine as asref
import ragout.assembly_graph.assembly_graph as asgraph
import ragout.breakpoint_graph.breakpoint_graph as bg
import ragout.scaffolder.scaffolder as scfldr
import ragout.scaffolder.merge_iters as merge
import ragout.scaffolder.output_generator as out_gen
import ragout.maf2synteny.maf2synteny as m2s
import ragout.overlap.overlap as overlap
import ragout.shared.config as config
from ragout.overlap.overlap import OverlapException
from ragout.phylogeny.phylogeny import Phylogeny, PhyloException
from ragout.breakpoint_graph.permutation import (PermutationContainer,
                                                 PermException)
from ragout.synteny_backend.synteny_backend import (SyntenyBackend,
                                                    BackendException)
from ragout.parsers.recipe_parser import parse_ragout_recipe, RecipeException
from ragout.parsers.fasta_parser import read_fasta_dict, FastaError
from ragout.shared.debug import DebugConfig

#register backends
import synteny_backend.sibelia
import synteny_backend.cactus
import synteny_backend.maf
import synteny_backend.hal

logger = logging.getLogger()
debugger = DebugConfig.get_instance()


def enable_logging(log_file, debug):
    """
    Turns on logging, sets debug levels and assigns a log file
    """
    log_formatter = logging.Formatter("[%(asctime)s] %(name)s: %(levelname)s: "
                                      "%(message)s", "%H:%M:%S")
    console_formatter = logging.Formatter("[%(asctime)s] %(levelname)s: "
                                          "%(message)s", "%H:%M:%S")
    console_log = logging.StreamHandler()
    console_log.setFormatter(console_formatter)
    if not debug:
        console_log.setLevel(logging.INFO)

    file_handler = logging.FileHandler(log_file, mode="w")
    file_handler.setFormatter(log_formatter)

    logger.setLevel(logging.DEBUG)
    logger.addHandler(console_log)
    logger.addHandler(file_handler)


def check_extern_modules(backend):
    """
    Checks if all neccessary native modules are available
    """
    backends = SyntenyBackend.get_available_backends()
    if backend not in backends:
        raise BackendException("\"{0}\" is not installed.".format(backend))

    if not m2s.check_binary():
        raise BackendException("maf2synteny binary is missing, "
                               "did you run 'make'?")

    if not overlap.check_binary():
        raise BackendException("overlap binary is missing, "
                               "did you run 'make'?")


def run_ragout(args):
    """
    A wrapper to catch possible exceptions
    """
    try:
        run_unsafe(args)
    except (RecipeException, PhyloException, PermException,
            BackendException, OverlapException, FastaError) as e:
        logger.error("An error occured while running Ragout:")
        logger.error(e)
        return 1

    return 0


def run_unsafe(args):
    """
    Top-level logic of the program
    """
    out_log = os.path.join(args.out_dir, "ragout.log")
    out_links = os.path.join(args.out_dir, "scaffolds.links")
    out_scaffolds = os.path.join(args.out_dir, "scaffolds.fasta")
    out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot")
    out_refined_links = os.path.join(args.out_dir, "scaffolds_refined.links")
    out_refined_scaffolds = os.path.join(args.out_dir,
                                         "scaffolds_refined.fasta")
    debug_root = os.path.join(args.out_dir, "debug")

    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)
    if args.debug:
        debugger.set_debug_dir(debug_root)
        debugger.clear_debug_dir()

    enable_logging(out_log, args.debug)
    logger.info("Cooking Ragout...")

    check_extern_modules(args.synteny_backend)
    all_backends = SyntenyBackend.get_available_backends()
    backend = all_backends[args.synteny_backend]
    recipe = parse_ragout_recipe(args.recipe)

    #Setting synteny block sizes
    if "blocks" in recipe:
        scale = recipe["blocks"]
    else:
        scale = backend.infer_block_scale(recipe)
        logger.info("Synteny block scale set to '{0}'".format(scale))
    synteny_blocks = config.vals["blocks"][scale]

    #Running backend to get synteny blocks
    perm_files = backend.make_permutations(recipe, synteny_blocks, args.out_dir,
                                           args.overwrite, args.threads)

    #phylogeny-related
    if "tree" in recipe:
        logger.info("Phylogeny is taken from the recipe")
        phylogeny = Phylogeny.from_newick(recipe["tree"])
    else:
        logger.info("Inferring phylogeny from synteny blocks data")
        small_blocks = min(synteny_blocks)
        perm_container = PermutationContainer(perm_files[small_blocks],
                                              recipe, False, False, None)
        phylogeny = Phylogeny.from_permutations(perm_container)
        logger.info(phylogeny.tree_string)

    #main loop
    last_scaffolds = None
    for block_size in synteny_blocks:
        logger.info("Running Ragout with the block size {0}".format(block_size))

        if args.debug:
            debug_dir = os.path.join(debug_root, str(block_size))
            debugger.set_debug_dir(debug_dir)

        conservative = last_scaffolds is None
        perm_container = PermutationContainer(perm_files[block_size],
                                              recipe, args.resolve_repeats,
                                              conservative, phylogeny)

        graph = bg.BreakpointGraph()
        graph.build_from(perm_container, recipe)

        adjacencies = graph.find_adjacencies(phylogeny)
        scaffolds = scfldr.get_scaffolds(adjacencies, perm_container)

        if last_scaffolds is not None:
            last_scaffolds = merge.merge(last_scaffolds, scaffolds)
        else:
            last_scaffolds = scaffolds

    if args.debug:
        debugger.set_debug_dir(debug_root)

    logger.info("Reading contigs file")
    target_fasta_file = backend.get_target_fasta()
    target_fasta_dict = read_fasta_dict(target_fasta_file)

    out_gen.output_links(last_scaffolds, out_links)
    out_gen.output_fasta(target_fasta_dict, last_scaffolds, out_scaffolds)

    if not args.no_refine:
        overlap.make_overlap_graph(target_fasta_file, out_overlap)
        refined_scaffolds = asref.refine_scaffolds(out_overlap, last_scaffolds,
                                                   target_fasta_dict)
        out_gen.output_links(refined_scaffolds, out_refined_links)
        out_gen.output_fasta(target_fasta_dict, refined_scaffolds,
                             out_refined_scaffolds)
        if args.debug:
            shutil.copy(out_overlap, debugger.debug_dir)
            out_colored_overlap = os.path.join(debugger.debug_dir,
                                               "colored_overlap.dot")
            asgraph.save_colored_insert_overlap_graph(out_overlap, last_scaffolds,
                                                      refined_scaffolds,
                                                      out_colored_overlap)
        os.remove(out_overlap)

    logger.info("Your Ragout is ready!")


def main():
    parser = argparse.ArgumentParser(description="A tool for assisted assembly"
                                                 " using multiple references",
                                     formatter_class= \
                                        argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("recipe", metavar="recipe_file",
                        help="path to recipe file")
    parser.add_argument("-o", "--outdir", dest="out_dir",
                        metavar="output_dir",
                        help="path to the working directory",
                        default="ragout-out")
    parser.add_argument("-s", "--synteny", dest="synteny_backend",
                        default="sibelia",
                        choices=["sibelia", "cactus", "maf", "hal"],
                        help="backend for synteny block decomposition")
    parser.add_argument("--no-refine", action="store_true",
                        dest="no_refine", default=False,
                        help="disable refinement with assembly graph")
    parser.add_argument("--overwrite", action="store_true", default=False,
                        dest="overwrite",
                        help="overwrite existing synteny blocks")
    parser.add_argument("--repeats", action="store_true", default=False,
                        dest="resolve_repeats",
                        help="try to resolve repeats before constructing "
                             "breakpoint graph")
    parser.add_argument("--debug", action="store_true",
                        dest="debug", default=False,
                        help="enable debug output")
    parser.add_argument("-t", "--threads", dest="threads", type=int,
                        default=1, help="number of threads for synteny backend")
    parser.add_argument("--version", action="version", version="Ragout v1.1")
    args = parser.parse_args()

    return run_ragout(args)
back to top