ragout.py
#!/usr/bin/env python
import os
import sys
import logging
import argparse
import source.overlap as ovlp
import source.scaffolder as scfldr
import source.sibelia_parser as sp
import source.merge_iters as merge
import source.breakpoint_graph as bg
import source.config_parser as cparser
import source.assembly_refine as asref
from source.phylogeny import Phylogeny
from source.debug import DebugConfig
from source.permutation import PermutationContainer
SIBELIA_BIN = "../Sibelia/distr/bin/"
running_dir = os.path.dirname(os.path.realpath(__file__))
os.environ["PATH"] += os.pathsep + os.path.join(running_dir, SIBELIA_BIN)
logger = logging.getLogger()
def enable_logging(log_file):
log_formatter = logging.Formatter("[%(asctime)s] %(name)s: %(levelname)s: %(message)s", "%H:%M:%S")
console_formatter = logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s", "%H:%M:%S")
console_log = logging.StreamHandler()
console_log.setLevel(logging.INFO)
console_log.setFormatter(console_formatter)
file_handler = logging.FileHandler(log_file, mode="w")
file_handler.setFormatter(log_formatter)
logger.setLevel(logging.DEBUG)
logger.addHandler(console_log)
logger.addHandler(file_handler)
def do_job(config_file, out_dir, skip_sibelia, assembly_refine):
if not os.path.isdir(out_dir):
sys.stderr.write("Output directory doesn`t exists\n")
return
config = cparser.parse_ragout_config(config_file)
phylogeny = Phylogeny(config.tree)
out_log = os.path.join(out_dir, "log.txt")
out_order = os.path.join(out_dir, "scaffolds.ord")
out_scaffolds = os.path.join(out_dir, "scaffolds.fasta")
out_overlap = os.path.join(out_dir, "contigs_overlap.dot")
out_refined_order = os.path.join(out_dir, "scaffolds_refined.ord")
out_refined_scaffolds = os.path.join(out_dir, "scaffolds_refined.fasta")
enable_logging(out_log)
last_scaffolds = None
logger.info("Cooking Ragout...")
for block_size in config.blocks:
logger.info("Running with the block size {0}...".format(block_size))
block_dir = os.path.join(out_dir, str(block_size))
if not os.path.isdir(block_dir):
os.mkdir(block_dir)
block_config = os.path.join(block_dir, "blocks.cfg")
block_order = os.path.join(block_dir, "scaffolds.ord")
debug_dir = os.path.join(block_dir, "debug")
DebugConfig.get_writer().set_debug_dir(debug_dir)
if not skip_sibelia:
sp.make_permutations(config.references, config.targets,
block_size, block_dir)
perm_container = PermutationContainer(block_config)
graph = bg.BreakpointGraph()
graph.build_from(perm_container, True)
connections = graph.find_adjacencies(phylogeny)
scaffolds = scfldr.get_scaffolds(connections, perm_container)
scfldr.output_order(scaffolds, block_order)
if last_scaffolds:
last_scaffolds = merge.merge(last_scaffolds, scaffolds)
else:
last_scaffolds = scaffolds
scfldr.output_order(last_scaffolds, out_order)
scfldr.output_scaffolds(config.targets, last_scaffolds, out_scaffolds)
if assembly_refine:
MIN_OVERLAP = 33
MAX_PATH_LEN = 6
ovlp.make_overlap_graph(config.targets, out_overlap, MIN_OVERLAP)
refined_scaffolds = asref.refine_contigs(out_overlap, last_scaffolds, MAX_PATH_LEN)
scfldr.output_order(refined_scaffolds, out_refined_order)
scfldr.output_scaffolds(config.targets, refined_scaffolds, out_refined_scaffolds)
logger.info("Your Ragout is ready!")
def main():
parser = argparse.ArgumentParser(description="A tool for assisted assembly using multiple references")
parser.add_argument("-c", action="store", metavar="config", dest="config",
required=True, help="configuration file")
parser.add_argument("-o", action="store", metavar="output_dir",
dest="output_dir", required=True, help="output directory")
parser.add_argument("-s", action="store_const", metavar="skip_sibelia",
dest="skip_sibelia", default=False, const=True,
help="skip Sibelia running step")
parser.add_argument("-g", action="store_const", metavar="assembly_refine",
dest="assembly_refine", default=False, const=True,
help="refine with the assembly graph")
args = parser.parse_args()
do_job(args.config, args.output_dir, args.skip_sibelia, args.assembly_refine)
if __name__ == "__main__":
main()