Raw File
#(c) 2013-2014 by Authors
#This file is a part of Ragout program.
#Released under the BSD license (see LICENSE file)

This module parses Ragout configuration file

from collections import namedtuple
import re
import os
import logging

from ragout.parsers.phylogeny_parser import get_leaves_names, PhyloException

logger = logging.getLogger()

class RecipeException(Exception):

def parse_ragout_recipe(filename):
    if not os.path.exists(filename):
        raise RecipeException("Can't open recipe file")

    prefix = os.path.dirname(filename)

    recipe_dict = {"genomes" : {}}
    known_params = ["tree", "target", "blocks", "maf", "fasta",
                    "circular", "draft"]
    required_params = ["tree", "target", "blocks"]

    cast_bool = ["circular", "draft"]
    cast_int_list = ["blocks"]
    fix_path = ["fasta", "maf"]

    defaults = {"circular" : False,
                "draft" : False}

    param_matcher = re.compile("([^\s]+)\s*=\s*([^\s].*)$")
    with open(filename, "r") as f:
        for lineno, line in enumerate(f):
            line = line.strip()
            if not line or line.startswith("#"):

            m = param_matcher.match(line)
            if not m or not "." in m.group(1):
                raise RecipeException("Error parsing recipe on line {1}"
                                      .format(filename, lineno + 1))

            (obj, param_name), value = m.group(1).split("."), m.group(2)
            if param_name not in known_params:
                raise RecipeException("Unknown recipe parameter '{0}' on line {1}"
                                      .format(param_name, lineno, filename))

            #casting if necessary
            if param_name in cast_bool:
                if value in ["True", "true", "1"]:
                    value = True
                elif value in ["False", "false", "0"]:
                    value = False
                    raise RecipeException("Error parsing recipe on line "
                                          "{0}: wrong value '{1}' for bool param"
                                          .format(lineno, value))
            if param_name in cast_int_list:
                value = list(map(int, value.split(",")))
            if param_name in fix_path:
                value = os.path.join(prefix, value)

            if obj == "":
                recipe_dict[param_name] = value
            elif obj == "*":
                defaults[param_name] = value
                recipe_dict["genomes"].setdefault(obj, {})[param_name] = value

    for param in required_params:
        if param not in recipe_dict:
            raise RecipeException("Required parameter '{0}' not found in recipe"

    genomes = None
    for param, value in recipe_dict.items():
        if param == "tree":
                genomes = get_leaves_names(value)
            except PhyloException as e:
                raise RecipeException(e)

    if len(set(genomes)) < len(set(recipe_dict["genomes"].keys())):
        raise RecipeException("Some genomes are missing from the tree")

    for g in genomes:
        recipe_dict["genomes"].setdefault(g, {})

    for g, g_params in recipe_dict["genomes"].items():
        for def_key, def_val in defaults.items():
            g_params.setdefault(def_key, def_val)

    if len(recipe_dict["blocks"]) != len(set(recipe_dict["blocks"])):
        raise RecipeException("Found similar synteny block sizes in recipe")

    if not recipe_dict["genomes"]:
        raise RecipeException("No genomes detected in recipe")

    if recipe_dict["target"] not in recipe_dict["genomes"]:
        raise RecipeException("Error parsing recipe: target genome "
                              "is not in tree")
    if "fasta" not in recipe_dict["genomes"][recipe_dict["target"]]:
        raise RecipeException("Error parsing recipe: FASTA file for "
                              "target genome is not specified")

    return recipe_dict
back to top