https://github.com/fenderglass/Ragout
Tip revision: d3f213f733f5d80a62f00d4967b26edb7a06ba72 authored by Mikhail Kolmogorov on 22 March 2015, 19:11:36 UTC
1.1 release
1.1 release
Tip revision: d3f213f
recipe_parser.py
#(c) 2013-2014 by Authors
#This file is a part of Ragout program.
#Released under the BSD license (see LICENSE file)
"""
This module parses Ragout configuration file
"""
from collections import namedtuple
import re
import os
import logging
from ragout.parsers.phylogeny_parser import get_leaves_names, PhyloException
import ragout.shared.config as config
logger = logging.getLogger()
class RecipeException(Exception):
pass
def parse_ragout_recipe(filename):
if not os.path.exists(filename):
raise RecipeException("Can't open recipe file")
prefix = os.path.dirname(filename)
recipe_dict = {"genomes" : {}}
known_params = ["tree", "target", "blocks", "maf", "hal", "fasta",
"circular", "draft", "references"]
required_params = ["references", "target"]
cast_bool = ["circular", "draft"]
fix_path = ["fasta", "maf", "hal"]
defaults = {"circular" : False,
"draft" : False}
param_matcher = re.compile("([^\s]+)\s*=\s*([^\s].*)$")
with open(filename, "r") as f:
for lineno, line in enumerate(f):
line = line.strip()
if not line or line.startswith("#"):
continue
m = param_matcher.match(line)
if not m or not "." in m.group(1):
raise RecipeException("Error parsing recipe on line {1}"
.format(filename, lineno + 1))
(obj, param_name), value = m.group(1).split("."), m.group(2)
if param_name not in known_params:
raise RecipeException("Unknown recipe parameter '{0}' on line {1}"
.format(param_name, lineno, filename))
#checking values, casting
if param_name in cast_bool:
if value.lower() in ["true", "1"]:
value = True
elif value.lower() in ["false", "0"]:
value = False
else:
raise RecipeException("Error parsing recipe on line "
"{0}: wrong value '{1}' for bool param"
.format(lineno, value))
if param_name == "blocks":
if value not in config.vals["blocks"]:
raise RecipeException("Unknown synteny block size set: {0}"
.format(value))
if param_name == "references":
value = list(map(lambda s: s.strip(), value.split(",")))
if param_name in fix_path:
value = os.path.expanduser(value)
value = os.path.join(prefix, value)
###
if obj == "":
recipe_dict[param_name] = value
elif obj == "*":
defaults[param_name] = value
else:
recipe_dict["genomes"].setdefault(obj, {})[param_name] = value
for param in required_params:
if param not in recipe_dict:
raise RecipeException("Required parameter '{0}' not found in recipe"
.format(param))
genomes = recipe_dict["references"] + [recipe_dict["target"]]
if "tree" in recipe_dict:
try:
leaves = get_leaves_names(recipe_dict["tree"])
if set(leaves) != set(genomes):
raise RecipeException("The tree does not agree with "
"the specified set of genomes")
except PhyloException as e:
raise RecipeException(e)
for g in recipe_dict["genomes"]:
if g not in genomes:
raise RecipeException("Recipe error: genome '{0}' is not in "
"specified as reference or target".format(g))
for g in genomes:
recipe_dict["genomes"].setdefault(g, {})
for g, g_params in recipe_dict["genomes"].items():
for def_key, def_val in defaults.items():
g_params.setdefault(def_key, def_val)
return recipe_dict