https://github.com/fenderglass/Ragout
Tip revision: 4b42ddec7d839ab6369faa31a49e2d3a8db7d124 authored by fenderglass on 26 April 2014, 05:29:56 UTC
instal
instal
Tip revision: 4b42dde
synteny_backend.py
import logging
import os
from collections import namedtuple, defaultdict
logger = logging.getLogger()
class SyntenyBackend:
backends = {}
def __init__(self):
pass
#runs backend and then prepare data for futher processing
def make_permutations(self, config, output_dir, overwrite):
try:
files = self.run_backend(config, output_dir, overwrite)
except Exception as e:
logger.debug(e)
return False
assert sorted(files.keys()) == sorted(config.blocks)
for block_size, perm_file in files.items():
block_dir = os.path.join(output_dir, str(block_size))
if not os.path.isdir(block_dir):
os.mkdir(block_dir)
all_genomes = dict(list(config.references.items()) +
list(config.targets.items()))
chr_to_gen = _get_chr_names(all_genomes)
_split_permutations(chr_to_gen, config.references,
config.targets, perm_file, block_dir)
return True
#runs backend and returns a dict with permutations files
#indexed by block sizes
def run_backend(self, config, output_dir, overwrite):
return None
@staticmethod
def get_available_backends():
return SyntenyBackend.backends
@staticmethod
def register_backend(name, instance):
assert name not in SyntenyBackend.backends
SyntenyBackend.backends[name] = instance
#Quickly get chromosome names
def _get_chr_names(genomes):
chr_to_id = {}
for seq_id, seq_file in genomes.items():
for line in open(seq_file, "r"):
if line.startswith(">"):
contig_name = line.strip()[1:].split(" ")[0]
chr_to_id[contig_name] = seq_id
return chr_to_id
#Splits single premuation file generated by Sibelia into
#multiple files (one for each input genome)
def _split_permutations(chr_to_gen, references, targets, perm_file, out_dir):
out_files = {}
config = open(os.path.join(out_dir, "blocks.cfg"), "w")
all_genomes = dict(list(references.items()) + list(targets.items()))
for gen_id in set(chr_to_gen.values()):
filename = all_genomes[gen_id]
base = os.path.splitext(os.path.basename(filename))[0]
block_file_base = base + ".blocks"
block_file = os.path.join(out_dir, block_file_base)
out_files[gen_id] = open(block_file, "w")
if gen_id in references:
config.write("REF {0}={1}\n".format(gen_id, block_file_base))
else:
assert gen_id in targets
config.write("TARGET {0}={1}\n".format(gen_id, block_file_base))
for line in open(perm_file, "r"):
line = line.strip()
if not line:
continue
if line.startswith(">"):
name = line[1:]
else:
handle = out_files[chr_to_gen[name]]
handle.write(">{0}\n{1}\n".format(name, line))