https://github.com/fenderglass/Ragout
Revision 54ec8318ee615f6aa28b3188cfd95ee5679cc317 authored by fenderglass on 15 January 2014, 22:20:24 UTC, committed by fenderglass on 15 January 2014, 22:20:24 UTC
1 parent b0a0b5a
Tip revision: 54ec8318ee615f6aa28b3188cfd95ee5679cc317 authored by fenderglass on 15 January 2014, 22:20:24 UTC
sibelia path fix
sibelia path fix
Tip revision: 54ec831
permutation.py
from collections import defaultdict
import config_parser as parser
import logging
logger = logging.getLogger()
class Permutation:
def __init__(self, ref_id, chr_id, chr_num, blocks):
self.ref_id = ref_id
self.chr_id = chr_id
self.chr_num = chr_num
self.blocks = blocks
self.target_perms = []
self.ref_perms = []
self.ref_perms_filtered = []
self.target_perms_filtered = []
def iter_blocks(self, circular=False):
if not len(self.blocks):
return
for block in self.blocks:
yield block
if circular:
yield self.blocks[0]
def find_duplications(ref_perms, target_perms):
index = defaultdict(set)
duplications = set()
for perm in ref_perms + target_perms:
for block in map(abs, perm.blocks):
if perm.ref_id in index[block]:
duplications.add(block)
else:
index[block].add(perm.ref_id)
return duplications
def filter_perm(perm, to_hold):
new_perm = Permutation(perm.ref_id, perm.chr_id, perm.chr_num, [])
for block in perm.blocks:
if abs(block) in to_hold:
new_perm.blocks.append(block)
return new_perm
def parse_blocks_file(ref_id, filename):
name = ""
permutations = []
chr_count = 0
for line in open(filename, "r").read().splitlines():
line = line.strip()
if not line:
continue
if line.startswith(">"):
name = line[1:]
else:
blocks = line.split(" ")[:-1]
permutations.append(Permutation(ref_id, name, chr_count, map(int, blocks)))
chr_count += 1
return permutations
class PermutationContainer:
def __init__(self, config_file):
self.ref_perms = []
self.target_perms = []
logging.info("Reading permutation file")
config = parser.parse_ragout_config(config_file)
for ref_id, ref_file in config.references.iteritems():
self.ref_perms.extend(parse_blocks_file(ref_id, ref_file))
for t_id, t_file in config.targets.iteritems():
self.target_perms.extend(parse_blocks_file(t_id, t_file))
self.duplications = find_duplications(self.ref_perms, self.target_perms)
self.target_blocks = set()
for perm in self.target_perms:
self.target_blocks |= set(map(abs, perm.blocks))
to_hold = self.target_blocks - self.duplications
self.ref_perms_filtered = [filter_perm(p, to_hold) for p in self.ref_perms]
self.target_perms_filtered = [filter_perm(p, to_hold) for p in self.target_perms]
Computing file changes ...