https://github.com/fenderglass/Ragout
Tip revision: 9b706fa0825b6a8f25a626ceffa9b4c71bdaf9e4 authored by Mikhail Kolmogorov on 30 July 2018, 20:00:30 UTC
version update
version update
Tip revision: 9b706fa
datatypes.py
#(c) 2013-2014 by Authors
#This file is a part of Ragout program.
#Released under the BSD license (see LICENSE file)
"""
This module provides some common data structures
"""
from collections import namedtuple
from copy import copy, deepcopy
class Block:
"""
Represents synteny block
"""
def __init__(self, block_id, sign, start=None, end=None):
self.block_id = block_id
self.sign = sign
self.start = start
self.end = end
def length(self):
if self.start is None or self.end is None:
return None
assert self.end >= self.start
return self.end - self.start
def signed_id(self):
return self.block_id * self.sign
class Permutation:
"""
Represents signed permutation
"""
def __init__(self, genome_name, chr_name, seq_len, blocks):
self.genome_name = genome_name
self.chr_name = chr_name
self.seq_start = 0
self.seq_end = seq_len
self.seq_len = seq_len
self.blocks = blocks
self.repeat_id = 0
self.draft = False
def length(self):
assert self.seq_end > self.seq_start
return self.seq_end - self.seq_start
def name(self):
if self.seq_start == 0 and self.seq_end == self.seq_len:
return self.chr_name
else:
return "{0}[{1}:{2}]".format(self.chr_name, self.seq_start,
self.seq_end)
def iter_pairs(self):
for pb, nb in zip(self.blocks[:-1], self.blocks[1:]):
yield pb, nb
def __str__(self):
return ("[{0}, {1}, {2}, b:{3}, e:{4}]"
.format(self.genome_name, self.chr_name,
list(map(lambda b: b.signed_id(), self.blocks)),
self.seq_start, self.seq_end))
def output_permutations(permutations, out_file):
with open(out_file, "w") as f:
for perm in permutations:
f.write(">" + perm.name() + "\n")
for block in perm.blocks:
f.write("{0:+} ".format(block.signed_id()))
f.write("$\n")
class Contig:
def __init__(self, permutation, sign, link, dummy_param):
self.perm = permutation
self.sign = sign
if link is None:
link = Link(0, [])
self.link = link
@staticmethod
def with_perm(permutation, sign=1, link=None):
return Contig(permutation, sign, link, None)
@staticmethod
def with_sequence(seq_name, seq_len, sign=1, link=None):
dummy_perm = Permutation(None, seq_name, seq_len, None)
return Contig.with_perm(dummy_perm, sign, link)
def left_end(self):
return (self.perm.blocks[0].signed_id() if self.sign > 0
else -self.perm.blocks[-1].signed_id())
def right_end(self):
return (-self.perm.blocks[-1].signed_id() if self.sign > 0
else self.perm.blocks[0].signed_id())
def left_gap(self):
return (self.perm.blocks[0].start if self.sign > 0
else self.perm.length() - self.perm.blocks[-1].end)
def right_gap(self):
return (self.perm.length() - self.perm.blocks[-1].end
if self.sign > 0 else self.perm.blocks[0].start)
def reverse_copy(self):
contig = copy(self)
contig.sign = -contig.sign
return contig
def signed_perm(self):
if self.sign > 0:
return list(map(lambda b: b.signed_id(), self.perm.blocks))
else:
return list(map(lambda b: -b.signed_id(), self.perm.blocks[::-1]))
def name(self):
return self.perm.name()
def signed_name(self):
sign = "+" if self.sign > 0 else "-"
return sign + self.name()
def name_with_coords(self):
return self.perm.chr_name, self.perm.seq_start, self.perm.seq_end
def trim_left(self, trim_len):
if self.sign > 0:
self.perm.seq_start += trim_len
else:
self.perm.seq_end -= trim_len
def trim_right(self, trim_len):
if self.sign > 0:
self.perm.seq_end -= trim_len
else:
self.perm.seq_start += trim_len
def length(self):
return self.perm.length()
class Link:
"""
Represens an adjancency between teo contigs
"""
def __init__(self, gap, supporting_genomes):
self.gap = gap
self.supporting_genomes = supporting_genomes
self.supporting_assembly = False
class Scaffold:
def __init__(self, name):
self.left = None
self.right = None
self.contigs = []
self.name = name
@staticmethod
def with_contigs(name, left, right, contigs):
scf = Scaffold(name)
scf.left = left
scf.right = right
scf.contigs = contigs
return scf
def output_scaffolds_premutations(scaffolds, out_file):
with open(out_file, "w") as f:
permutations = []
for scf in scaffolds:
blocks = []
for contig in scf.contigs:
blocks.extend(contig.signed_perm())
f.write(">" + scf.name + "\n")
for block in blocks:
f.write("{0:+} ".format(block))
f.write("$\n")