Raw File
Tip revision: a010582c8df9b28958e7447ae37bfca886c457de authored by Mikhail Kolmogorov on 18 March 2015, 01:59:36 UTC
non-negative tree lengths, dosumentationimproved
Tip revision: a010582
#!/usr/bin/env python2.7

#(c) 2013-2014 by Authors
#This file is a part of Ragout program.
#Released under the BSD license (see LICENSE file)

This script generates dot-plots of given set of synteny blocks
for visual comparison.

from __future__ import print_function
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import os, sys
import subprocess
from collections import namedtuple

Block = namedtuple("Block", ["id", "start", "length", "chr_id"])
SeqInfo = namedtuple("SeqInfo", ["id", "length"])

GEPARD_DIR = "~/Bioinf/tools/gepard-1.30/"
EXEC = "./"
MATRIX = "matrices/edna.mat"

def main():
    if len(sys.argv) < 4:
        print("Usage: coords_file output_dir fasta_references\n\n"
              "A script for plotting synteny blocks for visual verification. "
              "Probably, you don't want to use it.")
    coords_file = sys.argv[1]
    fasta = sys.argv[3:]
    blocks = parse_coords_file(coords_file)
    draw_dot_plot(blocks, fasta, sys.argv[2])

def draw_dot_plot(blocks, seq_files, out_dir):
    seqs = {}
    for file in seq_files:
        for seq in SeqIO.parse(file, "fasta"):
            seq_id =" ")[0]
            seqs[seq_id] = seq.seq

    out_dir = os.path.abspath(out_dir)

    for block_id, blocklist in blocks.items():
        allBlocks = open(os.path.join(out_dir,
                                      "blocks{0}.fasta".format(block_id)), "w")
        for block in blocklist:
            seq = seqs[block.chr_id][block.start : block.start + block.length]
            if < 0:
                seq = seq.reverse_complement()
            SeqIO.write(SeqRecord(seq, id=block.chr_id, description=""),
                        allBlocks, "fasta")

        block1, block2 = blocklist[0:2]

        s1 = seqs[block1.chr_id]
        s2 = seqs[block2.chr_id]

        seq1 = s1[block1.start : block1.start + block1.length]
        seq2 = s2[block2.start : block2.start + block2.length]
        if < 0:
            seq1 = seq1.reverse_complement()
        if < 0:
            seq2 = seq2.reverse_complement()

        file1 = os.path.join(out_dir, "block{0}.{1}-1.fasta"
                                      .format(block_id, block1.chr_id))
        file2 = os.path.join(out_dir, "block{0}.{1}-2.fasta"
                                      .format(block_id, block2.chr_id))

        SeqIO.write(SeqRecord(seq1), file1, "fasta")
        SeqIO.write(SeqRecord(seq2), file2, "fasta")

        out_dot = os.path.join(out_dir, "block{0}-{1}-{2}.png"
                               .format(block_id, block1.chr_id, block2.chr_id))

        cmdline = [EXEC, "-seq1", file1, "-seq2", file2, "-outfile",
                    out_dot, "-matrix", MATRIX, "-word", "10"]


def parse_coords_file(blocks_file):
    group = [[]]
    seq_info = {}
    blocks_info = {}
    line = [l.strip() for l in open(blocks_file) if l.strip()]
    for l in line:
        if l.startswith("-"):
    for l in group[0][1:]:
        seq_num, seq_len, seq_id = l.split()
        seq_num = int(seq_num)
        seq_info[seq_num] = SeqInfo(seq_id, int(seq_len))
    for g in [g for g in group[1:] if g]:
        block_id = int(g[0].split()[1][1:])
        blocks_info[block_id] = []
        for l in g[2:]:
            chr_num, bl_strand, bl_start, bl_end, bl_length = l.split()
            chr_num = int(chr_num)
            chr_id = seq_info[chr_num].id
            bl_start, bl_end = int(bl_start), int(bl_end)
            if bl_strand == "-":
                bl_start, bl_end = bl_end, bl_start

            num_id = block_id if bl_strand == "+" else -block_id
            blocks_info[block_id].append(Block(num_id, bl_start,
                                        int(bl_length), chr_id))
    return blocks_info

if __name__ == "__main__":
back to top