https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
trna_clustering.py
#!/usr/bin/python

import sys
from Bio import SeqIO
from subprocess import call

print "Usage: trna_clustering.py TrnaScanOutput FastaFile"

try:
    ts = sys.argv[1]
except:
    ts = raw_input("Introduce tRNAscan output: ")

try:
    seqs = sys.argv[2]
except:
    seqs = raw_input("Introduce FASTA file: ")

seqs = SeqIO.parse(open(seqs), "fasta")

# create lists

names = []
seque = []

# create a dictionary with name and sequence

print "\nCreating database..."

for a in seqs:
    names.append(a.id)
    seque.append(a.seq)

dictio = dict(zip(names, seque))


print "\nGetting tRNAs..."

ts_data = open(ts).readlines()

ts_name = ts.split(".")
ts_name = ts_name[:-1]
ts_name = ".".join(ts_name)

out = open(ts_name+"_seqs.fas", "w")

for line in ts_data[3:]:
    info = line.split()
    id = info[0]
    num = info[1]
    begin = int(info[2])
    end = int(info[3])
    if begin-1 < end:
        diff = end-begin
        if diff <= 116:
            ext_seq = dictio[id][begin-1:end]
    else:
        diff = begin - end
        if diff <= 116:
            ext_seq = dictio[id][end-1:begin]
            ext_seq.reverse_complement()
    out.write(">%s_%s\n%s\n" % (id, num, ext_seq))

out.close()

call("cd-hit-est -T 12 -r 1 -i %s_seqs.fas -o %s_cdhit.fas -M 0 -aS 0.8 -c 0.8 -G 0 -g 1" % (ts_name,ts_name), shell=True)

print "\nDONE!"
back to top