https://github.com/fjruizruano/ngs-protocols
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
trna_clustering.py
#!/usr/bin/python
import sys
from Bio import SeqIO
from subprocess import call
print "Usage: trna_clustering.py TrnaScanOutput FastaFile"
try:
ts = sys.argv[1]
except:
ts = raw_input("Introduce tRNAscan output: ")
try:
seqs = sys.argv[2]
except:
seqs = raw_input("Introduce FASTA file: ")
seqs = SeqIO.parse(open(seqs), "fasta")
# create lists
names = []
seque = []
# create a dictionary with name and sequence
print "\nCreating database..."
for a in seqs:
names.append(a.id)
seque.append(a.seq)
dictio = dict(zip(names, seque))
print "\nGetting tRNAs..."
ts_data = open(ts).readlines()
ts_name = ts.split(".")
ts_name = ts_name[:-1]
ts_name = ".".join(ts_name)
out = open(ts_name+"_seqs.fas", "w")
for line in ts_data[3:]:
info = line.split()
id = info[0]
num = info[1]
begin = int(info[2])
end = int(info[3])
if begin-1 < end:
diff = end-begin
if diff <= 116:
ext_seq = dictio[id][begin-1:end]
else:
diff = begin - end
if diff <= 116:
ext_seq = dictio[id][end-1:begin]
ext_seq.reverse_complement()
out.write(">%s_%s\n%s\n" % (id, num, ext_seq))
out.close()
call("cd-hit-est -T 12 -r 1 -i %s_seqs.fas -o %s_cdhit.fas -M 0 -aS 0.8 -c 0.8 -G 0 -g 1" % (ts_name,ts_name), shell=True)
print "\nDONE!"