https://github.com/fjruizruano/ngs-protocols
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
rm_clas_seq.py
#!/usr/bin/python
from Bio import SeqIO
files = open("lista_out.txt").readlines()
out_in1 = open("list_in_1.fas", "w")
out_in2 = open("list_in_2.fas", "w")
out_no1 = open("list_no_1.fas", "w")
out_no2 = open("list_no_2.fas", "w")
for file in files:
print "\nLoading files %s and %s" % (file[:-1], file[:-5])
input = open(file[:-1]).readlines()
lista_id = []
for line in input[3:]:
text = line.split()
id = text[4]
lista_id.append(id[:-1])
lista_id = list(set(lista_id))
lista_id.sort()
handle = open(file[:-5], "rU")
sequen = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
handle.close()
print "Getting sequences..."
for el in lista_id:
out_in1.write(">%s\n%s\n" % (el+"/1", str(sequen[el+"1"].seq)))
out_in2.write(">%s\n%s\n" % (el+"/2", str(sequen[el+"2"].seq)))
del sequen[el+"1"]
del sequen[el+"2"]
for el in sorted(sequen.iterkeys()):
sec = sequen[el]
if el[-1:] == "1":
out_no1.write(">%s\n%s\n" % (el[:-1]+"/"+el[-1:], str(sec.seq)))
elif el[-1:] == "2":
out_no2.write(">%s\n%s\n" % (el[:-1]+"/"+el[-1:], str(sec.seq)))
out_in1.close()
out_in2.close()
out_no1.close()
out_no2.close()