https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
dnapipete_createdb.py
#!/usr/bin/python

import sys
from Bio import SeqIO

print "Usage: dnapipete_createdb.py Trinity.fasta one_RM_hit_per_Trinity_contigs [unknown]"

try:
    trinity = sys.argv[1]
except:
    trinity = raw_input("Introduce path the Trinity.fasta file: ")

try:
    annot = sys.argv[2]
except:
    annot = raw_input("Introduce path to the one_RM_hit_per_Trinity_contigs file: ")

try:
    unknownq = sys.argv[3]
except:
    unknownq = ""

secus = SeqIO.parse(open(trinity),"fasta")
annot_read = open(annot).readlines()
annotations = {}

for annotation in annot_read:
    annotation = annotation.split()
    annotations[annotation[0]] = annotation[4]

w = open("Trinity.annot.fasta","w")

for sec in secus:
    name = ""
    if sec.id in annotations:
        name = "%s#%s" % (sec.id, annotations[sec.id])
        w.write(">%s\n%s\n" % (name, str(sec.seq)))
    elif unknownq != "":
        name = "%s#Unknown" % (sec.id)
        w.write(">%s\n%s\n" % (name, str(sec.seq)))

w.close()
back to top