https://github.com/carden24/Bioinformatics_scripts
Raw File
Tip revision: 020af3000eeca9c2e270be798fe7264ece87f22b authored by Erick Cardenas on 30 September 2018, 20:38:24 UTC
Delete create_multiple_f.sh
Tip revision: 020af30
get_gi_number_from_cazy_fasta.py
#usage python get_gi_number_from_cazy_blast.py <blast.output> <out.file>
#        	0				1		2	


import sys

import sys
from Bio import SeqIO


filein = open(sys.argv[1], 'r')
fileout = open(sys.argv[2], 'w')

for record in SeqIO.parse(filein,"fasta"):
    gi_column = record.id
    if gi_column.startswith('gi|'):
        print '1'
        gi_location = gi_column.split('|')
        gi_number = gi_location[1]
        fileout.write('%s\t%s\n' %(gi_column, gi_number))
    else:
        print '2'
        gi_location = gi_column.split('_')
        gi_number = gi_location[1]
        fileout.write('%s\t%s\n' %(gi_column, gi_number))

filein.close()
fileout.close()

 

back to top