https://github.com/harrisonlab/popgen
Raw File
Tip revision: 4f01d188df0fb1c73edd158bd81c7f404d82e273 authored by adarmitage on 10 October 2019, 16:56:08 UTC
Increased numbers of bootstrap reps.
Tip revision: 4f01d18
write_seq_length.py
#!/usr/bin/python

#Process FASTA file and print two columns (comma delimited): sequence id, sequence length

import os, sys, re
from sys import argv
from Bio import SeqIO

script, fasta = argv

bare = r"(\w+)(.fa$|.fas$|.fasta$)"
one = r"\1_lengths.txt"
o = re.sub(bare, one, fasta)
out = open(o, 'w')

for seq_record in SeqIO.parse(fasta, "fasta"):
    gene_id = str(seq_record.id)
    gene_len = len(seq_record.seq)
    out.write(gene_id + "\t" + str(gene_len) + "\n")

out.close
back to top