https://github.com/kpalin/EEL
Tip revision: c0cd936bedca30564088c8aeb8f43ebe4e05421f authored by Kimmo Palin on 07 November 2012, 13:23:26 UTC
Fixed a variable format clash which sometimes caused core dumps.
Fixed a variable format clash which sometimes caused core dumps.
Tip revision: c0cd936
qGramCounter.py
from Bio.SeqIO import FASTA
import sys
file=FASTA.FastaReader(open(sys.argv[1]))
for rec in file:
q=4
grams={}
print rec.id,len(rec.seq)
for i in range(1,len(rec.seq)):
if (i%1000)==0:
print i,
sys.stdout.flush()
for j in range(1,min(i,q)+1):
gram=rec.seq[i-j:i]
if grams.has_key(gram):
grams[gram]=grams[gram]+1
else:
grams[gram]=1
print rec.id