https://github.com/kpalin/EEL
Tip revision: c0cd936bedca30564088c8aeb8f43ebe4e05421f authored by Kimmo Palin on 07 November 2012, 13:23:26 UTC
Fixed a variable format clash which sometimes caused core dumps.
Fixed a variable format clash which sometimes caused core dumps.
Tip revision: c0cd936
randomizeGFF.py
import random,sys
fname=sys.argv[1]
lines=map(lambda x:x.split("\t"),open(fname).readlines())
names=map(lambda x:(x[2],int(x[4])-int(x[3])),lines)
random.shuffle(names)
m={}
goodLines=[]
for (name,dim) in names:
line=lines.pop()
c=0
while m.has_key(line[3]) and m[line[3]].has_key(name):
c=c+1
lines=[line]+lines
prevLine=line
line=lines.pop()
#print "looping with",line
if c>10:
random.shuffle(lines)
if not m.has_key(line[3]):
m[line[3]]={}
m[line[3]][name]=1
line[2]=name
line[4]=str(int(line[3])+dim)
#print line
goodLines.append(line)
#print goodLines
for x in goodLines: print "\t".join(x),
sys.exit()
for line,(name,dim) in zip(lines,names):
line[2]=name
line[4]=str(int(line[3])+dim)
#lines=map(lambda line:"%s\n"%("\t".join(line)),lines)
lines.sort(lambda x,y:cmp((x[2],int(x[3])),(y[2],int(y[3]))))
for i in range(len(lines)):
if i>0 and lines[i][2]==lines[i-1][2] and lines[i][3]==lines[i-1][3] and lines[i][6]==lines[i-1][6]:
#print "Skipping",i,"\t".join(lines[i])
continue
print "\t".join(lines[i]),