https://github.com/fjruizruano/ngs-protocols
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
blat_recursive_hard.py
#!/usr/bin/python
import sys
from subprocess import call, Popen
from os import listdir
from os.path import isfile, join
print "\nUsage: blat_recursive.py NumberOfThreads ListOfSequences Reference\n"
try:
threads = sys.argv[1]
except:
threads = raw_input("Introduce number of threads: ")
try:
li = sys.argv[2]
except:
li = raw_input("Introduce list name: ")
try:
db = sys.argv[3]
except:
db = raw_input("Introduce DB in FASTA format: ")
thr = int(threads)
lis = open(li).readlines()
for file in lis:
file = file[:-1]
call("FastA.split.pl %s tmp_queries %s" % (file, thr), shell=True)
# call("faSplit sequence %s %s tmp_queries_" % (file, thr), shell=True)
onlyfiles = [f for f in listdir(".") if isfile(join(".",f))]
splits = []
for f in onlyfiles:
if f.startswith("tmp_queries") and f.endswith(".fa"):
splits.append(f)
splits.sort()
commands = []
for n in range(0,len(splits)):
com = "blat -noHead -minIdentity=95 %s %s %s" % (db,splits[n],splits[n]+".blat")
commands.append(com)
processes = [Popen(cmd, shell=True) for cmd in commands]
for p in processes:
p.wait()
w = open(file+".blat", "w")
blat = open(splits[0]+".blat").readlines()
w.write("".join(blat))
for n in range(1,len(splits)):
blat = open(splits[n]+".blat").readlines()
w.write("".join(blat[5:]))
w.close()
call("rm tmp_queries*", shell=True)