https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
blat_recursive_hard.py
#!/usr/bin/python

import sys
from subprocess import call, Popen
from os import listdir
from os.path import isfile, join

print "\nUsage: blat_recursive.py NumberOfThreads ListOfSequences Reference\n"

try:
    threads = sys.argv[1]
except:
    threads = raw_input("Introduce number of threads: ")

try:
    li = sys.argv[2]
except:
    li = raw_input("Introduce list name: ")

try:
    db = sys.argv[3]
except:
    db = raw_input("Introduce DB in FASTA format: ")

thr = int(threads)
lis = open(li).readlines()

for file in lis:
    file = file[:-1]
    call("FastA.split.pl %s tmp_queries %s" % (file, thr), shell=True)

#    call("faSplit sequence %s %s tmp_queries_" % (file, thr), shell=True)
    onlyfiles = [f for f in listdir(".") if isfile(join(".",f))]
    splits = []
    for f in onlyfiles:
        if f.startswith("tmp_queries") and f.endswith(".fa"):
            splits.append(f)
    splits.sort()
    commands = []
    for n in range(0,len(splits)):
        com = "blat -noHead -minIdentity=95 %s %s %s" % (db,splits[n],splits[n]+".blat")
        commands.append(com)
    processes = [Popen(cmd, shell=True) for cmd in commands]
    for p in processes:
        p.wait()
    w = open(file+".blat", "w")
    blat = open(splits[0]+".blat").readlines()
    w.write("".join(blat))
    for n in range(1,len(splits)):
        blat = open(splits[n]+".blat").readlines()
        w.write("".join(blat[5:]))
    w.close()

call("rm tmp_queries*", shell=True)
back to top