https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
sat_subfam2fam_multi.py
#!/usr/bin/python

import sys
import commands
from subprocess import call, Popen
from os import listdir
from os.path import isfile, join

print "sat_subfam2fam_multi.py AlignFile PatternFile Threads"

try:
    dat = sys.argv[1]
except:
    dat = raw_input("Introduce align file: ")

try:
    pat = sys.argv[2]
except:
    pat = raw_input("Introduce pattern file: ")

try:
    th = sys.argv[3]
except:
    th = raw_input("Introduce number of threads: ")

th = int(th)

c = commands.getstatusoutput("wc -l %s" % dat)
c = c[1]
c = c.split()
c = int(c[0])

lines = (c/th)+1

call("split -l %s %s %s " % (str(lines), dat, dat), shell=True)

onlyfiles = [f for f in listdir(".") if isfile(join(".",f))]
splits = []
for f in onlyfiles:
    if f.startswith(dat+"a"):
        splits.append(f)
splits.sort()

commands = []
for n in range(0,len(splits)):
    com = "sat_subfam2fam_multi_support.py %s %s" % (splits[n],pat)
    commands.append(com)

processes = [Popen(cmd, shell=True) for cmd in commands]
for p in processes:
    p.wait()

splits_fam = []

for s in splits:
    splits_fam.append(s+".fam")

call("cat %s > %s.fam" % (" ".join(splits_fam), dat), shell=True)

print splits

call("rm %s %s" % (" ".join(splits), " ".join(splits_fam)), shell=True)

call("calcDivergenceFromAlign.pl -s %s.fam.divsum %s.fam" % (dat,dat), shell=True)
back to top