https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
bam_var_join.py
#!/usr/bin/python

import sys
from subprocess import call
from os import listdir
from os.path import isfile, join

print "bam_coverage_join.py FastaReference ListOfBams"

try:
    ref = sys.argv[1]
except:
    ref = raw_input("Introduce FASTA reference: ")

try:
    li = sys.argv[2]
except:
    li = raw_input("Introduce list of BAM files: ")

bams = open(li).readlines()

to_join = []

onlyfiles = [f for f in listdir(".") if isfile(join(".",f))]

for bam in bams:
    bam = bam[:-1]
    if bam+".var2" not in onlyfiles:
        if bam+".var" not in onlyfiles:
            print "Not found file %s.var, generating..." % bam
            call("pysamstats -f %s --type variation %s > %s.var" % (ref,bam,bam), shell=True)
        call("""tail -n +2 %s.var | awk {'print $1"&="$2"\t"$10"&="$12"&="$14"&="$16"&="$18"&="$20'}  > %s.var2""" % (bam,bam), shell=True)
        print "Not found file %s.var2, generating..." % bam
    else:
        print "Found file %s.var2, skipping." % bam
    to_join.append(bam+".var2")

print "Joining lists..."
call("join_multiple_lists_var.py %s " % " ".join(to_join), shell=True)

call("""sed 's/&=/\t/g' toico.txt > toico2.txt""", shell=True)

call("(head -n 2 toico2.txt && tail -n +3 toico2.txt | sort -k 1,1 -k 2n) > toico3.txt ", shell=True)

print "We're done!!!\n"
back to top