https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
sat_cross_libraries.py
#!/usr/bin/python

from subprocess import call
import sys

print "Usage: sat_cross_libraries.py FastaLibrary RepeatMaskerOutFile SatelliteNamesList"

try:
    fasta = sys.argv[1]
except:
    fasta = raw_input("Introduce Fasta file (reads): ")

try:
    out = sys.argv[2]
except:
    out = raw_input("Introduce RepeatMasker's OUT file: ")

try:
    sats = sys.argv[3]
except:
    sats = raw_input("Introduce list of satellite names: " )

out_prefix = out.split(".")
out_prefix = out_prefix[0]

list_sats = open(sats).readlines()

for sat in list_sats:
    sat = sat[:-1]
    print "\n" + sat
    call("""grep "%s" %s > %s.%s.out""" % (sat,out,out_prefix,sat) , shell=True)

    call("""awk {'print $5'} %s.%s.out | sed 's/\//\134t/g' | awk {'print $1'} | sort -u | awk {'print $1"/1\134n"$1"/2"'} > %s.%s.names""" % (out_prefix,sat,out_prefix,sat), shell=True) 

    call("seqtk subseq %s %s.%s.names > %s.%s.fasta" % (fasta,out_prefix,sat,out_prefix,sat), shell=True)
    call("seqkit sort --by-name %s.%s.fasta > %s.%s.sort.fasta" % (out_prefix,sat,out_prefix,sat), shell=True)
back to top