https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
rexp_prepare_normaltag.py
#!/usr/bin/python

import sys, os
from subprocess import call

print "\nUsage: rexp_prepare.py NumberOfPairedReads File_1.fastq File_2.fastq MinQual MinLen  [PREFIX]\n"

##read parameters
#number of reads
try:
    sel_reads = sys.argv[1]
    sel_reads = int(sel_reads)
except:
    sel_reads = raw_input("Number of reads you want select: ")
    sel_reads = int(sel_reads)

#FASTQ files
try:
    r1 = sys.argv[2]
    r2 = sys.argv[3]
except:
    r1 = raw_input("FASTQ file 1: ")
    r2 = raw_input("FASTQ file 2: ")

#Trimming
try:
    mq = sys.argv[4]
    ml = sys.argv[5]
except:
    ml = 101
    mq = 30

#prefix
try:
    prefix = sys.argv[6]
except:
    prefix = ""

#list of files in the current directory
files = [f for f in os.listdir(".") if os.path.isfile(f)]

#files names
rr1 = r1.find(".")
suffix = r1[rr1:]
rr1 = r1[:rr1]
rrr1 = r1.find("_")
rrr1 = r1[:rrr1]
rr2 = r2.find(".")
rr2 = r2[:rr2]

print rrr1

#with open(r1) as myfile:
#    head = [next(myfile) for x in xrange(2)]
#len_reads = str(len(head[1])-1)

#Trimming with Trimmomatic
trimmomatic = "trimmomatic PE -phred33 %s %s %s %s %s %s ILLUMINACLIP:/usr/local/lib/Trimmomatic-0.32/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:%s MINLEN:%s" % (r1, r2, rr1+"_paired.fastq", rr1+"_unpaired.fastq", rr2+"_paired.fastq", rr2+"_unpaired.fastq", mq, ml)

#random selection of reads

fastq_pe_random_1 = "seqtk sample -s 100 %s %s > %s" % (rr1+"_paired.fastq",sel_reads,rr1+"_paired.fastq.subset")
fastq_pe_random_2 = "seqtk sample -s 100 %s %s > %s" % (rr2+"_paired.fastq",sel_reads,rr2+"_paired.fastq.subset")

#fastq_pe_random = "fastq-pe-random.py %s %s %s" % (rr1+"_paired"+suffix, rr2+"_paired"+suffix, sel_reads)

#shuffle reads
shuffle = "shuffleSequences_fastq.pl %s %s %s" % (rr1+"_paired.fastq.subset", rr2+"_paired.fastq.subset", rrr1+"_all_"+prefix+str(sel_reads)+".fastq")

#convert fastq to fasta
fastq_to_fasta =  "seqtk seq -a %s > %s" % (rrr1+"_all_"+prefix+str(sel_reads)+".fastq",rrr1+"_all_"+prefix+str(sel_reads)+".fasta")

#Try to run Trimmomatic

if rr1+"_paired.fastq" not in files or rr2+"_paired.fastq" not in files:
    try:
        print "Running Trimmomatic\n"
        print trimmomatic
        call(trimmomatic, shell = True)
    except:
        print "Trimmomatic could not run. Try again.\n"
else:
    print "Trimmomatic was already run. Skipping.\n"

#Try to run fastq pe random
try:
    print "Running Fastq-pe-random"
    print fastq_pe_random_1
    call(fastq_pe_random_1, shell = True)
    print fastq_pe_random_2
    call(fastq_pe_random_2, shell = True)
except:
    print "Fastq-pe-random could not run. Try again.\n"

#Try to run shuffling
try:
    print "\nRunning Shuffling"
    print shuffle+"\n"
    call(shuffle, shell = True)
except:
    print "Shuffling could not run. Try again.\n"

#open output
fqtemp = open("%s_all_%s%s_temp.fastq" % (rrr1,prefix,str(sel_reads)) ,"w")

#read modified fasta file
print "Editing "+ rrr1+"_all_"+prefix+str(sel_reads)+".fastq\n"
fastq = open(rrr1+"_all_"+prefix+str(sel_reads)+".fastq").readlines()

#add prefix
for x in range(0,len(fastq)):
    if x%8 == 0:
        line = fastq[x]
        line = line.split(" ")
        line = ">%s%s" % (prefix,line[0][1:])
    elif x%8 == 4:
        line = fastq[x]
        line = line.split(" ")
        line = ">%s%s" % (prefix,line[0][1:])
    else:
        line = fastq[x]
    fqtemp.write(line)

fqtemp.close()

call("mv %s %s" % (rrr1+"_all_"+prefix+str(sel_reads)+"_temp.fastq",rrr1+"_all_"+prefix+str(sel_reads)+".fastq"), shell=True)

#Try to convert to fasta format
try:
    print "Running Fastq to fasta"
    print fastq_to_fasta
    call(fastq_to_fasta, shell = True)
except:
    print "Fastq to fasta could not run. Try again.\n"

print "\nWe\'re done!\n"
back to top