Snakefile
import os
import sys
import glob
import itertools
import yaml
from Mikado.utilities import path_join
import Mikado.configuration.configurator
import subprocess
import gzip
from snakemake import logger as snake_logger
swissprot = "uniprot_sprot_plants.fasta"
swissprot_noat = "uniprot_sprot_plants.not_at.fasta"
DBs=[swissprot]
zipDBs=["{0}.gz".format(db) for db in DBs]
configname = "configuration.yaml"
if not os.path.exists(configname):
command = "mikado configure --list list.txt --reference chr5.fas --mode permissive --daijin -t 3 \
--scoring plants.yaml --junctions junctions.bed -bt {swiss} -bc 1 {configname}".format(configname=configname, swiss=swissprot)
snake_logger.info("Creating the configuration file")
snake_logger.info(command)
subprocess.call(command, shell=True)
try:
config = Mikado.configuration.configurator.to_json(configname)
except:
os.remove(configname)
raise
configfile: "configuration.yaml"
rule complete:
input: "compare.stats", "compare_subloci.stats", "compare_input.stats" # , "compare_monoloci.stats"
output: touch("finished.ok")
rule test_json:
input: db=zipDBs, config=configname
output: touch("{}.ok".format(configname)), "chr5.fas"
message: "gunzip -c chr5.fas.gz > chr5.fas"
run:
try:
__= Mikado.configuration.configurator.to_json(configname)
except:
os.remove(configname)
raise
subprocess.call("gunzip -c chr5.fas.gz > chr5.fas", shell=True)
# shell("touch {output}")
rule uncompress_blast:
input: "{0}.gz".format(swissprot)
output: swissprot
message: "gzip -dc {input} > {output}"
shell:
"gzip -dc {input} > {output}"
rule daijin:
input: "class.gtf", "cufflinks.gtf", "stringtie.gtf", "trinity.gff3", "mikado.bed", rules.test_json.output[0], rules.test_json.output[1], rules.uncompress_blast.output
output:
loci=os.path.join("Daijin", "5-mikado", "pick", "permissive", "mikado-permissive.loci.gff3"),
sub=os.path.join("Daijin", "5-mikado", "pick", "permissive", "mikado.subloci.gff3"),
# mono=os.path.join("Daijin", "5-mikado", "pick", "permissive", "mikado.monoloci.gff3"),
prep=os.path.join("Daijin", "5-mikado", "mikado_prepared.gtf")
message: "daijin mikado --jobs 1 --cores 2 --threads 2 -nd --nolock configuration.yaml"
shell: "daijin mikado --jobs 1 --cores 2 --threads 2 -nd --nolock configuration.yaml"
rule index_reference:
input: reference="reference.gff3"
output: "reference.gff3.midx"
log: "index.log"
message: """mikado compare -r {input[reference]} --index --log {log}"""
shell: """mikado compare -r {input[reference]} --index --log {log}"""
rule compare:
input: reference="reference.gff3", prediction=rules.daijin.output.loci, midx=rules.index_reference.output
output: "compare.stats", "compare.tmap", "compare.refmap"
log: "compare.log"
message: """mikado compare -r {input[reference]} -p {input[prediction]} -o compare -l {log}"""
shell: """mikado compare -r {input[reference]} -p {input[prediction]} -o compare -l {log}"""
rule compare_input:
input: reference="reference.gff3", prediction=rules.daijin.output.prep, midx=rules.index_reference.output
output: "compare_input.stats", "compare_input.tmap", "compare_input.refmap"
log: "compare_input.log"
message: """mikado compare -r {input[reference]} -p {input[prediction]} -o compare_input -l {log}"""
shell: """mikado compare -r {input[reference]} -p {input[prediction]} -o compare_input -l {log}"""
rule compare_subloci:
input: reference="reference.gff3", prediction=rules.daijin.output.sub, midx=rules.index_reference.output
output: "compare_subloci.stats", "compare_subloci.tmap", "compare_subloci.refmap"
log: "compare_subloci.log"
message: """mikado compare -r {input[reference]} -p {input[prediction]} -o compare_subloci -l {log}"""
shell: """mikado compare -r {input[reference]} -p {input[prediction]} -o compare_subloci -l {log}"""
rule clean:
run:
for filename in itertools.chain(glob.glob("*.ok"), glob.glob("uniprot*.fasta.p*"),
glob.glob("*midx"), glob.glob("*fai"),
"Daijin",
glob.glob("uniprot*fasta"), glob.glob("*loci*"),
["mikado_prepared.gtf", "mikado_prepared.fasta", "chr5.fas", "mikado.prodigal.gff3"],
glob.glob("compare*"), glob.glob(config["db_settings"]["db"]),
glob.glob("*.log"), glob.glob("*xml"), ["chr5.fas"],
["configuration.yaml"]):
if os.path.exists(filename):
os.remove(filename)
rule clean_crumbs:
run:
for filename in itertools.chain(["finished.ok"], glob.glob("mikado*loci*"),
glob.glob("compare*")):
if os.path.exists(filename):
os.remove(filename)