import os import sys import tempfile import subprocess import shutil import multiprocessing import pytest import collections CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) SCRIPTS_DIR = os.path.join(CURRENT_DIR, '..', 'scripts') EXAMPLES_DIR = os.path.join(CURRENT_DIR, '..', 'examples') DB_DIR = os.path.join(CURRENT_DIR, '..', 'db') sys.path.append(SCRIPTS_DIR) from binary_utils import Binary # skip all module tests if needed pytestmark = pytest.mark.skipif( not os.path.isdir(DB_DIR), reason='DB_DIR is missing:%s' % DB_DIR ) @pytest.fixture(scope='module') def matam_results(): out = tempfile.mkdtemp(dir='/tmp/', prefix='matam_functionnal_test_') p = { 'bin': os.path.join(SCRIPTS_DIR, 'matam_assembly.py'), 'reads': os.path.join( EXAMPLES_DIR, '16sp_simulated_dataset/16sp.art_HS25_pe_100bp_50x.fq' ), 'db': os.path.join(DB_DIR, 'SILVA_128_SSURef_NR95'), 'out': out, 'cpu': multiprocessing.cpu_count() } cmd = '{bin} -i {reads} -d {db} -o {out} --cpu {cpu} --max_memory 3000 \ --debug --coverage_threshold 2000 \ --perform_taxonomic_assignment'.format(**p) completed_process = subprocess.run(cmd, shell=True) return_code = completed_process.returncode fasta = os.path.join(out, 'final_assembly.fa') krona_html = os.path.join(out, 'krona.html') krona_tab = os.path.join(out, 'krona.tab') rdp_tab = os.path.join(out, 'rdp.tab') MatamResults = collections.namedtuple( "MatamResults", "return_code fasta krona_html krona_tab rdp_tab" ) results = MatamResults( return_code=return_code, fasta=fasta, krona_html=krona_html, krona_tab=krona_tab, rdp_tab=rdp_tab ) yield results if os.path.isdir(out): shutil.rmtree(out) def exists_and_not_empty(fpath): return os.path.isfile(fpath) and os.stat(fpath).st_size != 0 def test_return_code(matam_results): assert matam_results.return_code == 0 def test_final_fasta_file(matam_results): assert exists_and_not_empty(matam_results.fasta) def test_krona_html(matam_results): assert exists_and_not_empty(matam_results.krona_html) def test_krona_tab(matam_results): assert exists_and_not_empty(matam_results.krona_tab) def test_rdp_tab(matam_results): assert exists_and_not_empty(matam_results.rdp_tab) def extract_metaquast_val(tsv): with open(tsv, 'r') as tsv_handler: lines = tsv_handler.readlines() return float(lines[1].split('\t')[1].strip()) @pytest.mark.skipif( not Binary.which('metaquast.py'), reason="requires metaquast.py to be in PATH" ) def test_metaquast(matam_results): data_directory = tempfile.mkdtemp(dir='/tmp/', prefix='metaquast_') fasta = matam_results.fasta true_ref = os.path.join(EXAMPLES_DIR, '16sp_simulated_dataset/16sp.fasta') cmd = "metaquast.py -a all --ambiguity-score 1 --min-identity 97 -x 500 \ --unaligned-part-size 200 -R %s %s" % (true_ref, fasta) subprocess.run(cmd, shell=True, cwd=data_directory) genome_fraction_file = os.path.join( data_directory, 'quast_results/latest/summary/TSV/Genome_fraction.tsv' ) mismatches_file = os.path.join( data_directory, 'quast_results/latest/summary/TSV/num_mismatches_per_100_kbp.tsv' ) indels_file = os.path.join( data_directory, 'quast_results/latest/summary/TSV/num_Ns_per_100_kbp.tsv' ) ns_file = os.path.join( data_directory, 'quast_results/latest/summary/TSV/num_Ns_per_100_kbp.tsv' ) genome_fraction = extract_metaquast_val(genome_fraction_file) mismatches = extract_metaquast_val(mismatches_file) indels = extract_metaquast_val(indels_file) ns = extract_metaquast_val(ns_file) error_rate = (mismatches + indels + ns) / 1000 # 100000bp * 100 assert genome_fraction > 86.4 assert error_rate < 0.15 if os.path.isdir(data_directory): shutil.rmtree(data_directory)