Raw File
index_default_ssu_rrna_db.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sys
import argparse
import subprocess
import time
import logging

# Create logger
logger = logging.getLogger(__name__)

# Get program filename, filepath and directory
program_filepath = os.path.realpath(sys.argv[0])
program_dirpath, program_filename = os.path.split(program_filepath)
program_name, program_extension = os.path.splitext(program_filename)

# Set dependencies directories path
matam_root_dirpath = program_dirpath
matam_db_dirpath = os.path.join(matam_root_dirpath, 'db')
matam_scripts_dirpath = os.path.join(matam_root_dirpath, 'scripts')
index_ref_db_bin = os.path.join(matam_scripts_dirpath, 'index_ref_db.py')

# Set default ref db name
default_ref_db_basename = 'SILVA_128_SSURef_NR95'
default_ref_db_archive_filename = default_ref_db_basename + '.tar.bz2'
default_ref_db_archive_url = 'http://bioinfo.lifl.fr/matam/' + default_ref_db_archive_filename


def parse_arguments():
    """
    Parse the command line, and check if arguments are correct
    """
    # Initiate argument parser
    parser = argparse.ArgumentParser(description='Index default SSU rRNA DB')

    # -d / --ref_dir
    parser.add_argument('-d', '--ref_dir',
                        action = 'store',
                        metavar = 'DBDIRPATH',
                        type = str,
                        help = 'Output dir. '
                               'Default is $MATAM_DIR/db/')
    # -m / --max_memory
    parser.add_argument('-m', '--max_memory',
                        action = 'store',
                        metavar = 'MAXMEM',
                        type = int,
                        default = 10000,
                        help = 'Maximum memory to use (in MBi). '
                               'Default is %(default)s MBi')

    args = parser.parse_args()

    # Set default ref db dir
    if not args.ref_dir:
        args.ref_dir = matam_db_dirpath

    # Get absolute path for all arguments
    args.ref_dir = os.path.abspath(args.ref_dir)

    #
    return args


if __name__ == '__main__':

    # Set logging
    # create console handler
    ch = logging.StreamHandler()
    # set logging level
    logger.setLevel(logging.DEBUG)
    # create formatter for debug level
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    # add the formatter to the console handler
    ch.setFormatter(formatter)
    # add the handler to logger
    logger.addHandler(ch)

    # Set global t0
    global_t0_wall = time.time()

    # Init global error code
    global_error_code = 0

    # Arguments parsing
    args = parse_arguments()

    #
    sys.stderr.write('\n')

    ############################
    # Set output directory path

    default_ref_db_basepath = os.path.join(args.ref_dir, default_ref_db_basename)
    default_ref_db_archive_filepath = os.path.join(args.ref_dir, default_ref_db_archive_filename)

    #########################
    # Get compressed archive

    logger.info('-- Get compressed archive --')

    os.chdir(matam_root_dirpath)
    logger.debug('PWD: {0}'.format(matam_root_dirpath))

    cmd_line = 'mkdir ' + args.ref_dir
    cmd_line += '; wget ' + default_ref_db_archive_url
    cmd_line += ' -O ' + default_ref_db_archive_filepath

    logger.debug('CMD: {0}'.format(cmd_line))
    error_code = subprocess.call(cmd_line, shell=True)

    if error_code > 0:
        logger.warning('A problem might have happened while getting the compressed archive. Check log above')

    global_error_code += error_code

    sys.stderr.write('\n')

    ############################
    # Extracting default ref db

    logger.info('-- Extracting default ref db --')

    os.chdir(args.ref_dir)
    logger.debug('PWD: {0}'.format(args.ref_dir))

    cmd_line = 'tar jxvf ' + default_ref_db_archive_filename

    logger.debug('CMD: {0}'.format(cmd_line))
    error_code = subprocess.call(cmd_line, shell=True)

    if error_code > 0:
        logger.warning('A problem might have happened while extracting default ref db. Check log above')

    global_error_code += error_code

    sys.stderr.write('\n')

    ##########################
    # Indexing default ref db

    logger.info('-- Indexing default ref db --')

    logger.debug('PWD: {0}'.format(args.ref_dir))

    cmd_line = index_ref_db_bin + ' -v -i ' + default_ref_db_basepath
    cmd_line += ' --max_memory ' + str(args.max_memory)

    logger.debug('CMD: {0}'.format(cmd_line))
    error_code = subprocess.call(cmd_line, shell=True)

    if error_code > 0:
        logger.warning('A problem might have happened while indexing default ref db. Check log above')

    global_error_code += error_code

    sys.stderr.write('\n')

    #######
    # Exit

    logger.info('-- Completed default SSU rRNA DB indexing --')

    if global_error_code > 0:
        logger.warning('Problems might have happened during indexing. Please check log above')
        sys.exit(global_error_code)
    else:
        logger.debug('Indexing completed in {0:.2f} seconds'.format(time.time() - global_t0_wall))
        logger.info('Indexing went well. '
                    'Default SSU rRNA DB and its indexes can be found in'
                    ': {0}*'.format(default_ref_db_basepath))

    sys.stderr.write('\n')































back to top