https://github.com/RabadanLab/TOBI
Raw File
Tip revision: 47e9f4e80ebd11f03f5f202f55e9e5c13e510e9f authored by gitliver on 29 May 2019, 16:26:36 UTC
add MIT License
Tip revision: 47e9f4e
TOBIml.py
#!/usr/bin/env python

import argparse
import os
import sys
import subprocess

def get_arg():
    prog_description = """TOBIv1.2: 
        Tumor Only Boosting Identification of Driver Mutations.
        Machine learning step."""
    parser = argparse.ArgumentParser(description = prog_description)
    
    parser.add_argument(
        "step",
        choices = ['preprocess','machinelearning'],
        help = "preprocess: preprocessing step; machinelearning: machine learning step"
        )
    parser.add_argument(
        '--input',
        help = "[REQUIRED] input file"
        )
    parser.add_argument(
        '--output',
        help = "[REQUIRED] output file"
        )
    parser.add_argument(
        '--somatic',
        help = "[REQUIRED] formatted file containing somatic variants"
        )
    parser.add_argument(
        '--log',
        help = 'Optional argument to specify a log to pipe stdout and stderr to'
        )
    parser.add_argument(
        '--check_missed',
        help = '[PP ARG] checking which mutations in important genes are missed by filtering'
        )
    parser.add_argument(
        '--suffix',
        help = '[ML ARG] a label specific to this particular run (e.g. <date>_<disease>)'
        )
    parser.add_argument(
        "--vcftype", 
        choices = ['default','TCGA'], 
        default = "default",
        help="Specifies vcf type specically for TCGA filtering"
        )
    parser.add_argument(
        '--train_size',
        help = '[ML ARG] number of patients you want in the training set.',
        )
    parser.add_argument(
        '--verbose',
        action = "store_true",
        default = False,
        help = "verbose flag"
        )
    
    if len(sys.argv)==1:
        parser.print_help()
        sys.exit(1)
    
    args = parser.parse_args()
    return args
    
def check_main_args(args):
    if args.input == None:
        sys.exit("[ERROR]: Missing required '--inputdir' argument.")
    if args.output == None:
        sys.exit("[ERROR]: Missing required '--output' argument.")
    if args.somatic == None:
        sys.exit("[ERROR]: Missing required '--somatic' argument.")
        
def check_ml_args(args):
    if args.suffix == None:
        sys.exit("[ERROR]: Missing required '--suffix' argument.")
    if args.train_size == None:
        sys.exit("[ERROR]: Missing required '--train_size' argument.")

def pp_cmdgen(args,source_dir):
    if args.log == None:
        pipe = ""
    else:
        pipe = ">> " + args.log + " 2>&1"
        
    cmd = source_dir +"/machine_learning/pre_processing.R " \
        + args.input + " " \
        + args.output + " " \
        + args.somatic + " " \
        + source_dir + " " \
        + args.vcftype + " " \
        + pipe
    if args.verbose:
        print(cmd)
    return cmd
    
def ml_cmdgen(args,source_dir):
    if args.log == None:
        pipe = ""
    else:
        pipe = ">> " + args.log + " 2>&1"
    cmd = source_dir + "/machine_learning/machine_learning.R " \
        + args.input + " " \
        + args.output + " " \
        + args.somatic + " " \
        + args.suffix + " " \
        + args.train_size + " " \
        + source_dir + " " \
        + args.vcftype + " " \
        + pipe
    if args.verbose:
        print(cmd)
    return cmd

def runShellCmd(cmd):
    proc = subprocess.Popen(
        cmd, 
        shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        ) 
    if proc.wait() != 0:
        sys.exit("[ERROR] process '" + cmd + "' terminated with non-zero exit code")
        

def main():
    args = get_arg()
    source_dir = os.path.dirname(os.path.realpath(__file__))
    check_main_args(args)
    
    #preprocessing
    if args.step == "preprocess":
        runShellCmd(pp_cmdgen(args,source_dir))
    
    #machine learning
    if args.step == "machinelearning":
        check_ml_args(args)
        runShellCmd(ml_cmdgen(args,source_dir))

if __name__ == "__main__":
    main()
    
back to top