https://github.com/galaxyproject/galaxy
Raw File
Tip revision: 48510a2cc12dd76e661f1bac3c14268926a0b3c1 authored by John Chilton on 12 December 2019, 16:37:38 UTC
Merge pull request #9089 from nsoranzo/release_16.04_extra-index-url
Tip revision: 48510a2
maf_filter.py
# Dan Blankenberg
# Filters a MAF file according to the provided code file, which is generated in maf_filter.xml <configfiles>
# Also allows filtering by number of columns in a block, and limiting output species
import os
import shutil
import sys

import bx.align.maf

from galaxy.tools.util import maf_utilities


def main():
    # Read command line arguments
    try:
        script_file = sys.argv.pop( 1 )
        maf_file = sys.argv.pop( 1 )
        out_file = sys.argv.pop( 1 )
        additional_files_path = sys.argv.pop( 1 )
        species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) )
        min_size = int( sys.argv.pop( 1 ) )
        max_size = int( sys.argv.pop( 1 ) )
        if max_size < 1:
            max_size = sys.maxint
        min_species_per_block = int( sys.argv.pop( 1 ) )
        exclude_incomplete_blocks = int( sys.argv.pop( 1 ) )
        if species:
            num_species = len( species )
        else:
            num_species = len( sys.argv.pop( 1 ).split( ',') )
    except:
        print >>sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep"
        sys.exit()

    # Open input and output MAF files
    try:
        maf_reader = bx.align.maf.Reader( open( maf_file, 'r' ) )
        maf_writer = bx.align.maf.Writer( open( out_file, 'w' ) )
    except:
        print >>sys.stderr, "Your MAF file appears to be malformed."
        sys.exit()

    # Save script file for debuging/verification info later
    os.mkdir( additional_files_path )
    shutil.copy( script_file, os.path.join( additional_files_path, 'debug.txt' ) )

    # Loop through blocks, running filter on each
    # 'maf_block' and 'ret_val' are used/shared in the provided code file
    # 'ret_val' should be set to True if the block is to be kept
    i = 0
    blocks_kept = 0
    for i, maf_block in enumerate( maf_reader ):
        if min_size <= maf_block.text_size <= max_size:
            local = {'maf_block': maf_block, 'ret_val': False}
            execfile( script_file, {}, local )
            if local['ret_val']:
                # Species limiting must be done after filters as filters could be run on non-requested output species
                if species:
                    maf_block = maf_block.limit_to_species( species )
                if len( maf_block.components ) >= min_species_per_block and ( not exclude_incomplete_blocks or len( maf_block.components ) >= num_species ):
                    maf_writer.write( maf_block )
                    blocks_kept += 1
    maf_writer.close()
    maf_reader.close()
    if i == 0:
        print "Your file contains no valid maf_blocks."
    else:
        print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 )

if __name__ == "__main__":
    main()
back to top