Revision 1457b0e7bf88393f77faeb4d48d944830e804c6a authored by Duncan Brown on 18 September 2016, 00:40:38 UTC, committed by samantha-usman on 18 September 2016, 00:40:38 UTC
* comment out rom check as the checksums are incorrect

* fixed lalapps build

* disable the parts of lalapps that can cause build failures

* fixed rom checksums

* removed hdf5 lib install needed for centos 5

* move rom checks to after r7 is downloaded

* put lal extra data in src directory

* force a link of lalapps against zlib

* when building dev install glue and pylal from github
1 parent 5778e26
Raw File
pycbc_calculate_far
#!/usr/bin/env python

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

from optparse import OptionParser
import sqlite3
import sys
import os

from pylal import ligolw_sqlutils as sqlutils

from glue import git_version
from glue.ligolw import dbtables
from glue.ligolw.utils import process
from glue.ligolw import lsctables

__prog__ = "ligolw_cbc_cfar"
__author__ = "Collin Capano <cdcapano@physics.syr.edu>"

description = \
"Calculates false alarm rates (FAR) in units of yr^(-1) and stores to " + \
"specified output_column. Writes results out to a new database. "

# =============================================================================
#
#                                   Function Definitions
#
# =============================================================================

def get_ifo_multiplicity( ifos ):
    """
    Returns the number of coincident ifos in an 'ifos' string.

    @ifos: a string of comma-separated ifos
    """
    return len( lsctables.instrument_set_from_ifos( ifos ) )

# =============================================================================
#
#                                   Set Options
#
# =============================================================================


def parse_command_line():
    """
    Parse the command line, return options and check for consistency among the
    options.
    """
    parser = OptionParser(
        version = git_version.verbose_msg,
        usage   = "%prog [options]",
        description = description
        )

    # following are related to file input and output naming
    parser.add_option( "-i", "--input", action = "store", type = "string", default = None,
        help = 
            "Input database to read. Can only input one at a time."
            )
    parser.add_option( "-o", "--output", action = "store", type = "string", default = None,
        help = 
            "Name of output database to save to."
            )
    parser.add_option( "-t", "--tmp-space", action = "store", type = "string", default = None,
        metavar = "PATH",
        help = 
            "Requried. Location of local disk on which to do work. " +
            "This is used to enhance performance in a networked " +
            "environment, and to protect against accidently " +      
            "overwriting the input database."
            )
    parser.add_option( "-v", "--verbose", action = "store_true", default = False,
        help = 
            "Print progress information"
           )
    parser.add_option( "-D", "--debug", action = "store_true", default = False,
        help =
            "Print SQLite queries used and the approximate time taken to run each one." )
    # following are generic inspiral_sql options
    parser.add_option( "-P", "--param-name", action = "store", default = None,
        metavar = "PARAMETER", 
        help = 
            "Can be any parameter in the param table (see --param-table). " + 
            "Specifying this and param-ranges defines the categories " +
            "which the uncombined_fars are calculated in and are " +
            "combined over in the combined far calculation. " +
            "If not specified, triggers will not be binned when calculating " +
            "the uncombined FAR and so the uncombined FAR will be the same as " +
            "the combined FAR."
             )
    parser.add_option( "-R", "--param-ranges", action = "store", default = None,
        metavar = " [ LOW1, HIGH1 ); ( LOW2, HIGH2]; etc.",
        help = 
            "Requires --param-name. Specify the parameter ranges " +
            "to bin the triggers in. A '(' or ')' implies an open " +
            "boundary, a '[' or ']' a closed boundary. To specify " +
            "multiple ranges, separate each range by a ';'. Any " +
            "coincidences that fall outside of the union of all " +
            "specified ranges will be deleted." 
            )
    parser.add_option( "-X", "--exclude-coincs", action = "store", type = "string", default = None,
        metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
            "[ALL in INSTRUMENTS_ON2]; etc.",
        help = 
            "Exclude coincident types in specified detector times, " + 
            "e.g., '[H2,L1 in H1,H2,L1]'. Some rules: " +             
                "* Coinc-types and detector time must be separated by " + 
                "an ' in '. When specifying a coinc_type or detector " +
                "time, detectors and/or ifos must be separated by " +
                "commas, e.g. 'H1,L1' not 'H1L1'. " + 
                "* To specify multiple coinc-types in one type of time, " +
                "separate each coinc-type by a '+', e.g., " +
                "'[H1,H2 + H2,L1 in H1,H2,L1]'. " +
                "* To exclude all coincs in a specified detector time " +
                "or specific coinc-type in all times, use 'ALL'. E.g., " +
                "to exclude all H1,H2 triggers, use '[H1,H2 in ALL]' " +
                "or to exclude all H2,L1 time use '[ALL in H2,L1]'. " +
                "* To specify multiple exclusions, separate each " +
                "bracket by a ';'. " +
                "* Order of the instruments nor case of the letters " +
                "matter. So if your pinky is broken and you're " +
                "dyslexic you can type '[h2,h1 in all]' without a " +
                "problem. " 
            )
    parser.add_option( "-I", "--include-only-coincs", action = "store", type = "string", default = None,
        metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
            "[ALL in INSTRUMENTS_ON2]; etc.",
        help =
            "Opposite of --exclude-coincs: fars will be calculated " +
            "for the specified coinc types only (all other coincs will be " +
            "deleted from the output database). To avoid confusing overlaps, " +
            "cannot specify both --exclude-coincs and --include-only-coincs." 
            )
    parser.add_option( "-V", "--vacuum", action = "store_true", default = False,
        help = 
            "If turned on, will vacuum the database before saving. " +
            "This cleans any fragmentation and removes empty space " +
            "left behind by all the DELETEs, making the output " +
            "database smaller and more efficient. " +
            "WARNING: Since this requires rebuilding the entire " +
            "database, this can take awhile for larger files." 
            )
    # following are options specific to this program
    parser.add_option( "-s", "--ranking-stat", action = "store", type = "string", default = None,
        help =
            "Required. The stat to use to rank triggers for calculating " +
            "the false alarm rate. It can be any column in the specified ranking-table. "
            )
    parser.add_option( "-b", "--rank-by", action = "store", type = "string", default = None, 
        metavar = "MAX or MIN",
        help = 
            "Requried. Options are MAX or MIN. " +
            "This specifies whether to rank triggers by ascending (MAX) or " +
            "descending (MIN) stat value." 
            )
    parser.add_option( "-c", "--uncombined-far-column", action = "store", type = "string", default = None, 
        help = 
            "Required. Column in the output-table to store uncombined far to. If the column doesn't exist, " +
            "it will be created. Column name must be all lower-case. "
            )
    parser.add_option( "", "--combined-far-column", action = "store", type = "string", default = None,
        help =
            "Required. Column in the output-table to store combined far to. If the column doesn't exist, " +
            "it will be created. Column name must be all lower-case. "
            )
    parser.add_option("", "--ranking-table", action="store", type="string",
        default=None,
        help =
          "Required. What table to get the ranking-statsitic from. Can be any table with a coinc_event_id. Ex. coinc_inspiral."
        )
    parser.add_option("", "--ifos-table", action="store", type="string",
        default=None,
        help =
          "What table to look in for the coincident ifos. Can be any table with a coinc_event_id and an ifos column. " +
          "Default is to use whatever ranking-table is set to. If, however, the specified ranking-table does not have an " +
          "ifos column (e.g., coinc_event), then this must be specified."
        )
    parser.add_option("", "--param-table", action="store", type="string",
        default=None,
        help =
          "If specifying a param-name and ranges, what table to look in for the param values. Can be any table with a coinc_event_id. " +
          "Default is to use whatever ranking-table is set to."
        )
    parser.add_option("", "--output-table", action="store", type="string",
        default=None,
        help =
          "What table to write the output-column to. Can be any table with a coinc_event_id column. Default is write to " +
          "whatever ranking-table is set to."
        )
    parser.add_option( "-G", "--group-by-ifos", action = "store_true", default = False, 
        help = 
            "Turning on will cause triggers to be grouped by coincident ifos when " +
            "calculating FARs."
            )
    parser.add_option( "-M", "--group-by-multiplicity", action = "store_true", default = False, 
        help = 
            "Turning on will cause triggers to be grouped by the number of coincident ifos when " +
            "calculating FARs. For example, doubles will be grouped with doubles, triples with triples, etc. " +
            "Note: if both --group-by-ifos and --group-by-multiplicity on, group-by-ifos takes precedence."
            )
    parser.add_option( "-T", "--time-units", action = "store", default = 'yr',
        metavar = "s, min, hr, days, OR yr",
        help = 
            "Time units to use when calculating FARs (the units of the FARs will be the inverse of this). " +
            "Options are s, min, hr, days, or yr. Default is yr."
            )
  
    (options, args) = parser.parse_args()
  
    # check for required options and for self-consistency
    if not options.input:
        raise ValueError, "No input specified."
    if not options.output:
        raise ValueError, "No output specified."
    if not options.tmp_space:
        raise ValueError, "--tmp-space is a requred argument."
    if not options.ranking_stat:
        raise ValueError, "No ranking stat specified."
    if not options.ranking_table:
        raise ValueError, "--ranking-table is a required argument."
    if not (options.rank_by.strip().upper() == 'MAX' or options.rank_by.strip().upper() == 'MIN'):
        raise ValueError, "--rank-by must be specified and set to either MAX or MIN."
    if options.param_name and not options.param_ranges:
        raise ValueError, "param-name requires param-ranges"
    if options.param_ranges and not options.param_name:
        raise ValueError, "param-ranges requires param-name"
    if not options.uncombined_far_column:
        raise ValueError, "--uncombined-far-column is a required argument"
    if not options.combined_far_column:
        raise ValueError, "--uncombined-far-column is a required argument"
    if ' ' in options.uncombined_far_column.strip():
        raise ValueError, "--uncombined-far-column cannot have spaces in it"
    if ' ' in options.combined_far_column.strip():
        raise ValueError, "--combined-far-column cannot have spaces in it"
    if options.exclude_coincs and options.include_only_coincs:
        raise ValueError, "Cannot specify both --exclude-coincs and --include-only-coincs."

    return options, sys.argv[1:]


# =============================================================================
#
#                                     Main
#
# =============================================================================

#
#       Generic Initilization
#

opts, args = parse_command_line()

sqlite3.enable_callback_tracebacks(opts.debug)

# get input database filename
filename = opts.input
if not os.path.isfile( filename ):
    raise ValueError, "The input file, %s, cannot be found." % filename

# Setup working databases and connections
if opts.verbose: 
    print >> sys.stdout, "Setting up temp. database..."
working_filename = dbtables.get_connection_filename( 
    filename, tmp_path = opts.tmp_space, verbose = opts.verbose )
connection = sqlite3.connect( working_filename )
if opts.tmp_space:
    dbtables.set_temp_store_directory(connection, opts.tmp_space, verbose = opts.verbose)

ranking_table = sqlutils.validate_option( opts.ranking_table )
if opts.ifos_table is None:
    ifos_table = ranking_table
else:
    ifos_table = sqlutils.validate_option( opts.ifos_table )
if opts.param_table is None:
    param_table = ranking_table
else:
    param_table = sqlutils.validate_option( opts.param_table )
if opts.output_table is None:
    output_table = ranking_table
else:
    output_table = sqlutils.validate_option( opts.output_table )

# Add program to process and process params table

# FIXME: remove the following two lines once boolean type
# has been properly handled
from glue.ligolw import types as ligolwtypes
ligolwtypes.FromPyType[type(True)] = ligolwtypes.FromPyType[type(8)]

# create an xmldoc representation of the database for writing the
# process and process-params
xmldoc = dbtables.get_xml(connection)
# Add entries to process and process_params tables for this program
proc_id = process.register_to_xmldoc(xmldoc, __prog__, opts.__dict__, version = git_version.id)

# Get param and param-ranges if specified
if opts.param_name:
    param_parser = sqlutils.parse_param_ranges( param_table, opts.param_name, 
      opts.param_ranges, verbose = opts.verbose )
    param_name = param_parser.get_param_name()
    param_filters = param_parser.get_param_filters()
else:
    param_filters = None

# Get exclude_coincs list if specified
if opts.exclude_coincs:
    exclude_coinc_filters = sqlutils.parse_coinc_options( opts.exclude_coincs, 
        verbose = opts.verbose ).get_coinc_filters( ifos_table )
else:
    exclude_coinc_filters = None

# Get include_coincs list if specified
if opts.include_only_coincs:
    include_coinc_filters = sqlutils.parse_coinc_options( opts.include_only_coincs, 
        verbose = opts.verbose ).get_coinc_filters( ifos_table )
else:
    include_coinc_filters = None

# Clear ranking_table of triggers outside of interested ranges
if param_filters or exclude_coinc_filters or include_coinc_filters:
    sqlutils.apply_inclusion_rules_to_coinc_table( connection, ranking_table, 
        exclude_coincs = exclude_coinc_filters,
        include_coincs = include_coinc_filters, 
        param_filters = param_filters, verbose = opts.verbose )

#
#         Program-specific Initialization
# 

# validate ranking stats
ranking_stat = sqlutils.validate_option( opts.ranking_stat )
rank_by = sqlutils.validate_option( opts.rank_by, lower = False ).upper()

# create column to store output to if it doesn't already exist
output_table, uncombined_far_column = sqlutils.create_column( connection, output_table, opts.uncombined_far_column )
output_table, combined_far_column = sqlutils.create_column( connection, output_table, opts.combined_far_column )

# sqlitize desired conversion function

def convert_livetime( duration, unit = opts.time_units ):
    """
    Uses sqlutils.convert_duration to automatically convert the frg_durs
    to the desired unit.
    """
    unit = sqlutils.validate_option( unit, lower = True )
    return sqlutils.convert_duration( duration, unit )

connection.create_function( 'convert_livetime', 1, convert_livetime )

# sqlitize param_parser.group_by_param_range()
if opts.param_name:
    connection.create_function("group_by_param", 1, param_parser.group_by_param_range)
    param_grouping = ''.join([ 'group_by_param(', param_name, ')' ])
else:
    param_grouping = '0'

#
# Collect information about the background 
#
if opts.verbose:
    print >> sys.stderr, "Getting background statistics..."

# initialize Summaries class, call background
background = sqlutils.Summaries()
background2 = sqlutils.Summaries()

# get information about all rows in the experiment summary table
sqlquery = """
    SELECT
        experiment_summary.experiment_id,
        experiment_summary.experiment_summ_id,
        convert_livetime(experiment_summary.duration),
        experiment_summary.datatype
    FROM
        experiment_summary
    """
for eid, esid, duration, datatype in connection.cursor().execute( sqlquery ):
    background.append_duration(eid, esid, duration)
    if datatype != "slide":
        background.append_zero_lag_id( eid, esid )

for eid, esid, duration, datatype in connection.cursor().execute( sqlquery ):
    background2.append_duration(eid, esid, duration)
    if datatype != "slide":
        background2.append_zero_lag_id( eid, esid )


# calculate the background duration for each experiment_summary_id 
background.calc_bkg_durs()
background2.calc_bkg_durs()

# get desired ifo grouping
if opts.group_by_ifos:
    ifo_grouping = '.'.join([ ifos_table, 'ifos' ])
elif opts.group_by_multiplicity:
    ifo_grouping = ''.join(['get_ifo_multiplicity(', ifos_table, '.ifos)'])
    connection.create_function('get_ifo_multiplicity', 1, get_ifo_multiplicity)
else:
    ifo_grouping = '"ALL_IFOS"'
# add any extra tables to the join statement
add_join = ''
add_tables = set([table_name for table_name in [ifos_table, param_table] if table_name != ranking_table])
for table_name in add_tables:
    add_join = ''.join([ add_join, '\n', """
    JOIN
        """, table_name, """ ON
        experiment_map.coinc_event_id == """, table_name, '.coinc_event_id' ])

#
#     Calculate uncombined FAR
#

if rank_by == "MIN":
    connection.create_function("calc_ufar", 5, background.calc_ufar_by_min)
else:
    connection.create_function("calc_ufar", 5, background.calc_ufar_by_max)

# get all the triggers
sqlquery = ''.join([ """
    SELECT 
        experiment_summary.experiment_id,
        experiment_summary.experiment_summ_id,
        """,
        ifo_grouping, """,
        """,
        param_grouping, """,
        """, ranking_table, '.', ranking_stat,"""
    FROM 
        """, ranking_table, """
    JOIN 
        experiment_summary, experiment_map ON (
            experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
            AND experiment_map.coinc_event_id == """, ranking_table, """.coinc_event_id)
    """, add_join ])

if opts.debug:
    import time
    print >> sys.stderr, sqlquery
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

for eid, esid, ifos, param_group, stat in connection.cursor().execute(sqlquery):
    background.add_to_bkg_stats(eid, esid, ifos, param_group, stat)

if opts.debug:
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

# sort the background lists
background.sort_bkg_stats()

if opts.verbose:
    print >> sys.stderr, "Calculating uncombined FAR..."
# calculate the far for each trigger and store in the output table
add_from = ''
add_join = ''
add_tables = [table for table in [ranking_table, param_table, ifos_table] if table is not None and table != output_table]

for needed_table in set(add_tables):
    add_from = ', '.join([ add_from, needed_table ])
    add_join = ''.join([ add_join, """
                AND experiment_map.coinc_event_id == """, needed_table, '.coinc_event_id' ])

sqlquery = ''.join(["""
    UPDATE
        """, output_table, """
    SET
        """, uncombined_far_column, """ = (
            SELECT
                calc_ufar(
                    experiment_summary.experiment_id,
                    experiment_summary.experiment_summ_id,
                    """, ifo_grouping, """,
                    """, param_grouping, """,
                    """, ranking_table, '.', ranking_stat, """
                    ) 
            FROM 
                experiment_summary, experiment_map""", add_from, """
            WHERE
                experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
                AND experiment_map.coinc_event_id == """, output_table, """.coinc_event_id""",
                add_join, ')'])
if opts.debug:
    print >> sys.stderr, sqlquery
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

connection.cursor().execute(sqlquery)

connection.commit()

#
#     Calculate combined FAR
#

connection.create_function("calc_cfar", 5, background2.calc_ufar_by_min)

# get all the triggers
sqlquery = ''.join([ """
    SELECT 
        experiment_summary.experiment_id,
        experiment_summary.experiment_summ_id,
        """,
        '"ALL_IFOS"', """,
        """,
        '0', """,
        """, ranking_table, '.', uncombined_far_column,"""
    FROM 
        """, ranking_table, """
    JOIN 
        experiment_summary, experiment_map ON (
            experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
            AND experiment_map.coinc_event_id == """, ranking_table, """.coinc_event_id)
    """, add_join ])

if opts.debug:
    import time
    print >> sys.stderr, sqlquery
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

for eid, esid, ifos, param_group, stat in connection.cursor().execute(sqlquery):
    background2.add_to_bkg_stats(eid, esid, ifos, param_group, stat)

if opts.debug:
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

# sort the background lists
background2.sort_bkg_stats()

if opts.verbose:
    print >> sys.stderr, "Calculating combined FAR..."
# calculate the far for each trigger and store in the output table
add_from = ''
add_join = ''
add_tables = [table for table in [ranking_table, param_table, ifos_table] if table is not None and table != output_table]

for needed_table in set(add_tables):
    add_from = ', '.join([ add_from, needed_table ])
    add_join = ''.join([ add_join, """
                AND experiment_map.coinc_event_id == """, needed_table, '.coinc_event_id' ])

sqlquery = ''.join(["""
    UPDATE
        """, output_table, """
    SET
        """, combined_far_column, """ = (
            SELECT
                calc_cfar(
                    experiment_summary.experiment_id,
                    experiment_summary.experiment_summ_id,
                    """, '"ALL_IFOS"', """,
                    """, '0', """,
                    """, ranking_table, '.', uncombined_far_column, """
                    ) 
            FROM 
                experiment_summary, experiment_map""", add_from, """
            WHERE
                experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
                AND experiment_map.coinc_event_id == """, output_table, """.coinc_event_id""",
                add_join, ')'])
if opts.debug:
    print >> sys.stderr, sqlquery
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]


connection.cursor().execute(sqlquery)

if opts.debug:
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]


# Vacuum database if desired
if opts.vacuum:
    if opts.verbose:
        print >> sys.stderr, "Vacuuming database..."
    connection.cursor().execute( 'VACUUM' )
    if opts.verbose:
        print >> sys.stderr, "done."

#
#       Save and Exit
#

connection.commit()
connection.close()

# write output database
dbtables.put_connection_filename(opts.output, working_filename, verbose = opts.verbose)

if opts.verbose: 
    print >> sys.stdout, "Finished!"

# set process end time
process.set_process_end_time(proc_id)
sys.exit(0)

back to top