#!/usr/bin/env python
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
from optparse import OptionParser
import sqlite3
import sys
import os
from pylal import ligolw_sqlutils as sqlutils
from glue import git_version
from glue.ligolw import dbtables
from glue.ligolw.utils import process
from glue.ligolw import lsctables
__prog__ = "ligolw_cbc_cfar"
__author__ = "Collin Capano <cdcapano@physics.syr.edu>"
description = \
"Calculates false alarm rates (FAR) in units of yr^(-1) and stores to " + \
"specified output_column. Writes results out to a new database. "
# =============================================================================
#
# Function Definitions
#
# =============================================================================
def get_ifo_multiplicity( ifos ):
"""
Returns the number of coincident ifos in an 'ifos' string.
@ifos: a string of comma-separated ifos
"""
return len( lsctables.instrument_set_from_ifos( ifos ) )
# =============================================================================
#
# Set Options
#
# =============================================================================
def parse_command_line():
"""
Parse the command line, return options and check for consistency among the
options.
"""
parser = OptionParser(
version = git_version.verbose_msg,
usage = "%prog [options]",
description = description
)
# following are related to file input and output naming
parser.add_option( "-i", "--input", action = "store", type = "string", default = None,
help =
"Input database to read. Can only input one at a time."
)
parser.add_option( "-o", "--output", action = "store", type = "string", default = None,
help =
"Name of output database to save to."
)
parser.add_option( "-t", "--tmp-space", action = "store", type = "string", default = None,
metavar = "PATH",
help =
"Requried. Location of local disk on which to do work. " +
"This is used to enhance performance in a networked " +
"environment, and to protect against accidently " +
"overwriting the input database."
)
parser.add_option( "-v", "--verbose", action = "store_true", default = False,
help =
"Print progress information"
)
parser.add_option( "-D", "--debug", action = "store_true", default = False,
help =
"Print SQLite queries used and the approximate time taken to run each one." )
# following are generic inspiral_sql options
parser.add_option( "-P", "--param-name", action = "store", default = None,
metavar = "PARAMETER",
help =
"Can be any parameter in the param table (see --param-table). " +
"Specifying this and param-ranges defines the categories " +
"which the uncombined_fars are calculated in and are " +
"combined over in the combined far calculation. " +
"If not specified, triggers will not be binned when calculating " +
"the uncombined FAR and so the uncombined FAR will be the same as " +
"the combined FAR."
)
parser.add_option( "-R", "--param-ranges", action = "store", default = None,
metavar = " [ LOW1, HIGH1 ); ( LOW2, HIGH2]; etc.",
help =
"Requires --param-name. Specify the parameter ranges " +
"to bin the triggers in. A '(' or ')' implies an open " +
"boundary, a '[' or ']' a closed boundary. To specify " +
"multiple ranges, separate each range by a ';'. Any " +
"coincidences that fall outside of the union of all " +
"specified ranges will be deleted."
)
parser.add_option( "-X", "--exclude-coincs", action = "store", type = "string", default = None,
metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
"[ALL in INSTRUMENTS_ON2]; etc.",
help =
"Exclude coincident types in specified detector times, " +
"e.g., '[H2,L1 in H1,H2,L1]'. Some rules: " +
"* Coinc-types and detector time must be separated by " +
"an ' in '. When specifying a coinc_type or detector " +
"time, detectors and/or ifos must be separated by " +
"commas, e.g. 'H1,L1' not 'H1L1'. " +
"* To specify multiple coinc-types in one type of time, " +
"separate each coinc-type by a '+', e.g., " +
"'[H1,H2 + H2,L1 in H1,H2,L1]'. " +
"* To exclude all coincs in a specified detector time " +
"or specific coinc-type in all times, use 'ALL'. E.g., " +
"to exclude all H1,H2 triggers, use '[H1,H2 in ALL]' " +
"or to exclude all H2,L1 time use '[ALL in H2,L1]'. " +
"* To specify multiple exclusions, separate each " +
"bracket by a ';'. " +
"* Order of the instruments nor case of the letters " +
"matter. So if your pinky is broken and you're " +
"dyslexic you can type '[h2,h1 in all]' without a " +
"problem. "
)
parser.add_option( "-I", "--include-only-coincs", action = "store", type = "string", default = None,
metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
"[ALL in INSTRUMENTS_ON2]; etc.",
help =
"Opposite of --exclude-coincs: fars will be calculated " +
"for the specified coinc types only (all other coincs will be " +
"deleted from the output database). To avoid confusing overlaps, " +
"cannot specify both --exclude-coincs and --include-only-coincs."
)
parser.add_option( "-V", "--vacuum", action = "store_true", default = False,
help =
"If turned on, will vacuum the database before saving. " +
"This cleans any fragmentation and removes empty space " +
"left behind by all the DELETEs, making the output " +
"database smaller and more efficient. " +
"WARNING: Since this requires rebuilding the entire " +
"database, this can take awhile for larger files."
)
# following are options specific to this program
parser.add_option( "-s", "--ranking-stat", action = "store", type = "string", default = None,
help =
"Required. The stat to use to rank triggers for calculating " +
"the false alarm rate. It can be any column in the specified ranking-table. "
)
parser.add_option( "-b", "--rank-by", action = "store", type = "string", default = None,
metavar = "MAX or MIN",
help =
"Requried. Options are MAX or MIN. " +
"This specifies whether to rank triggers by ascending (MAX) or " +
"descending (MIN) stat value."
)
parser.add_option( "-c", "--uncombined-far-column", action = "store", type = "string", default = None,
help =
"Required. Column in the output-table to store uncombined far to. If the column doesn't exist, " +
"it will be created. Column name must be all lower-case. "
)
parser.add_option( "", "--combined-far-column", action = "store", type = "string", default = None,
help =
"Required. Column in the output-table to store combined far to. If the column doesn't exist, " +
"it will be created. Column name must be all lower-case. "
)
parser.add_option("", "--ranking-table", action="store", type="string",
default=None,
help =
"Required. What table to get the ranking-statsitic from. Can be any table with a coinc_event_id. Ex. coinc_inspiral."
)
parser.add_option("", "--ifos-table", action="store", type="string",
default=None,
help =
"What table to look in for the coincident ifos. Can be any table with a coinc_event_id and an ifos column. " +
"Default is to use whatever ranking-table is set to. If, however, the specified ranking-table does not have an " +
"ifos column (e.g., coinc_event), then this must be specified."
)
parser.add_option("", "--param-table", action="store", type="string",
default=None,
help =
"If specifying a param-name and ranges, what table to look in for the param values. Can be any table with a coinc_event_id. " +
"Default is to use whatever ranking-table is set to."
)
parser.add_option("", "--output-table", action="store", type="string",
default=None,
help =
"What table to write the output-column to. Can be any table with a coinc_event_id column. Default is write to " +
"whatever ranking-table is set to."
)
parser.add_option( "-G", "--group-by-ifos", action = "store_true", default = False,
help =
"Turning on will cause triggers to be grouped by coincident ifos when " +
"calculating FARs."
)
parser.add_option( "-M", "--group-by-multiplicity", action = "store_true", default = False,
help =
"Turning on will cause triggers to be grouped by the number of coincident ifos when " +
"calculating FARs. For example, doubles will be grouped with doubles, triples with triples, etc. " +
"Note: if both --group-by-ifos and --group-by-multiplicity on, group-by-ifos takes precedence."
)
parser.add_option( "-T", "--time-units", action = "store", default = 'yr',
metavar = "s, min, hr, days, OR yr",
help =
"Time units to use when calculating FARs (the units of the FARs will be the inverse of this). " +
"Options are s, min, hr, days, or yr. Default is yr."
)
(options, args) = parser.parse_args()
# check for required options and for self-consistency
if not options.input:
raise ValueError, "No input specified."
if not options.output:
raise ValueError, "No output specified."
if not options.tmp_space:
raise ValueError, "--tmp-space is a requred argument."
if not options.ranking_stat:
raise ValueError, "No ranking stat specified."
if not options.ranking_table:
raise ValueError, "--ranking-table is a required argument."
if not (options.rank_by.strip().upper() == 'MAX' or options.rank_by.strip().upper() == 'MIN'):
raise ValueError, "--rank-by must be specified and set to either MAX or MIN."
if options.param_name and not options.param_ranges:
raise ValueError, "param-name requires param-ranges"
if options.param_ranges and not options.param_name:
raise ValueError, "param-ranges requires param-name"
if not options.uncombined_far_column:
raise ValueError, "--uncombined-far-column is a required argument"
if not options.combined_far_column:
raise ValueError, "--uncombined-far-column is a required argument"
if ' ' in options.uncombined_far_column.strip():
raise ValueError, "--uncombined-far-column cannot have spaces in it"
if ' ' in options.combined_far_column.strip():
raise ValueError, "--combined-far-column cannot have spaces in it"
if options.exclude_coincs and options.include_only_coincs:
raise ValueError, "Cannot specify both --exclude-coincs and --include-only-coincs."
return options, sys.argv[1:]
# =============================================================================
#
# Main
#
# =============================================================================
#
# Generic Initilization
#
opts, args = parse_command_line()
sqlite3.enable_callback_tracebacks(opts.debug)
# get input database filename
filename = opts.input
if not os.path.isfile( filename ):
raise ValueError, "The input file, %s, cannot be found." % filename
# Setup working databases and connections
if opts.verbose:
print >> sys.stdout, "Setting up temp. database..."
working_filename = dbtables.get_connection_filename(
filename, tmp_path = opts.tmp_space, verbose = opts.verbose )
connection = sqlite3.connect( working_filename )
if opts.tmp_space:
dbtables.set_temp_store_directory(connection, opts.tmp_space, verbose = opts.verbose)
ranking_table = sqlutils.validate_option( opts.ranking_table )
if opts.ifos_table is None:
ifos_table = ranking_table
else:
ifos_table = sqlutils.validate_option( opts.ifos_table )
if opts.param_table is None:
param_table = ranking_table
else:
param_table = sqlutils.validate_option( opts.param_table )
if opts.output_table is None:
output_table = ranking_table
else:
output_table = sqlutils.validate_option( opts.output_table )
# Add program to process and process params table
# FIXME: remove the following two lines once boolean type
# has been properly handled
from glue.ligolw import types as ligolwtypes
ligolwtypes.FromPyType[type(True)] = ligolwtypes.FromPyType[type(8)]
# create an xmldoc representation of the database for writing the
# process and process-params
xmldoc = dbtables.get_xml(connection)
# Add entries to process and process_params tables for this program
proc_id = process.register_to_xmldoc(xmldoc, __prog__, opts.__dict__, version = git_version.id)
# Get param and param-ranges if specified
if opts.param_name:
param_parser = sqlutils.parse_param_ranges( param_table, opts.param_name,
opts.param_ranges, verbose = opts.verbose )
param_name = param_parser.get_param_name()
param_filters = param_parser.get_param_filters()
else:
param_filters = None
# Get exclude_coincs list if specified
if opts.exclude_coincs:
exclude_coinc_filters = sqlutils.parse_coinc_options( opts.exclude_coincs,
verbose = opts.verbose ).get_coinc_filters( ifos_table )
else:
exclude_coinc_filters = None
# Get include_coincs list if specified
if opts.include_only_coincs:
include_coinc_filters = sqlutils.parse_coinc_options( opts.include_only_coincs,
verbose = opts.verbose ).get_coinc_filters( ifos_table )
else:
include_coinc_filters = None
# Clear ranking_table of triggers outside of interested ranges
if param_filters or exclude_coinc_filters or include_coinc_filters:
sqlutils.apply_inclusion_rules_to_coinc_table( connection, ranking_table,
exclude_coincs = exclude_coinc_filters,
include_coincs = include_coinc_filters,
param_filters = param_filters, verbose = opts.verbose )
#
# Program-specific Initialization
#
# validate ranking stats
ranking_stat = sqlutils.validate_option( opts.ranking_stat )
rank_by = sqlutils.validate_option( opts.rank_by, lower = False ).upper()
# create column to store output to if it doesn't already exist
output_table, uncombined_far_column = sqlutils.create_column( connection, output_table, opts.uncombined_far_column )
output_table, combined_far_column = sqlutils.create_column( connection, output_table, opts.combined_far_column )
# sqlitize desired conversion function
def convert_livetime( duration, unit = opts.time_units ):
"""
Uses sqlutils.convert_duration to automatically convert the frg_durs
to the desired unit.
"""
unit = sqlutils.validate_option( unit, lower = True )
return sqlutils.convert_duration( duration, unit )
connection.create_function( 'convert_livetime', 1, convert_livetime )
# sqlitize param_parser.group_by_param_range()
if opts.param_name:
connection.create_function("group_by_param", 1, param_parser.group_by_param_range)
param_grouping = ''.join([ 'group_by_param(', param_name, ')' ])
else:
param_grouping = '0'
#
# Collect information about the background
#
if opts.verbose:
print >> sys.stderr, "Getting background statistics..."
# initialize Summaries class, call background
background = sqlutils.Summaries()
background2 = sqlutils.Summaries()
# get information about all rows in the experiment summary table
sqlquery = """
SELECT
experiment_summary.experiment_id,
experiment_summary.experiment_summ_id,
convert_livetime(experiment_summary.duration),
experiment_summary.datatype
FROM
experiment_summary
"""
for eid, esid, duration, datatype in connection.cursor().execute( sqlquery ):
background.append_duration(eid, esid, duration)
if datatype != "slide":
background.append_zero_lag_id( eid, esid )
for eid, esid, duration, datatype in connection.cursor().execute( sqlquery ):
background2.append_duration(eid, esid, duration)
if datatype != "slide":
background2.append_zero_lag_id( eid, esid )
# calculate the background duration for each experiment_summary_id
background.calc_bkg_durs()
background2.calc_bkg_durs()
# get desired ifo grouping
if opts.group_by_ifos:
ifo_grouping = '.'.join([ ifos_table, 'ifos' ])
elif opts.group_by_multiplicity:
ifo_grouping = ''.join(['get_ifo_multiplicity(', ifos_table, '.ifos)'])
connection.create_function('get_ifo_multiplicity', 1, get_ifo_multiplicity)
else:
ifo_grouping = '"ALL_IFOS"'
# add any extra tables to the join statement
add_join = ''
add_tables = set([table_name for table_name in [ifos_table, param_table] if table_name != ranking_table])
for table_name in add_tables:
add_join = ''.join([ add_join, '\n', """
JOIN
""", table_name, """ ON
experiment_map.coinc_event_id == """, table_name, '.coinc_event_id' ])
#
# Calculate uncombined FAR
#
if rank_by == "MIN":
connection.create_function("calc_ufar", 5, background.calc_ufar_by_min)
else:
connection.create_function("calc_ufar", 5, background.calc_ufar_by_max)
# get all the triggers
sqlquery = ''.join([ """
SELECT
experiment_summary.experiment_id,
experiment_summary.experiment_summ_id,
""",
ifo_grouping, """,
""",
param_grouping, """,
""", ranking_table, '.', ranking_stat,"""
FROM
""", ranking_table, """
JOIN
experiment_summary, experiment_map ON (
experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
AND experiment_map.coinc_event_id == """, ranking_table, """.coinc_event_id)
""", add_join ])
if opts.debug:
import time
print >> sys.stderr, sqlquery
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
for eid, esid, ifos, param_group, stat in connection.cursor().execute(sqlquery):
background.add_to_bkg_stats(eid, esid, ifos, param_group, stat)
if opts.debug:
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
# sort the background lists
background.sort_bkg_stats()
if opts.verbose:
print >> sys.stderr, "Calculating uncombined FAR..."
# calculate the far for each trigger and store in the output table
add_from = ''
add_join = ''
add_tables = [table for table in [ranking_table, param_table, ifos_table] if table is not None and table != output_table]
for needed_table in set(add_tables):
add_from = ', '.join([ add_from, needed_table ])
add_join = ''.join([ add_join, """
AND experiment_map.coinc_event_id == """, needed_table, '.coinc_event_id' ])
sqlquery = ''.join(["""
UPDATE
""", output_table, """
SET
""", uncombined_far_column, """ = (
SELECT
calc_ufar(
experiment_summary.experiment_id,
experiment_summary.experiment_summ_id,
""", ifo_grouping, """,
""", param_grouping, """,
""", ranking_table, '.', ranking_stat, """
)
FROM
experiment_summary, experiment_map""", add_from, """
WHERE
experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
AND experiment_map.coinc_event_id == """, output_table, """.coinc_event_id""",
add_join, ')'])
if opts.debug:
print >> sys.stderr, sqlquery
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
connection.cursor().execute(sqlquery)
connection.commit()
#
# Calculate combined FAR
#
connection.create_function("calc_cfar", 5, background2.calc_ufar_by_min)
# get all the triggers
sqlquery = ''.join([ """
SELECT
experiment_summary.experiment_id,
experiment_summary.experiment_summ_id,
""",
'"ALL_IFOS"', """,
""",
'0', """,
""", ranking_table, '.', uncombined_far_column,"""
FROM
""", ranking_table, """
JOIN
experiment_summary, experiment_map ON (
experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
AND experiment_map.coinc_event_id == """, ranking_table, """.coinc_event_id)
""", add_join ])
if opts.debug:
import time
print >> sys.stderr, sqlquery
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
for eid, esid, ifos, param_group, stat in connection.cursor().execute(sqlquery):
background2.add_to_bkg_stats(eid, esid, ifos, param_group, stat)
if opts.debug:
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
# sort the background lists
background2.sort_bkg_stats()
if opts.verbose:
print >> sys.stderr, "Calculating combined FAR..."
# calculate the far for each trigger and store in the output table
add_from = ''
add_join = ''
add_tables = [table for table in [ranking_table, param_table, ifos_table] if table is not None and table != output_table]
for needed_table in set(add_tables):
add_from = ', '.join([ add_from, needed_table ])
add_join = ''.join([ add_join, """
AND experiment_map.coinc_event_id == """, needed_table, '.coinc_event_id' ])
sqlquery = ''.join(["""
UPDATE
""", output_table, """
SET
""", combined_far_column, """ = (
SELECT
calc_cfar(
experiment_summary.experiment_id,
experiment_summary.experiment_summ_id,
""", '"ALL_IFOS"', """,
""", '0', """,
""", ranking_table, '.', uncombined_far_column, """
)
FROM
experiment_summary, experiment_map""", add_from, """
WHERE
experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
AND experiment_map.coinc_event_id == """, output_table, """.coinc_event_id""",
add_join, ')'])
if opts.debug:
print >> sys.stderr, sqlquery
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
connection.cursor().execute(sqlquery)
if opts.debug:
print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]
# Vacuum database if desired
if opts.vacuum:
if opts.verbose:
print >> sys.stderr, "Vacuuming database..."
connection.cursor().execute( 'VACUUM' )
if opts.verbose:
print >> sys.stderr, "done."
#
# Save and Exit
#
connection.commit()
connection.close()
# write output database
dbtables.put_connection_filename(opts.output, working_filename, verbose = opts.verbose)
if opts.verbose:
print >> sys.stdout, "Finished!"
# set process end time
process.set_process_end_time(proc_id)
sys.exit(0)