Revision e5cc3dfe9061cb7416d9ade6aa6184c39b5a5280 authored by tdent on 12 November 2015, 14:46:15 UTC, committed by tdent on 12 November 2015, 14:46:15 UTC
pycbc_page_foundmissed: fix error from e7bf4ce
2 parent s f27f727 + 89b2206
Raw File
pycbc_compute_durations
#!/usr/bin/env python

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

import argparse
import pycbc
import pycbc.version
from pycbc import workflow
import sqlite3
import sys
import os

from pylal import ligolw_sqlutils as sqlutils
from pylal import ligolw_cbc_compute_durations as compute_dur

from glue import segments
from glue import segmentsUtils
from glue.ligolw import table
from glue.ligolw import lsctables
from glue.ligolw import dbtables
from glue.ligolw import utils
from glue.ligolw import ligolw
from glue.ligolw.utils import process

__prog__ = "pycbc_compute_durations"
__author__ = "Collin Capano <cdcapano@physics.syr.edu>"
__version__ = pycbc.version.git_verbose_msg
__date__    = pycbc.version.date

description = \
"Computes durations for every row in the experiment_summary table in a " + \
"database and stores them."

# =============================================================================
#
#                                   Set Options
#
# =============================================================================


def parse_command_line():
    """
    Parse the command line, return options and check for consistency among the
    options.
    """
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--version', action='version', version=__version__)

    parser.add_argument("-i", "--input", action="store", type=str,
        required=True, metavar="INPUT_FILE",
        help="Input database to read. Can only input one at a time.")
    parser.add_argument("-o", "--output", action="store", type=str,
        required=True, metavar="OUTPUT_FILE",
        help="Output database to write to.")
    parser.add_argument("-t", "--tmp-space", action="store", type=str,
        default=None, metavar="PATH",
        help="""Location of local disk on which to do work. This is optional;
                it is only used to enhance performance in a networked 
                environment.""")
    parser.add_argument("-s", "--segment-file", action="store", type=str,
        required=True, metavar="SEGMENT_FILE",
        help="""The segment file containing the segments that have been
                analysed by each interferometer. This should contain the
                segments for all ifos using the same name. For e.g. if I set
                --channel-name==WORKFLOW_SEGS I expect this file to contain 
                H1:WORKFLOW_SEGS:1, L1:WORKFLOW_SEGS:1, ... The version number can
                take any value but we should **not** have multiple version
                numbers in the same document. Ie. do not include
                H1:WORKFLOW_SEGS:1 **and** H1:WORKFLOW_SEGS:2. Workflow will produce
                this file from its analysislogging module.""")
    parser.add_argument("-c", "--channel-name", action="store", type=str,
        required=True, metavar="CHANNEL_NAME",
        help="""The channel name to search for in the --segment-file. See help
                for --segment-file for more information.""")
    parser.add_argument("-v", "--verbose", action="store_true", default = False,
        help="Be verbose.")

    args = parser.parse_args()

    return args

# =============================================================================
#
#                              Function Definitions
#
# =============================================================================


def get_playground_sets_from_fulldata(full_data_dict):
    """
    Calculates playground segment sets using full_data single-ifo segments.
    Returns a dictionary of segments (which is of form {instrument:segmentlist})
    with just playground segments in them.
    
    @param full_data_dict: the full_data single-ifo analyzed segments dictionary
    """
    playground_listdict = segments.segmentlistdict()
    playground_seglist = segmentsUtils.S2playground(full_data_dict.extent_all())
    for instruments, seglist in full_data_dict.items():
        playground_listdict[instruments] = seglist.coalesce() & playground_seglist 

    return playground_listdict


class Durations:
    """
    Class to store and retrieve durations.
    self.durations has the structure:
    self.durations[(on_instruments, veto_def_name, datatype, time_slide_id)]
    """
    def __init__( self ):
        self.durations = {}

    def map_slides_to_durations( self, veto_def_name, datatype, livetime_dict ):
        for key, duration in livetime_dict.items():
            # the dictionary key is of the form (time_slide_id, on_instruments)
            self.durations[( key[1], veto_def_name, datatype, str(key[0]) )] = duration

    def retrieve_duration( self, on_instruments, veto_def_name, datatype, time_slide_id ):
        # when both simulations & full_data are in the same database, use full_data livetime
        if datatype == "simulation":
            datatype = "all_data"
        # when tuple from the experiment tables is a key of the durations dict, return duration
        if (on_instruments, veto_def_name, datatype, str(time_slide_id)) in self.durations.keys():
            return self.durations[( on_instruments, veto_def_name, datatype, str(time_slide_id) )]


# =============================================================================
#
#                                     Main
#
# =============================================================================

#
#       Generic Initilization
#

args = parse_command_line()

# DB content handler

class ContentHandler(ligolw.LIGOLWContentHandler):
        pass

dbtables.use_in(ContentHandler)

# get input database filename
input_filename = args.input
if not os.path.isfile( input_filename ):
    raise ValueError, "The input database, %s, is not there." % input_filename

# Setup working databases and connections
if args.verbose: 
    print >> sys.stderr, "Opening database..."

working_filename = dbtables.get_connection_filename( 
    input_filename, tmp_path=args.tmp_space, verbose=args.verbose )
connection = sqlite3.connect( working_filename )
# Link the ContentHandler to the SQL database
ContentHandler.connection = connection
if args.tmp_space:
    dbtables.set_temp_store_directory(connection, args.tmp_space,
                                                          verbose=args.verbose)
xmldoc = dbtables.get_xml(connection)

# Add program to process and process params table

proc_id = process.register_to_xmldoc(xmldoc, __prog__, args.__dict__,
                                                version=pycbc.version.git_hash)

#
#       Compute Durations
#

# turn the time slide table into a dictionary
time_slide_dict = table.get_table(xmldoc, lsctables.TimeSlideTable.tableName).as_dict()
zero_lag_dict = dict([dict_entry for dict_entry in time_slide_dict.items() if not any( dict_entry[1].values() )])
del time_slide_dict[ zero_lag_dict.keys()[0] ]

# initialize duration bank
dur_bank = Durations()

# determine single-ifo segments for each usertag
ifo_segments = segments.segmentlistdict()
datatypes = {}

# In S5/S6 we ran seperate analyses for FULL_DATA and PLAYRGOUND. Here we can
# reconstruct the playground results from the full_data inputs. 
usertags = ["FULL_DATA"]

for tag in (set(usertags) & set(["FULL_DATA","PLAYGROUND"])):
    fP = open(args.segment_file, 'r')
    WorkflowAnalysisSegDict = workflow.segment.fromsegmentxml(
            fP, return_dict=True)
    fP.close()
    ifo_segments[tag] = segments.segmentlistdict([])
    for key in WorkflowAnalysisSegDict.keys():
        # Key should be something like H1:DMT-SCIENCE:3
        channel_details = key.split(':')
        if channel_details[1] == args.channel_name:
            ifo_segments[tag][channel_details[0]] = WorkflowAnalysisSegDict[key]

    if tag == "FULL_DATA":
        datatypes[tag] = ["all_data","slide"]
    elif tag == "PLAYGROUND":
        datatypes[tag] = ["playground","slide"]

if "FULL_DATA" in usertags:
    # find playground segments despite a playground analysis not being done
    if "PLAYGROUND" not in usertags:
        tag = unicode("PLAYGROUND")
        ifo_segments[tag] = get_playground_sets_from_fulldata(ifo_segments["FULL_DATA"])
        datatypes[tag] = ["playground"]

    tag = unicode("EXCLUDE_PLAY")
    ifo_segments[tag] = ifo_segments["FULL_DATA"] - ifo_segments["PLAYGROUND"]
    datatypes[tag] = ["exclude_play"]

if args.verbose:
    print >> sys.stderr, "Getting all veto categories in the experiment_summary table..."

# get veto_segments
veto_segments = compute_dur.get_veto_segments(xmldoc, args.verbose)

for veto_def_name, veto_seg_dict in veto_segments.items():
    if args.verbose:
        print >> sys.stderr, "\n\tThe DQ vetoes applied to the single-ifo segments are %s" % veto_def_name
    for usertag, ifo_seg_dict in ifo_segments.items():
        # compute the durations (or livetimes) for every possible instrument combo for every
        # slide in the time-slide table; the resulting durations dictionary has the following form:
        # durations[(on_instruments, veto_def_name, datatype, time_slide_id)] = livetime (in seconds)

        if args.verbose:
            print >> sys.stderr, "\n\t\tCalculating live-time for %s zerolag time" % usertag
        # determine the durations for the zerolag time
        livetime_dict = compute_dur.get_livetimes(
            ifo_seg_dict - veto_seg_dict,
            zero_lag_dict,
            verbose=args.verbose)
        dur_bank.map_slides_to_durations(
            veto_def_name,
            datatypes[usertag][0],
            livetime_dict)

        # determine the durations for each separate time-slide
        if len(datatypes[usertag]) > 1:
            if args.verbose:
                print >> sys.stderr, "\n\t\tCalculating live-time for each %s time slide" % usertag
            livetime_dict = compute_dur.get_livetimes(
                ifo_seg_dict - veto_seg_dict,
                time_slide_dict,
                verbose=args.verbose)
            dur_bank.map_slides_to_durations(
                veto_def_name,
                datatypes[usertag][1],
                livetime_dict)

#
# finished getting all durations, now populate the experiment_summary table
# with them
#
if args.verbose:
    print >> sys.stderr, "\nPopulating the experiment_summary table with results..."

connection.create_function("retrieve_duration", 4, dur_bank.retrieve_duration)

# populate the experiment_summary table with the appropiate duration
sqlquery = """
    UPDATE experiment_summary
    SET duration = (
        SELECT retrieve_duration(
            experiment.instruments,
            experiment_summary.veto_def_name,
            experiment_summary.datatype,
            experiment_summary.time_slide_id
            )
        FROM
            experiment
        WHERE
            experiment.experiment_id == experiment_summary.experiment_id
        )"""
connection.cursor().execute( sqlquery )

#
#       Close database and exit
#

connection.commit()
connection.cursor().close()
dbtables.put_connection_filename(args.output, working_filename,
                                 verbose=args.verbose)

if args.verbose:
    print >> sys.stderr, "Finished!"

# set process end time
process.set_process_end_time(proc_id)
sys.exit(0)


back to top