Revision 6c28dacde866ea096e45e6c3f1dd24dad5b3d395 authored by Ian Harry on 11 October 2019, 20:00:10 UTC, committed by GitHub on 11 October 2019, 20:00:10 UTC
1 parent 84e8c92
Raw File
pycbc_coinc_statmap
#!/usr/bin/env python
"""
The program combines coincident output files generated
by pycbc_coinc_findtrigs to generated a mapping between SNR and FAP, along
with producing the combined foreground and background triggers. It also has 
the capability of doing hierarchical removal of foreground triggers that are 
louder than all of the background triggers. We use this to properly assess 
the FANs of any other gravitational waves in the dataset.
"""
import argparse, h5py, itertools
import lal, logging, numpy 
from pycbc.events import veto, coinc
import pycbc.version, pycbc.pnutils, pycbc.io
import sys
import pycbc.conversions as conv

class fw(object):
    def __init__(self, name):
        self.f = h5py.File(name, 'w')
        self.attrs = self.f.attrs

    def __setitem__(self, name, data):
        # Make a new item if isn't in the hdf file
        if not name in self.f:
            self.f.create_dataset(name, data=data, compression="gzip",
                                  compression_opts=9, shuffle=True,
                                  maxshape=data.shape)
        # Else reassign values
        else:
            self.f[name][:] = data

    def __getitem__(self, *args):
        return self.f.__getitem__(*args)

parser = argparse.ArgumentParser()
# General required options
parser.add_argument('--version', action='version', 
                    version=pycbc.version.git_verbose_msg)
parser.add_argument('--coinc-files', nargs='+', 
                    help='List of coincidence files used to calculate the '
                         'FAP, FAR, etc.')
parser.add_argument('--verbose', action='count')
parser.add_argument('--cluster-window', type=float, default=10,
                    help='Length of time window in seconds to cluster coinc '
                         'events, [default=10s]')
parser.add_argument('--veto-window', type=float, default=.1,
                    help='Time around each zerolag trigger to window out, '
                         '[default=.1s]')
parser.add_argument('--hierarchical-removal-window', type=float, default=1.0,
                    help='Time around each trigger to window out for a very '
                         'louder trigger in the hierarchical removal '
                         'procedure. [default=1.0s]')
parser.add_argument('--max-hierarchical-removal', type=int, default=0,
                    help='Maximum amount of hierarchical removals to carry '
                         'out when hierarchical removal is desired. Choose '
                         '-1 for continuous hiearchical removal until no '
                         'foreground triggers are louder than the '
                         '(inclusive/exclusive) background. Choose 0 to do '
                         'no hierarchical removals.  Choose 1 to do at most '
                         '1 hierarchical removal, and so on. If given, user '
                         'must also give option '
                         '--hierarchical-removal-against to indicate which '
                         'background to remove triggers against. [default=0]')
parser.add_argument('--hierarchical-removal-against', type=str,
                                                      default=None,
                    help='Make a choice to hierarchically remove foreground '
                         'triggers based on whether it is louder than the '
                         'inclusive (little-dogs-in) background or the '
                         'exclusive (little-dogs-out) background. Choose by '
                         'entering <inclusive> or <exclusive>. '
                         '[default=None]')
parser.add_argument('--output-file')
args = parser.parse_args()

# Check that the user chose inclusive or exclusive background to perform
# hierarchical removals of foreground triggers against.

if args.max_hierarchical_removal == 0:
    if args.hierarchical_removal_against is not None:
        parser.error("User Error: 0 maximum hierarchical removals chosen " \
                     "but option for --hierarchical-removal-against " \
                     "was still given. These are conflicting options. " \
                     "Use with --help for more information.")

else :
    is_bkg_inc = (args.hierarchical_removal_against == 'inclusive')
    is_bkg_exc = (args.hierarchical_removal_against == 'exclusive')
    either_bkg = is_bkg_inc or is_bkg_exc

    if either_bkg == False:
        parser.error("--max-hierarchical-removal requires a choice of which " \
                     "background to remove foreground triggers against, " \
                     "inclusive or exclusive. Use with --help option for " \
                     "more information.")

pycbc.init_logging(args.verbose)

logging.info("Loading coinc triggers")    
all_trigs = pycbc.io.StatmapData(files=args.coinc_files)

logging.info("We have %s triggers" % len(all_trigs.stat))
fore_locs = all_trigs.timeslide_id == 0

# Foreground trigger times for ifo.
fore_time1 = all_trigs.time1[fore_locs]
fore_time2 = all_trigs.time2[fore_locs]

# Average times of triggers from ifo1 and ifo2
ave_fore_time = (fore_time1 + fore_time2) / 2.0

# Remove start and end time around every average foreground trigger time to
# window around.
remove_start_time = ave_fore_time - args.veto_window
remove_end_time = ave_fore_time + args.veto_window

# The time contained between segments around the times contained between each
# element of remove_start_time and remove_end_time.
veto_time = abs(veto.start_end_to_segments(remove_start_time,
                                           remove_end_time).coalesce())

# Veto indices from list of triggers for times in ifo 1&2 around the window
# times. This gives exclusive background triggers.
veto_indices1 = veto.indices_within_times(all_trigs.time1, remove_start_time,
                                          remove_end_time)

exc_zero_trigs = all_trigs.remove(veto_indices1)

veto_indices2 = veto.indices_within_times(exc_zero_trigs.time2,
                                          remove_start_time, remove_end_time)

exc_zero_trigs = exc_zero_trigs.remove(veto_indices2)

logging.info("Clustering coinc triggers (inclusive of zerolag)")
all_trigs = all_trigs.cluster(args.cluster_window)

# Return an array of true or false if the trigger has not been time-slid.
fore_locs = all_trigs.timeslide_id == 0
logging.info("%s clustered foreground triggers" % fore_locs.sum())

logging.info("Clustering coinc triggers (exclusive of zerolag)")
exc_zero_trigs = exc_zero_trigs.cluster(args.cluster_window)

logging.info("Dumping foreground triggers")
f = fw(args.output_file)
f.attrs['detector_1'] = all_trigs.attrs['detector_1']
f.attrs['detector_2'] = all_trigs.attrs['detector_2']
f.attrs['timeslide_interval'] = all_trigs.attrs['timeslide_interval']

# Copy over the segment for coincs and singles
for key in all_trigs.seg.keys():
    f['segments/%s/start' % key] = all_trigs.seg[key]['start'][:]
    f['segments/%s/end' % key] = all_trigs.seg[key]['end'][:]

if fore_locs.sum() > 0:
    f['segments/foreground_veto/start'] = remove_start_time
    f['segments/foreground_veto/end'] = remove_end_time
    for k in all_trigs.data:
        f['foreground/' + k] = all_trigs.data[k][fore_locs]
else:
    # Put SOMETHING in here to avoid failures later
    f['segments/foreground_veto/start'] = numpy.array([0])
    f['segments/foreground_veto/end'] = numpy.array([0])
    for k in all_trigs.data:
        f['foreground/' + k] = numpy.array([], dtype=all_trigs.data[k].dtype)

# If a particular index of all_trigs.timeslide_id isn't 0, evaluate true.
# List of locations that is background.
back_locs = all_trigs.timeslide_id != 0

if (back_locs.sum()) == 0:
    logging.warn("There were no background events, so we could not assign "
                 "any statistic values")
    sys.exit()

logging.info("Dumping background triggers (inclusive of zerolag)")
for k in all_trigs.data:
    f['background/' + k] = all_trigs.data[k][back_locs]
    
logging.info("Dumping background triggers (exclusive of zerolag)")   
for k in exc_zero_trigs.data:
    f['background_exc/' + k] = exc_zero_trigs.data[k]

maxtime = max(all_trigs.attrs['foreground_time1'], all_trigs.attrs['foreground_time2'])
mintime = min(all_trigs.attrs['foreground_time1'], all_trigs.attrs['foreground_time2'])

maxtime_exc = maxtime - veto_time
mintime_exc = mintime - veto_time

background_time = int(maxtime / all_trigs.attrs['timeslide_interval']) * mintime
coinc_time = float(all_trigs.attrs['coinc_time'])

background_time_exc = int(maxtime_exc / all_trigs.attrs['timeslide_interval']) * mintime_exc
coinc_time_exc = coinc_time - veto_time

logging.info("Making mapping from FAN to the combined statistic")

# Ranking statistic of foreground and background
back_stat = all_trigs.stat[back_locs]
fore_stat = all_trigs.stat[fore_locs]

# Cumulative array of inclusive background triggers and the number of
# inclusive background triggers louder than each foreground trigger.
back_cnum, fnlouder = coinc.calculate_n_louder(back_stat, fore_stat, 
                                               all_trigs.decimation_factor[back_locs])

# Cumulative array of exclusive background triggers and the number
# of exclusive background triggers louder than each foreground trigger.
back_cnum_exc, fnlouder_exc = coinc.calculate_n_louder(exc_zero_trigs.stat,
                                                       fore_stat,
                                                       exc_zero_trigs.decimation_factor)

f['background/ifar'] = conv.sec_to_year(background_time / (back_cnum + 1))  
f['background_exc/ifar'] = conv.sec_to_year(background_time_exc / (back_cnum_exc + 1))

f.attrs['background_time'] = background_time

f.attrs['foreground_time'] = coinc_time
f.attrs['background_time_exc'] = background_time_exc
f.attrs['foreground_time_exc'] = coinc_time_exc

logging.info("calculating ifar/fap values")

if fore_locs.sum() > 0:
    ifar = background_time / (fnlouder + 1)
    fap = 1 - numpy.exp(- coinc_time / ifar)
    f['foreground/ifar'] = conv.sec_to_year(ifar)
    f['foreground/fap'] = fap

    ifar_exc = background_time_exc / (fnlouder_exc + 1)
    fap_exc = 1 - numpy.exp(- coinc_time_exc / ifar_exc)
    f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
    f['foreground/fap_exc'] = fap_exc
else:
    f['foreground/ifar'] = numpy.array([])
    f['foreground/fap'] = numpy.array([])
    f['foreground/ifar_exc'] = numpy.array([])
    f['foreground/fap_exc'] = numpy.array([])

if 'name' in all_trigs.attrs:
    f.attrs['name'] = all_trigs.attrs['name']

# Incorporate hierarchical removal for any other loud triggers
logging.info("Beginning hierarchical removal of foreground triggers.")

# Step 1: Create a copy of foreground trigger ranking statistic for reference
#         in the hierarchical removal while loop when updating ifar and fap of
#         hierarchically removed foreground triggers.

# Set an index to keep track of how many hierarchical removals we want to do.
h_iterations = 0

orig_fore_stat = fore_stat

# Assign a new variable to keep track of whether we care about fnlouder or
# fnlouder_exc. Whether we want to remove hierarchically against inclusive
# or exclusive background.
if args.max_hierarchical_removal != 0:
    # If user wants to remove against inclusive background.
    if is_bkg_inc == True:
        louder_foreground = fnlouder

    # Otherwise user wants to remove against exclusive background
    else :
        louder_foreground = fnlouder_exc

else :
    # It doesn't matter if you choose louder_foreground = fnlouder
    # or vice versa, the while loop below will break if
    # louder_foreground doesn't contain 0, or at the comparison
    # h_iterations == args.max_hierarchical_removal. But this avoids
    # a NameError
    louder_foreground = fnlouder

# Step 2 : Loop until we don't have to hierarchically remove anymore. This
#          will happen when fnlouder has no elements that equal 0.

while numpy.any(louder_foreground == 0):
    # If the user wants to stop doing hierarchical removals after a set
    # number of iterations then break when that happens.
    if (h_iterations == args.max_hierarchical_removal): 
        break

    # Write foreground trigger info before hierarchical removals for
    # downstream codes.
    if h_iterations == 0:
        f['background_h%s/stat' % h_iterations] = back_stat 
        f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(background_time / (back_cnum + 1))
        f['background_h%s/timeslide_id' % h_iterations] = all_trigs.data['timeslide_id'][back_locs]
        f['foreground_h%s/stat' % h_iterations] = fore_stat
        f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
        f['foreground_h%s/fap' % h_iterations] = fap
        trig_id1 = all_trigs.trigger_id1[fore_locs]
        trig_id2 = all_trigs.trigger_id2[fore_locs]
        f['foreground_h%s/trigger_id1' % h_iterations] = trig_id1
        f['foreground_h%s/trigger_id2' % h_iterations] = trig_id2
        f['foreground_h%s/template_id' % h_iterations] = all_trigs.data['template_id'][fore_locs]
        f['foreground_h%s/time1' % h_iterations] = all_trigs.time1[fore_locs]
        f['foreground_h%s/time2' % h_iterations] = all_trigs.time2[fore_locs]

    # Add the iteration number of hierarchical removals done.
    h_iterations += 1

    # Among foreground triggers, find the one with the largest ranking
    # statistic and mark it for removal.
    max_stat_idx = fore_stat.argmax()

    # Step 3: Remove that trigger from the list of zerolag triggers

    # Find the index of the loud foreground trigger to remove next. And find
    # the index in the list of original foreground triggers.

    rm_trig_idx = numpy.where(all_trigs.stat[:] == fore_stat[max_stat_idx])[0][0]
    orig_fore_idx = numpy.where(orig_fore_stat == fore_stat[max_stat_idx])[0][0]

    # Store any foreground trigger's information that we want to
    # hierarchically remove.

    f['foreground/ifar'][orig_fore_idx] = conv.sec_to_year(ifar[max_stat_idx])
    f['foreground/fap'][orig_fore_idx] = fap[max_stat_idx]

    logging.info("Removing foreground trigger that is louder than the inclusive background.")

    # Remove the foreground trigger and all of the background triggers that
    # are associated with it.

    ave_rm_time = (all_trigs.time1[rm_trig_idx] + all_trigs.time2[rm_trig_idx]) / 2.0

    ind_to_rm_ifo1 = veto.indices_within_times(all_trigs.time1,
                              [ave_rm_time - args.hierarchical_removal_window],
                              [ave_rm_time + args.hierarchical_removal_window])
    ind_to_rm_ifo2 = veto.indices_within_times(all_trigs.time2,
                              [ave_rm_time - args.hierarchical_removal_window],
                              [ave_rm_time + args.hierarchical_removal_window])

    indices_to_rm = numpy.concatenate([ind_to_rm_ifo1, ind_to_rm_ifo2])

    all_trigs = all_trigs.remove(indices_to_rm)

    fore_locs = all_trigs.timeslide_id == 0
    # The foreground trigger has been removed, continue with typical statmap operations. 

    # Calculate the change to foreground trigger time and vetoed out time
    fore_time1 = all_trigs.time1[fore_locs]
    fore_time2 = all_trigs.time2[fore_locs]

    ave_fore_time = (fore_time1 + fore_time2) / 2.0

    remove_start_time = ave_fore_time - args.veto_window
    remove_end_time = ave_fore_time + args.veto_window

    logging.info("We have %s triggers after hierarchical removal." % len(all_trigs.stat))

    # Step 4: Re cluster the triggers and calculate the inclusive ifar/fap
    logging.info("Clustering coinc triggers (inclusive of zerolag)")
    all_trigs = all_trigs.cluster(args.cluster_window)
    
    fore_locs = all_trigs.timeslide_id == 0

    logging.info("%s clustered foreground triggers" % fore_locs.sum())

    logging.info("%s hierarchically removed foreground trigger(s)" % h_iterations)

    back_locs = all_trigs.timeslide_id != 0 

    logging.info("Dumping foreground triggers")

    logging.info("Dumping background triggers (inclusive of zerolag)")
    for k in all_trigs.data:
         f['background_h%s/' %h_iterations + k] = all_trigs.data[k][back_locs]

    maxtime = max(all_trigs.attrs['foreground_time1'], all_trigs.attrs['foreground_time2'])
    mintime = min(all_trigs.attrs['foreground_time1'], all_trigs.attrs['foreground_time2'])

    background_time = int(maxtime / all_trigs.attrs['timeslide_interval']) * mintime
    coinc_time = float(all_trigs.attrs['coinc_time'])

    logging.info("Making mapping from FAN to the combined statistic")

    back_stat = all_trigs.stat[back_locs]
    fore_stat = all_trigs.stat[fore_locs]

    back_cnum, fnlouder = coinc.calculate_n_louder(back_stat, fore_stat, 
                                                   all_trigs.decimation_factor[back_locs])

    # Update the louder_foreground criteria depending on whether foreground
    # triggers are being removed via inclusive or exclusive background.
    if is_bkg_inc == True:
        louder_foreground = fnlouder

    # Exclusive background doesn't change when removing foreground triggers.
    # So we don't have to take back_cnum_exc, jut repopulate fnlouder_exc
    else :
        _, fnlouder_exc = coinc.calculate_n_louder(exc_zero_trigs.stat,
                                                   fore_stat,
                                                   exc_zero_trigs.decimation_factor)
        louder_foreground = fnlouder_exc
    # louder_foreground has been updated and the code can continue.


    logging.info("Calculating ifar/fap values")

    f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(background_time / (back_cnum + 1))
    f.attrs['background_time_h%s' % h_iterations] = background_time
    f.attrs['foreground_time_h%s' % h_iterations] = coinc_time

    if fore_locs.sum() > 0:
        # Write ranking statistic to file just for downstream plotting code
        f['foreground_h%s/stat' % h_iterations] = fore_stat

        ifar = background_time / (fnlouder + 1)
        fap = 1 - numpy.exp(- coinc_time / ifar)

        f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
        f['foreground_h%s/fap' % h_iterations] = fap

        # Update ifar and fap for other foreground triggers
        for i in range(len(ifar)):
            orig_fore_idx = numpy.where(orig_fore_stat == fore_stat[i])[0][0]
            f['foreground/ifar'][orig_fore_idx] = conv.sec_to_year(ifar[i])
            f['foreground/fap'][orig_fore_idx] = fap[i]

        # Save trigger ids for foreground triggers for downstream plotting code.
        # These don't change with the iterations but should be written at every
        # level.
        trig_id1 = all_trigs.trigger_id1[fore_locs]
        trig_id2 = all_trigs.trigger_id2[fore_locs]
        f['foreground_h%s/trigger_id1' % h_iterations] = trig_id1
        f['foreground_h%s/trigger_id2' % h_iterations] = trig_id2
        f['foreground_h%s/template_id' % h_iterations] = all_trigs.data['template_id'][fore_locs]
        f['foreground_h%s/time1' % h_iterations] = all_trigs.time1[fore_locs]
        f['foreground_h%s/time2' % h_iterations] = all_trigs.time2[fore_locs]

    else :
        f['foreground_h%s/stat' % h_iterations] = numpy.array([])
        f['foreground_h%s/ifar' % h_iterations] = numpy.array([])
        f['foreground_h%s/fap' % h_iterations] = numpy.array([])
        f['foreground_h%s/trigger_id1' % h_iterations] = numpy.array([])
        f['foreground_h%s/trigger_id2' % h_iterations] = numpy.array([])
        f['foreground_h%s/template_id' % h_iterations] = numpy.array([])
        f['foreground_h%s/time1' % h_iterations] = numpy.array([])
        f['foreground_h%s/time2' % h_iterations] = numpy.array([])

# Write to file how many hierarchical removals were implemented.
f.attrs['hierarchical_removal_iterations'] = h_iterations

# Write whether hierarchical removals were removed against the
# inclusive background or the exclusive background. Have to use
# numpy.string_ datatype.
if h_iterations != 0:
    hrm_method = args.hierarchical_removal_against
    f.attrs['hierarchical_removal_method'] = numpy.string_(hrm_method) 

logging.info("Done") 
back to top