swh:1:snp:3a699297f000109a1bc833f294a54171df990207
Raw File
Tip revision: 9a7ef504439bf7295dd910081c2780077e16fad0 authored by Alex Nitz on 20 March 2024, 15:51:52 UTC
don't keep file open within injectionset (#4667)
Tip revision: 9a7ef50
pycbc_process_sngls
#!/usr/bin/env python

"""Reads in and vetoes single ifo triggers, cuts, reranks and clusters them"""

import argparse
import logging
import numpy
import h5py

import pycbc
from pycbc.io import SingleDetTriggers
from pycbc.events import stat, coinc, veto


parser = argparse.ArgumentParser(description=__doc__)
pycbc.add_common_pycbc_options(parser)
parser.add_argument('--single-trig-file', required=True,
                    help='Path to file containing single-detector triggers in '
                         'HDF5 format. Required')
parser.add_argument('--detector', required=True, 
                    help='Detector. Required')
parser.add_argument('--bank-file', required=True,
                    help='Path to file containing template bank in HDF5 format'
                         '. Required')
parser.add_argument('--veto-file',
                    help='Optional path to file containing veto segments')
parser.add_argument('--segment-name', default=None,
                    help='Optional, name of segment list to use for vetoes')
parser.add_argument('--filter-string', default=None,
                    help='Optional, boolean expression for filtering triggers '
                         'e.g. "self.mchirp>5."')
parser.add_argument('--min-snr', default=0., type=float,
                    help='Only keep triggers above the given SNR')
parser.add_argument('--cluster-window', type=float,
                    help='If supplied, cluster singles by symmetrical time '
                         'window method, specify window extent from maximum'
                         'in seconds')
parser.add_argument('--store-bank-values', default=False, action='store_true',
                    help='If given also add the template bank parameters into '
                         'the output file.')
parser.add_argument('--output-file', required=True)
stat.insert_statistic_option_group(parser)

args = parser.parse_args()

pycbc.init_logging(args.verbose)

# Munge together SNR cut and any other filter specified
snr_filter = 'self.snr>%f' % (args.min_snr) if args.min_snr > 0. else None 
filts = [f for f in [snr_filter, args.filter_string] if f is not None]
if len(filts) == 2:  # both an explicit filter and a min-snr
    # io.hdf uses numpy imported as np
    filter_func = 'np.logical_and(%s, %s)' % (filts[0], filts[1])
elif len(filts) == 1:
    filter_func = filts[0]
else:
    filter_func = None

if filter_func is not None:
    logging.info('Will filter trigs using %s', filter_func)
# Filter will be stored as self.mask attribute of sngls instance
sngls = SingleDetTriggers(
    args.single_trig_file,
    args.detector,
    bank_file=args.bank_file,
    veto_file=args.veto_file,
    segment_name=args.segment_name,
    filter_func=filter_func,
)

logging.info('Calculating stat')
rank_method = stat.get_statistic_from_opts(args, [args.detector])
#  NOTE: inefficient, as we are calculating the stat on all
#  triggers. Might need to do something complicated to fix this.
#  Or just use files with fewer triggers :P
sngl_info = ([args.detector], sngls.trigs)
stat = rank_method.rank_stat_single(sngl_info)[sngls.mask]

logging.info('%i stat values found', len(stat))

outfile = h5py.File(args.output_file, 'w')
outgroup = outfile.create_group(args.detector)

if args.cluster_window is not None:
    logging.info('Clustering events over %s s window', args.cluster_window)
    out_idx = coinc.cluster_over_time(stat, sngls.end_time,
                                      window=args.cluster_window)
    logging.info('%d triggers remaining', len(out_idx))
    outgroup['cluster_window'] = args.cluster_window
else:
    out_idx = numpy.arange(len(sngls.end_time))

logging.info('Writing %i triggers', len(out_idx))

# get the columns to copy over
with h5py.File(args.single_trig_file, 'r') as trigfile:
    cnames = []
    # only keep datasets parallel to the original trigger list
    for n, col in trigfile[args.detector].items():
        if n.endswith('_template') or isinstance(col, h5py.Group) \
                or n == u'template_boundaries':
            continue
        cnames.append(n)
for n in cnames:
    outgroup[n] = sngls.get_column(n)[out_idx]

if args.store_bank_values:
    for n in sngls.bank:
        if n == 'template_hash':
            continue
        if not hasattr(sngls, n):
            logging.warning(
                "Bank's %s dataset or group is not supported "
                "by SingleDetTriggers, ignoring it",
                n
            )
            continue
        # don't repeat things that already came from the trigger file
        # (e.g. template_duration)
        if n in cnames:
            continue
        outgroup[n] = getattr(sngls, n)[out_idx]

# copy the live time segments to enable the calculation of trigger rates.
# If a veto file has been used, subtract the vetoed time from the live time

live_segs = veto.start_end_to_segments(sngls.trigs['search/start_time'][:],
                                       sngls.trigs['search/end_time'][:])
live_segs.coalesce()

if args.veto_file is not None:
    veto_segs = veto.select_segments_by_definer(args.veto_file,
                                                args.segment_name,
                                                args.detector)
    veto_segs.coalesce()
    live_segs -= veto_segs

outgroup['search/start_time'], outgroup['search/end_time'] = \
        veto.segments_to_start_end(live_segs)
outgroup['search'].attrs['live_time'] = abs(live_segs)

# cannot store None in a h5py attr
outgroup.attrs['filter'] = filter_func or 'None'
outgroup.attrs['cluster_window'] = args.cluster_window or 'None'

outgroup['stat'] = stat[out_idx]
outgroup.attrs['ranking_statistic'] = args.ranking_statistic
outgroup.attrs['sngl_ranking'] = args.sngl_ranking
outgroup.attrs['statistic_files'] = args.statistic_files

outfile.close()
logging.info('Done!')
back to top