Revision 6c28dacde866ea096e45e6c3f1dd24dad5b3d395 authored by Ian Harry on 11 October 2019, 20:00:10 UTC, committed by GitHub on 11 October 2019, 20:00:10 UTC
1 parent 84e8c92
pycbc_coinc_findtrigs
#!/usr/bin/env python
import h5py, argparse, logging, numpy, numpy.random
from pycbc import events, detector
from pycbc.events import veto, coinc, stat
import pycbc.version
from numpy.random import seed, shuffle
parser = argparse.ArgumentParser()
parser.add_argument("--verbose", action="count")
parser.add_argument("--version", action="version", version=pycbc.version.git_verbose_msg)
parser.add_argument("--veto-files", nargs='*', action='append', default=[],
help="Optional veto file. Triggers within veto segments "
"contained in the file are ignored")
parser.add_argument("--segment-name", nargs='*', action='append', default=[],
help="Optional, name of veto segment in veto file")
parser.add_argument("--strict-coinc-time", action='store_true',
help="Optional, only allow coincidences between triggers "
"that lie in coincident time after applying vetoes")
parser.add_argument("--trigger-files", nargs=2,
help="Files containing single-detector triggers")
parser.add_argument("--template-bank", required=True,
help="Template bank file in HDF format")
# produces a list of lists to allow multiple invocations and multiple args
parser.add_argument("--statistic-files", nargs='*', action='append', default=[],
help="Files containing ranking statistic info")
parser.add_argument("--ranking-statistic", choices=stat.statistic_dict.keys(),
default='newsnr',
help="The coinc ranking statistic to calculate")
parser.add_argument("--use-maxalpha", action="store_true")
parser.add_argument("--coinc-threshold", type=float, default=0.0,
help="Seconds to add to time-of-flight coincidence window")
parser.add_argument("--timeslide-interval", type=float,
help="Interval between timeslides in seconds. Timeslides "
"are disabled if the option is omitted.")
parser.add_argument("--loudest-keep-values", type=str, nargs='*',
default=['6:1'],
help="Apply successive multiplicative levels of decimation"
" to coincs with stat value below the given thresholds"
" Ex. 9:10 8.5:30 8:30 7.5:30. Default: no decimation")
parser.add_argument("--template-fraction-range", default="0/1",
help="Optional, analyze only part of template bank. Format"
" PART/NUM_PARTS")
parser.add_argument("--cluster-window", type=float,
help="Optional, window size in seconds to cluster "
"coincidences over the bank")
parser.add_argument("--output-file",
help="File to store the coincident triggers")
parser.add_argument("--randomize-template-order", action="store_true",
help="Random shuffle templates with fixed seed "
"before selecting range to analyze")
parser.add_argument("--batch-singles", default=5000, type=int,
help="Number of single triggers to process at once")
args = parser.parse_args()
# flatten the list of lists of filenames to a single list (may be empty)
args.statistic_files = sum(args.statistic_files, [])
args.segment_name = sum(args.segment_name, [])
args.veto_files = sum(args.veto_files, [])
if args.verbose:
logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG)
def parse_template_range(num_templates, rangestr):
part = int(rangestr.split('/')[0])
pieces = int(rangestr.split('/')[1])
tmin = int(num_templates / float(pieces) * part)
tmax = int(num_templates / float(pieces) * (part+1))
return tmin, tmax
class ReadByTemplate(object):
def __init__(self, filename, bank=None, segment_name=[], veto_files=[]):
self.filename = filename
self.file = h5py.File(filename, 'r')
self.ifo = tuple(self.file.keys())[0]
self.valid = None
self.bank = h5py.File(bank, 'r') if bank else None
# Determine the segments which define the boundaries of valid times
# to use triggers
from ligo.segments import segmentlist, segment
key = '%s/search/' % self.ifo
s, e = self.file[key + 'start_time'][:], self.file[key + 'end_time'][:]
self.segs = veto.start_end_to_segments(s, e).coalesce()
for vfile, name in zip(veto_files, segment_name):
veto_segs = veto.select_segments_by_definer(vfile, ifo=self.ifo,
segment_name=name)
self.segs = (self.segs - veto_segs).coalesce()
self.valid = veto.segments_to_start_end(self.segs)
def get_data(self, col, num):
""" Get a column of data for template with id 'num'
Parameters
----------
col: str
Name of column to read
num: int
The template id to read triggers for
Returns
-------
data: numpy.ndarray
The requested column of data
"""
ref = self.file['%s/%s_template' % (self.ifo, col)][num]
return self.file['%s/%s' % (self.ifo, col)][ref]
def set_template(self, num):
""" Set the active template to read from
Parameters
----------
num: int
The template id to read triggers for
Returns
-------
trigger_id: numpy.ndarray
The indices of this templates triggers
"""
self.template_num = num
times = self.get_data('end_time', num)
# Determine which of these template's triggers are kept after
# applying vetoes
if self.valid:
self.keep = veto.indices_within_times(times, self.valid[0], self.valid[1])
logging.info('applying vetoes')
else:
self.keep = numpy.arange(0, len(times))
if self.bank is not None:
self.param = {}
if 'parameters' in self.bank.attrs :
for col in self.bank.attrs['parameters']:
self.param[col] = self.bank[col][self.template_num]
else :
for col in self.bank:
self.param[col] = self.bank[col][self.template_num]
# Calculate the trigger id by adding the relative offset in self.keep
# to the absolute beginning index of this templates triggers stored
# in 'template_boundaries'
trigger_id = self.keep + self.file['%s/template_boundaries' % self.ifo][num]
return trigger_id
def __getitem__(self, col):
""" Return the column of data for current active template after
applying vetoes
Parameters
----------
col: str
Name of column to read
Returns
-------
data: numpy.ndarray
The requested column of data
"""
if self.template_num == None:
raise ValueError('You must call set_template to first pick the '
'template to read data from')
data = self.get_data(col, self.template_num)
data = data[self.keep] if self.valid else data
return data
logging.info('Starting...')
num_templates = len(h5py.File(args.template_bank, "r")['template_hash'])
tmin, tmax = parse_template_range(num_templates, args.template_fraction_range)
logging.info('Analyzing template %s - %s' % (tmin, tmax-1))
logging.info('Opening first trigger file: %s' % args.trigger_files[0])
trigs0= ReadByTemplate(args.trigger_files[0],
args.template_bank, args.segment_name, args.veto_files)
logging.info('Opening second trigger file: %s' % args.trigger_files[1])
trigs1 = ReadByTemplate(args.trigger_files[1],
args.template_bank, args.segment_name, args.veto_files)
coinc_segs = (trigs0.segs & trigs1.segs).coalesce()
if args.strict_coinc_time:
trigs0.segs = coinc_segs
trigs1.segs = coinc_segs
trigs0.valid = veto.segments_to_start_end(trigs0.segs)
trigs1.valid = veto.segments_to_start_end(trigs1.segs)
# initialize a Stat class instance to calculate the coinc ranking statistic
ifos = [trigs0.ifo, trigs1.ifo]
rank_method = stat.get_statistic(args.ranking_statistic)(args.statistic_files,
ifos=ifos)
if args.use_maxalpha:
rank_method.use_alphamax()
det0, det1 = detector.Detector(trigs0.ifo), detector.Detector(trigs1.ifo)
time_window = det0.light_travel_time_to_detector(det1) + args.coinc_threshold
if args.timeslide_interval is not None and \
time_window >= args.timeslide_interval:
raise parser.error("The maximum time delay between detectors should be "
"smaller than the timeslide interval.")
# slide = 0 means don't do timeslides
if args.timeslide_interval is None:
args.timeslide_interval = 0
logging.info('The coincidence window is %3.1f ms' % (time_window * 1000))
data = {'stat': [],
'decimation_factor': [],
'time1': [],
'time2': [],
'trigger_id1': [],
'trigger_id2': [],
'timeslide_id': [],
'template_id':[]
}
if args.randomize_template_order:
seed(0)
template_ids = numpy.arange(0, num_templates)
shuffle(template_ids)
template_ids = template_ids[tmin:tmax]
else:
template_ids = range(tmin, tmax)
for tnum in template_ids:
tid0g = trigs0.set_template(tnum)
tid1g = trigs1.set_template(tnum)
if (len(tid0g) == 0) or (len(tid1g) == 0):
continue
t0g = trigs0['end_time']
t1g = trigs1['end_time']
logging.info('Trigs for template %s, %s:%s %s:%s' % \
(tnum, trigs0.ifo, len(t0g), trigs1.ifo, len(t1g)))
logging.info('Calculating Single Detector Statistic')
s0g, s1g = rank_method.single(trigs0), rank_method.single(trigs1)
# Loop over the single triggers and calculate the coincs they can
# form
start0 = 0
while start0 < len(s0g):
start1 = 0
while start1 < len(s1g):
end0 = start0 + args.batch_singles
end1 = start1 + args.batch_singles
if end0 > len(s0g):
end0 = len(s0g)
if end1 > len(s1g):
end1 = len(s1g)
# Set the local parts of the single information we'll use
tid0 = tid0g[start0:end0]
tid1 = tid1g[start1:end1]
s0 = s0g[start0:end0]
s1 = s1g[start1:end1]
t0 = t0g[start0:end0]
t1 = t1g[start1:end1]
i0, i1, slide = coinc.time_coincidence(t0, t1, time_window,
args.timeslide_interval)
logging.info('Coincident Trigs: %s' % (len(i1)))
logging.info('Calculating Multi-Detector Combined Statistic: %s, %s', end0, end1)
c = rank_method.coinc(s0[i0], s1[i1], slide,
args.timeslide_interval)
#index values of the zerolag triggers
fi = numpy.where(slide == 0)[0]
#index values of the background triggers
bi = numpy.where(slide != 0)[0]
logging.info('%s foreground triggers' % len(fi))
logging.info('%s background triggers' % len(bi))
# coincs will be decimated by successive (multiplicative) levels
# tracked by 'total_factor'
bi_dec = bi.copy()
dec = numpy.ones(len(bi))
total_factor = 1
for decstr in args.loudest_keep_values:
thresh, factor = decstr.split(':')
thresh = float(thresh)
# throws an error if 'factor' is not the string representation
# of an integer
total_factor *= int(factor)
# triggers not being further decimated
upper = c[bi_dec] >= thresh
idxk = bi_dec[upper]
# decimate the remaining triggers
idx = bi_dec[c[bi_dec] < thresh]
idx = idx[slide[idx] % total_factor == 0]
bi_dec = numpy.concatenate([idxk, idx])
dec = numpy.concatenate([dec[upper],
numpy.repeat(total_factor, len(idx))]
)
ti = numpy.concatenate([bi_dec, fi]).astype(numpy.uint32)
dec_fac = numpy.concatenate([dec, numpy.ones(len(fi))])
logging.info('%s after decimation' % len(ti))
# temporary storage for decimated trigger ids
g0 = i0[ti]
g1 = i1[ti]
del i0
del i1
data['stat'] += [c[ti]]
data['decimation_factor'] += [dec_fac]
data['time1'] += [t0[g0]]
data['time2'] += [t1[g1]]
data['trigger_id1'] += [tid0[g0]]
data['trigger_id2'] += [tid1[g1]]
data['timeslide_id'] += [slide[ti]]
data['template_id'] += [numpy.repeat(tnum, len(ti))]
start1 += args.batch_singles
start0 += args.batch_singles
if len(data['stat']) > 0:
for key in data:
data[key] = numpy.concatenate(data[key])
if args.cluster_window and len(data['stat']) > 0:
cid = coinc.cluster_coincs(data['stat'], data['time1'], data['time2'],
data['timeslide_id'], args.timeslide_interval,
args.cluster_window)
logging.info('saving coincident triggers')
f = h5py.File(args.output_file, 'w')
if len(data['stat']) > 0:
for key in data:
var = data[key][cid] if args.cluster_window else data[key]
f.create_dataset(key, data=var,
compression='gzip',
compression_opts=9,
shuffle=True)
f['segments/coinc/start'], f['segments/coinc/end'] = veto.segments_to_start_end(coinc_segs)
for t in [trigs0, trigs1]:
f['segments/%s/start' % t.ifo], f['segments/%s/end' % t.ifo] = t.valid
f.attrs['timeslide_interval'] = args.timeslide_interval
f.attrs['detector_1'] = det0.name
f.attrs['detector_2'] = det1.name
f.attrs['foreground_time1'] = abs(trigs0.segs)
f.attrs['foreground_time2'] = abs(trigs1.segs)
f.attrs['coinc_time'] = abs(coinc_segs)
if args.timeslide_interval:
nslides = int(max(abs(trigs0.segs), abs(trigs1.segs)) / args.timeslide_interval)
else:
nslides = 0
f.attrs['num_slides'] = nslides
logging.info('Done')
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...