Revision 561c8b0416deb4d51266e7a71002f8c6e93d36e3 authored by Soumi De on 18 May 2017, 13:33:34 UTC, committed by GitHub on 18 May 2017, 13:33:34 UTC


* Allow inj_filter_rejecor template waveforms to be generated starting from f_lower of templates stored in the bank
1 parent 703ba68
Raw File
pycbc_combine_statmap
#!/bin/env python
""" Apply a naive mass binning, assuming that each bin is fully independent, which 
is a conservative estimate. This clusters to find the most significant foreground, but
leaves the background triggers alone. """

import h5py, numpy, argparse, logging, pycbc, pycbc.events, lal
import pycbc.version

def com(f, files, group):
    """ Combine the same column from multiple files and saave to a third"""
    f[group] = numpy.concatenate([fi[group][:] if group in fi else numpy.array([], dtype=numpy.uint32) for fi in files])

parser = argparse.ArgumentParser()
parser.add_argument("--version", action="version", version=pycbc.version.git_verbose_msg)
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--statmap-files', nargs='+',
                    help="List of coinc files to be redistributed")
parser.add_argument('--cluster-window', type=float)
parser.add_argument('--output-file', help="name of output file")
args = parser.parse_args()

pycbc.init_logging(args.verbose)

# We apply a dumb factor of the number of bins only, nothing smart for now, 
# no clustering of the background
files = [h5py.File(n) for n in args.statmap_files]

fac = len(args.statmap_files)

# copy over the standard data that is constant
f = h5py.File(args.output_file, "w")
f.attrs['detector_1'] = files[0].attrs['detector_1']
f.attrs['detector_2'] = files[0].attrs['detector_2']
f.attrs['timeslide_interval'] = files[0].attrs['timeslide_interval']

f.attrs['background_time'] = files[0].attrs['background_time']
f.attrs['foreground_time'] = files[0].attrs['foreground_time']
f.attrs['background_time_exc'] = files[0].attrs['background_time_exc']
f.attrs['foreground_time_exc'] = files[0].attrs['foreground_time_exc']

for attr in  ['detector_1', 'detector_2', 'timeslide_interval']:
    for i in range(len(files)-1):
        assert files[i].attrs[attr] == files[i+1].attrs[attr]

# combine times that are foreground vetoed as they may differ between bins
for key in files[0]['segments'].keys():
    if key == 'foreground_veto':
        continue
    f['segments/%s/start' % key] = files[0]['segments/%s/start' % key][:]
    f['segments/%s/end' % key] = files[0]['segments/%s/end' % key][:]

# copy over all the columns in the foreground group, we recalculate ifar/fap later
for key in files[0]['foreground'].keys():
    com(f, files, 'foreground/%s' % key)
    
# recalculate ifar/fap for the foreground triggers by 
#  applying a trials factor = num_files(num_bins)
ifar = f['foreground/ifar'][:] / fac
coinc_time = f.attrs['foreground_time'] / lal.YRJUL_SI
f['foreground/ifar'][:] = ifar
f['foreground/fap'][:] = 1 - numpy.exp( - coinc_time / ifar)

ifar = f['foreground/ifar_exc'][:] / fac
coinc_time = f.attrs['foreground_time_exc'] / lal.YRJUL_SI
f['foreground/ifar_exc'][:] = ifar
f['foreground/fap_exc'][:] = 1 - numpy.exp( - coinc_time / ifar)

# cluster for the loudest ifar value 

def argmax(v):
    return numpy.argsort(v)[-1]

stat = numpy.core.records.fromarrays([f['foreground/ifar'][:], 
                                      f['foreground/stat'][:]],
                                      names='ifar,stat')
cidx = pycbc.events.cluster_coincs(stat,
                                   f['foreground/time1'][:], 
                                   f['foreground/time2'][:], 
                                   numpy.zeros(len(ifar)),
                                   0, args.cluster_window, argmax=argmax)

# downsample the foreground columns to only the loudest ifar between the multiple files
for key in f['foreground'].keys():
    dset = f['foreground/%s' % key][:][cidx]
    del f['foreground/%s' % key]
    f['foreground/%s' % key] = dset

# If there is a background set (full_data as opposed to injection run), then recalculate 
# the values for its triggers as well
if 'background' in files[0]:  
    for key in files[0]['background'].keys():
        com(f, files, 'background/%s' % key)
     
    f['background/ifar'][:] =  f['background/ifar'][:] / fac
    
    for key in files[0]['background_exc'].keys():
        com(f, files, 'background_exc/%s' % key)
     
    f['background_exc/ifar'][:] = f['background_exc/ifar'][:] / fac

    com(f, files, 'segments/foreground_veto/start')
    com(f, files, 'segments/foreground_veto/end')


f.close()
back to top