Content - 813b4661439a5eede8cc0fabb886c695dc8a0abd - 47a5694/bin/pycbc_optimize_snr

swh:1:snp:3a699297f000109a1bc833f294a54171df990207

Tip revision: 3135d11ad886c6dd66a2b0150f01b0d677ca0e56 authored by Alex Nitz on 02 March 2022, 22:05:59 UTC
Update setup.py (#3953)
Tip revision: 3135d11
pycbc_optimize_snr
#!/usr/bin/env python

"""Followup utility to optimize the SNR of a PyCBC Live trigger."""

import os
import shutil
import argparse, numpy, h5py
import time
import types
import logging
import tempfile
from scipy.optimize import differential_evolution, shgo
import pycbc
from pycbc import (
    DYN_RANGE_FAC, fft, scheme, version, waveform
)
from pycbc.types import MultiDetOptionAction, zeros, load_frequencyseries
from pycbc.filter import matched_filter_core
import pycbc.waveform.bank
from pycbc.conversions import (
    mchirp_from_mass1_mass2, mtotal_from_mchirp_eta,
    mass1_from_mtotal_eta, mass2_from_mtotal_eta
)
from pycbc.io import live
from pycbc.io.hdf import load_hdf5_to_dict
from pycbc.detector import Detector
from pycbc.psd import interpolate

try:
    import pyswarms as ps
except:
    ps = None


Nfeval = 0
start_time = time.time()

def callback_func(Xi, convergence=0):
    global Nfeval
    logging.info("Currently at %d %s", Nfeval, convergence)
    if (time.time() - start_time) > 360:
        return True
    Nfeval += 1


def compute_network_snr_core(v, *argv, raise_err=False):
    """
    Compute network SNR as a function over mchirp, eta and two aligned
    spin components, stored in that order in the sequence v
    """    
    data = argv[0]
    coinc_times = argv[1]
    ifos = argv[2]
    flen = argv[3]
    approximant = argv[4]
    flow = argv[5]
    f_end = argv[6]
    delta_f = argv[7]
    sample_rate = argv[8]
    distance = 1.0 / DYN_RANGE_FAC
    mtotal = mtotal_from_mchirp_eta(v[0], v[1])
    mass1 = mass1_from_mtotal_eta(mtotal, v[1])
    mass2 = mass2_from_mtotal_eta(mtotal, v[1])

    # enforce broadly accepted search space boundaries
    if mass1 < 1 or mass2 < 1 or mtotal > 500:
        return -numpy.inf, {}

    try:
        htilde = waveform.get_waveform_filter(
                zeros(flen, dtype=numpy.complex64),
                approximant=approximant,
                mass1=mass1, mass2=mass2, spin1z=v[2], spin2z=v[3],
                f_lower=flow, f_final=f_end, delta_f=delta_f,
                delta_t=1.0/sample_rate, distance=distance)
    except RuntimeError:
        if raise_err:
            raise
        # assume a failure in the waveform approximant
        # due to the choice of parameters
        else:
            return -numpy.inf, {}

    if not hasattr(htilde, 'params'):
        htilde.params = dict(mass1=mass1, mass2=mass2,
                             spin1z=v[2], spin2z=v[3])
    if not hasattr(htilde, 'end_idx'):
        htilde.end_idx = int(f_end / htilde.delta_f)
    htilde.approximant = approximant
    htilde.sigmasq = types.MethodType(pycbc.waveform.bank.sigma_cached,
                                      htilde)
    htilde.min_f_lower = flow
    htilde.end_frequency = f_end
    htilde.f_lower = flow
    network_snrsq = 0
    snr_series_dict = {}
    for ifo in ifos:
        sigmasq = htilde.sigmasq(data[ifo].psd)
        snr, _, norm = matched_filter_core(htilde, data[ifo],
                                           h_norm=sigmasq)
        duration = 0.095
        half_dur_samples = int(snr.sample_rate * duration / 2)
        onsource_idx = float(coinc_times[ifo] - snr.start_time) * snr.sample_rate
        onsource_idx = int(round(onsource_idx))
        onsource_slice = slice(onsource_idx - half_dur_samples,
                               onsource_idx + half_dur_samples + 1)
        snr_series = snr[onsource_slice] * norm
        snr_series_dict[ifo] = snr * norm
        snr_series_dict['sigmasq_' + ifo] = sigmasq
        network_snrsq += max(abs(snr_series._data)) ** 2.

    return network_snrsq ** 0.5, snr_series_dict


def compute_network_snr(v, *argv):
    if len(argv) == 1:
        argv = argv[0]
    nsnr, _ = compute_network_snr_core(v, *argv)
    return -nsnr


def compute_network_snr_pso(v, *argv, **kwargs):
    argv = kwargs['args']
    nsnr_array = numpy.array([-compute_network_snr_core(v_i, *argv)[0] for v_i in v])
    return nsnr_array


def optimize_di(bounds, cli_args, extra_args):
    bounds = [
        bounds['mchirp'],
        bounds['eta'],
        bounds['spin1z'],
        bounds['spin2z']
    ]
    results = differential_evolution(
        compute_network_snr,
        bounds,
        maxiter=cli_args.di_maxiter,
        workers=(cli_args.cores or -1),
        popsize=cli_args.di_popsize,
        mutation=(0.5, 1),
        recombination=0.7,
        callback=callback_func,
        args=extra_args
    )
    return results.x


def optimize_shgo(bounds, cli_args, extra_args):
    bounds = [
        bounds['mchirp'],
        bounds['eta'],
        bounds['spin1z'],
        bounds['spin2z']
    ]
    results = shgo(
        compute_network_snr,
        bounds=bounds,
        args=extra_args,
        iters=args.shgo_iters,
        n=args.shgo_samples,
        sampling_method="sobol"
    )
    return results.x


def optimize_pso(bounds, cli_args, extra_args):
    options = {
        'c1': cli_args.pso_c1,
        'c2': cli_args.pso_c2,
        'w': cli_args.pso_w
    }
    min_bounds = [
        bounds['mchirp'][0],
        bounds['eta'][0],
        bounds['spin1z'][0],
        bounds['spin2z'][0]
    ]
    max_bounds = [
        bounds['mchirp'][1],
        bounds['eta'][1],
        bounds['spin1z'][1],
        bounds['spin2z'][1]
    ]
    optimizer = ps.single.GlobalBestPSO(
        n_particles=cli_args.pso_particles,
        dimensions=4,
        options=options,
        bounds=(min_bounds, max_bounds)
    )
    _, results = optimizer.optimize(
        compute_network_snr_pso,
        iters=cli_args.pso_iters,
        n_processes=cli_args.cores,
        args=extra_args
    )
    return results


optimize_funcs = {
    'differential_evolution': optimize_di,
    'shgo': optimize_shgo,
    'pso': optimize_pso
}


parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--version', action='version',
                    version=version.git_verbose_msg)
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--params-file', required=True,
                    help='Location of the attributes file created by PyCBC '
                         'Live')
parser.add_argument('--data-files', type=str, nargs='+',
                    action=MultiDetOptionAction,
                    metavar='IFO:DATA_FILE',
                    help='Locations of the overwhitened data files produced '
                         'by PyCBC Live.')
parser.add_argument('--psd-files', type=str, nargs='+',
                    action=MultiDetOptionAction,
                    metavar='IFO:PSD_FILE',
                    help='Locations of the PSD files produced '
                         'by PyCBC Live.')
parser.add_argument('--approximant', required=True,
                    help='Waveform approximant string.')
parser.add_argument('--snr-threshold', default=4.0,
                    help='If the SNR in ifo X is below this threshold do not '
                         'consider it part of the coincidence. Not implemented')
parser.add_argument('--gracedb-server', metavar='URL',
                    help='URL of GraceDB server API for uploading events. '
                         'If not provided, the default URL is used.')
parser.add_argument('--gracedb-search', type=str, default='AllSky',
                    help='String going into the "search" field of the GraceDB '
                         'events')
parser.add_argument('--production', action='store_true',
                    help='Upload a production event rather than a test event')
parser.add_argument('--enable-gracedb-upload', action='store_true', default=False,
                    help='Upload triggers to GraceDB')
parser.add_argument('--output-path', required=True,
                    help='Path to a directory to store results in')
parser.add_argument('--cores', type=int,
                    help='Restrict calculation to given number of CPU cores')
parser.add_argument('--optimizer', type=str, default='differential_evolution',
                    choices=sorted(optimize_funcs),
                    help='The optimizer to use, differential_evolution, shgo or pso')
parser.add_argument('--di-maxiter', type=int, default=50,
                    help='Only relevant for --optimizer differential_evolution: '
                    'The maximum number of generations over which the entire '
                    'population is evolved.')
parser.add_argument('--di-popsize', type=int, default=100,
                    help='Only relevant for --optimizer differential_evolution: '
                         'A multiplier for setting the total population size.')
parser.add_argument('--shgo-samples', type=int, default=76,
                    help='Only relevant for --optimizer shgo: '
                    'Number of sampling points used in the construction of the '
                    'simplicial complex.')
parser.add_argument('--shgo-iters', type=int, default=3,
                    help='Only relevant for --optimizer shgo: '
                    'Number of iterations used in the construction of the simplicial complex.')
parser.add_argument('--pso-iters', type=int, default=5,
                    help='Only relevant for --optimizer pso: '
                    'Number of iterations used in the particle swarm optimization.')
parser.add_argument('--pso-particles', type=int, default=250,
                    help='Only relevant for --optimizer pso: '
                    'Number of particles used in the swarm.')
parser.add_argument('--pso-c1', type=float, default=0.5,
                    help='Only relevant for --optimizer pso: '
                    'The hyperparameter c1: the cognitive parameter.')
parser.add_argument('--pso-c2', type=float, default=2.0,
                    help='Only relevant for --optimizer pso: '
                    'The hyperparameter c2: the social parameter.')
parser.add_argument('--pso-w', type=float, default=0.01,
                    help='Only relevant for --optimizer pso: '
                    'The hyperparameter w: the intertia parameter.')

scheme.insert_processing_option_group(parser)
fft.insert_fft_option_group(parser)

# Input checking
args = parser.parse_args()
if args.snr_threshold != 4:
    parser.error("Sorry, the SNR threshold option doesn't work yet")
if args.optimizer == 'pso' and ps == None:
    parser.error('You need to install pyswarms to use the pso optimizer.')
pycbc.init_logging(args.verbose)

scheme.verify_processing_options(args, parser)
fft.verify_fft_options(args, parser)

scheme_context = scheme.from_cli(args)
fft.from_cli(args)

logging.info('Starting optimize SNR')

data = {}
ifos = list(args.data_files.keys())
for ifo in ifos:
    data[ifo] = load_frequencyseries(args.data_files[ifo])
    data[ifo].psd = load_frequencyseries(args.psd_files[ifo])

fp = h5py.File(args.params_file, 'r')
original_gid = None
if 'gid' in fp:
    original_gid = fp['gid'].asstr()[()]
if args.enable_gracedb_upload and not original_gid:
    raise RuntimeError('Params must include original gracedb ID in order '
                       'to upload followup!')
if original_gid:
    logging.info('Following up GID %s', original_gid)

coinc_times = {}
for ifo in ifos:
    try:
        coinc_times[ifo] = fp['coinc_times'][ifo][()]
    except KeyError:
        pass
coinc_ifos = list(coinc_times.keys())

flen = fp['flen'][()]
approximant = args.approximant
flow = float(fp['flow'][()])
f_end = float(fp['f_end'][()])
delta_f = fp['delta_f'][()]
sample_rate = fp['sample_rate'][()]

extra_args = [data, coinc_times, coinc_ifos, flen,
              approximant, flow, f_end, delta_f, sample_rate]

mchirp = mchirp_from_mass1_mass2(
    fp['mass1'][()],
    fp['mass2'][()]
)
minchirp = mchirp * (1 - mchirp / 50.0)
maxchirp = mchirp * (1 + mchirp / 50.0)
minchirp = 1 if minchirp < 1 else minchirp
maxchirp = 80 if maxchirp > 80 else maxchirp

# boundary of the optimization space (dict of (min, max) tuples)
bounds = {
    'mchirp': (minchirp, maxchirp),
    'eta': (0.01, 0.2499),
    'spin1z': (-0.9, 0.9),
    'spin2z': (-0.9, 0.9)
}

with scheme_context:
    logging.info('Starting optimization')

    optimize_func = optimize_funcs[args.optimizer]
    opt_params = optimize_func(bounds, args, extra_args)

    logging.info('Optimization complete')

    fup_ifos = set(ifos) - set(coinc_ifos)
    for ifo in fup_ifos:
        coinc_times[ifo] = coinc_times[coinc_ifos[0]]

    extra_args[2] = ifos

    _, snr_series_dict = compute_network_snr_core(opt_params, *extra_args)

mtotal = mtotal_from_mchirp_eta(opt_params[0], opt_params[1])
mass1 = mass1_from_mtotal_eta(mtotal, opt_params[1])
mass2 = mass2_from_mtotal_eta(mtotal, opt_params[1])
spin1z = opt_params[2]
spin2z = opt_params[3]

# Prepare for GraceDB upload
coinc_results = {}
followup_data = {}

loudest_ifo = None
loudest_snr_allifos = 0
loudest_snr_time_allifos = None
loudest_snrs = {}
loudest_snr_times = {}
loudest_snr_idxs = {}

# Determine which ifo has the loudest SNR peak ... We'll use this to determine
# the time window for other ifos (e.g. if one ifo has a loud SNR peak, and the
# other has SNR < 4, we would need to determine the loudest SNR for the quieter
# instrument within light-travel time of the loud SNR peak)
for ifo in ifos:
    duration = 0.095
    half_dur_samples = int(sample_rate * duration / 2)
    onsource_idx = float(coinc_times[ifo] - snr_series_dict[ifo].start_time) \
        * snr_series_dict[ifo].sample_rate
    onsource_idx = int(round(onsource_idx))
    onsource_slice = slice(onsource_idx - half_dur_samples,
                           onsource_idx + half_dur_samples + 1)
    max_snr_idx = numpy.argmax(abs(snr_series_dict[ifo][onsource_slice]))
    max_snr_idx = max_snr_idx + onsource_idx - half_dur_samples
    max_snr = snr_series_dict[ifo][max_snr_idx]
    max_snr_time = snr_series_dict[ifo].start_time \
        + max_snr_idx*(1./sample_rate)
    loudest_snr_idxs[ifo] = max_snr_idx
    loudest_snr_times[ifo] = float(max_snr_time)
    loudest_snrs[ifo] = max_snr
    if abs(max_snr) > loudest_snr_allifos:
        loudest_snr_allifos = abs(max_snr)
        loudest_ifo = ifo
        loudest_snr_time_allifos = loudest_snr_times[ifo]

# Now create the snr_series_dict
for ifo in ifos:
    # Find loudest SNR within coincidence window of the largest peak
    snr_peak = abs(loudest_snrs[ifo])
    # And check light travel time
    lttbd = Detector(ifo).light_travel_time_to_detector(Detector(loudest_ifo))
    # Add small buffer
    lttbd += 0.005

    time_window = [loudest_snr_time_allifos-lttbd,
                   loudest_snr_time_allifos+lttbd]

    snr_slice = snr_series_dict[ifo].time_slice(time_window[0], time_window[1])
    max_snr_idx = numpy.argmax(abs(snr_slice))
    idx_offset = snr_slice.start_time - snr_series_dict[ifo].start_time
    idx_offset = int(float(idx_offset) * sample_rate + 0.5)
    max_snr_idx = max_snr_idx + idx_offset
    new_snr_slice = slice(max_snr_idx - int(sample_rate/10),
                          max_snr_idx + int(sample_rate/10)+1)
    snr_series_dict[ifo] = snr_series_dict[ifo][new_snr_slice]

netsnr = 0
for idx, ifo in enumerate(ifos):
    coinc_results['foreground/'+ifo+'/mass1'] = mass1
    coinc_results['foreground/'+ifo+'/mass2'] = mass2
    coinc_results['foreground/'+ifo+'/spin1z'] = spin1z
    coinc_results['foreground/'+ifo+'/spin2z'] = spin2z
    coinc_results['foreground/'+ifo+'/f_lower'] = flow
    coinc_results['foreground/'+ifo+'/window'] = 0.1
    coinc_results['foreground/'+ifo+'/sample_rate'] = int(sample_rate)
    coinc_results['foreground/'+ifo+'/template_id'] = 0
    # Apparently GraceDB gets upset if chi-squared is not set
    coinc_results['foreground/'+ifo+'/chisq'] = 1.
    coinc_results['foreground/'+ifo+'/chisq_dof'] = 1
    coinc_results['foreground/'+ifo+'/template_duration'] = \
        fp['template_duration'][()]
    followup_data[ifo] = {}
    followup_data[ifo]['psd'] = interpolate(data[ifo].psd, 0.25)
    followup_data[ifo]['snr_series'] = snr_series_dict[ifo]

    # Find loudest SNR within coincidence window of the largest peak
    snr_peak = abs(loudest_snrs[ifo])
    # And check light travel time
    lttbd = Detector(ifo).light_travel_time_to_detector(Detector(loudest_ifo))
    # Add small buffer
    lttbd += 0.005

    time_window = [loudest_snr_time_allifos-lttbd,
                   loudest_snr_time_allifos+lttbd]

    snr_slice = snr_series_dict[ifo].time_slice(time_window[0], time_window[1])
    max_snr_idx = numpy.argmax(abs(snr_slice))
    loudest_snr_time = snr_slice.sample_times[max_snr_idx]
    loudest_snr = snr_slice[max_snr_idx]

    coinc_results['foreground/'+ifo+'/end_time'] = loudest_snr_time
    coinc_results['foreground/'+ifo+'/snr_series'] = snr_series_dict[ifo]
    coinc_results['foreground/'+ifo+'/psd_series'] = data[ifo].psd
    coinc_results['foreground/'+ifo+'/delta_f'] = delta_f
    coinc_results['foreground/'+ifo+'/snr'] = abs(loudest_snr)
    netsnr += abs(loudest_snr)**2
    coinc_results['foreground/'+ifo+'/sigmasq'] \
        = snr_series_dict['sigmasq_' + ifo]
    coinc_results['foreground/'+ifo+'/coa_phase'] \
        = numpy.angle(loudest_snr)

coinc_results['foreground/stat'] = numpy.sqrt(netsnr)
coinc_results['foreground/ifar'] = fp['ifar'][()]

channel_names = {}
for ifo in fp['channel_names'].keys():
    channel_names[ifo] = fp['channel_names'][ifo].asstr()[()]

mc_area_args = load_hdf5_to_dict(fp, 'mc_area_args/')

kwargs = {'psds': {ifo: followup_data[ifo]['psd'] for ifo in ifos},
          'low_frequency_cutoff': flow,
          'followup_data': followup_data,
          'channel_names': channel_names,
          'mc_area_args': mc_area_args}

doc = live.SingleCoincForGraceDB(ifos, coinc_results,
                                 upload_snr_series=True, **kwargs)

if args.enable_gracedb_upload:
    comment = ('Automatic PyCBC followup of trigger '
               '<a href="/events/{0}/view">{0}</a> to find the template '
               'parameters that maximize the SNR. The FAR of this trigger is '
               'copied from {0} and does not reflect this triggers\' template '
               'parameters.')
    comment = comment.format(original_gid)

    tmpdir = tempfile.mkdtemp()
    xml_path = os.path.join(tmpdir,
                            '{:.3f}.xml.gz'.format(loudest_snr_time_allifos))
    gid = doc.upload(xml_path, gracedb_server=args.gracedb_server,
                     testing=(not args.production), extra_strings=[comment],
                     search=args.gracedb_search)
    if gid is not None:
        logging.info('Event uploaded as %s', gid)
        shutil.rmtree(tmpdir)

        # add a note to the original G event pointing to the optimized one
        from ligo.gracedb.rest import GraceDb

        gracedb = GraceDb(args.gracedb_server) \
                if args.gracedb_server is not None else GraceDb()
        comment = ('Result of SNR maximization uploaded as '
                   '<a href="/events/{0}/view">{0}</a>').format(gid)
        gracedb.writeLog(original_gid, comment, tag_name=['analyst_comments'])
else:
    fname = 'coinc-{:.3f}-{}.xml.gz'.format(loudest_snr_time_allifos,
                                            pycbc.random_string(6))
    fname = os.path.join(args.output_path, fname)
    doc.save(fname)