Revision d0b5eb5ab4223c69379b07d612d7b8fc289e0e6c authored by Brian Bockelman on 12 November 2015, 20:42:48 UTC, committed by Brian Bockelman on 12 November 2015, 20:42:48 UTC
1 parent e80deb9
Raw File
pycbc_timeslides
#!/usr/bin/env python
#
#############################################################################
# #
# # This program is free software; you can redistribute it and/or modify it
# # under the terms of the GNU General Public License as published by the
# # Free Software Foundation; either version 2 of the License, or (at your
# # option) any later version.
# #
# # This program is distributed in the hope that it will be useful, but
# # WITHOUT ANY WARRANTY; without even the implied warranty of
# # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# # Public License for more details.
# #
# # You should have received a copy of the GNU General Public License along
# # with this program; if not, write to the Free Software Foundation, Inc.,
# # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# #
#############################################################################

"""
pycbc_timeslides constructs time_slide tables, writing the result to one or
more files. Inspiral-style slides are specified by multiples of a fixed offset
vector, with equal numbers of +ve and -ve multiples: the total number of time
slide entries including zerolag is 2*nslides+1. Tisi-style slides are specified
via one or more ranges of offsets for each instrument. If more than one
instrument and set of offsets is given, the time slide table will contain
entries corresponding to all combinations of offsets, one each from the
different instruments. If no file names are given on the command line, output
is written to stdout. If more than one file name is given on the command line,
the time slides are distributed uniformly between files with each file being
given a disjoint subset.  Output files whose names end in '.gz' will be gzip
compressed.
Example:\n\npycbc_timeslides --verbose --tisi-slides H1=-100:+100:10 L1=-100:+100:+10 time_slides.xml.gz
"""

import argparse
import sys

from glue.ligolw import ligolw
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import time_slide as ligolw_time_slide
from glue import offsetvector

from pylal import ligolw_tisi
from pylal import cbc_timeslides

import pycbc.version

__author__ = "Denty <thomas.dent@ligo.org"
__version__ = pycbc.version.git_verbose_msg
__githash__ = pycbc.version.git_hash
__date__ = pycbc.version.date
__program__ = "pycbc_timeslides"


#### Command line ####
def parse_normalize(normalize):
  returnDict = {}
  for value in normalize:
    name, offset = value.split("=")
    returnDict[name] = offset
  return returnDict

def parse_command_line():
  """
  Parse the command line.
  """
  _desc = __doc__[1:]
  parser = argparse.ArgumentParser(description=_desc)
  parser.add_argument('--version', action='version', version=__version__)
  
  parser.add_argument( "-a", "--add-to", metavar = "filename", default = None,
      help = """Add the time slides from this file to the newly-generated time
                slides If the name ends in '.gz' it will be gzip-decompressed
                on input.""" )

  parser.add_argument( "--comment", metavar = "text",
                 help="Set comment string in process table (default = None)." )

  parser.add_argument( "-v", "--verbose", action = "store_true",
                       help = "Be verbose." )

  parser.add_argument( "-i", "--tisi-slides", metavar = "name=first:last:step",
                nargs="*", action = "store", help="""
                Generate tisi-style (i.e. 'new-style') slides by giving a
                space-separated list of instrument-specific sets of offsets.
                The set of offsets is (first + "n * step) where n is an integer
                such that first <= offset <= last. More than one set of offsets
                can be given for the same instrument, in which case the union
                is used.""" )

  parser.add_argument( "--inspiral-num-slides", 
                metavar="count:instrument=offset[,instrument=offset...]",
                action="store", nargs="*",
                help = """
                Generate a set of lalapps-thinca-style time slides.  The
                collection of instrument/offset pairs defines an offset vector,
                the time slides produced are integer multiples n * {offsets}
                where n is a non-zero integer in [-count, +count]. To generate
                multiple set of slides, give the option multiple times
                separated by spaces, e.g.
                --inspiral-num-slides '50:H1=0,L1=5 25:H1=10,V1=0'""" )

  parser.add_argument( "-n", "--normalize", metavar = "name=offset",
      action = "store", nargs = "*", help = """
      Normalize the time slides so that a given instrument has the specified
      offset in all slides.  The other offsets in each time slide are adjusted
      so that the relative offsets are preserved.  Time slides that do not
      involve this instrument are unaffected.  If multiple instrument/offset
      pairs are given, then for each time slide they are considered in
      alphabetical order by instrument until the first is found that affects
      the offsets of that time slide. YMMV.""" )

  parser.add_argument( "--remove-zero-lag", action="store_true", \
      help = "Remove the time slide with offsets of 0 for all instruments." )

  parser.add_argument( "--output-files", action="store", metavar="FILE",
                       nargs="+", required=True, help = """The path to the
                       file(s) in which to write the resulting time slides. If
                       more than one file is present, split the time slides
                       equally over the files.""")

  options =  parser.parse_args()

  if options.normalize:
    try:
      parse_normalize(options.normalize)
    except Exception as e:
      raise ValueError("unable to parse --normalize arguments: %s" % str(e))

  return options

#### LIGOLW boilerplate ####

def new_doc(comment=None, **kwargs):

  doc = ligolw.Document()
  doc.appendChild(ligolw.LIGO_LW())

  process = ligolw_process.register_to_xmldoc(
    doc,
    program = __program__,
    paramdict = kwargs,
    version = __githash__,
    cvs_repository = u"pycbc",
    cvs_entry_time = __date__,
    comment = comment
  )

  timeslidetable = lsctables.New(lsctables.TimeSlideTable)
  doc.childNodes[0].appendChild(timeslidetable)

  return doc, process

###################
#####  MAIN  ######
###################

options = parse_command_line()
filenames = options.output_files

## Load initial time slides

if options.add_to is not None:
  time_slide_table = lsctables.TimeSlideTable.get_table(ligolw_utils.load_filename(options.add_to, verbose = options.verbose, gz = gz, contenthandler = contenthandler))
  time_slide_table.sync_next_id()
  time_slides = time_slide_table.as_dict()
  if options.verbose: print >>sys.stderr, "Loaded %d time slides." % len(time_slides)
else:
    time_slides = {}

## Make new-style time slides

## dictionary mapping time_slide_id --> (dictionary mapping instrument --> offset)

if options.tisi_slides:
  if options.verbose: print >>sys.stderr, "Computing new-style time slides ..."

  for offsetvect in ligolw_tisi.SlidesIter( ligolw_tisi.parse_slides(options.tisi_slides) ):
    time_slides[lsctables.TimeSlideTable.get_next_id()] = offsetvect

## Make lalapps-thinca-style time slides

if options.inspiral_num_slides:
  if options.verbose: print >>sys.stderr, "Computing lalapps_thinca style slides ..."

  for inspiral_slidespec in options.inspiral_num_slides:
    for offsetvect in cbc_timeslides.Inspiral_Num_Slides_Iter( \
        *cbc_timeslides.parse_lalapps_thinca_slidespec(inspiral_slidespec) ):
      time_slides[lsctables.TimeSlideTable.get_next_id()] = offsetvect

if options.verbose: print >>sys.stderr, "Total of %d time slides." % len(time_slides)

## Remove duplicates

if options.verbose: print >>sys.stderr, "Identifying and removing duplicates ..."

map( time_slides.pop, ligolw_time_slide.time_slides_vacuum(time_slides, verbose=options.verbose).keys() )

if options.verbose: print >>sys.stderr, "%d time slides remain." % len(time_slides)

## Remove zerolag if required

if options.remove_zero_lag:
  if options.verbose: print >>sys.stderr, "Identifying and removing zero-lag ..."

  null_ids = [ id for id, offsetvect in time_slides.items() if not any(offsetvect.deltas.values()) ]
  for id in null_ids:
    del time_slides[id]

  if options.verbose: print >>sys.stderr, "%d time slides remain." % len(time_slides)

## Convert to list of offset dictionaries.  We no longer require the IDs,
## new ones will be assigned as they are put into the output XML document.

time_slides = time_slides.values()

# Normalize the time slides

if options.normalize:
  if options.verbose: print >>sys.stderr, "Normalizing the time slides ..."
  time_slides = [ offsetvect.normalize(**parse_normalize(options.normalize)) \
      for offsetvect in time_slides ]

## Make documents

lsctables.table.reset_next_ids([lsctables.TimeSlideTable])

while filenames:
  # Create an empty document and add process info
  xmldoc, process = new_doc(**options.__dict__)
  timeslidetable = lsctables.TimeSlideTable.get_table(xmldoc)

  # Decide how many slides to write in this file
  # If only one file is left, all remaining slides go in it
  N = int( round( float(len(time_slides))/len(filenames) ) )

  # Add the slides
  for offsetvect in time_slides[:N]:
    timeslidetable.append_offsetvector(offsetvect, process)

  del time_slides[:N]

  # Finish off and write the file
  ligolw_process.set_process_end_time(process)
  filename = filenames.pop(0)
  ligolw_utils.write_filename( xmldoc, filename, options.verbose, \
      gz=(filename or "stdout").endswith(".gz") )

# check there are no slides left over
assert not time_slides

back to top