Content - f18c89a92f1ad67a50303081e09e58e06339aad7 - b054075/bin/pycbc_splitbank

swh:1:snp:3a699297f000109a1bc833f294a54171df990207

Tip revision: 5179ccee3df706f32621b7602a68fb7e42f881ac authored by Duncan Brown on 15 September 2015, 20:15:07 UTC
Merge pull request #278 from titodalcanton/psd_estim_docs
Tip revision: 5179cce
pycbc_splitbank
#!/usr/bin/python
#
# Copyright (C) 2014 LIGO Scientific Collaboration
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

"""
Splits a table in an xml file into multiple pieces
"""

import pycbc
import pycbc.version
__author__  = "Alex Nitz <alex.nitz@ligo.org>"
__version__ = pycbc.version.git_verbose_msg
__date__    = pycbc.version.date
__program__ = "pycbc_splitbank"

import time
import argparse
from glue import gpstime
from glue.ligolw import ligolw
from glue.ligolw import table
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue import git_version
from numpy import random, ceil

class ContentHandler(ligolw.LIGOLWContentHandler):
    pass
lsctables.use_in(ContentHandler)

 
# Command line parsing
_desc = __doc__[1:]
parser = argparse.ArgumentParser(description=_desc)
parser.add_argument('--version', action='version', version=__version__)

group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--templates-per-bank', metavar='SAMPLES', help='number of templates in the output banks', type=int)
group.add_argument('-n', '--number-of-banks', metavar='N', help='Split template bank into N files', type=int)
group.add_argument("-O", "--output-filenames", nargs='*', default=None,
                    action="store",
                    metavar="OUTPUT_FILENAME", help="""Directly specify the
                    names of the output files. The number of files specified
                    here will dictate how to split the bank. It will be split
                    equally between all specified files.""")

parser.add_argument("-o", "--output-prefix", default=None, help="Prefix to add to the template bank name (name becomes output#.xml[.gz])" )
parser.add_argument("-z", "--write-compress", action="store_true", help="Write compressed xml.gz files.")

parser.add_argument("-V", "--verbose", action="store_true", help="Print extra debugging information", default=False )
parser.add_argument("-t", "--bank-file", metavar='INPUT_FILE',
                    help='Template bank to split', required=True)

parser.add_argument("--sort-mchirp", action="store_true", default=False,
                    help='Sort templates by chirp mass before splitting')
parser.add_argument("--random-sort", action="store_true", default=False,
                    help='Sort templates randomly before splitting')
parser.add_argument("--random-seed", type=int,
                    help='Random seed to use when sorting randomly')

args = parser.parse_args()

if args.output_filenames and args.output_prefix:
    errMsg="Cannot supply --output-filenames with --output-prefix."
    parser.error(errMsg)

if args.sort_mchirp and args.random_sort:
    errMsg="You can't sort by Mchirp *and* randomly, dumbass!"
    parser.error(errMsg)

if args.output_filenames:
    args.number_of_banks = len(args.output_filenames)

indoc = ligolw_utils.load_filename(args.bank_file, verbose=args.verbose,
                                   contenthandler=ContentHandler)

try:
  template_bank_table = table.get_table(indoc, lsctables.SnglInspiralTable.tableName)
  tabletype = lsctables.SnglInspiralTable
except:
  template_bank_table = table.get_table(indoc, lsctables.SimInspiralTable.tableName)
  tabletype = lsctables.SimInspiralTable

length = len(template_bank_table)

def mchirp_sort(x,y):
    import pycbc.pnutils
    mc1,e1 = pycbc.pnutils.mass1_mass2_to_mchirp_eta(x.mass1,x.mass2)
    mc2,e2 = pycbc.pnutils.mass1_mass2_to_mchirp_eta(y.mass1,y.mass2)
    return cmp(mc1,mc2)

tt = template_bank_table

if args.sort_mchirp:
    tt = sorted(template_bank_table,cmp=mchirp_sort)

if args.random_sort:
    if args.random_seed is not None:
        random.seed(args.random_seed)
    random.shuffle(template_bank_table)

if args.number_of_banks:
    # Decide how to split up the templates
    # Put approximately the requested number of templates in each file
    # But try to make each file very nearly the same size
    num_files = args.number_of_banks
    num_per_file = length / float(num_files)
elif args.templates_per_bank:
    num_per_file = args.templates_per_bank
    num_files = int(ceil(float(length) / num_per_file))

index_list = [int(round(num_per_file*idx)) for idx in range(num_files)]
index_list.append(length)
assert(index_list[0] == 0)

for num, (idx1, idx2) in enumerate(zip(index_list[:-1], index_list[1:])):
    assert(idx2 > idx1)
    # create a blank xml document and add the process id
    outdoc = ligolw.Document()
    outdoc.appendChild(ligolw.LIGO_LW())

    proc_id = ligolw_process.register_to_xmldoc(outdoc, 
                    __program__, args.__dict__, ifos=["G1"],
                    version=git_version.id, cvs_repository=git_version.branch,
                    cvs_entry_time=git_version.date).process_id

    sngl_inspiral_table = lsctables.New(tabletype,columns=template_bank_table.columnnames)
    outdoc.childNodes[0].appendChild(sngl_inspiral_table)

    for i in range(idx2-idx1):
       row = tt.pop()
       sngl_inspiral_table.append(row) 
    
    # write the xml doc to disk
    proctable = table.get_table(outdoc, lsctables.ProcessTable.tableName)
    proctable[0].end_time = gpstime.GpsSecondsFromPyUTC(time.time())

    if args.output_filenames:
        outname = args.output_filenames[num]
    elif args.output_prefix:
        outname = args.output_prefix + str(num) + '.xml'
        if args.write_compress:
            outname +='.gz'
    else:
        errMsg = "Cannot figure out how to set output file names."
        raise ValueError(errMsg)
    ligolw_utils.write_filename(outdoc, outname, gz=outname.endswith('.gz'))