Revision - 50c4e9f - Add Pegasus workflow for faith simulations (#4163)

Revision 50c4e9f16a0ed18f86a97f5989c2419f9348899a authored by MPillas on 07 November 2022, 10:58:35 UTC, committed by GitHub on 07 November 2022, 10:58:35 UTC

Add Pegasus workflow for faith simulations (#4163)

* Changes in faithsim to be used with python3

* change the first line to get the env

* create a directory for the template bank project with the workflow using Pegasus

* changes because I realized the pycbc_collect_results output is a single flie

* currently trying to run the workflow and fix the issues I encounter, here I started with create inj issues

* now fixing pycbc_faithsim job

* almost final version just having problem with the plot names

* clean the code

* add bash files to run and submit the workflow

* final workflow and associated scripts problem because the kickstart job fails after submission

* fix a mistake with a parameter from the config file

* changes in submit file

* fix the run_workflow.sh script, the kickstar job seems to work now

* fix an error in create_inj script

* all scripts for the workflow, final version

* rebase and move the scripts to right directories

* remove the old files

* remove the hardcoded config path

* lot of changes, add a script to add some parameters in the dat file before the plotting script, put all the arguments in the configuration file, remove them from the workflow script ...

* add the header in the dat file

* changes in the descriptions of the scripts given to argparse and compute the derived quantities in the plotting script

* remove the path to the collect full results in the ini file

* use black on the plot script

* fix a bug in the plotting script

* changes suggested by Tito and Ian

* fix bug: change q into mchirp

* fix bug : add d in the derived_map for the s2 magnitude

* last bug fixed

* Ian's comments

Co-authored-by: Marion Pillas <marion.pillas@ldas-pcdev3.ligo.caltech.edu>
Co-authored-by: Marion Pillas <marion.pillas@ldas-pcdev1.ligo.caltech.edu>
Co-authored-by: Marion Pillas <marion.pillas@ldas-pcdev6.ligo.caltech.edu>
Co-authored-by: Marion Pillas <marion.pillas@ldas-grid.ligo.caltech.edu>

1 parent ce7ad08

Files
Changes

Permalinks

pycbc_splitbank

#!/usr/bin/env python
#
# Copyright (C) 2014 LIGO Scientific Collaboration
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

"""Splits a table in an xml file into multiple pieces."""

import argparse
from numpy import random, ceil
from ligo.lw import ligolw
from ligo.lw import lsctables
from ligo.lw import utils as ligolw_utils
from ligo.lw.utils import process as ligolw_process
from pycbc import version
from pycbc.io.ligolw import LIGOLWContentHandler
from pycbc.conversions import mchirp_from_mass1_mass2
from pycbc.pnutils import frequency_cutoff_from_name


__author__  = "Alex Nitz <alex.nitz@ligo.org>"
__version__ = version.git_verbose_msg
__date__    = version.date
__program__ = "pycbc_splitbank"


# Command line parsing
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--version', action='version', version=__version__)

group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--templates-per-bank', metavar='SAMPLES',
                    help='number of templates in the output banks', type=int)
group.add_argument('-n', '--number-of-banks', metavar='N',
                    help='Split template bank into N files', type=int)
group.add_argument("-O", "--output-filenames", nargs='*', default=None,
                    action="store",
                    metavar="OUTPUT_FILENAME", help="""Directly specify the
                    names of the output files. The number of files specified
                    here will dictate how to split the bank. It will be split
                    equally between all specified files.""")

parser.add_argument("-o", "--output-prefix", default=None,
                    help="Prefix to add to the template bank name (name becomes output#.xml[.gz])" )

parser.add_argument("-V", "--verbose", action="store_true",
                    help="Print extra debugging information", default=False )
parser.add_argument("-t", "--bank-file", metavar='INPUT_FILE',
                    help='Template bank to split', required=True)
parser.add_argument("--sort-frequency-cutoff",
                    help="Frequency cutoff to use for sorting the sub banks")
parser.add_argument("--sort-mchirp", action="store_true", default=False,
                    help='Sort templates by chirp mass before splitting')
parser.add_argument("--random-sort", action="store_true", default=False,
                    help='Sort templates randomly before splitting')
parser.add_argument("--random-seed", type=int,
                    help='Random seed to use when sorting randomly')

args = parser.parse_args()

if args.output_filenames and args.output_prefix:
    errMsg="Cannot supply --output-filenames with --output-prefix."
    parser.error(errMsg)

if args.sort_mchirp and args.random_sort:
    errMsg="You can't sort by Mchirp *and* randomly, dumbass!"
    parser.error(errMsg)

if args.output_filenames:
    args.number_of_banks = len(args.output_filenames)

indoc = ligolw_utils.load_filename(args.bank_file, verbose=args.verbose,
                                   contenthandler=LIGOLWContentHandler)

try:
    template_bank_table = lsctables.SnglInspiralTable.get_table(indoc)
    tabletype = lsctables.SnglInspiralTable
except:
    template_bank_table = lsctables.SimInspiralTable.get_table(indoc)
    tabletype = lsctables.SimInspiralTable

# make a list of columns that are present in the input table.
# The : split is needed for columns like `process:process_id`,
# which must be listed as `process:process_id` in `lsctables.New()`,
# but are listed as just `process_id` in the `columnnames` attribute
used_columns = []
for col in template_bank_table.validcolumns:
    att = col.split(':')[-1]
    if att in template_bank_table.columnnames:
        used_columns.append(col)

length = len(template_bank_table)

tt = template_bank_table

if args.sort_frequency_cutoff:
    sort_key = lambda x: frequency_cutoff_from_name(
            args.sort_frequency_cutoff, x.mass1, x.mass2, x.spin1z, x.spin2z)
    tt = sorted(template_bank_table, key=sort_key)

if args.sort_mchirp:
    sort_key = lambda x: mchirp_from_mass1_mass2(x.mass1, x.mass2)
    tt = sorted(template_bank_table, key=sort_key)

if args.random_sort:
    if args.random_seed is not None:
        random.seed(args.random_seed)
    random.shuffle(template_bank_table)

if args.number_of_banks:
    # Decide how to split up the templates
    # Put approximately the requested number of templates in each file
    # But try to make each file very nearly the same size
    num_files = args.number_of_banks
    num_per_file = length / float(num_files)

elif args.templates_per_bank:
    num_per_file = args.templates_per_bank
    num_files = int(ceil(float(length) / num_per_file))

index_list = [int(round(num_per_file*idx)) for idx in range(num_files)]
index_list.append(length)
assert(index_list[0] == 0)

for num, (idx1, idx2) in enumerate(zip(index_list[:-1], index_list[1:])):
    assert(idx2 > idx1)
    # create a blank xml document and add the process id
    outdoc = ligolw.Document()
    outdoc.appendChild(ligolw.LIGO_LW())

    process = ligolw_process.register_to_xmldoc(outdoc,
                    __program__, args.__dict__, instruments=["G1"],
                    version=version.version, cvs_repository=version.git_branch,
                    cvs_entry_time=version.date)

    sngl_inspiral_table = lsctables.New(tabletype, columns=used_columns)
    outdoc.childNodes[0].appendChild(sngl_inspiral_table)

    for i in range(idx2-idx1):
        row = tt.pop()
        row.process_id = process.process_id
        sngl_inspiral_table.append(row)

    # write the xml doc to disk
    proctable = lsctables.ProcessTable.get_table(outdoc)
    proctable[0].set_end_time_now()

    if args.output_filenames:
        outname = args.output_filenames[num]
    elif args.output_prefix:
        outname = args.output_prefix + str(num) + '.xml.gz'
    else:
        errMsg = "Cannot figure out how to set output file names."
        raise ValueError(errMsg)
    ligolw_utils.write_filename(outdoc, outname)

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...