swh:1:snp:3a699297f000109a1bc833f294a54171df990207
Raw File
Tip revision: 3135d11ad886c6dd66a2b0150f01b0d677ca0e56 authored by Alex Nitz on 02 March 2022, 22:05:59 UTC
Update setup.py (#3953)
Tip revision: 3135d11
pycbc_make_banksim
#! /usr/bin/env python
import logging
import os
import shutil
from six.moves import configparser as ConfigParser
import subprocess
import glob
import tempfile
from optparse import OptionParser
from glue.pipeline import CondorDAGJob, CondorDAGNode, CondorDAG, CondorJob


class BaseJob(CondorDAGJob, CondorJob):
    def __init__(self, log_dir, executable, cp, section, gpu=False,
                 accounting_group=None):
        CondorDAGJob.__init__(self, "vanilla", executable)

        if gpu:
            CondorJob.__init__(self, "vanilla", executable, 2)
        # These are all python jobs so need to pull in the env
        self.add_condor_cmd('getenv', 'True')
        log_base = os.path.join(
            log_dir, os.path.basename(executable) + '-$(cluster)-$(process)')
        self.set_stderr_file(log_base + '.err')
        self.set_stdout_file(log_base + '.out')
        self.set_sub_file(os.path.basename(executable) + '.sub')

        if cp is not None:
            self.add_ini_opts(cp, section)

        if accounting_group:
            self.add_condor_cmd('accounting_group', accounting_group)

class BanksimNode(CondorDAGNode):
    def __init__(self, job, inj_file, tmplt_file, match_file, gpu=True,
                 gpu_postscript=False, inj_per_job=None):
        CondorDAGNode.__init__(self, job)

        self.add_file_opt("signal-file", inj_file)
        self.add_file_opt("template-file", tmplt_file)

        if gpu:
            self.add_var_opt("processing-scheme", 'cuda')

        if gpu and gpu_postscript:
            self.set_retry(5)
            mf = match_file+".$(Process)"
            mf1 = match_file+".0"
            mf2 = match_file+".1"
            self.add_file_opt("match-file", match_file+".$(Process)",
                              file_is_output_file=True)
            self.job().__queue = 2

            # Needed to satisfy the requirements for both running on atlas and spice
            job.add_condor_cmd('+WantsGPU', 'true')
            job.add_condor_cmd('+WantGPU', 'true')
            job.add_condor_cmd(
                'Requirements',
                '(GPU_PRESENT =?= true) || (HasGPU =?= "gtx580")')

            self.set_post_script(gpu_postscript)
            self.add_post_script_arg(mf1)
            self.add_post_script_arg(mf2)
            self.add_post_script_arg(".0001")
            self.add_post_script_arg(match_file)
            self.add_post_script_arg(str(inj_per_job))
        else:
            self.add_file_opt("match-file", match_file, file_is_output_file=True)
        
class CombineNode(CondorDAGNode):
    def __init__(self, job, inj_num):
        CondorDAGNode.__init__(self, job)
        
        self.add_var_opt("inj-num", inj_num)
        
        outf = "match/match" + str(inj_num) + ".dat"
        
        self.add_file_opt("output-file", outf)    

def get_ini_opts(confs, section):
    op_str = ""
    for opt in confs.options(section):
        val = confs.get(section, opt)
        op_str += "--" + opt + " " + val + " \\" + "\n"
    return op_str
    
def mkdir(dir_name):
    try :
        os.mkdir(dir_name)
    except OSError:
        pass
        
def mc_min_max_from_sorted_file(fname):
    from ligo.lw.utils import load_filename
    from ligo.lw.table import Table
    from pycbc.pnutils import mass1_mass2_to_mchirp_eta
    from pycbc.io.ligolw import LIGOLWContentHandler

    doc = load_filename(fname, False, contenthandler=LIGOLWContentHandler)
    try:
        t = Table.get_table(doc, "sngl_inspiral")
    except:
        t = Table.get_table(doc, "sim_inspiral")
    mc_max, et = mass1_mass2_to_mchirp_eta(t[0].mass1, t[0].mass2)
    mc_min, et = mass1_mass2_to_mchirp_eta(t[-1].mass1, t[-1].mass2)
    return mc_min, mc_max
    
        
bf_mchirps = {}
sf_mchirps = {}
def check_outside_mchirp(bf, sf, w):
    if bf not in bf_mchirps:
        bf_mchirps[bf] = mc_min_max_from_sorted_file(bf)    
    if sf not in sf_mchirps:
        sf_mchirps[sf] = mc_min_max_from_sorted_file(sf) 
         
    mc_min, mc_max = bf_mchirps[bf]
    mc2_min, mc2_max =  sf_mchirps[sf]
    
    if (mc_min  <= mc2_max * (1+w) ) and (mc_max * (1+w) >= mc2_min):
        return False
    else:
        return True

parser = OptionParser()
parser.add_option('--config', type=str)          
(options, args) = parser.parse_args() 

if options.config is None:
    raise ValueError("Config file is required")  

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

confs = ConfigParser.ConfigParser()
confs.read(options.config)

banksim_prog = confs.get("executables", "banksim")
bank_file = confs.get("workflow", "bank-file")
injections_per_job = confs.get("workflow", "injections-per-job")
templates_per_job = confs.get("workflow", "templates-per-job")

log_path = confs.get("workflow", 'log-path')

tempfile.tempdir = log_path
tempfile.template='banksim.dag.log.'
logfile = tempfile.mktemp()

mchirp_window = None
if confs.has_option("banksim", "mchirp-window"):
    if ',' in confs.get("banksim", "mchirp-window"):
         mchirp_window = max([float(x) for x in confs.get("banksim", "mchirp-window").split(",")])
    else:
         mchirp_window = float(confs.get("banksim", "mchirp-window"))

gpu = False
try:
    gpu = confs.get("workflow", "use-gpus")
    if gpu is not None:
        gpu = True
except:
    pass

try:
    accounting_group = confs.get('workflow', 'accounting-group')
except:
    accounting_group = None
    logging.warn('Warning: accounting-group not specified, LDG clusters may'
                 ' reject this workflow!')

logging.info("Making workspace directories")
mkdir('scripts')
mkdir('bank')
mkdir('match')
mkdir('injection')
mkdir('match-part')
mkdir('log')
mkdir('plots')

logging.info("Copying scripts")
shutil.copy(banksim_prog, 'scripts/pycbc_banksim')
os.chmod('scripts/pycbc_banksim', 0o0777)

logging.info("Creating injection file")
inj_str = "lalapps_inspinj " + get_ini_opts(confs, "inspinj") + "--output inj.xml"
os.system(inj_str)

logging.info("Splitting template bank")
subprocess.call(['pycbc_splitbank',
                 '--templates-per-bank', str(templates_per_job),
                 '-t', bank_file,
                 '-o', 'bank/bank',
                 '--sort-mchirp'])

logging.info("Splitting injection file")
subprocess.call(['pycbc_splitbank',
                 '--templates-per-bank', str(injections_per_job),
                 '-t', "inj.xml",
                 '-o', 'injection/injection',
                 '--sort-mchirp'])

num_banks = len(glob.glob("bank/bank*"))
num_injs = len(glob.glob("injection/injection*"))

logging.info("Creating DAG")
f = open("banksim.dag", "w")

do_count = 0
skip_count = 0

dag = CondorDAG(logfile)
dag.set_dag_file("banksim")

bsjob = BaseJob("log", "scripts/pycbc_banksim", confs, "banksim", gpu=gpu,
                accounting_group=accounting_group)
cjob = BaseJob("log", "scripts/pycbc_banksim_match_combine", None, None,
               accounting_group=accounting_group)
rjob = BaseJob("log", "scripts/pycbc_banksim_collect_results", None, None,
               accounting_group=accounting_group)
pjob = BaseJob("log", "scripts/pycbc_banksim_plots", None, None,
               accounting_group=accounting_group)
rnode = CondorDAGNode(rjob)
pnode = CondorDAGNode(pjob)

for inj_num in range(num_injs):
    num = str(inj_num)
    combine_has_jobs = False
    cnode = CombineNode(cjob, inj_num)
    for bank_num in range(num_banks):
        if mchirp_window is not None:
            bank_part = "bank/bank" + str(bank_num) + ".xml"
            sim_part =  "injection/injection" + str(inj_num) + ".xml"
            if check_outside_mchirp(bank_part, sim_part, mchirp_window):
                skip_count += 1
                continue
            else:
                do_count += 1
        part_num = str(bank_num)
        mfn = 'match-part/match' + num +'part' + part_num + '.dat'
        sn = 'injection/injection' + num + '.xml'
        bn = 'bank/bank' + part_num + '.xml'
        bsnode = BanksimNode(bsjob, sn, bn, mfn, gpu=gpu,
                             gpu_postscript="scripts/diff_match.sh",
                             inj_per_job=injections_per_job)
        cnode.add_parent(bsnode)
        dag.add_node(bsnode)
        combine_has_jobs = True
    if combine_has_jobs:
        rnode.add_parent(cnode)
        dag.add_node(cnode)      
dag.add_node(rnode)
pnode.add_parent(rnode)
dag.add_node(pnode)

logging.info("DO : %d SKIP %d" %(do_count, skip_count))
f.close()

f = open("scripts/pycbc_banksim_match_combine", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from optparse import OptionParser
from numpy import *
import glob
parser = OptionParser()

parser.add_option('--inj-num',help="index of the injection set for the match files",type=int)
parser.add_option('-o','--output-file',help="output file with the maximized values")
options, argv_frame_files = parser.parse_args()

fils = glob.glob("match-part/match"+str(options.inj_num)+"part*.dat")

dtypef={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}

matches=[]
maxmatch = []
for fil in fils:
    matches.append(loadtxt(fil, dtype=dtypef))
   
indices = array(matches, dtype=dtypef)['match'].argmax(0)
for i, j in enumerate(indices):
    maxmatch.append(matches[j][i])
    
maxmatch=array(maxmatch, dtype =dtypef)
savetxt(options.output_file, maxmatch,fmt=('%5.5f', '%s', '%i', '%s', '%i', '%5.5f'), delimiter=' ')
""")
os.chmod('scripts/pycbc_banksim_match_combine', 0o0777)

f = open("scripts/pycbc_banksim_collect_results", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from numpy import *
from ligo.lw import utils, table
import glob

from pycbc.io.ligolw import LIGOLWContentHandler

fils = glob.glob("match/match*.dat")

dtypem={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}

# Collect the results
res = None
for fil in fils:
    if res is not None:
        res = append(res, loadtxt(fil, dtype=dtypem))
    else:
        res = loadtxt(fil, dtype=dtypem)
 
btables = {}
itables = {}     

f = open("results.dat", "w")
for row in res: 
    outstr = ""
    if row['bank'] not in btables:
        indoc = utils.load_filename(row['bank'], False,
                                    contenthandler=LIGOLWContentHandler)
        btables[row['bank']] = table.get_table(indoc, "sngl_inspiral") 

    if row['sim'] not in itables:
        indoc = utils.load_filename(row['sim'], False,
                                    contenthandler=LIGOLWContentHandler)
        itables[row['sim']] = table.get_table(indoc, "sim_inspiral") 
    
    bt = btables[row['bank']][row['bank_i']]     
    it = itables[row['sim']][row['sim_i']]
 
    outstr += str(row['match']) + " "
    outstr += str(bt.mass1) + " "
    outstr += str(bt.mass2) + " "
    outstr += str(bt.spin1x) + " "
    outstr += str(bt.spin1y) + " "
    outstr += str(bt.spin1z) + " "
    outstr += str(bt.spin2x) + " "
    outstr += str(bt.spin2y) + " " 
    outstr += str(bt.spin2z) + " "
    
    outstr += str(it.mass1) + " "
    outstr += str(it.mass2) + " "
    outstr += str(it.spin1x) + " "
    outstr += str(it.spin1y) + " "
    outstr += str(it.spin1z) + " "
    outstr += str(it.spin2x) + " "
    outstr += str(it.spin2y) + " " 
    outstr += str(it.spin2z) + " "
    
    outstr += str(it.coa_phase) + " "
    outstr += str(it.inclination) + " "
    outstr += str(it.latitude) + " " 
    outstr += str(it.longitude) + " "
    outstr += str(it.polarization) + " "
    
    outstr += str(row['sigmasq']) + " "
                
    outstr += "\\n"
    
    f.write(outstr)
""")
os.chmod('scripts/pycbc_banksim_collect_results', 0o0777)

if gpu:
    f = open("cconfig", "w")
    f.write("""
    DAGMAN_PROHIBIT_MULTI_JOBS = False
    """)

    f = open("scripts/diff_match.sh", "w")
    f.write("""#!/bin/bash

    len=`cat $1 | wc -l`
    len2=`cat $2 | wc -l`

    if [ $len -eq $len2 ] && [ $len -ne 0 ] ; then
       echo "correct length"
    else
       echo "wrong length file"
        exit 1
    fi  

    function fuzzy_diff {
       echo  " ($3>($1-$2)) && ($3>($2-$1)) " | bc  
    }

    exec 3<$1
    exec 4<$2

    while IFS= read -r line1 <&3
    IFS= read -r line2 <&4
    do
        line1=`echo "$line1" | cut --delimiter=' ' -f 1`
        line2=`echo "$line2" | cut --delimiter=' ' -f 1` 

        if ! [[ "$line1" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
           exec >&2; echo "error: Not a number"; exit 1
        fi
        
        if ! [[ "$line2" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
           exec >&2; echo "error: Not a number"; exit 1
        fi

        ok=`fuzzy_diff $line1 $line2 $3`

        if  [ $ok -eq 0 ] ; then
           echo "Files do not match"
           exit 1
        fi 
       
    done


    cp $1 $4
    cp $1.found $4.found
    echo "The files are close enough"

    exit 0
    """)
    os.chmod('scripts/diff_match.sh', 0o0777)
    
logging.info("Creating submit script")
f = open("submit.sh","w")
if gpu:
    f.write("""#!/bin/bash
    condor_submit_dag -config cconfig banksim.dag
    """)
else:
    f.write("""#!/bin/bash
    condor_submit_dag banksim.dag
    """)
os.chmod('submit.sh', 0o0777)

f = open("partial_results.sh", "w")
f.write("""#!/bin/bash
scripts/pycbc_banksim_collect_results
""")
os.chmod('partial_results.sh', 0o0777)

dag.write_sub_files()
dag.write_script()
dag.write_dag()

f = open("scripts/pycbc_banksim_plots", "w")
f.write("""#!/usr/bin/env python
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot
from pycbc import pnutils
import numpy
import pylab

goldenratio = 2 / (1 + 5**.5)
#matplotlib.rcParams.update({
#        "font.size": 8.0,
#        "axes.titlesize": 8.0,
#        "axes.labelsize": 8.0,
#        "xtick.labelsize": 8.0,
#        "ytick.labelsize": 8.0,
#        "legend.fontsize": 8.0,
#	"figure.figsize": (3.3,3.3*goldenratio),
#        "figure.dpi": 200,
#	"subplots.left": 0.2, 
#	"subplots.right": 0.75, 
#	"subplots.bottom": 0.15,
#	"subplots.top": 0.75,
#        "savefig.dpi": 600,
#        "text.verticalalignment": "center",
#})

res = numpy.loadtxt("results.dat")
match = res[:,0]

tmass1 = res[:,1]
tmass2 = res[:,2]
tspin1x = res[:,3] 
tspin1y = res[:,4]
tspin1z = res[:,5]
tspin2x = res[:,6]
tspin2y = res[:,7]
tspin2z = res[:,8]
tmchirp, teta = pnutils.mass1_mass2_to_mchirp_eta(tmass1, tmass2)

imass1 = res[:,9]
imass2 = res[:,10]
ispin1x = res[:,11]
ispin1y = res[:,12]
ispin1z = res[:,13]
ispin2x = res[:,14]
ispin2y = res[:,15]
ispin2z = res[:,16]
imchirp, ieta = pnutils.mass1_mass2_to_mchirp_eta(imass1, imass2)

coa_phase = res[:,17]
inclination = res[:,18]
latitude  = res[:,19]
longitude = res[:,20]
polarization = res[:,21]

sigmasq = res[:,22]

q = numpy.maximum(imass1/imass2, imass2/imass1)
s1m = (ispin1x**2+ispin1y**2+ispin1z**2)**0.5
s2m = (ispin2x**2+ispin2y**2+ispin2z**2)**0.5

def mhist(c1, name, cum=False, normed=True, log=False, bins=100, xl="", yl=""):
    pylab.figure()
    pylab.xlabel(xl)
    pylab.ylabel(yl)
    if log:
        pylab.yscale('log')
    pylab.hist(c1, bins=bins, normed=normed, histtype='step', cumulative=cum)
    pylab.savefig(name)

def mplot(c1, c2, c, name, xl="", yl="", vmin=None, vmax=None):
    pylab.figure()
    pylab.axes((0.15, 0.15, 0.8, 0.8))
    pylab.scatter(c1, c2, c=c, linewidth=0, s=1, vmin=vmin, vmax=vmax)
    pylab.colorbar()
    pylab.xlim(min(c1), max(c1))
    pylab.ylim(min(c2), max(c2))
    pylab.xlabel(xl)
    pylab.ylabel(yl)
    pylab.savefig(name)

mhist(imchirp-tmchirp, "plots/hist-mchirp-diff.png")
mhist((imchirp-tmchirp)/imchirp, "plots/hist-mchirp-reldiff.png")
mhist(match, "plots/hist-match.png")
mhist(match, "plots/hist-match-cum.png", cum=1, log=True, bins=10000, xl = "Match", yl="Fraction of injections < Match")
    
pylab.figure(102)
pylab.ylabel('Fraction of Injections')
pylab.xlabel('Fitting factor')
pylab.yscale('log') 
pylab.xlim(0.95, 1.0)
pylab.ylim(1e-4, 1)
hBins = pylab.arange(0.,1.,0.0005,dtype=float)
n, bins,patches=pylab.hist(match,cumulative=1,bins=hBins,normed=True)
pylab.grid()
pylab.savefig("plots/cum_hist.png")
    
mplot(imass1, imass2, match, "plots/m1-m2-match.png")
mplot(tmass1, tmass2, match, "plots/tm1-tm2-match.png")
mplot(q, s1m, match, "plots/q-s1m-match.png")
mplot(q, s2m, match, "plots/q-s2m-match.png")
mplot(q, ispin1z, match, "plots/q-s1z-match.png")  
mplot(q, ispin2z, match, "plots/q-s2z-match.png", "Mass Ratio", "Spin2z") 
mplot(q, ispin2z, match, "plots/q-s2z-match97.png", "Mass Ratio", "Spin2z", vmin=0.97)
mplot(q, ispin2z, match, "plots/q-s2z-match90.png", "Mass Ratio", "Spin2z", vmin=0.90)
mplot(inclination, match, match, "plots/inc-match.png")   

mplot(imass1, imass2, imchirp-tmchirp, "plots/m1-m2-mchirpdiff.png")
mplot(q, ispin1z, imchirp-tmchirp, "plots/q-s1z-mchirpdiff.png", "Mass Ratio", "Spin1z")
mplot(q, ispin2z, imchirp-tmchirp, "plots/q-s2z-mchirpdiff.png", "Mass Ratio", "Spin2z")

mplot(imass1, imass2, (imchirp-tmchirp)/imchirp, "plots/m1-m2-mchirpreldiff.png")
mplot(q, ispin1z, (imchirp-tmchirp)/imchirp, "plots/q-s1z-mchirpreldiff.png")
mplot(q, ispin2z, (imchirp-tmchirp)/imchirp, "plots/q-s2z-mchirpreldiff.png")

""")
os.chmod("scripts/pycbc_banksim_plots", 0o0777)

logging.info("Done")
back to top