Revision 9607f8f7299e22787f3afa979c4bc33aac22248e authored by Thomas Dent on 04 March 2021, 21:54:37 UTC, committed by GitHub on 04 March 2021, 21:54:37 UTC
* fixes to help msgs for injection group

* fix typo

* Fix wrong help message for inj scale factor

* codeclimatecomplaint
1 parent d2a9743
Raw File
pycbc_make_banksim
#! /usr/bin/env python
import logging
import os
import shutil
from six.moves import configparser as ConfigParser
import subprocess
import glob
import tempfile
from optparse import OptionParser
from glue.pipeline import CondorDAGJob, CondorDAGNode, CondorDAG, CondorJob


class BaseJob(CondorDAGJob, CondorJob):
    def __init__(self, log_dir, executable, cp, section, gpu=False,
                 accounting_group=None):
        CondorDAGJob.__init__(self, "vanilla", executable)

        if gpu:
            CondorJob.__init__(self, "vanilla", executable, 2)
        # These are all python jobs so need to pull in the env
        self.add_condor_cmd('getenv', 'True')
        log_base = os.path.join(
            log_dir, os.path.basename(executable) + '-$(cluster)-$(process)')
        self.set_stderr_file(log_base + '.err')
        self.set_stdout_file(log_base + '.out')
        self.set_sub_file(os.path.basename(executable) + '.sub')

        if cp is not None:
            self.add_ini_opts(cp, section)

        if accounting_group:
            self.add_condor_cmd('accounting_group', accounting_group)

class BanksimNode(CondorDAGNode):
    def __init__(self, job, inj_file, tmplt_file, match_file, gpu=True,
                 gpu_postscript=False, inj_per_job=None):
        CondorDAGNode.__init__(self, job)

        self.add_file_opt("signal-file", inj_file)
        self.add_file_opt("template-file", tmplt_file)

        if gpu:
            self.add_var_opt("processing-scheme", 'cuda')

        if gpu and gpu_postscript:
            self.set_retry(5)
            mf = match_file+".$(Process)"
            mf1 = match_file+".0"
            mf2 = match_file+".1"
            self.add_file_opt("match-file", match_file+".$(Process)",
                              file_is_output_file=True)
            self.job().__queue = 2

            # Needed to satisfy the requirements for both running on atlas and spice
            job.add_condor_cmd('+WantsGPU', 'true')
            job.add_condor_cmd('+WantGPU', 'true')
            job.add_condor_cmd(
                'Requirements',
                '(GPU_PRESENT =?= true) || (HasGPU =?= "gtx580")')

            self.set_post_script(gpu_postscript)
            self.add_post_script_arg(mf1)
            self.add_post_script_arg(mf2)
            self.add_post_script_arg(".0001")
            self.add_post_script_arg(match_file)
            self.add_post_script_arg(str(inj_per_job))
        else:
            self.add_file_opt("match-file", match_file, file_is_output_file=True)
        
class CombineNode(CondorDAGNode):
    def __init__(self, job, inj_num):
        CondorDAGNode.__init__(self, job)
        
        self.add_var_opt("inj-num", inj_num)
        
        outf = "match/match" + str(inj_num) + ".dat"
        
        self.add_file_opt("output-file", outf)    

def get_ini_opts(confs, section):
    op_str = ""
    for opt in confs.options(section):
        val = confs.get(section, opt)
        op_str += "--" + opt + " " + val + " \\" + "\n"
    return op_str
    
def mkdir(dir_name):
    try :
        os.mkdir(dir_name)
    except OSError:
        pass
        
def mc_min_max_from_sorted_file(fname):
    from glue.ligolw import lsctables
    from glue.ligolw.ligolw import LIGOLWContentHandler
    class mycontenthandler(LIGOLWContentHandler):
        pass
    lsctables.use_in(mycontenthandler)
    from glue.ligolw.utils import load_filename
    from glue.ligolw.table import get_table
    from pycbc.pnutils import mass1_mass2_to_mchirp_eta
    try:
        t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sngl_inspiral")
    except:
        t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sim_inspiral")
    mc_max, et = mass1_mass2_to_mchirp_eta(t[0].mass1, t[0].mass2)
    mc_min, et = mass1_mass2_to_mchirp_eta(t[-1].mass1, t[-1].mass2)
    return mc_min, mc_max
    
        
bf_mchirps = {}
sf_mchirps = {}
def check_outside_mchirp(bf, sf, w):
    if bf not in bf_mchirps:
        bf_mchirps[bf] = mc_min_max_from_sorted_file(bf)    
    if sf not in sf_mchirps:
        sf_mchirps[sf] = mc_min_max_from_sorted_file(sf) 
         
    mc_min, mc_max = bf_mchirps[bf]
    mc2_min, mc2_max =  sf_mchirps[sf]
    
    if (mc_min  <= mc2_max * (1+w) ) and (mc_max * (1+w) >= mc2_min):
        return False
    else:
        return True

parser = OptionParser()
parser.add_option('--config', type=str)          
(options, args) = parser.parse_args() 

if options.config is None:
    raise ValueError("Config file is required")  

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

confs = ConfigParser.ConfigParser()
confs.read(options.config)

banksim_prog = confs.get("executables", "banksim")
bank_file = confs.get("workflow", "bank-file")
injections_per_job = confs.get("workflow", "injections-per-job")
templates_per_job = confs.get("workflow", "templates-per-job")

log_path = confs.get("workflow", 'log-path')

tempfile.tempdir = log_path
tempfile.template='banksim.dag.log.'
logfile = tempfile.mktemp()

mchirp_window = None
if confs.has_option("banksim", "mchirp-window"):
    if ',' in confs.get("banksim", "mchirp-window"):
         mchirp_window = max([float(x) for x in confs.get("banksim", "mchirp-window").split(",")])
    else:
         mchirp_window = float(confs.get("banksim", "mchirp-window"))

gpu = False
try:
    gpu = confs.get("workflow", "use-gpus")
    if gpu is not None:
        gpu = True
except:
    pass

try:
    accounting_group = confs.get('workflow', 'accounting-group')
except:
    accounting_group = None
    logging.warn('Warning: accounting-group not specified, LDG clusters may'
                 ' reject this workflow!')

logging.info("Making workspace directories")
mkdir('scripts')
mkdir('bank')
mkdir('match')
mkdir('injection')
mkdir('match-part')
mkdir('log')
mkdir('plots')

logging.info("Copying scripts")
shutil.copy(banksim_prog, 'scripts/pycbc_banksim')
os.chmod('scripts/pycbc_banksim', 0o0777)

logging.info("Creating injection file")
inj_str = "lalapps_inspinj " + get_ini_opts(confs, "inspinj") + "--output inj.xml"
os.system(inj_str)

logging.info("Splitting template bank")
subprocess.call(['pycbc_splitbank',
                 '--templates-per-bank', str(templates_per_job),
                 '-t', bank_file,
                 '-o', 'bank/bank',
                 '--sort-mchirp'])

logging.info("Splitting injection file")
subprocess.call(['pycbc_splitbank',
                 '--templates-per-bank', str(injections_per_job),
                 '-t', "inj.xml",
                 '-o', 'injection/injection',
                 '--sort-mchirp'])

num_banks = len(glob.glob("bank/bank*"))
num_injs = len(glob.glob("injection/injection*"))

logging.info("Creating DAG")
f = open("banksim.dag", "w")

do_count = 0
skip_count = 0

dag = CondorDAG(logfile)
dag.set_dag_file("banksim")
dag.set_dax_file("banksim")

bsjob = BaseJob("log", "scripts/pycbc_banksim", confs, "banksim", gpu=gpu,
                accounting_group=accounting_group)
cjob = BaseJob("log", "scripts/pycbc_banksim_match_combine", None, None,
               accounting_group=accounting_group)
rjob = BaseJob("log", "scripts/pycbc_banksim_collect_results", None, None,
               accounting_group=accounting_group)
pjob = BaseJob("log", "scripts/pycbc_banksim_plots", None, None,
               accounting_group=accounting_group)
rnode = CondorDAGNode(rjob)
pnode = CondorDAGNode(pjob)

for inj_num in range(num_injs):
    num = str(inj_num)
    combine_has_jobs = False
    cnode = CombineNode(cjob, inj_num)
    for bank_num in range(num_banks):
        if mchirp_window is not None:
            bank_part = "bank/bank" + str(bank_num) + ".xml"
            sim_part =  "injection/injection" + str(inj_num) + ".xml"
            if check_outside_mchirp(bank_part, sim_part, mchirp_window):
                skip_count += 1
                continue
            else:
                do_count += 1
        part_num = str(bank_num)
        mfn = 'match-part/match' + num +'part' + part_num + '.dat'
        sn = 'injection/injection' + num + '.xml'
        bn = 'bank/bank' + part_num + '.xml'
        bsnode = BanksimNode(bsjob, sn, bn, mfn, gpu=gpu,
                             gpu_postscript="scripts/diff_match.sh",
                             inj_per_job=injections_per_job)
        cnode.add_parent(bsnode)
        dag.add_node(bsnode)
        combine_has_jobs = True
    if combine_has_jobs:
        rnode.add_parent(cnode)
        dag.add_node(cnode)      
dag.add_node(rnode)
pnode.add_parent(rnode)
dag.add_node(pnode)

logging.info("DO : %d SKIP %d" %(do_count, skip_count))
f.close()

f = open("scripts/pycbc_banksim_match_combine", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from optparse import OptionParser
from numpy import *
import glob
parser = OptionParser()

parser.add_option('--inj-num',help="index of the injection set for the match files",type=int)
parser.add_option('-o','--output-file',help="output file with the maximized values")
options, argv_frame_files = parser.parse_args()

fils = glob.glob("match-part/match"+str(options.inj_num)+"part*.dat")

dtypef={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}

matches=[]
maxmatch = []
for fil in fils:
    matches.append(loadtxt(fil, dtype=dtypef))
   
indices = array(matches, dtype=dtypef)['match'].argmax(0)
for i, j in enumerate(indices):
    maxmatch.append(matches[j][i])
    
maxmatch=array(maxmatch, dtype =dtypef)
savetxt(options.output_file, maxmatch,fmt=('%5.5f', '%s', '%i', '%s', '%i', '%5.5f'), delimiter=' ')
""")
os.chmod('scripts/pycbc_banksim_match_combine', 0o0777)

f = open("scripts/pycbc_banksim_collect_results", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from numpy import *
from glue.ligolw import utils, table
import glob

from glue.ligolw.ligolw import LIGOLWContentHandler
from glue.ligolw import lsctables
class mycontenthandler(LIGOLWContentHandler):
    pass
lsctables.use_in(mycontenthandler)

fils = glob.glob("match/match*.dat")

dtypem={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}

# Collect the results
res = None
for fil in fils:
    if res is not None:
        res = append(res, loadtxt(fil, dtype=dtypem))
    else:
        res = loadtxt(fil, dtype=dtypem)
 
btables = {}
itables = {}     

f = open("results.dat", "w")
for row in res: 
    outstr = ""
    if row['bank'] not in btables:
        indoc = utils.load_filename(row['bank'], False, contenthandler=mycontenthandler)
        btables[row['bank']] = table.get_table(indoc, "sngl_inspiral") 

    if row['sim'] not in itables:
        indoc = utils.load_filename(row['sim'], False, contenthandler=mycontenthandler)
        itables[row['sim']] = table.get_table(indoc, "sim_inspiral") 
    
    bt = btables[row['bank']][row['bank_i']]     
    it = itables[row['sim']][row['sim_i']]
 
    outstr += str(row['match']) + " "
    outstr += str(bt.mass1) + " "
    outstr += str(bt.mass2) + " "
    outstr += str(bt.spin1x) + " "
    outstr += str(bt.spin1y) + " "
    outstr += str(bt.spin1z) + " "
    outstr += str(bt.spin2x) + " "
    outstr += str(bt.spin2y) + " " 
    outstr += str(bt.spin2z) + " "
    
    outstr += str(it.mass1) + " "
    outstr += str(it.mass2) + " "
    outstr += str(it.spin1x) + " "
    outstr += str(it.spin1y) + " "
    outstr += str(it.spin1z) + " "
    outstr += str(it.spin2x) + " "
    outstr += str(it.spin2y) + " " 
    outstr += str(it.spin2z) + " "
    
    outstr += str(it.coa_phase) + " "
    outstr += str(it.inclination) + " "
    outstr += str(it.latitude) + " " 
    outstr += str(it.longitude) + " "
    outstr += str(it.polarization) + " "
    
    outstr += str(row['sigmasq']) + " "
                
    outstr += "\\n"
    
    f.write(outstr)
""")
os.chmod('scripts/pycbc_banksim_collect_results', 0o0777)

if gpu:
    f = open("cconfig", "w")
    f.write("""
    DAGMAN_PROHIBIT_MULTI_JOBS = False
    """)

    f = open("scripts/diff_match.sh", "w")
    f.write("""#!/bin/bash

    len=`cat $1 | wc -l`
    len2=`cat $2 | wc -l`

    if [ $len -eq $len2 ] && [ $len -ne 0 ] ; then
       echo "correct length"
    else
       echo "wrong length file"
        exit 1
    fi  

    function fuzzy_diff {
       echo  " ($3>($1-$2)) && ($3>($2-$1)) " | bc  
    }

    exec 3<$1
    exec 4<$2

    while IFS= read -r line1 <&3
    IFS= read -r line2 <&4
    do
        line1=`echo "$line1" | cut --delimiter=' ' -f 1`
        line2=`echo "$line2" | cut --delimiter=' ' -f 1` 

        if ! [[ "$line1" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
           exec >&2; echo "error: Not a number"; exit 1
        fi
        
        if ! [[ "$line2" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
           exec >&2; echo "error: Not a number"; exit 1
        fi

        ok=`fuzzy_diff $line1 $line2 $3`

        if  [ $ok -eq 0 ] ; then
           echo "Files do not match"
           exit 1
        fi 
       
    done


    cp $1 $4
    cp $1.found $4.found
    echo "The files are close enough"

    exit 0
    """)
    os.chmod('scripts/diff_match.sh', 0o0777)
    
logging.info("Creating submit script")
f = open("submit.sh","w")
if gpu:
    f.write("""#!/bin/bash
    condor_submit_dag -config cconfig banksim.dag
    """)
else:
    f.write("""#!/bin/bash
    condor_submit_dag banksim.dag
    """)
os.chmod('submit.sh', 0o0777)

f = open("partial_results.sh", "w")
f.write("""#!/bin/bash
scripts/pycbc_banksim_collect_results
""")
os.chmod('partial_results.sh', 0o0777)

dag.write_sub_files()
dag.write_script()
dag.write_dag()

f = open("scripts/pycbc_banksim_plots", "w")
f.write("""#!/usr/bin/env python
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot
from pycbc import pnutils
import numpy
import pylab

goldenratio = 2 / (1 + 5**.5)
#matplotlib.rcParams.update({
#        "font.size": 8.0,
#        "axes.titlesize": 8.0,
#        "axes.labelsize": 8.0,
#        "xtick.labelsize": 8.0,
#        "ytick.labelsize": 8.0,
#        "legend.fontsize": 8.0,
#	"figure.figsize": (3.3,3.3*goldenratio),
#        "figure.dpi": 200,
#	"subplots.left": 0.2, 
#	"subplots.right": 0.75, 
#	"subplots.bottom": 0.15,
#	"subplots.top": 0.75,
#        "savefig.dpi": 600,
#        "text.verticalalignment": "center",
#})

res = numpy.loadtxt("results.dat")
match = res[:,0]

tmass1 = res[:,1]
tmass2 = res[:,2]
tspin1x = res[:,3] 
tspin1y = res[:,4]
tspin1z = res[:,5]
tspin2x = res[:,6]
tspin2y = res[:,7]
tspin2z = res[:,8]
tmchirp, teta = pnutils.mass1_mass2_to_mchirp_eta(tmass1, tmass2)

imass1 = res[:,9]
imass2 = res[:,10]
ispin1x = res[:,11]
ispin1y = res[:,12]
ispin1z = res[:,13]
ispin2x = res[:,14]
ispin2y = res[:,15]
ispin2z = res[:,16]
imchirp, ieta = pnutils.mass1_mass2_to_mchirp_eta(imass1, imass2)

coa_phase = res[:,17]
inclination = res[:,18]
latitude  = res[:,19]
longitude = res[:,20]
polarization = res[:,21]

sigmasq = res[:,22]

q = numpy.maximum(imass1/imass2, imass2/imass1)
s1m = (ispin1x**2+ispin1y**2+ispin1z**2)**0.5
s2m = (ispin2x**2+ispin2y**2+ispin2z**2)**0.5

def mhist(c1, name, cum=False, normed=True, log=False, bins=100, xl="", yl=""):
    pylab.figure()
    pylab.xlabel(xl)
    pylab.ylabel(yl)
    if log:
        pylab.yscale('log')
    pylab.hist(c1, bins=bins, normed=normed, histtype='step', cumulative=cum)
    pylab.savefig(name)

def mplot(c1, c2, c, name, xl="", yl="", vmin=None, vmax=None):
    pylab.figure()
    pylab.axes((0.15, 0.15, 0.8, 0.8))
    pylab.scatter(c1, c2, c=c, linewidth=0, s=1, vmin=vmin, vmax=vmax)
    pylab.colorbar()
    pylab.xlim(min(c1), max(c1))
    pylab.ylim(min(c2), max(c2))
    pylab.xlabel(xl)
    pylab.ylabel(yl)
    pylab.savefig(name)

mhist(imchirp-tmchirp, "plots/hist-mchirp-diff.png")
mhist((imchirp-tmchirp)/imchirp, "plots/hist-mchirp-reldiff.png")
mhist(match, "plots/hist-match.png")
mhist(match, "plots/hist-match-cum.png", cum=1, log=True, bins=10000, xl = "Match", yl="Fraction of injections < Match")
    
pylab.figure(102)
pylab.ylabel('Fraction of Injections')
pylab.xlabel('Fitting factor')
pylab.yscale('log') 
pylab.xlim(0.95, 1.0)
pylab.ylim(1e-4, 1)
hBins = pylab.arange(0.,1.,0.0005,dtype=float)
n, bins,patches=pylab.hist(match,cumulative=1,bins=hBins,normed=True)
pylab.grid()
pylab.savefig("plots/cum_hist.png")
    
mplot(imass1, imass2, match, "plots/m1-m2-match.png")
mplot(tmass1, tmass2, match, "plots/tm1-tm2-match.png")
mplot(q, s1m, match, "plots/q-s1m-match.png")
mplot(q, s2m, match, "plots/q-s2m-match.png")
mplot(q, ispin1z, match, "plots/q-s1z-match.png")  
mplot(q, ispin2z, match, "plots/q-s2z-match.png", "Mass Ratio", "Spin2z") 
mplot(q, ispin2z, match, "plots/q-s2z-match97.png", "Mass Ratio", "Spin2z", vmin=0.97)
mplot(q, ispin2z, match, "plots/q-s2z-match90.png", "Mass Ratio", "Spin2z", vmin=0.90)
mplot(inclination, match, match, "plots/inc-match.png")   

mplot(imass1, imass2, imchirp-tmchirp, "plots/m1-m2-mchirpdiff.png")
mplot(q, ispin1z, imchirp-tmchirp, "plots/q-s1z-mchirpdiff.png", "Mass Ratio", "Spin1z")
mplot(q, ispin2z, imchirp-tmchirp, "plots/q-s2z-mchirpdiff.png", "Mass Ratio", "Spin2z")

mplot(imass1, imass2, (imchirp-tmchirp)/imchirp, "plots/m1-m2-mchirpreldiff.png")
mplot(q, ispin1z, (imchirp-tmchirp)/imchirp, "plots/q-s1z-mchirpreldiff.png")
mplot(q, ispin2z, (imchirp-tmchirp)/imchirp, "plots/q-s2z-mchirpreldiff.png")

""")
os.chmod("scripts/pycbc_banksim_plots", 0o0777)

logging.info("Done")
back to top