Revision 58c62ef193350e27094a6d41a587616fba9b6aa4 authored by Josh Willis on 01 August 2020, 18:47:17 UTC, committed by GitHub on 01 August 2020, 18:47:17 UTC
* Add try/except block around skcuda.fft import to ensure ImportError * Address CC line length issue
1 parent 74f3245
pycbc_make_banksim
#! /usr/bin/env python
import logging
import os
import shutil
from six.moves import configparser as ConfigParser
import subprocess
import glob
import tempfile
from optparse import OptionParser
from glue.pipeline import CondorDAGJob, CondorDAGNode, CondorDAG, CondorJob
class BaseJob(CondorDAGJob, CondorJob):
def __init__(self, log_dir, executable, cp, section, gpu=False,
accounting_group=None):
CondorDAGJob.__init__(self, "vanilla", executable)
if gpu:
CondorJob.__init__(self, "vanilla", executable, 2)
# These are all python jobs so need to pull in the env
self.add_condor_cmd('getenv', 'True')
log_base = os.path.join(
log_dir, os.path.basename(executable) + '-$(cluster)-$(process)')
self.set_stderr_file(log_base + '.err')
self.set_stdout_file(log_base + '.out')
self.set_sub_file(os.path.basename(executable) + '.sub')
if cp is not None:
self.add_ini_opts(cp, section)
if accounting_group:
self.add_condor_cmd('accounting_group', accounting_group)
class BanksimNode(CondorDAGNode):
def __init__(self, job, inj_file, tmplt_file, match_file, gpu=True,
gpu_postscript=False, inj_per_job=None):
CondorDAGNode.__init__(self, job)
self.add_file_opt("signal-file", inj_file)
self.add_file_opt("template-file", tmplt_file)
if gpu:
self.add_var_opt("processing-scheme", 'cuda')
if gpu and gpu_postscript:
self.set_retry(5)
mf = match_file+".$(Process)"
mf1 = match_file+".0"
mf2 = match_file+".1"
self.add_file_opt("match-file", match_file+".$(Process)",
file_is_output_file=True)
self.job().__queue = 2
# Needed to satisfy the requirements for both running on atlas and spice
job.add_condor_cmd('+WantsGPU', 'true')
job.add_condor_cmd('+WantGPU', 'true')
job.add_condor_cmd(
'Requirements',
'(GPU_PRESENT =?= true) || (HasGPU =?= "gtx580")')
self.set_post_script(gpu_postscript)
self.add_post_script_arg(mf1)
self.add_post_script_arg(mf2)
self.add_post_script_arg(".0001")
self.add_post_script_arg(match_file)
self.add_post_script_arg(str(inj_per_job))
else:
self.add_file_opt("match-file", match_file, file_is_output_file=True)
class CombineNode(CondorDAGNode):
def __init__(self, job, inj_num):
CondorDAGNode.__init__(self, job)
self.add_var_opt("inj-num", inj_num)
outf = "match/match" + str(inj_num) + ".dat"
self.add_file_opt("output-file", outf)
def get_ini_opts(confs, section):
op_str = ""
for opt in confs.options(section):
val = confs.get(section, opt)
op_str += "--" + opt + " " + val + " \\" + "\n"
return op_str
def mkdir(dir_name):
try :
os.mkdir(dir_name)
except OSError:
pass
def mc_min_max_from_sorted_file(fname):
from glue.ligolw import lsctables
from glue.ligolw.ligolw import LIGOLWContentHandler
class mycontenthandler(LIGOLWContentHandler):
pass
lsctables.use_in(mycontenthandler)
from glue.ligolw.utils import load_filename
from glue.ligolw.table import get_table
from pycbc.pnutils import mass1_mass2_to_mchirp_eta
try:
t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sngl_inspiral")
except:
t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sim_inspiral")
mc_max, et = mass1_mass2_to_mchirp_eta(t[0].mass1, t[0].mass2)
mc_min, et = mass1_mass2_to_mchirp_eta(t[-1].mass1, t[-1].mass2)
return mc_min, mc_max
bf_mchirps = {}
sf_mchirps = {}
def check_outside_mchirp(bf, sf, w):
if bf not in bf_mchirps:
bf_mchirps[bf] = mc_min_max_from_sorted_file(bf)
if sf not in sf_mchirps:
sf_mchirps[sf] = mc_min_max_from_sorted_file(sf)
mc_min, mc_max = bf_mchirps[bf]
mc2_min, mc2_max = sf_mchirps[sf]
if (mc_min <= mc2_max * (1+w) ) and (mc_max * (1+w) >= mc2_min):
return False
else:
return True
parser = OptionParser()
parser.add_option('--config', type=str)
(options, args) = parser.parse_args()
if options.config is None:
raise ValueError("Config file is required")
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
confs = ConfigParser.ConfigParser()
confs.read(options.config)
banksim_prog = confs.get("executables", "banksim")
bank_file = confs.get("workflow", "bank-file")
injections_per_job = confs.get("workflow", "injections-per-job")
templates_per_job = confs.get("workflow", "templates-per-job")
log_path = confs.get("workflow", 'log-path')
tempfile.tempdir = log_path
tempfile.template='banksim.dag.log.'
logfile = tempfile.mktemp()
mchirp_window = None
if confs.has_option("banksim", "mchirp-window"):
if ',' in confs.get("banksim", "mchirp-window"):
mchirp_window = max([float(x) for x in confs.get("banksim", "mchirp-window").split(",")])
else:
mchirp_window = float(confs.get("banksim", "mchirp-window"))
gpu = False
try:
gpu = confs.get("workflow", "use-gpus")
if gpu is not None:
gpu = True
except:
pass
try:
accounting_group = confs.get('workflow', 'accounting-group')
except:
accounting_group = None
logging.warn('Warning: accounting-group not specified, LDG clusters may'
' reject this workflow!')
logging.info("Making workspace directories")
mkdir('scripts')
mkdir('bank')
mkdir('match')
mkdir('injection')
mkdir('match-part')
mkdir('log')
mkdir('plots')
logging.info("Copying scripts")
shutil.copy(banksim_prog, 'scripts/pycbc_banksim')
os.chmod('scripts/pycbc_banksim', 0o0777)
logging.info("Creating injection file")
inj_str = "lalapps_inspinj " + get_ini_opts(confs, "inspinj") + "--output inj.xml"
os.system(inj_str)
logging.info("Splitting template bank")
subprocess.call(['pycbc_splitbank',
'--templates-per-bank', str(templates_per_job),
'-t', bank_file,
'-o', 'bank/bank',
'--sort-mchirp'])
logging.info("Splitting injection file")
subprocess.call(['pycbc_splitbank',
'--templates-per-bank', str(injections_per_job),
'-t', "inj.xml",
'-o', 'injection/injection',
'--sort-mchirp'])
num_banks = len(glob.glob("bank/bank*"))
num_injs = len(glob.glob("injection/injection*"))
logging.info("Creating DAG")
f = open("banksim.dag", "w")
do_count = 0
skip_count = 0
dag = CondorDAG(logfile)
dag.set_dag_file("banksim")
dag.set_dax_file("banksim")
bsjob = BaseJob("log", "scripts/pycbc_banksim", confs, "banksim", gpu=gpu,
accounting_group=accounting_group)
cjob = BaseJob("log", "scripts/pycbc_banksim_match_combine", None, None,
accounting_group=accounting_group)
rjob = BaseJob("log", "scripts/pycbc_banksim_collect_results", None, None,
accounting_group=accounting_group)
pjob = BaseJob("log", "scripts/pycbc_banksim_plots", None, None,
accounting_group=accounting_group)
rnode = CondorDAGNode(rjob)
pnode = CondorDAGNode(pjob)
for inj_num in range(num_injs):
num = str(inj_num)
combine_has_jobs = False
cnode = CombineNode(cjob, inj_num)
for bank_num in range(num_banks):
if mchirp_window is not None:
bank_part = "bank/bank" + str(bank_num) + ".xml"
sim_part = "injection/injection" + str(inj_num) + ".xml"
if check_outside_mchirp(bank_part, sim_part, mchirp_window):
skip_count += 1
continue
else:
do_count += 1
part_num = str(bank_num)
mfn = 'match-part/match' + num +'part' + part_num + '.dat'
sn = 'injection/injection' + num + '.xml'
bn = 'bank/bank' + part_num + '.xml'
bsnode = BanksimNode(bsjob, sn, bn, mfn, gpu=gpu,
gpu_postscript="scripts/diff_match.sh",
inj_per_job=injections_per_job)
cnode.add_parent(bsnode)
dag.add_node(bsnode)
combine_has_jobs = True
if combine_has_jobs:
rnode.add_parent(cnode)
dag.add_node(cnode)
dag.add_node(rnode)
pnode.add_parent(rnode)
dag.add_node(pnode)
logging.info("DO : %d SKIP %d" %(do_count, skip_count))
f.close()
f = open("scripts/pycbc_banksim_match_combine", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from optparse import OptionParser
from numpy import *
import glob
parser = OptionParser()
parser.add_option('--inj-num',help="index of the injection set for the match files",type=int)
parser.add_option('-o','--output-file',help="output file with the maximized values")
options, argv_frame_files = parser.parse_args()
fils = glob.glob("match-part/match"+str(options.inj_num)+"part*.dat")
dtypef={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}
matches=[]
maxmatch = []
for fil in fils:
matches.append(loadtxt(fil, dtype=dtypef))
indices = array(matches, dtype=dtypef)['match'].argmax(0)
for i, j in enumerate(indices):
maxmatch.append(matches[j][i])
maxmatch=array(maxmatch, dtype =dtypef)
savetxt(options.output_file, maxmatch,fmt=('%5.5f', '%s', '%i', '%s', '%i', '%5.5f'), delimiter=' ')
""")
os.chmod('scripts/pycbc_banksim_match_combine', 0o0777)
f = open("scripts/pycbc_banksim_collect_results", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from numpy import *
from glue.ligolw import utils, table
import glob
from glue.ligolw.ligolw import LIGOLWContentHandler
from glue.ligolw import lsctables
class mycontenthandler(LIGOLWContentHandler):
pass
lsctables.use_in(mycontenthandler)
fils = glob.glob("match/match*.dat")
dtypem={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}
# Collect the results
res = None
for fil in fils:
if res is not None:
res = append(res, loadtxt(fil, dtype=dtypem))
else:
res = loadtxt(fil, dtype=dtypem)
btables = {}
itables = {}
f = open("results.dat", "w")
for row in res:
outstr = ""
if row['bank'] not in btables:
indoc = utils.load_filename(row['bank'], False, contenthandler=mycontenthandler)
btables[row['bank']] = table.get_table(indoc, "sngl_inspiral")
if row['sim'] not in itables:
indoc = utils.load_filename(row['sim'], False, contenthandler=mycontenthandler)
itables[row['sim']] = table.get_table(indoc, "sim_inspiral")
bt = btables[row['bank']][row['bank_i']]
it = itables[row['sim']][row['sim_i']]
outstr += str(row['match']) + " "
outstr += str(bt.mass1) + " "
outstr += str(bt.mass2) + " "
outstr += str(bt.spin1x) + " "
outstr += str(bt.spin1y) + " "
outstr += str(bt.spin1z) + " "
outstr += str(bt.spin2x) + " "
outstr += str(bt.spin2y) + " "
outstr += str(bt.spin2z) + " "
outstr += str(it.mass1) + " "
outstr += str(it.mass2) + " "
outstr += str(it.spin1x) + " "
outstr += str(it.spin1y) + " "
outstr += str(it.spin1z) + " "
outstr += str(it.spin2x) + " "
outstr += str(it.spin2y) + " "
outstr += str(it.spin2z) + " "
outstr += str(it.coa_phase) + " "
outstr += str(it.inclination) + " "
outstr += str(it.latitude) + " "
outstr += str(it.longitude) + " "
outstr += str(it.polarization) + " "
outstr += str(row['sigmasq']) + " "
outstr += "\\n"
f.write(outstr)
""")
os.chmod('scripts/pycbc_banksim_collect_results', 0o0777)
if gpu:
f = open("cconfig", "w")
f.write("""
DAGMAN_PROHIBIT_MULTI_JOBS = False
""")
f = open("scripts/diff_match.sh", "w")
f.write("""#!/bin/bash
len=`cat $1 | wc -l`
len2=`cat $2 | wc -l`
if [ $len -eq $len2 ] && [ $len -ne 0 ] ; then
echo "correct length"
else
echo "wrong length file"
exit 1
fi
function fuzzy_diff {
echo " ($3>($1-$2)) && ($3>($2-$1)) " | bc
}
exec 3<$1
exec 4<$2
while IFS= read -r line1 <&3
IFS= read -r line2 <&4
do
line1=`echo "$line1" | cut --delimiter=' ' -f 1`
line2=`echo "$line2" | cut --delimiter=' ' -f 1`
if ! [[ "$line1" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
exec >&2; echo "error: Not a number"; exit 1
fi
if ! [[ "$line2" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
exec >&2; echo "error: Not a number"; exit 1
fi
ok=`fuzzy_diff $line1 $line2 $3`
if [ $ok -eq 0 ] ; then
echo "Files do not match"
exit 1
fi
done
cp $1 $4
cp $1.found $4.found
echo "The files are close enough"
exit 0
""")
os.chmod('scripts/diff_match.sh', 0o0777)
logging.info("Creating submit script")
f = open("submit.sh","w")
if gpu:
f.write("""#!/bin/bash
condor_submit_dag -config cconfig banksim.dag
""")
else:
f.write("""#!/bin/bash
condor_submit_dag banksim.dag
""")
os.chmod('submit.sh', 0o0777)
f = open("partial_results.sh", "w")
f.write("""#!/bin/bash
scripts/pycbc_banksim_collect_results
""")
os.chmod('partial_results.sh', 0o0777)
dag.write_sub_files()
dag.write_script()
dag.write_dag()
f = open("scripts/pycbc_banksim_plots", "w")
f.write("""#!/usr/bin/env python
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot
from pycbc import pnutils
import numpy
import pylab
goldenratio = 2 / (1 + 5**.5)
#matplotlib.rcParams.update({
# "font.size": 8.0,
# "axes.titlesize": 8.0,
# "axes.labelsize": 8.0,
# "xtick.labelsize": 8.0,
# "ytick.labelsize": 8.0,
# "legend.fontsize": 8.0,
# "figure.figsize": (3.3,3.3*goldenratio),
# "figure.dpi": 200,
# "subplots.left": 0.2,
# "subplots.right": 0.75,
# "subplots.bottom": 0.15,
# "subplots.top": 0.75,
# "savefig.dpi": 600,
# "text.verticalalignment": "center",
#})
res = numpy.loadtxt("results.dat")
match = res[:,0]
tmass1 = res[:,1]
tmass2 = res[:,2]
tspin1x = res[:,3]
tspin1y = res[:,4]
tspin1z = res[:,5]
tspin2x = res[:,6]
tspin2y = res[:,7]
tspin2z = res[:,8]
tmchirp, teta = pnutils.mass1_mass2_to_mchirp_eta(tmass1, tmass2)
imass1 = res[:,9]
imass2 = res[:,10]
ispin1x = res[:,11]
ispin1y = res[:,12]
ispin1z = res[:,13]
ispin2x = res[:,14]
ispin2y = res[:,15]
ispin2z = res[:,16]
imchirp, ieta = pnutils.mass1_mass2_to_mchirp_eta(imass1, imass2)
coa_phase = res[:,17]
inclination = res[:,18]
latitude = res[:,19]
longitude = res[:,20]
polarization = res[:,21]
sigmasq = res[:,22]
q = numpy.maximum(imass1/imass2, imass2/imass1)
s1m = (ispin1x**2+ispin1y**2+ispin1z**2)**0.5
s2m = (ispin2x**2+ispin2y**2+ispin2z**2)**0.5
def mhist(c1, name, cum=False, normed=True, log=False, bins=100, xl="", yl=""):
pylab.figure()
pylab.xlabel(xl)
pylab.ylabel(yl)
if log:
pylab.yscale('log')
pylab.hist(c1, bins=bins, normed=normed, histtype='step', cumulative=cum)
pylab.savefig(name)
def mplot(c1, c2, c, name, xl="", yl="", vmin=None, vmax=None):
pylab.figure()
pylab.axes((0.15, 0.15, 0.8, 0.8))
pylab.scatter(c1, c2, c=c, linewidth=0, s=1, vmin=vmin, vmax=vmax)
pylab.colorbar()
pylab.xlim(min(c1), max(c1))
pylab.ylim(min(c2), max(c2))
pylab.xlabel(xl)
pylab.ylabel(yl)
pylab.savefig(name)
mhist(imchirp-tmchirp, "plots/hist-mchirp-diff.png")
mhist((imchirp-tmchirp)/imchirp, "plots/hist-mchirp-reldiff.png")
mhist(match, "plots/hist-match.png")
mhist(match, "plots/hist-match-cum.png", cum=1, log=True, bins=10000, xl = "Match", yl="Fraction of injections < Match")
pylab.figure(102)
pylab.ylabel('Fraction of Injections')
pylab.xlabel('Fitting factor')
pylab.yscale('log')
pylab.xlim(0.95, 1.0)
pylab.ylim(1e-4, 1)
hBins = pylab.arange(0.,1.,0.0005,dtype=float)
n, bins,patches=pylab.hist(match,cumulative=1,bins=hBins,normed=True)
pylab.grid()
pylab.savefig("plots/cum_hist.png")
mplot(imass1, imass2, match, "plots/m1-m2-match.png")
mplot(tmass1, tmass2, match, "plots/tm1-tm2-match.png")
mplot(q, s1m, match, "plots/q-s1m-match.png")
mplot(q, s2m, match, "plots/q-s2m-match.png")
mplot(q, ispin1z, match, "plots/q-s1z-match.png")
mplot(q, ispin2z, match, "plots/q-s2z-match.png", "Mass Ratio", "Spin2z")
mplot(q, ispin2z, match, "plots/q-s2z-match97.png", "Mass Ratio", "Spin2z", vmin=0.97)
mplot(q, ispin2z, match, "plots/q-s2z-match90.png", "Mass Ratio", "Spin2z", vmin=0.90)
mplot(inclination, match, match, "plots/inc-match.png")
mplot(imass1, imass2, imchirp-tmchirp, "plots/m1-m2-mchirpdiff.png")
mplot(q, ispin1z, imchirp-tmchirp, "plots/q-s1z-mchirpdiff.png", "Mass Ratio", "Spin1z")
mplot(q, ispin2z, imchirp-tmchirp, "plots/q-s2z-mchirpdiff.png", "Mass Ratio", "Spin2z")
mplot(imass1, imass2, (imchirp-tmchirp)/imchirp, "plots/m1-m2-mchirpreldiff.png")
mplot(q, ispin1z, (imchirp-tmchirp)/imchirp, "plots/q-s1z-mchirpreldiff.png")
mplot(q, ispin2z, (imchirp-tmchirp)/imchirp, "plots/q-s2z-mchirpreldiff.png")
""")
os.chmod("scripts/pycbc_banksim_plots", 0o0777)
logging.info("Done")
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...