Revision 703ba68b9a6106b03f2e116fcc4dad8af81fc665 authored by Ian Harry on 18 May 2017, 12:32:21 UTC, committed by GitHub on 18 May 2017, 12:32:21 UTC
1 parent cedc4de
pycbc_make_banksim
#! /usr/bin/env python
import logging
import os
import shutil
import ConfigParser
import subprocess
import glob
import tempfile
from optparse import OptionParser
from pycbc_glue.pipeline import CondorDAGJob, CondorDAGNode, CondorDAG, CondorJob
class BaseJob(CondorDAGJob, CondorJob):
def __init__(self, log_dir, executable, cp, section, gpu=False,
accounting_group=None):
CondorDAGJob.__init__(self, "vanilla", executable)
if gpu:
CondorJob.__init__(self, "vanilla", executable, 2)
# These are all python jobs so need to pull in the env
self.add_condor_cmd('getenv', 'True')
log_base = os.path.join(
log_dir, os.path.basename(executable) + '-$(cluster)-$(process)')
self.set_stderr_file(log_base + '.err')
self.set_stdout_file(log_base + '.out')
self.set_sub_file(os.path.basename(executable) + '.sub')
if cp is not None:
self.add_ini_opts(cp, section)
if accounting_group:
self.add_condor_cmd('accounting_group', accounting_group)
class BanksimNode(CondorDAGNode):
def __init__(self, job, inj_file, tmplt_file, match_file, gpu=True,
gpu_postscript=False, inj_per_job=None):
CondorDAGNode.__init__(self, job)
self.add_file_opt("signal-file", inj_file)
self.add_file_opt("template-file", tmplt_file)
if gpu:
self.add_var_opt("processing-scheme", 'cuda')
if gpu and gpu_postscript:
self.set_retry(5)
mf = match_file+".$(Process)"
mf1 = match_file+".0"
mf2 = match_file+".1"
self.add_file_opt("match-file", match_file+".$(Process)",
file_is_output_file=True)
self.job().__queue = 2
# Needed to satisfy the requirements for both running on atlas and spice
job.add_condor_cmd('+WantsGPU', 'true')
job.add_condor_cmd('+WantGPU', 'true')
job.add_condor_cmd(
'Requirements',
'(GPU_PRESENT =?= true) || (HasGPU =?= "gtx580")')
self.set_post_script(gpu_postscript)
self.add_post_script_arg(mf1)
self.add_post_script_arg(mf2)
self.add_post_script_arg(".0001")
self.add_post_script_arg(match_file)
self.add_post_script_arg(str(inj_per_job))
else:
self.add_file_opt("match-file", match_file, file_is_output_file=True)
class CombineNode(CondorDAGNode):
def __init__(self, job, inj_num):
CondorDAGNode.__init__(self, job)
self.add_var_opt("inj-num", inj_num)
outf = "match/match" + str(inj_num) + ".dat"
self.add_file_opt("output-file", outf)
def get_ini_opts(confs, section):
op_str = ""
for opt in confs.options(section):
val = confs.get(section, opt)
op_str += "--" + opt + " " + val + " \\" + "\n"
return op_str
def mkdir(dir_name):
try :
os.mkdir(dir_name)
except OSError:
pass
def mc_min_max_from_sorted_file(fname):
from pycbc_glue.ligolw import lsctables
from pycbc_glue.ligolw.ligolw import LIGOLWContentHandler
class mycontenthandler(LIGOLWContentHandler):
pass
lsctables.use_in(mycontenthandler)
from pycbc_glue.ligolw.utils import load_filename
from pycbc_glue.ligolw.table import get_table
from pycbc.pnutils import mass1_mass2_to_mchirp_eta
try:
t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sngl_inspiral")
except:
t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sim_inspiral")
mc_max, et = mass1_mass2_to_mchirp_eta(t[0].mass1, t[0].mass2)
mc_min, et = mass1_mass2_to_mchirp_eta(t[-1].mass1, t[-1].mass2)
return mc_min, mc_max
bf_mchirps = {}
sf_mchirps = {}
def check_outside_mchirp(bf, sf, w):
if bf not in bf_mchirps:
bf_mchirps[bf] = mc_min_max_from_sorted_file(bf)
if sf not in sf_mchirps:
sf_mchirps[sf] = mc_min_max_from_sorted_file(sf)
mc_min, mc_max = bf_mchirps[bf]
mc2_min, mc2_max = sf_mchirps[sf]
if (mc_min <= mc2_max * (1+w) ) and (mc_max * (1+w) >= mc2_min):
return False
else:
return True
parser = OptionParser()
parser.add_option('--config', type=str)
(options, args) = parser.parse_args()
if options.config is None:
raise ValueError("Config file is required")
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
confs = ConfigParser.ConfigParser()
confs.read(options.config)
banksim_prog = confs.get("executables", "banksim")
bank_file = confs.get("workflow", "bank-file")
injections_per_job = confs.get("workflow", "injections-per-job")
templates_per_job = confs.get("workflow", "templates-per-job")
log_path = confs.get("workflow", 'log-path')
tempfile.tempdir = log_path
tempfile.template='banksim.dag.log.'
logfile = tempfile.mktemp()
mchirp_window = None
if confs.has_option("banksim", "mchirp-window"):
if ',' in confs.get("banksim", "mchirp-window"):
mchirp_window = max([float(x) for x in confs.get("banksim", "mchirp-window").split(",")])
else:
mchirp_window = float(confs.get("banksim", "mchirp-window"))
gpu = False
try:
gpu = confs.get("workflow", "use-gpus")
if gpu is not None:
gpu = True
except:
pass
try:
accounting_group = confs.get('workflow', 'accounting-group')
except:
accounting_group = None
logging.warn('Warning: accounting-group not specified, LDG clusters may'
' reject this workflow!')
logging.info("Making workspace directories")
mkdir('scripts')
mkdir('bank')
mkdir('match')
mkdir('injection')
mkdir('match-part')
mkdir('log')
mkdir('plots')
logging.info("Copying scripts")
shutil.copy(banksim_prog, 'scripts/pycbc_banksim')
os.chmod('scripts/pycbc_banksim', 0777)
logging.info("Creating injection file")
inj_str = "lalapps_inspinj " + get_ini_opts(confs, "inspinj") + "--output inj.xml"
os.system(inj_str)
logging.info("Splitting template bank")
subprocess.call(['pycbc_splitbank',
'--templates-per-bank', str(templates_per_job),
'-t', bank_file,
'-o', 'bank/bank',
'--sort-mchirp'])
logging.info("Splitting injection file")
subprocess.call(['pycbc_splitbank',
'--templates-per-bank', str(injections_per_job),
'-t', "inj.xml",
'-o', 'injection/injection',
'--sort-mchirp'])
num_banks = len(glob.glob("bank/bank*"))
num_injs = len(glob.glob("injection/injection*"))
logging.info("Creating DAG")
f = open("banksim.dag", "w")
do_count = 0
skip_count = 0
dag = CondorDAG(logfile)
dag.set_dag_file("banksim")
dag.set_dax_file("banksim")
bsjob = BaseJob("log", "scripts/pycbc_banksim", confs, "banksim", gpu=gpu,
accounting_group=accounting_group)
cjob = BaseJob("log", "scripts/pycbc_banksim_match_combine", None, None,
accounting_group=accounting_group)
rjob = BaseJob("log", "scripts/pycbc_banksim_collect_results", None, None,
accounting_group=accounting_group)
pjob = BaseJob("log", "scripts/pycbc_banksim_plots", None, None,
accounting_group=accounting_group)
rnode = CondorDAGNode(rjob)
pnode = CondorDAGNode(pjob)
for inj_num in range(num_injs):
num = str(inj_num)
combine_has_jobs = False
cnode = CombineNode(cjob, inj_num)
for bank_num in range(num_banks):
if mchirp_window is not None:
bank_part = "bank/bank" + str(bank_num) + ".xml"
sim_part = "injection/injection" + str(inj_num) + ".xml"
if check_outside_mchirp(bank_part, sim_part, mchirp_window):
skip_count += 1
continue
else:
do_count += 1
part_num = str(bank_num)
mfn = 'match-part/match' + num +'part' + part_num + '.dat'
sn = 'injection/injection' + num + '.xml'
bn = 'bank/bank' + part_num + '.xml'
bsnode = BanksimNode(bsjob, sn, bn, mfn, gpu=gpu,
gpu_postscript="scripts/diff_match.sh",
inj_per_job=injections_per_job)
cnode.add_parent(bsnode)
dag.add_node(bsnode)
combine_has_jobs = True
if combine_has_jobs:
rnode.add_parent(cnode)
dag.add_node(cnode)
dag.add_node(rnode)
pnode.add_parent(rnode)
dag.add_node(pnode)
logging.info("DO : %d SKIP %d" %(do_count, skip_count))
f.close()
f = open("scripts/pycbc_banksim_match_combine", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from optparse import OptionParser
from numpy import *
import glob
parser = OptionParser()
parser.add_option('--inj-num',help="index of the injection set for the match files",type=int)
parser.add_option('-o','--output-file',help="output file with the maximized values")
options, argv_frame_files = parser.parse_args()
fils = glob.glob("match-part/match"+str(options.inj_num)+"part*.dat")
dtypef={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}
matches=[]
maxmatch = []
for fil in fils:
matches.append(loadtxt(fil, dtype=dtypef))
indices = array(matches, dtype=dtypef)['match'].argmax(0)
for i, j in enumerate(indices):
maxmatch.append(matches[j][i])
maxmatch=array(maxmatch, dtype =dtypef)
savetxt(options.output_file, maxmatch,fmt=('%5.5f', '%s', '%i', '%s', '%i', '%5.5f'), delimiter=' ')
""")
os.chmod('scripts/pycbc_banksim_match_combine', 0777)
f = open("scripts/pycbc_banksim_collect_results", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from numpy import *
from pycbc_glue.ligolw import utils, table
import glob
from pycbc_glue.ligolw.ligolw import LIGOLWContentHandler
from pycbc_glue.ligolw import lsctables
class mycontenthandler(LIGOLWContentHandler):
pass
lsctables.use_in(mycontenthandler)
fils = glob.glob("match/match*.dat")
dtypem={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}
# Collect the results
res = None
for fil in fils:
if res is not None:
res = append(res, loadtxt(fil, dtype=dtypem))
else:
res = loadtxt(fil, dtype=dtypem)
btables = {}
itables = {}
f = open("results.dat", "w")
for row in res:
outstr = ""
if row['bank'] not in btables:
indoc = utils.load_filename(row['bank'], False, contenthandler=mycontenthandler)
btables[row['bank']] = table.get_table(indoc, "sngl_inspiral")
if row['sim'] not in itables:
indoc = utils.load_filename(row['sim'], False, contenthandler=mycontenthandler)
itables[row['sim']] = table.get_table(indoc, "sim_inspiral")
bt = btables[row['bank']][row['bank_i']]
it = itables[row['sim']][row['sim_i']]
outstr += str(row['match']) + " "
outstr += str(bt.mass1) + " "
outstr += str(bt.mass2) + " "
outstr += str(bt.spin1x) + " "
outstr += str(bt.spin1y) + " "
outstr += str(bt.spin1z) + " "
outstr += str(bt.spin2x) + " "
outstr += str(bt.spin2y) + " "
outstr += str(bt.spin2z) + " "
outstr += str(it.mass1) + " "
outstr += str(it.mass2) + " "
outstr += str(it.spin1x) + " "
outstr += str(it.spin1y) + " "
outstr += str(it.spin1z) + " "
outstr += str(it.spin2x) + " "
outstr += str(it.spin2y) + " "
outstr += str(it.spin2z) + " "
outstr += str(it.coa_phase) + " "
outstr += str(it.inclination) + " "
outstr += str(it.latitude) + " "
outstr += str(it.longitude) + " "
outstr += str(it.polarization) + " "
outstr += str(row['sigmasq']) + " "
outstr += "\\n"
f.write(outstr)
""")
os.chmod('scripts/pycbc_banksim_collect_results', 0777)
if gpu:
f = open("cconfig", "w")
f.write("""
DAGMAN_PROHIBIT_MULTI_JOBS = False
""")
f = open("scripts/diff_match.sh", "w")
f.write("""#!/bin/bash
len=`cat $1 | wc -l`
len2=`cat $2 | wc -l`
if [ $len -eq $len2 ] && [ $len -ne 0 ] ; then
echo "correct length"
else
echo "wrong length file"
exit 1
fi
function fuzzy_diff {
echo " ($3>($1-$2)) && ($3>($2-$1)) " | bc
}
exec 3<$1
exec 4<$2
while IFS= read -r line1 <&3
IFS= read -r line2 <&4
do
line1=`echo "$line1" | cut --delimiter=' ' -f 1`
line2=`echo "$line2" | cut --delimiter=' ' -f 1`
if ! [[ "$line1" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
exec >&2; echo "error: Not a number"; exit 1
fi
if ! [[ "$line2" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
exec >&2; echo "error: Not a number"; exit 1
fi
ok=`fuzzy_diff $line1 $line2 $3`
if [ $ok -eq 0 ] ; then
echo "Files do not match"
exit 1
fi
done
cp $1 $4
cp $1.found $4.found
echo "The files are close enough"
exit 0
""")
os.chmod('scripts/diff_match.sh', 0777)
logging.info("Creating submit script")
f = open("submit.sh","w")
if gpu:
f.write("""#!/bin/bash
condor_submit_dag -config cconfig banksim.dag
""")
else:
f.write("""#!/bin/bash
condor_submit_dag banksim.dag
""")
os.chmod('submit.sh', 0777)
f = open("partial_results.sh", "w")
f.write("""#!/bin/bash
scripts/pycbc_banksim_collect_results
""")
os.chmod('partial_results.sh', 0777)
dag.write_sub_files()
dag.write_script()
dag.write_dag()
f = open("scripts/pycbc_banksim_plots", "w")
f.write("""#!/usr/bin/env python
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot
from pycbc import pnutils
import numpy
import pylab
goldenratio = 2 / (1 + 5**.5)
#matplotlib.rcParams.update({
# "font.size": 8.0,
# "axes.titlesize": 8.0,
# "axes.labelsize": 8.0,
# "xtick.labelsize": 8.0,
# "ytick.labelsize": 8.0,
# "legend.fontsize": 8.0,
# "figure.figsize": (3.3,3.3*goldenratio),
# "figure.dpi": 200,
# "subplots.left": 0.2,
# "subplots.right": 0.75,
# "subplots.bottom": 0.15,
# "subplots.top": 0.75,
# "savefig.dpi": 600,
# "text.verticalalignment": "center",
# "text.usetex": True # render all text with TeX
#})
res = numpy.loadtxt("results.dat")
match = res[:,0]
tmass1 = res[:,1]
tmass2 = res[:,2]
tspin1x = res[:,3]
tspin1y = res[:,4]
tspin1z = res[:,5]
tspin2x = res[:,6]
tspin2y = res[:,7]
tspin2z = res[:,8]
tmchirp, teta = pnutils.mass1_mass2_to_mchirp_eta(tmass1, tmass2)
imass1 = res[:,9]
imass2 = res[:,10]
ispin1x = res[:,11]
ispin1y = res[:,12]
ispin1z = res[:,13]
ispin2x = res[:,14]
ispin2y = res[:,15]
ispin2z = res[:,16]
imchirp, ieta = pnutils.mass1_mass2_to_mchirp_eta(imass1, imass2)
coa_phase = res[:,17]
inclination = res[:,18]
latitude = res[:,19]
longitude = res[:,20]
polarization = res[:,21]
sigmasq = res[:,22]
q = numpy.maximum(imass1/imass2, imass2/imass1)
s1m = (ispin1x**2+ispin1y**2+ispin1z**2)**0.5
s2m = (ispin2x**2+ispin2y**2+ispin2z**2)**0.5
def mhist(c1, name, cum=False, normed=True, log=False, bins=100, xl="", yl=""):
pylab.figure()
pylab.xlabel(xl)
pylab.ylabel(yl)
if log:
pylab.yscale('log')
pylab.hist(c1, bins=bins, normed=normed, histtype='step', cumulative=cum)
pylab.savefig(name)
def mplot(c1, c2, c, name, xl="", yl="", vmin=None, vmax=None):
pylab.figure()
pylab.axes((0.15, 0.15, 0.8, 0.8))
pylab.scatter(c1, c2, c=c, linewidth=0, s=1, vmin=vmin, vmax=vmax)
pylab.colorbar()
pylab.xlim(min(c1), max(c1))
pylab.ylim(min(c2), max(c2))
pylab.xlabel(xl)
pylab.ylabel(yl)
pylab.savefig(name)
mhist(imchirp-tmchirp, "plots/hist-mchirp-diff.png")
mhist((imchirp-tmchirp)/imchirp, "plots/hist-mchirp-reldiff.png")
mhist(match, "plots/hist-match.png")
mhist(match, "plots/hist-match-cum.png", cum=1, log=True, bins=10000, xl = "Match", yl="Fraction of injections < Match")
pylab.figure(102)
pylab.ylabel('Fraction of Injections')
pylab.xlabel('Fitting factor')
pylab.yscale('log')
pylab.xlim(0.95, 1.0)
pylab.ylim(1e-4, 1)
hBins = pylab.arange(0.,1.,0.0005,dtype=float)
n, bins,patches=pylab.hist(match,cumulative=1,bins=hBins,normed=True)
pylab.grid()
pylab.savefig("plots/cum_hist.png")
mplot(imass1, imass2, match, "plots/m1-m2-match.png")
mplot(tmass1, tmass2, match, "plots/tm1-tm2-match.png")
mplot(q, s1m, match, "plots/q-s1m-match.png")
mplot(q, s2m, match, "plots/q-s2m-match.png")
mplot(q, ispin1z, match, "plots/q-s1z-match.png")
mplot(q, ispin2z, match, "plots/q-s2z-match.png", "Mass Ratio", "Spin2z")
mplot(q, ispin2z, match, "plots/q-s2z-match97.png", "Mass Ratio", "Spin2z", vmin=0.97)
mplot(q, ispin2z, match, "plots/q-s2z-match90.png", "Mass Ratio", "Spin2z", vmin=0.90)
mplot(inclination, match, match, "plots/inc-match.png")
mplot(imass1, imass2, imchirp-tmchirp, "plots/m1-m2-mchirpdiff.png")
mplot(q, ispin1z, imchirp-tmchirp, "plots/q-s1z-mchirpdiff.png", "Mass Ratio", "Spin1z")
mplot(q, ispin2z, imchirp-tmchirp, "plots/q-s2z-mchirpdiff.png", "Mass Ratio", "Spin2z")
mplot(imass1, imass2, (imchirp-tmchirp)/imchirp, "plots/m1-m2-mchirpreldiff.png")
mplot(q, ispin1z, (imchirp-tmchirp)/imchirp, "plots/q-s1z-mchirpreldiff.png")
mplot(q, ispin2z, (imchirp-tmchirp)/imchirp, "plots/q-s2z-mchirpreldiff.png")
""")
os.chmod("scripts/pycbc_banksim_plots", 0777)
logging.info("Done")
Computing file changes ...