Revision 28fa8ecbda12b520d1570fd3f6988ea156fc962b authored by Duncan Brown on 27 September 2015, 12:25:14 UTC, committed by Duncan Brown on 27 September 2015, 12:25:14 UTC
Fix for case where workflow doesn't have a psd section
2 parent s c790a8a + 2768ea8
Raw File
pycbc_make_banksim
#! /usr/bin/env python
import logging
import os
import shutil
import ConfigParser
import subprocess
import glob
import tempfile
from optparse import OptionParser
from glue.pipeline import CondorDAGJob, CondorDAGNode, CondorDAG, CondorJob


class BaseJob(CondorDAGJob, CondorJob):
    def __init__(self, log_dir, executable, cp, section, gpu=False,
                 accounting_group=None):
        CondorDAGJob.__init__(self, "vanilla", executable)

        if gpu:
            CondorJob.__init__(self, "vanilla", executable, 2)
        # These are all python jobs so need to pull in the env
        self.add_condor_cmd('getenv', 'True')
        log_base = os.path.join(
            log_dir, os.path.basename(executable) + '-$(cluster)-$(process)')
        self.set_stderr_file(log_base + '.err')
        self.set_stdout_file(log_base + '.out')
        self.set_sub_file(os.path.basename(executable) + '.sub')

        if cp is not None:
            self.add_ini_opts(cp, section)

        if accounting_group:
            self.add_condor_cmd('accounting_group', accounting_group)

class BanksimNode(CondorDAGNode):
    def __init__(self, job, inj_file, tmplt_file, match_file, gpu=True,
                 gpu_postscript=False, inj_per_job=None):
        CondorDAGNode.__init__(self, job)

        self.add_file_opt("signal-file", inj_file)
        self.add_file_opt("template-file", tmplt_file)

        if gpu:
            self.add_var_opt("processing-scheme", 'cuda')

        if gpu and gpu_postscript:
            self.set_retry(5)
            mf = match_file+".$(Process)"
            mf1 = match_file+".0"
            mf2 = match_file+".1"
            self.add_file_opt("match-file", match_file+".$(Process)",
                              file_is_output_file=True)
            self.job().__queue = 2

            # Needed to satisfy the requirements for both running on atlas and spice
            job.add_condor_cmd('+WantsGPU', 'true')
            job.add_condor_cmd('+WantGPU', 'true')
            job.add_condor_cmd(
                'Requirements',
                '(GPU_PRESENT =?= true) || (HasGPU =?= "gtx580")')

            self.set_post_script(gpu_postscript)
            self.add_post_script_arg(mf1)
            self.add_post_script_arg(mf2)
            self.add_post_script_arg(".0001")
            self.add_post_script_arg(match_file)
            self.add_post_script_arg(str(inj_per_job))
        else:
            self.add_file_opt("match-file", match_file, file_is_output_file=True)
        
class CombineNode(CondorDAGNode):
    def __init__(self, job, inj_num):
        CondorDAGNode.__init__(self, job)
        
        self.add_var_opt("inj-num", inj_num)
        
        outf = "match/match" + str(inj_num) + ".dat"
        
        self.add_file_opt("output-file", outf)    

def get_ini_opts(confs, section):
    op_str = ""
    for opt in confs.options(section):
        val = confs.get(section, opt)
        op_str += "--" + opt + " " + val + " \\" + "\n"
    return op_str
    
def mkdir(dir_name):
    try :
        os.mkdir(dir_name)
    except OSError:
        pass
        
def mc_min_max_from_sorted_file(fname):
    from glue.ligolw import lsctables
    from glue.ligolw.ligolw import LIGOLWContentHandler 
    class mycontenthandler(LIGOLWContentHandler):
        pass
    lsctables.use_in(mycontenthandler)
    from glue.ligolw.utils import load_filename
    from glue.ligolw.table import get_table
    from pycbc.pnutils import mass1_mass2_to_mchirp_eta
    try:
        t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sngl_inspiral")
    except:
        t = get_table(load_filename(fname, False, contenthandler=mycontenthandler), "sim_inspiral")
    mc_max, et = mass1_mass2_to_mchirp_eta(t[0].mass1, t[0].mass2)
    mc_min, et = mass1_mass2_to_mchirp_eta(t[-1].mass1, t[-1].mass2)
    return mc_min, mc_max
    
        
bf_mchirps = {}
sf_mchirps = {}
def check_outside_mchirp(bf, sf, w):
    if bf not in bf_mchirps:
        bf_mchirps[bf] = mc_min_max_from_sorted_file(bf)    
    if sf not in sf_mchirps:
        sf_mchirps[sf] = mc_min_max_from_sorted_file(sf) 
         
    mc_min, mc_max = bf_mchirps[bf]
    mc2_min, mc2_max =  sf_mchirps[sf]
    
    if (mc_min  <= mc2_max * (1+w) ) and (mc_max * (1+w) >= mc2_min):
        return False
    else:
        return True

parser = OptionParser()
parser.add_option('--config', type=str)          
(options, args) = parser.parse_args() 

if options.config is None:
    raise ValueError("Config file is required")  

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

confs = ConfigParser.ConfigParser()
confs.read(options.config)

banksim_prog = confs.get("executables", "banksim")
bank_file = confs.get("workflow", "bank-file")
injections_per_job = confs.get("workflow", "injections-per-job")
templates_per_job = confs.get("workflow", "templates-per-job")

log_path = confs.get("workflow", 'log-path')

tempfile.tempdir = log_path
tempfile.template='banksim.dag.log.'
logfile = tempfile.mktemp()

mchirp_window = None
if confs.has_option("banksim", "mchirp-window"):
    if ',' in confs.get("banksim", "mchirp-window"):
         mchirp_window = max([float(x) for x in confs.get("banksim", "mchirp-window").split(",")])
    else:
         mchirp_window = float(confs.get("banksim", "mchirp-window"))

gpu = False
try:
    gpu = confs.get("workflow", "use-gpus")
    if gpu is not None:
        gpu = True
except:
    pass

try:
    accounting_group = confs.get('workflow', 'accounting-group')
except:
    accounting_group = None
    logging.warn('Warning: accounting-group not specified, LDG clusters may'
                 ' reject this workflow!')

logging.info("Making workspace directories")
mkdir('scripts')
mkdir('bank')
mkdir('match')
mkdir('injection')
mkdir('match-part')
mkdir('log')
mkdir('plots')

logging.info("Copying scripts")
shutil.copy(banksim_prog, 'scripts/pycbc_banksim')
os.chmod('scripts/pycbc_banksim', 0777)

logging.info("Creating injection file")
inj_str = "lalapps_inspinj " + get_ini_opts(confs, "inspinj") + "--output inj.xml"
os.system(inj_str)

logging.info("Splitting template bank")
subprocess.call(['pycbc_splitbank',
                 '--templates-per-bank', str(templates_per_job),
                 '-t', bank_file,
                 '-o', 'bank/bank',
                 '--sort-mchirp'])

logging.info("Splitting injection file")
subprocess.call(['pycbc_splitbank',
                 '--templates-per-bank', str(injections_per_job),
                 '-t', "inj.xml",
                 '-o', 'injection/injection',
                 '--sort-mchirp'])

num_banks = len(glob.glob("bank/bank*"))
num_injs = len(glob.glob("injection/injection*"))

logging.info("Creating DAG")
f = open("banksim.dag", "w")

do_count = 0
skip_count = 0

dag = CondorDAG(logfile)
dag.set_dag_file("banksim")
dag.set_dax_file("banksim")

bsjob = BaseJob("log", "scripts/pycbc_banksim", confs, "banksim", gpu=gpu,
                accounting_group=accounting_group)
cjob = BaseJob("log", "scripts/pycbc_banksim_match_combine", None, None,
               accounting_group=accounting_group)
rjob = BaseJob("log", "scripts/pycbc_banksim_collect_results", None, None,
               accounting_group=accounting_group)
pjob = BaseJob("log", "scripts/pycbc_banksim_plots", None, None,
               accounting_group=accounting_group)
rnode = CondorDAGNode(rjob)
pnode = CondorDAGNode(pjob)

for inj_num in range(num_injs):
    num = str(inj_num)
    combine_has_jobs = False
    cnode = CombineNode(cjob, inj_num)
    for bank_num in range(num_banks):
        if mchirp_window is not None:
            bank_part = "bank/bank" + str(bank_num) + ".xml"
            sim_part =  "injection/injection" + str(inj_num) + ".xml"
            if check_outside_mchirp(bank_part, sim_part, mchirp_window):
                skip_count += 1
                continue
            else:
                do_count += 1
        part_num = str(bank_num)
        mfn = 'match-part/match' + num +'part' + part_num + '.dat'
        sn = 'injection/injection' + num + '.xml'
        bn = 'bank/bank' + part_num + '.xml'
        bsnode = BanksimNode(bsjob, sn, bn, mfn, gpu=gpu,
                             gpu_postscript="scripts/diff_match.sh",
                             inj_per_job=injections_per_job)
        cnode.add_parent(bsnode)
        dag.add_node(bsnode)
        combine_has_jobs = True
    if combine_has_jobs:
        rnode.add_parent(cnode)
        dag.add_node(cnode)      
dag.add_node(rnode)
pnode.add_parent(rnode)
dag.add_node(pnode)

logging.info("DO : %d SKIP %d" %(do_count, skip_count))
f.close()

f = open("scripts/pycbc_banksim_match_combine", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from optparse import OptionParser
from numpy import *
import glob
parser = OptionParser()

parser.add_option('--inj-num',help="index of the injection set for the match files",type=int)
parser.add_option('-o','--output-file',help="output file with the maximized values")
options, argv_frame_files = parser.parse_args()

fils = glob.glob("match-part/match"+str(options.inj_num)+"part*.dat")

dtypef={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}

matches=[]
maxmatch = []
for fil in fils:
    matches.append(loadtxt(fil, dtype=dtypef))
   
indices = array(matches, dtype=dtypef)['match'].argmax(0)
for i, j in enumerate(indices):
    maxmatch.append(matches[j][i])
    
maxmatch=array(maxmatch, dtype =dtypef)
savetxt(options.output_file, maxmatch,fmt=('%5.5f', '%s', '%i', '%s', '%i', '%5.5f'), delimiter=' ')
""")
os.chmod('scripts/pycbc_banksim_match_combine', 0777)

f = open("scripts/pycbc_banksim_collect_results", "w")
f.write("""#!/usr/bin/env python
from os.path import isfile
from numpy import *
from glue.ligolw import utils, table
import glob

from glue.ligolw.ligolw import LIGOLWContentHandler
from glue.ligolw import lsctables
class mycontenthandler(LIGOLWContentHandler):
    pass
lsctables.use_in(mycontenthandler)

fils = glob.glob("match/match*.dat")

dtypem={'names': ('match', 'bank', 'bank_i', 'sim', 'sim_i', 'sigmasq'), 'formats': ('f8', 'S256', 'i4', 'S256', 'i4', 'f8')}

# Collect the results
res = None
for fil in fils:
    if res is not None:
        res = append(res, loadtxt(fil, dtype=dtypem))
    else:
        res = loadtxt(fil, dtype=dtypem)
 
btables = {}
itables = {}     

f = open("results.dat", "w")
for row in res: 
    outstr = ""
    if row['bank'] not in btables:
        indoc = utils.load_filename(row['bank'], False, contenthandler=mycontenthandler)
        btables[row['bank']] = table.get_table(indoc, "sngl_inspiral") 

    if row['sim'] not in itables:
        indoc = utils.load_filename(row['sim'], False, contenthandler=mycontenthandler)
        itables[row['sim']] = table.get_table(indoc, "sim_inspiral") 
    
    bt = btables[row['bank']][row['bank_i']]     
    it = itables[row['sim']][row['sim_i']]
 
    outstr += str(row['match']) + " "
    outstr += str(bt.mass1) + " "
    outstr += str(bt.mass2) + " "
    outstr += str(bt.spin1x) + " "
    outstr += str(bt.spin1y) + " "
    outstr += str(bt.spin1z) + " "
    outstr += str(bt.spin2x) + " "
    outstr += str(bt.spin2y) + " " 
    outstr += str(bt.spin2z) + " "
    
    outstr += str(it.mass1) + " "
    outstr += str(it.mass2) + " "
    outstr += str(it.spin1x) + " "
    outstr += str(it.spin1y) + " "
    outstr += str(it.spin1z) + " "
    outstr += str(it.spin2x) + " "
    outstr += str(it.spin2y) + " " 
    outstr += str(it.spin2z) + " "
    
    outstr += str(it.coa_phase) + " "
    outstr += str(it.inclination) + " "
    outstr += str(it.latitude) + " " 
    outstr += str(it.longitude) + " "
    outstr += str(it.polarization) + " "
    
    outstr += str(row['sigmasq']) + " "
                
    outstr += "\\n"
    
    f.write(outstr)
""")
os.chmod('scripts/pycbc_banksim_collect_results', 0777)

if gpu:
    f = open("cconfig", "w")
    f.write("""
    DAGMAN_PROHIBIT_MULTI_JOBS = False
    """)

    f = open("scripts/diff_match.sh", "w")
    f.write("""#!/bin/bash

    len=`cat $1 | wc -l`
    len2=`cat $2 | wc -l`

    if [ $len -eq $len2 ] && [ $len -ne 0 ] ; then
       echo "correct length"
    else
       echo "wrong length file"
        exit 1
    fi  

    function fuzzy_diff {
       echo  " ($3>($1-$2)) && ($3>($2-$1)) " | bc  
    }

    exec 3<$1
    exec 4<$2

    while IFS= read -r line1 <&3
    IFS= read -r line2 <&4
    do
        line1=`echo "$line1" | cut --delimiter=' ' -f 1`
        line2=`echo "$line2" | cut --delimiter=' ' -f 1` 

        if ! [[ "$line1" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
           exec >&2; echo "error: Not a number"; exit 1
        fi
        
        if ! [[ "$line2" =~ ^[0-9]+([.][0-9]+)?$ ]] ; then
           exec >&2; echo "error: Not a number"; exit 1
        fi

        ok=`fuzzy_diff $line1 $line2 $3`

        if  [ $ok -eq 0 ] ; then
           echo "Files do not match"
           exit 1
        fi 
       
    done


    cp $1 $4
    cp $1.found $4.found
    echo "The files are close enough"

    exit 0
    """)
    os.chmod('scripts/diff_match.sh', 0777)
    
logging.info("Creating submit script")
f = open("submit.sh","w")
if gpu:
    f.write("""#!/bin/bash
    condor_submit_dag -config cconfig banksim.dag
    """)
else:
    f.write("""#!/bin/bash
    condor_submit_dag banksim.dag
    """)
os.chmod('submit.sh', 0777)

f = open("partial_results.sh", "w")
f.write("""#!/bin/bash
scripts/pycbc_banksim_collect_results
""")
os.chmod('partial_results.sh', 0777)

dag.write_sub_files()
dag.write_script()
dag.write_dag()

f = open("scripts/pycbc_banksim_plots", "w")
f.write("""#!/usr/bin/env python
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot
from pycbc import pnutils
import numpy
import pylab

goldenratio = 2 / (1 + 5**.5)
#matplotlib.rcParams.update({
#        "font.size": 8.0,
#        "axes.titlesize": 8.0,
#        "axes.labelsize": 8.0,
#        "xtick.labelsize": 8.0,
#        "ytick.labelsize": 8.0,
#        "legend.fontsize": 8.0,
#	"figure.figsize": (3.3,3.3*goldenratio),
#        "figure.dpi": 200,
#	"subplots.left": 0.2, 
#	"subplots.right": 0.75, 
#	"subplots.bottom": 0.15,
#	"subplots.top": 0.75,
#        "savefig.dpi": 600,
#        "text.verticalalignment": "center",
#        "text.usetex": True     # render all text with TeX
#})

res = numpy.loadtxt("results.dat")
match = res[:,0]

tmass1 = res[:,1]
tmass2 = res[:,2]
tspin1x = res[:,3] 
tspin1y = res[:,4]
tspin1z = res[:,5]
tspin2x = res[:,6]
tspin2y = res[:,7]
tspin2z = res[:,8]
tmchirp, teta = pnutils.mass1_mass2_to_mchirp_eta(tmass1, tmass2)

imass1 = res[:,9]
imass2 = res[:,10]
ispin1x = res[:,11]
ispin1y = res[:,12]
ispin1z = res[:,13]
ispin2x = res[:,14]
ispin2y = res[:,15]
ispin2z = res[:,16]
imchirp, ieta = pnutils.mass1_mass2_to_mchirp_eta(imass1, imass2)

coa_phase = res[:,17]
inclination = res[:,18]
latitude  = res[:,19]
longitude = res[:,20]
polarization = res[:,21]

sigmasq = res[:,22]

q = numpy.maximum(imass1/imass2, imass2/imass1)
s1m = (ispin1x**2+ispin1y**2+ispin1z**2)**0.5
s2m = (ispin2x**2+ispin2y**2+ispin2z**2)**0.5

def mhist(c1, name, cum=False, normed=True, log=False, bins=100, xl="", yl=""):
    pylab.figure()
    pylab.xlabel(xl)
    pylab.ylabel(yl)
    if log:
        pylab.yscale('log')
    pylab.hist(c1, bins=bins, normed=normed, histtype='step', cumulative=cum)
    pylab.savefig(name)

def mplot(c1, c2, c, name, xl="", yl="", vmin=None, vmax=None):
    pylab.figure()
    pylab.axes((0.15, 0.15, 0.8, 0.8))
    pylab.scatter(c1, c2, c=c, linewidth=0, s=1, vmin=vmin, vmax=vmax)
    pylab.colorbar()
    pylab.xlim(min(c1), max(c1))
    pylab.ylim(min(c2), max(c2))
    pylab.xlabel(xl)
    pylab.ylabel(yl)
    pylab.savefig(name)

mhist(imchirp-tmchirp, "plots/hist-mchirp-diff.png")
mhist((imchirp-tmchirp)/imchirp, "plots/hist-mchirp-reldiff.png")
mhist(match, "plots/hist-match.png")
mhist(match, "plots/hist-match-cum.png", cum=1, log=True, bins=10000, xl = "Match", yl="Fraction of injections < Match")
    
pylab.figure(102)
pylab.ylabel('Fraction of Injections')
pylab.xlabel('Fitting factor')
pylab.yscale('log') 
pylab.xlim(0.95, 1.0)
pylab.ylim(1e-4, 1)
hBins = pylab.arange(0.,1.,0.0005,dtype=float)
n, bins,patches=pylab.hist(match,cumulative=1,bins=hBins,normed=True)
pylab.grid()
pylab.savefig("plots/cum_hist.png")
    
mplot(imass1, imass2, match, "plots/m1-m2-match.png")
mplot(tmass1, tmass2, match, "plots/tm1-tm2-match.png")
mplot(q, s1m, match, "plots/q-s1m-match.png")
mplot(q, s2m, match, "plots/q-s2m-match.png")
mplot(q, ispin1z, match, "plots/q-s1z-match.png")  
mplot(q, ispin2z, match, "plots/q-s2z-match.png", "Mass Ratio", "Spin2z") 
mplot(q, ispin2z, match, "plots/q-s2z-match97.png", "Mass Ratio", "Spin2z", vmin=0.97)
mplot(q, ispin2z, match, "plots/q-s2z-match90.png", "Mass Ratio", "Spin2z", vmin=0.90)
mplot(inclination, match, match, "plots/inc-match.png")   

mplot(imass1, imass2, imchirp-tmchirp, "plots/m1-m2-mchirpdiff.png")
mplot(q, ispin1z, imchirp-tmchirp, "plots/q-s1z-mchirpdiff.png", "Mass Ratio", "Spin1z")
mplot(q, ispin2z, imchirp-tmchirp, "plots/q-s2z-mchirpdiff.png", "Mass Ratio", "Spin2z")

mplot(imass1, imass2, (imchirp-tmchirp)/imchirp, "plots/m1-m2-mchirpreldiff.png")
mplot(q, ispin1z, (imchirp-tmchirp)/imchirp, "plots/q-s1z-mchirpreldiff.png")
mplot(q, ispin2z, (imchirp-tmchirp)/imchirp, "plots/q-s2z-mchirpreldiff.png")

""")
os.chmod("scripts/pycbc_banksim_plots", 0777)

logging.info("Done")
back to top