Revision 2ac88b212928958f4eb9184e0284f01ada77ef1b authored by Collin Capano on 31 October 2017, 08:03:33 UTC, committed by GitHub on 31 October 2017, 08:03:33 UTC
* give meaningful error when priors section missing * comment out unused variables in example ini file * fix update for travis
1 parent e3b8d4a
pycbc_submit_dax
#!/bin/bash
# Redirect stdout ( > ) into a named pipe ( >() ) running "tee"
set -e
mkdir -p workflow/planning
COMMAND_LINE="${0} ${@}"
echo "${COMMAND_LINE}" > workflow/planning/pycbc-submit-dax-command.txt
echo "Executing on `hostname` in directory `pwd` at `date`" > workflow/planning/pycbc-submit-dax-log.txt
exec > >(tee -a workflow/pycbc-submit-dax-log.txt)
# Without this, only stdout would be captured - i.e. your
# log file would not contain any error messages.
# SEE answer by Adam Spiers, which keeps STDERR a seperate stream -
# I did not want to steal from him by simply adding his answer to mine.
exec 2>&1
DAX_FILE=""
CACHE_FILE=0
LOCAL_PEGASUS_DIR=""
ACCOUNTING_GROUP=""
ACCOUNTING_GROUP_USER=""
NO_ACCOUNTING_GROUP=0
PEGASUS_PROPERTIES=""
SHARED_FILESYSTEM=0
SITE_LIST=""
STAGING_SITES=""
TRANSFORMATION_CATALOG=""
LOCAL_GSIFTP_SERVER=""
REMOTE_STAGING_SERVER=""
NO_CREATE_PROXY=0
SUBMIT_DAX="--submit"
# These will be changed by the bundle builder
DATA_INLINE=False
function expand_pegasus_files() {
(
base64 -d <<DATA_END
PEGASUS_FILE_DATA
DATA_END
) | tar -zx
}
echo "# Properties set on command line" > extra-properties.conf
rm -f _reuse.cache
touch _reuse.cache
rm -f *-extra-site-properties.xml
GETOPT_CMD=`getopt -o d:c:g:r:a:u:p:P:es:S:k:t:Fknl:h --long dax:,cache-file:,local-gsiftp-server:,remote-staging-server:,accounting-group:,accounting-group-user:,pegasus-properties:,append-pegasus-property:,enable-shared-filesystem,execution-sites:,staging-sites:,append-site-profile:,transformation-catalog:,force-no-accounting-group,no-create-proxy,no-submit,local-dir:,help -n 'pycbc_submit_dax' -- "$@"`
eval set -- "$GETOPT_CMD"
while true ; do
case "$1" in
-d|--dax)
case "$2" in
"") shift 2 ;;
*) DAX_FILE=$2 ; shift 2 ;;
esac ;;
-c|--cache-file)
case "$2" in
"") shift 2 ;;
*) CACHE_FILE=1
OLD_IFS=${IFS}
IFS=","
set +e
for URL in $2; do
FOUND_URL=1
# strip of any leading file:// URI and see if the file exists
FILE_URL=${URL##file://}
if [ -f ${FILE_URL} ] ; then
cat ${FILE_URL} >> _reuse.cache
FOUND_URL=0
else
# otherwise try and get the file using curl
curl --fail ${URL} >> _reuse.cache
FOUND_URL=${?}
fi
if [ ${FOUND_URL} -ne 0 ] ; then
echo "$URL not found!"
exit
fi
IFS=${OLD_IFS}
done
set -e
shift 2 ;;
esac ;;
-g|--local-gsiftp-server)
case "$2" in
"") shift 2 ;;
*) LOCAL_GSIFTP_SERVER=$2 ; shift 2 ;;
esac ;;
-r|--remote-staging-server)
case "$2" in
"") shift 2 ;;
*) REMOTE_STAGING_SERVER=$2 ; shift 2 ;;
esac ;;
-a|--accounting-group)
case "$2" in
"") shift 2 ;;
*) export ACCOUNTING_GROUP=$2 ; shift 2 ;;
esac ;;
-u|--accounting-group-user)
case "$2" in
"") shift 2 ;;
*) export ACCOUNTING_GROUP_USER=$2 ; shift 2 ;;
esac ;;
-p|--pegasus-properties)
case "$2" in
"") shift 2 ;;
*) PEGASUS_PROPERTIES=$2 ; shift 2 ;;
esac ;;
-P|--append-pegasus-property)
case "$2" in
"") shift 2 ;;
*) echo $2 >> extra-properties.conf ; shift 2 ;;
esac ;;
-e|--enable-shared-filesystem) SHARED_FILESYSTEM=1 ; shift ;;
-s|--execution-sites)
case "$2" in
"") shift 2 ;;
*) SITE_LIST=$2 ; shift 2 ;;
esac ;;
-S|--staging-sites)
case "$2" in
"") shift 2 ;;
*) STAGING_SITES="--staging-site $2" ; shift 2 ;;
esac ;;
-k|--append-site-profile)
case "$2" in
"") shift 2 ;;
*) SITE_PROFILE=${2}
APPEND_SITE_PROFILE_ARRAY=`perl -e 'use HTML::Entities; print encode_entities(decode_entities(<STDIN>), "\'\"'\'\''<>()&+" );' <<< ${SITE_PROFILE}`
OIFS=${IFS}; IFS=":"
read -ra APPEND_SITE_PROFILE_ARRAY <<< "${APPEND_SITE_PROFILE_ARRAY}"
IFS=${OIFS}
APPEND_SITE=${APPEND_SITE_PROFILE_ARRAY[0]}
APPEND_SITE_NS_KEY=(${APPEND_SITE_PROFILE_ARRAY[1]//\|/ })
APPEND_SITE_VALUE=("${APPEND_SITE_PROFILE_ARRAY[@]:2}")
/bin/echo -n " <profile namespace=\"${APPEND_SITE_NS_KEY[0]}\" key=\"${APPEND_SITE_NS_KEY[1]}\">" >> ${APPEND_SITE}-extra-site-properties.xml
need_colon=0
for i in "${APPEND_SITE_VALUE[@]}" ; do
if [ ${need_colon} -eq 1 ] ; then
/bin/echo -n ":" >> ${APPEND_SITE}-extra-site-properties.xml
fi
/bin/echo -n "${i}" >> ${APPEND_SITE}-extra-site-properties.xml
need_colon=1
done
echo "</profile>" >> ${APPEND_SITE}-extra-site-properties.xml
shift 2 ;;
esac ;;
-t|--transformation-catalog)
case "$2" in
"") shift 2 ;;
*) TRANSFORMATION_CATALOG=$2 ; shift 2 ;;
esac ;;
-F|--force-no-accounting-group) NO_ACCOUNTING_GROUP=1 ; shift ;;
-k|--no-create-proxy) NO_CREATE_PROXY=1 ; shift ;;
-n|--no-submit) SUBMIT_DAX="" ; shift ;;
-l|--local-dir)
case "$2" in
"") shift 2 ;;
*) LOCAL_PEGASUS_DIR=$2 ; shift 2 ;;
esac ;;
-h|--help)
echo "usage: pycbc_submit_dax [-h] --dax DAX [optional arguments]"
echo
echo "required arguments:"
echo " -d, --dax DAX name of the dax file to plan"
echo
echo "workflow submission required one of:"
echo " -a, --accounting-group GROUP tagged string used for site "
echo " resource accounting."
echo " -F, --force-no-accounting-group submit without an accounting"
echo " group. Will cause condor submission"
echo " to fail on LIGO Data Grid clusters"
echo
echo "optional arguments:"
echo " -h, --help show this help message and exit"
echo " -c, --cache-file FILE replica cache file for data reuse"
echo " -g, --local-gsiftp-server HOST provide a gsiftp url on HOST"
echo " for the local site storage"
echo " and scratch directories"
echo " -r, --remote-staging-server HOST provide a gsiftp url on HOST"
echo " for the remote site storage"
echo " and scratch directories"
echo " -p, --pegasus-properties FILE use the specified file as"
echo " the pegasus properties file"
echo " -P, --append-pegasus-property STRING add the extra property"
echo " specified by the argument"
echo " -s, --execution-sites A,B,C specify a comma separated list"
echo " of execution sites that will be"
echo " used in addition to the local site"
echo " -S, --staging-sites A=X,B=Y,C=Z comma separated list of key=value"
echo " pairs, where the key is the"
echo " execution site and value is the"
echo " staging site for that execution site"
echo " -k, --append-site-profile SITE:NAMESPACE|KEY:VALUE"
echo " append the profile determined by"
echo " NAMESPACE, KEY, and VALUE to the"
echo " site catalog entry for SITE"
echo " -t, --transformation-catalog FILE pass the specified"
echo " transformation catalog to Pegasus"
echo " -k, --no-create-proxy Do not run ligo-proxy-init and assume"
echo " that the user has a valid grid proxy"
echo " -n, --no-submit Plan the DAX but do not submit it"
echo " -l, --local-dir Directory to put condor files under"
echo
echo "If the environment variable TMPDIR is set then this is prepended to the "
echo "path to the temporary workflow execute directory passed to pegasus-plan."
echo "If the --local-dir option is not given."
echo
echo "If the environment variable PEGASUS_FILE_DIRECTORY is set then the"
echo "script will look there for pegasus site catalog and configuration"
echo "otherwise, the script will look for this directory by querying the"
echo "pycbc.workflow module."
echo
echo "If the environment variable PEGASUS_SITE_CATALOG_PATH is set then the"
echo "script will look for site catalog templates in this directory."
echo "Site catalog templates may contain other environment variables that"
echo "must be set in order for them to be rendered correctly."
echo
exit 0 ;;
--) shift ; break ;;
*) echo "Internal error!" ; exit 1 ;;
esac
done
if [ "x$DAX_FILE" == "x" ]; then
echo "Error: --dax must be specified. Use --help for options."
exit 1
fi
if [ "x$ACCOUNTING_GROUP" == "x" ] && [ $NO_ACCOUNTING_GROUP == 0 ]; then
echo "Error: You must specify an accounting group with --accounting-group or"
echo "override this check with --force-no-accounting-group. If you do not specify"
echo "an accounting group, job submission will fail on the LIGO Data Grid."
echo
echo "For a list of available LIGO Data Grid accounting group tags, see"
echo " https://ldas-gridmon.ligo.caltech.edu/ldg_accounting/user"
exit 1
fi
if [ $NO_CREATE_PROXY == 0 ]; then
# Force the user to create a new grid proxy
LIGO_USER_NAME=""
while true; do
read -p "Enter your LIGO.ORG username in (e.g. albert.einstein): " LIGO_USER_NAME
echo
if [ ! -z $LIGO_USER_NAME ] ; then
break
fi
done
unset X509_USER_PROXY
ligo-proxy-init -p $LIGO_USER_NAME || exit 1
else
if [ ! -z ${X509_USER_PROXY} ] ; then
if [ -f ${X509_USER_PROXY} ] ; then
cp -a ${X509_USER_PROXY} /tmp/x509up_u`id -u`
fi
unset X509_USER_PROXY
fi
fi
#Check that the proxy is valid
set +e
grid-proxy-info -exists
RESULT=$?
set -e
if [ ${RESULT} -eq 0 ] ; then
PROXY_TYPE=`grid-proxy-info -type | tr -d ' '`
if [ x${PROXY_TYPE} == 'xRFC3820compliantimpersonationproxy' ] ; then
grid-proxy-info
else
cp /tmp/x509up_u`id -u` /tmp/x509up_u`id -u`.orig
grid-proxy-init -cert /tmp/x509up_u`id -u`.orig -key /tmp/x509up_u`id -u`.orig
rm -f /tmp/x509up_u`id -u`.orig
grid-proxy-info
fi
else
echo "Error: Could not find a valid grid proxy to submit workflow."
exit 1
fi
#Make a directory for the submit files
SUBMIT_DIR=`mktemp --tmpdir=${LOCAL_PEGASUS_DIR} -d pycbc-tmp.XXXXXXXXXX`
#Make sure the directory is world readable
chmod 755 $SUBMIT_DIR
# find the site-local template directory
if [ $DATA_INLINE == "True" ] ; then
expand_pegasus_files
PEGASUS_FILE_DIRECTORY=${PWD}/pegasus_files
elif [ -z $PEGASUS_FILE_DIRECTORY ] ; then
PEGASUS_FILE_DIRECTORY=`python -c 'from pycbc.workflow import PEGASUS_FILE_DIRECTORY;print PEGASUS_FILE_DIRECTORY'`
fi
# Create the site catalog
export LOCAL_SITE_PATH=${PWD}
if [ -z $LOCAL_GSIFTP_SERVER ] ; then
LOCAL_SITE_URL="file://${LOCAL_SITE_PATH}"
else
LOCAL_SITE_URL="gsiftp://${LOCAL_GSIFTP_SERVER}${LOCAL_SITE_PATH}"
fi
export LOCAL_SITE_URL
# add the local site
# Set remote staging url to local
if [ -z $SITE_LIST ] ; then
SITE_LIST="local"
REMOTE_STAGING_URL=${LOCAL_SITE_URL}
else
SITE_LIST="local,${SITE_LIST}"
fi
if [ -z ${PEGASUS_SITE_CATALOG_PATH} ] ; then
PEGASUS_SITE_CATALOG_PATH=${PEGASUS_FILE_DIRECTORY}
fi
# Create the site catalog
# Don't overwrite remote staging URL if site list is local
if [ ${SITE_LIST} != "local" ] ; then
if [ -z $REMOTE_STAGING_SERVER ] ; then
REMOTE_STAGING_URL="REMOTE_STAGING_SERVER_UNSET" # this will be used as an error flag
else
REMOTE_STAGING_URL="gsiftp://${REMOTE_STAGING_SERVER}${LOCAL_SITE_PATH}"
fi
fi
export REMOTE_STAGING_URL
# create the site catalog template
echo 'cat <<END_OF_TEXT' > site-catalog.sh
cat << EOF >> site-catalog.sh
<?xml version="1.0" encoding="UTF-8"?>
<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
EOF
OLD_IFS=${IFS}
IFS=','
read -ra SITE <<< "$SITE_LIST"
IFS=${OLD_IFS}
for s in "${SITE[@]}"; do
SITE_TEMPLATE=$PEGASUS_SITE_CATALOG_PATH/${s}-site-template.xml
if [ ! -e ${SITE_TEMPLATE} ] ; then
echo "Error: Could not find site catalog template for site ${s}"
exit 1
else
cat $SITE_TEMPLATE >> site-catalog.sh
if [ -e ${s}-extra-site-properties.xml ] ; then
cat ${s}-extra-site-properties.xml >> site-catalog.sh
fi
echo " </site>" >> site-catalog.sh
fi
done
cat << EOF >> site-catalog.sh
</sitecatalog>
EOF
echo 'END_OF_TEXT' >> site-catalog.sh
# write out the site catalog
bash site-catalog.sh > site-catalog.xml
# If REMOTE_STAGING_SERVER was not set, but was needed by
# sites the user asked for, then raise an error and exit
if grep -q REMOTE_STAGING_SERVER_UNSET site-catalog.xml ; then
echo "Attempt to use a remote site but --remote-staging-server not specified"
exit 1
fi
# Plan the workflow
echo "Generating concrete workflow"
# cache the pegasus config
if [ -z ${PEGASUS_PROPERTIES} ] ; then
cp $PEGASUS_FILE_DIRECTORY/pegasus-properties.conf ./pegasus-properties.conf
else
cp ${PEGASUS_PROPERTIES} ./pegasus-properties.conf
fi
# add a transformation catalog, if specified
if [ ! -z ${TRANSFORMATION_CATALOG} ] ; then
cp ${TRANSFORMATION_CATALOG} ./transformation-catalog.txt
echo >> pegasus-properties.conf
echo "pegasus.catalog.transformation Text" >> pegasus-properties.conf
echo "pegasus.catalog.transformation.file ${PWD}/transformation-catalog.txt" >> pegasus-properties.conf
fi
echo >> pegasus-properties.conf
cat extra-properties.conf >> pegasus-properties.conf
# If user specified a value for pegasus.data.configuration
# then don't add another one
if ! grep -q pegasus.data.configuration= pegasus-properties.conf
then
# No option specified, append nonshared unless user said
# not to
if [ $SHARED_FILESYSTEM == 0 ]
then
echo 'pegasus.data.configuration=nonsharedfs' >> pegasus-properties.conf
fi
fi
if [ $CACHE_FILE == 0 ]; then
pegasus-plan --conf ./pegasus-properties.conf -d $DAX_FILE --sites $SITE_LIST $STAGING_SITES -o local --dir $SUBMIT_DIR --cleanup inplace --relative-dir work --cluster label $SUBMIT_DAX
else
pegasus-plan --conf ./pegasus-properties.conf -d $DAX_FILE --sites $SITE_LIST $STAGING_SITES -o local --dir $SUBMIT_DIR --cleanup inplace --cache _reuse.cache --relative-dir work --cluster label $SUBMIT_DAX
fi
echo
rm -f submitdir
ln -sf $SUBMIT_DIR submitdir
echo pegasus-status $SUBMIT_DIR/work > status
chmod 755 status
echo pegasus-analyzer $SUBMIT_DIR/work > debug
chmod 755 debug
echo pegasus-remove $SUBMIT_DIR/work > stop
chmod 755 stop
cat << EOF > start
#!/bin/bash
if [ -f /tmp/x509up_u\`id -u\` ] ; then
unset X509_USER_PROXY
else
if [ ! -z \${X509_USER_PROXY} ] ; then
if [ -f \${X509_USER_PROXY} ] ; then
cp -a \${X509_USER_PROXY} /tmp/x509up_u\`id -u\`
fi
fi
unset X509_USER_PROXY
fi
# Check that the proxy is valid
grid-proxy-info -exists
RESULT=\${?}
if [ \${RESULT} -eq 0 ] ; then
PROXY_TYPE=\`grid-proxy-info -type | tr -d ' '\`
if [ x\${PROXY_TYPE} == 'xRFC3820compliantimpersonationproxy' ] ; then
grid-proxy-info
else
cp /tmp/x509up_u\`id -u\` /tmp/x509up_u\`id -u\`.orig
grid-proxy-init -cert /tmp/x509up_u\`id -u\`.orig -key /tmp/x509up_u\`id -u\`.orig
rm -f /tmp/x509up_u\`id -u\`.orig
grid-proxy-info
fi
else
echo "Error: Could not find a valid grid proxy to submit workflow."
exit 1
fi
EOF
echo pegasus-run $SUBMIT_DIR/work >> start
chmod 755 start
# Copy planning information into workflow directory so it can be displayed on the results page
cp pegasus-properties.conf workflow/planning
cp site-catalog.xml workflow/planning
cp $SUBMIT_DIR/work/braindump.txt workflow/planning
mkdir -p workflow/input_map
cp _reuse.cache workflow/input_map/reuse.cache
mkdir -p workflow/dax
mkdir -p workflow/output_map
# copy the main dax and output.map to the workflow directory
cp $DAX_FILE workflow/dax
if [ -e output.map ] ; then
cp output.map workflow/output_map
fi
# ugly shell to extract the sub-dax names from the uberdax xml
for dax in `egrep '<[[:blank:]]*dax' ${DAX_FILE} | tr ' ' \\\n | grep file | awk -F= '{print $2}' | tr -d '">'` ; do
if [ -e ${dax} ] ; then
cp ${dax} workflow/dax
fi
map=`basename ${dax} .dax`.map
if [ -e ${map} ] ; then
cp ${map} workflow/output_map
fi
done
if [ -z ${SUBMIT_DAX} ] ; then
echo
echo "WARNING: DAX planned but not submitted. No dashboard entry has been created and"
echo " the workflow section of results page will not show a dashboard URL."
echo " You must run this script without the --no-submit option if this is a"
echo " production run."
echo
exit 0
fi
if [ ! -e ${HOME}/.pegasus/workflow.db ] ; then
echo "WARNING: Could not find Pegasus dashboard database in ${HOME}/.pegasus"
echo " Workflow has been submitted but the results page will not contain"
echo " a link to the dashboard page. If this is a production workflow,"
echo " please remove the workflow, check for the origin of this error,"
echo " and re-submit the workflow by re-running this script."
echo
exit 1
fi
WORKFLOW_ID_STRING=""
WORKFLOW_DB_CMD="sqlite3 -csv ${HOME}/.pegasus/workflow.db \"select submit_hostname,wf_id,wf_uuid from master_workflow where submit_dir = '${SUBMIT_DIR}/work';\""
DB_TRY=0
# force a condor reschedule to get the workflow running
set +e
condor_reschedule &> /dev/null
set -e
/bin/echo "Querying Pegasus database for workflow stored in ${SUBMIT_DIR}/work"
/bin/echo -n "This may take up to 120 seconds. Please wait..."
rm -f pegasus_db.log
touch pegasus_db.log
# querying the database sometimes fails, so allow retries
set +e
until [ $DB_TRY -ge 15 ]
do
/bin/echo -n "."
WORKFLOW_ID_STRING=`eval $WORKFLOW_DB_CMD 2>> pegasus_db.log`
if [ $? -eq 0 ] && [ ! -z $WORKFLOW_ID_STRING ] ; then
/bin/echo " Done."
DB_QUERY_SUCCESS=0
break
else
DB_QUERY_SUCCESS=1
fi
DB_TRY=$(( $DB_TRY + 1 ))
for s in `seq ${DB_TRY}`
do
/bin/echo -n "."
sleep 1
done
done
set -e
if [ ${DB_QUERY_SUCCESS} -eq 1 ] ; then
echo; echo
/bin/echo "Query failed: ${WORKFLOW_DB_CMD}"
cat pegasus_db.log
else
rm -f pegasus_db.log
fi
if [ -z $WORKFLOW_ID_STRING ] ; then
echo "WARNING: Could not find the workflow in the Pegasus dashboard database."
echo " Workflow has been submitted but the results page will not contain"
echo " a link to the dashboard page. If this is a production workflow,"
echo " please remove the workflow, check for the origin of this error,"
echo " and re-submit the workflow by re-running this script."
echo
exit 1
fi
WORKFLOW_ID_ARRAY=(${WORKFLOW_ID_STRING//,/ })
DASHBOARD_URL="https://${WORKFLOW_ID_ARRAY[0]}/pegasus/u/${USER}/r/${WORKFLOW_ID_ARRAY[1]}/w?wf_uuid=${WORKFLOW_ID_ARRAY[2]}"
echo ${DASHBOARD_URL} > workflow/pegasus-dashboard-url.txt
shopt -s nullglob
DASHBOARD_GLOB=(results/*_workflow/*DASHBOARD*.html)
DASHBOARD_PATH=${DASHBOARD_GLOB[0]}
if [ -e ${DASHBOARD_PATH} ] ; then
DASHBOARD_COPY=workflow/`basename ${DASHBOARD_PATH}`.orig
if [ -e ${DASHBOARD_COPY} ] ; then
cp ${DASHBOARD_COPY} ${DASHBOARD_PATH}
else
cp ${DASHBOARD_PATH} ${DASHBOARD_COPY}
fi
COMMAND_LINE=`perl -e 'use HTML::Entities; print encode_entities(decode_entities(<STDIN>), "\'\"'\'\''<>()&+" );' <<< ${COMMAND_LINE}`
perl -pi.bak -e "s+PYCBC_SUBMIT_DAX_ARGV+${COMMAND_LINE}+g" ${DASHBOARD_PATH}
perl -pi.bak -e "s+PEGASUS_DASHBOARD_URL+${DASHBOARD_URL}+g" ${DASHBOARD_PATH}
fi
shopt -u nullglob
echo "Workflow submission completed successfully."
echo
echo "The Pegasus dashboard URL for this workflow is:"
echo " ${DASHBOARD_URL}"
echo
echo "Note that it make take a while for the dashboard entry to appear while the workflow"
echo "is parsed by the dashboard. The delay can be on the order of one hour for very large"
echo "workflows."
echo
# If the workflow results directory exists, copy the files there
for dir in planning dax input_map output_map
do
RESULT_DIR=results/*_workflow/*_${dir}
if [ -d ${RESULT_DIR} ] ; then
cp workflow/${dir}/* ${RESULT_DIR}
fi
done
exit 0
Computing file changes ...