https://github.com/dmwm/CMSSpark
Raw File
Tip revision: b4075987d75cfc7467e3a466765255d5aa7bcc1d authored by Ceyhun on 24 March 2023, 14:51:15 UTC
Merge pull request #146 from mrceyhun/decommission-static-cpueff
Tip revision: b407598
run_hdfs_an.sh
#!/bin/bash
##H Script to anonymise data on HDFS
##H Usage: run_hdfs_an.sh <input-dir> <output-dir> <attributes> <log-file>
##H        to run this script you need to fetch CMSSpark and reside in its area
##H        Example:
##H        git clone git@github.com:dmwm/CMSSpark.git
##H        cd CMSSpark; bin/run_hdfs_an.sh ...
##H
##H Options:
##H   input-dir      input area on HDFS
##H   output-dir     output area on HDFS
##H   attributes     comma separated attributes, e.g. user_dn,Country
##H   log-file       log file (default /tmp/hdfs_an.log)
##H

# Check if user is passing least required arguments.
if [ "$#" -lt 3  ]; then
    cat $0 | grep "^##H" | sed -e "s,##H,,g"
    exit 1
fi

hdir=$1
odir=$2
attrs=$3
log=$4
if [ "$log" == "" ]; then
    log=/tmp/hdfs_an/log
fi

# working directory, inside of CMSSpark
wdir=$PWD

# DO NOT EDIT BELOW THIS LINE
addr=cms-comp-monit-alerts@cern.ch

# for Spark 2.X
export PATH=$PATH:/usr/hdp/hadoop/bin
export HADOOP_CONF_DIR=/etc/hadoop/conf

# setup local environment
export PYTHONPATH=$wdir/src/python:$PYTHONPATH
export PATH=$wdir/bin:$PATH

amtool=""
if [ -f /data/cms/bin/amtool ]; then
    amtool=/data/cms/bin/amtool
elif [ -f /cvmfs/cms.cern.ch/cmsmon/amtool ]; then
    amtool=/cvmfs/cms.cern.ch/cmsmon/amtool
fi

# setup to run the script
cmd="$PWD/bin/run_spark hdfs_an.py --yarn --fout=$odir --attrs=$attrs --hdir=$hdir"
echo "Will execute ..."
echo $cmd
msg="Error while executing $cmd on $USER@`hostname` log at $log"

echo "amtool=$amtool"

set -e

trap func exit
# Declare the function
function func() {
    local status=$?
    if [ $status -ne 0 ]; then
        local msg="run_hdfs_an completed with non zero status"
        if [ "$amtool" != "" ]; then
            local expire=`date -d '+1 hour' --rfc-3339=ns | tr ' ' 'T'`
            local urls="http://cms-monitoring.cern.ch:30093 http://cms-monitoring-ha1.cern.ch:30093 http://cms-monitoring-ha2.cern.ch:30093"
            for url in $urls; do
                $amtool alert add run_hdfs_an \
                    alertname=hdfs_an severity=medium tag=cronjob alert=amtool \
                    --end=$expire\
                    --annotation=summary="$msg" \
                    --annotation=date="`date`" \
                    --annotation=hostname="`hostname`" \
                    --annotation=status="$status" \
                    --annotation=command="$cmd" \
                    --annotation=log="$log" \
                    --alertmanager.url=$url
            done
        else
            echo "$msg" | mail -s "alert run_hdfs_an" "$addr"
        fi
    fi
}

time $cmd >> $log 2>&1
back to top