#!/bin/bash ############################################################################# # Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Albert Chu # LLNL-CODE-644248 # # This file is part of Magpie, scripts for running Hadoop on # traditional HPC systems. For details, see https://github.com/llnl/magpie. # # Magpie is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # Magpie is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Magpie. If not, see . ############################################################################# # This script is the core sparkwordcount running script. For the most # part, it shouldn't be editted. See job submission files for # configuration details. source ${MAGPIE_SCRIPTS_HOME}/magpie-common-exports source ${MAGPIE_SCRIPTS_HOME}/magpie-common-functions source ${MAGPIE_SCRIPTS_HOME}/magpie-variable-conversion # For this run, we will use cluster specific paths Magpie_make_all_local_dirs_node_specific sparkwordcountclass="org.apache.spark.examples.JavaWordCount" cd ${SPARK_HOME} # For some reason, the examples jar is named with seemingly random # versioning, so we'll have to "find" it for each case if [ "${SPARK_SPARKWORDCOUNT_COPY_IN_FILE}X" != "X" ] then if echo ${SPARK_SPARKWORDCOUNT_FILE} | grep -q -E "hdfs:\/\/" then # Make directory of destination file filedir=`dirname ${SPARK_SPARKWORDCOUNT_FILE}` command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -mkdir -p ${filedir}" echo "Running $command" >&2 $command if echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^file:\/\/" then command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -copyFromLocal ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}" elif echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^hdfs:\/\/" then command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -cp ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}" fi elif echo ${SPARK_SPARKWORDCOUNT_FILE} | grep -q -E "file:\/\/" then # Make directory of destination file filedir=`dirname ${SPARK_SPARKWORDCOUNT_FILE}` filedir=`echo ${filedir} | sed "s/^file:\/\///"` mkdir -p ${filedir} if [ $? -ne 0 ] ; then echo "mkdir failed making ${filedir}" exit 1 fi if echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^file:\/\/" then sparkinputfile=`echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | sed "s/^file:\/\///"` sparkdestfile=`echo ${SPARK_SPARKWORDCOUNT_FILE} | sed "s/^file:\/\///"` command="cp ${sparkinputfile} ${sparkdestfile}" elif echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^hdfs:\/\/" then command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -copyToLocal ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}" fi fi echo "Running copy in $command" >&2 $command fi if echo ${SPARK_VERSION} | grep -q -E "1\.[0-9]\.[0-9]" then sparkexamplesjar=`find ${SPARK_HOME}/lib | grep jar | grep examples` command="${sparkcmdprefix}/spark-submit --class ${sparkwordcountclass} ${sparkexamplesjar} ${SPARK_SPARKWORDCOUNT_FILE}" else sparkexamplesjar=`find ${SPARK_HOME}/examples | grep jar | grep examples | grep -v assembly | grep -v sources` # Ugh, Spark 0.9.1 doesn't have a way to pass in specific jars # except via the SPARK_CLASSPATH environment variable. # # Because user may have set SPARK_JOB_CLASSPATH, we can't just set # it here. So we'll reput it into spark-env.sh if [ "${SPARK_JOB_CLASSPATH}X" != "X" ] then echo "export SPARK_CLASSPATH=\"${SPARK_JOB_CLASSPATH};${sparkexamplesjar}\"" >> ${SPARK_CONF_DIR}/spark-env.sh else echo "export SPARK_CLASSPATH=\"${sparkexamplesjar}\"" >> ${SPARK_CONF_DIR}/spark-env.sh fi command="${sparkcmdprefix}/spark-class ${sparkwordcountclass} spark://$SPARK_MASTER_NODE:$SPARK_MASTER_PORT ${SPARK_SPARKWORDCOUNT_FILE}" fi echo "Running $command" >&2 $command exit 0