Content - fa54f31aa49f0875aacbc8a8ba52fbe8971f8210 - 60224c2/magpie-run-spark-sparkwordcount

visit type:
Tip revision: 6f32a3dece2dd797c9395abc418e3e246a3077cb authored by Al Chu on 05 July 2016, 06:30:59 UTC
Merge pull request #157 from LLNL/storm100
Tip revision: 6f32a3d
magpie-run-spark-sparkwordcount
#!/bin/bash
#############################################################################
#  Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Albert Chu <chu11@llnl.gov>
#  LLNL-CODE-644248
#  
#  This file is part of Magpie, scripts for running Hadoop on
#  traditional HPC systems.  For details, see https://github.com/llnl/magpie.
#  
#  Magpie is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  Magpie is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with Magpie.  If not, see <http://www.gnu.org/licenses/>.
#############################################################################

# This script is the core sparkwordcount running script.  For the most
# part, it shouldn't be editted.  See job submission files for
# configuration details.

source ${MAGPIE_SCRIPTS_HOME}/magpie-common-exports
source ${MAGPIE_SCRIPTS_HOME}/magpie-common-functions
source ${MAGPIE_SCRIPTS_HOME}/magpie-variable-conversion

# For this run, we will use cluster specific paths
Magpie_make_all_local_dirs_node_specific

sparkwordcountclass="org.apache.spark.examples.JavaWordCount"

cd ${SPARK_HOME}
	    
# For some reason, the examples jar is named with seemingly random
# versioning, so we'll have to "find" it for each case

if [ "${SPARK_SPARKWORDCOUNT_COPY_IN_FILE}X" != "X" ]
then
    if echo ${SPARK_SPARKWORDCOUNT_FILE} | grep -q -E "hdfs:\/\/"
    then
	# Make directory of destination file
	filedir=`dirname ${SPARK_SPARKWORDCOUNT_FILE}`
	command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -mkdir -p ${filedir}"
	echo "Running $command" >&2
	$command

	if echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^file:\/\/"
	then
	    command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -copyFromLocal ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
	elif echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^hdfs:\/\/"
	then
	    command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -cp ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
	fi
    elif echo ${SPARK_SPARKWORDCOUNT_FILE} | grep -q -E "file:\/\/"
    then
	# Make directory of destination file
	filedir=`dirname ${SPARK_SPARKWORDCOUNT_FILE}`
	filedir=`echo ${filedir} | sed "s/^file:\/\///"`

	mkdir -p ${filedir}
        if [ $? -ne 0 ] ; then
            echo "mkdir failed making ${filedir}"
            exit 1
        fi

	if echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^file:\/\/"
	then
	    sparkinputfile=`echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | sed "s/^file:\/\///"`
	    sparkdestfile=`echo ${SPARK_SPARKWORDCOUNT_FILE} | sed "s/^file:\/\///"`
	    command="cp ${sparkinputfile} ${sparkdestfile}"
	elif echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^hdfs:\/\/"
	then
	    command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -copyToLocal ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
	fi
    fi

    echo "Running copy in $command" >&2
    $command
fi

if echo ${SPARK_VERSION} | grep -q -E "1\.[0-9]\.[0-9]"
then
    sparkexamplesjar=`find ${SPARK_HOME}/lib | grep jar | grep examples`

    command="${sparkcmdprefix}/spark-submit --class ${sparkwordcountclass} ${sparkexamplesjar} ${SPARK_SPARKWORDCOUNT_FILE}"
else
    sparkexamplesjar=`find ${SPARK_HOME}/examples | grep jar | grep examples | grep -v assembly | grep -v sources`

    # Ugh, Spark 0.9.1 doesn't have a way to pass in specific jars
    # except via the SPARK_CLASSPATH environment variable.
    #
    # Because user may have set SPARK_JOB_CLASSPATH, we can't just set
    # it here.  So we'll reput it into spark-env.sh
 
    if [ "${SPARK_JOB_CLASSPATH}X" != "X" ]
    then
	echo "export SPARK_CLASSPATH=\"${SPARK_JOB_CLASSPATH};${sparkexamplesjar}\"" >> ${SPARK_CONF_DIR}/spark-env.sh
    else
	echo "export SPARK_CLASSPATH=\"${sparkexamplesjar}\"" >> ${SPARK_CONF_DIR}/spark-env.sh
    fi

    command="${sparkcmdprefix}/spark-class ${sparkwordcountclass} spark://$SPARK_MASTER_NODE:$SPARK_MASTER_PORT ${SPARK_SPARKWORDCOUNT_FILE}"
fi

echo "Running $command" >&2
$command

exit 0
Browse the archive

https://github.com/chu11/magpie