https://github.com/valiantljk/h5spark
Tip revision: 74e7da1d402d21ef3fbb7c5f3c4a06b6243291fc authored by jialin on 17 February 2021, 22:05:42 UTC
Update README.md
Update README.md
Tip revision: 74e7da1
spark-scala-yarn.sh
#!/bin/bash
# Copyright (C) 2016 The HDF Group
# All rights reserved
#
# This example shows how to submit spark job using Yarn in cluster mode.
# This is modifeid shell script based on spark-scala.sh.
# This script is tested on Mac OS X Mavericks machine with the following:
#
# Hadoop 2.7.3
# Spark 2.0.0 with Hadoop 2.7.
#
export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/Users/hyoklee/Library/Java/Extensions
# This is critical for Yarn to load HDF5 JNI library properly.
export SPARK_YARN_USER_ENV="JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH,LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
export HADOOP_MAPRED_HOME=/Users/hyoklee/src/hadoop-2.7.3/
# This is useful to check java.library.path.
java -XshowSettings:properties -version
repartition="1"
# A NASA HDF-EOS5 file
inputfile="/tmp/GSSTF_NCEP.3.1987.07.01.he5"
app_name="H5Sspark-udf"
dataset="/HDFEOS/GRIDS/NCEP/Data Fields/SST"
# For standalone single node
# SPARKURL="local[1]"
# For standalone cluster
# SPARKURL="spark://localhost:7077"
# For Yarn
SPARKURL="yarn"
SCRATCH="/tmp"
spark-submit --verbose\
--master $SPARKURL\
--deploy-mode cluster \
--name $app_name \
--executor-cores 1 \
--driver-cores 1 \
--num-executors=1 \
--class org.nersc.io.readtest\
--conf spark.eventLog.enabled=true\
--conf spark.eventLog.dir=$SCRATCH/spark\
--driver-library-path=.\
target/scala-2.11/h5spark-assembly-1.0.jar \
$repartition "$inputfile" "$dataset"
rm /tmp/spark/*
# stop-all.sh
# stop-collectl.sh