하둡 클러스터링이 완료됬다는 전제하에 진행한다.
https://velog.io/@kidae92/Hadoop-Cluster
SPARK_HOME=/usr/local/spark
export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin
source ~/.bashrc
wget https://downloads.apache.org/spark/spark-3.0.3/spark-3.0.3-bin-hadoop3.2.tgz
mkdir -p $SPARK_HOME && sudo tar -zvxf spark-3.0.3-bin-hadoop3.2.tgz -C $SPARK_HOME --strip-components 1
chown -R $USER:$USER $SPARK_HOME
cp slaves.template slaves
worker1
worker2
worker3
worker4
cp spark-env.sh.template spark-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export SPARK_MASTER_HOST=master
export HADOOP_HOME=/usr/local/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir file:///usr/local/spark/eventLog
spark.history.fs.logDirectory file:///usr/local/spark/eventLog
mkdir -p $SPARK_HOME/eventLog
> $HADOOP_HOME/bin/hdfs namenode -format -force
> $HADOOP_HOME/sbin/start-dfs.sh
> $HADOOP_HOME/sbin/start-yarn.sh
> $HADOOP_HOME/bin/mapred --daemon start historyserver
> $SPARK_HOME/sbin/start-all.sh
> $SPARK_HOME/sbin/start-history-server.sh
Spark Master (http://master:8080) 화면
> $SPARK_HOME/sbin/stop-all.sh
> $SPARK_HOME/sbin/stop-history-server.sh
> rm -rf $SPARK_HOME/eventLog/*
> $HADOOP_HOME/sbin/stop-dfs.sh
> $HADOOP_HOME/sbin/stop-yarn.sh
> $HADOOP_HOME/bin/mapred --daemon stop historyserver
> rm -rf $HADOOP_HOME/data/namenode/*
rm -rf $HADOOP_HOME/data/datanode/*