Server | master | slave01 | slave02 | slave03 |
---|---|---|---|---|
OS | centos7 | centos7 | centos7 | centos7 |
Disk Size | 1000G | 1000G | 1000G | 1000G |
Memory | 32G | 16G | 16G | 16G |
Processors | 12 | 12 | 12 | 12 |
cd ~
wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz
tar xvf spark-2.4.0-bin-hadoop2.7.tgz
sudo mv ~/spark-2.4.0-bin-hadoop2.7 ~/spark
export SPARK_HOME=~/spark
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
$ cd $SPARK_HOME/conf
$ cp spark-env.sh.template spark-env.sh
$ vim spark-env.sh
export SPARK_MASTER_HOST=master
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=4g
export SPARK_WORKER_INSTANCES=1
export JAVA_HOME=${JAVA_HOME}
export HADOOP_HOME=${HADOOP_HOME}
export YARN_CONF_DIR=${YARN_CONF_DIR}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
$ vim ~/spark/conf/slaves
slave01
slave02
slave03
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa; \
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys; \
chmod 0600 ~/.ssh/authorized_keys
# slave01 ~/로 전송
scp -r ~/.ssh (계정)@slave01:~/
# slave02 ~/로 전송
scp -r ~/.ssh (계정)@slave02:~/
# slave03 ~/로 전송
scp -r ~/.ssh (계정)@slave03:~/
# slave01 ~/로 전송
scp -r ~/spark (계정)@slave01:~/
# slave02 ~/로 전송
scp -r ~/spark (계정)@slave02:~/
# slave03 ~/로 전송
scp -r ~/spark (계정)@slave03:~/
$ ~/spark/sbin/start-all.sh
$ ~/spark/sbin/start-history-server.sh
$ ~/spark/sbin/stop-all.sh
$ ~/spark/sbin/stop-history-server.sh
$ rm -rf ~/spark/evevntLog/*
$ wget https://repo.anaconda.com/archive/Anaconda3-2021.05-Linux-x86_64.sh
$ bash Anaconda3-2021.05-Linux-x86_64.sh
$ vim ~/.bashrc
export CONDA_HOME=~/naconda3
export PATH=PATH:CONDA_HOME/bin:$CONDA_HOME/condabin
export PYSPARK_PYTHON=python3
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS='notebook --allow-root'
$ source ~/.bashrc
-allow-root
Linux의 root 계정으로 접근하려고 할 때 필요합니다.$ conda config --set auto_activate_base False
$ pip install jupyter
$ jupyter notebook --generate-config
$ vim /root/.jupyter/jupyter_notebook_config.py
c.NotebookApp.notebook_dir = '/data'
c.NotebookApp.open_browser = False
c.NotebookApp.password =
c.NotebookApp.port = 8888
c.NotebookApp.ip = '172.17.0.2'
c.NotebookApp.allow_origin = '*'
$ python (파이썬 프롬프트 접근)
from notebook.auth import passwd
passwd()
출력되는 문자열을 복사하여 c.NotebookApp.password
의 값으로 붙여넣기.
$ ~/spark/bin/pyspark
실행하면 Jupyter Notebook이 뜬다.