Airflow 설치
ssh -i <pem 파일명> ubuntu@<ec2 호스트네임>
sudo apt-get update
sudo apt-get install -y python3-pip
sudo apt-get install -y postgresql-server-dev-all
sudo apt-get install -y libmysqlclient-dev
sudo pip3 install numpy
sudo pip3 install pandas
sudo pip3 install apache-airflow==2.2.5
sudo pip3 install apache-airflow-providers-postgres==2.2.0
sudo pip3 install apache-airflow-providers-mysql==2.2.0
sudo pip3 install apache-airflow-providers-amazon==2.3.0
sudo pip3 install apache.airflow.providers.slack
sudo pip3 install apache.airflow.providers.google
sudo pip3 install SQLAlchemy==1.3.23
sudo pip3 install oauth2client
sudo pip3 install gspread
sudo pip3 install typing_extensions
sudo groupadd airflow
sudo useradd -s /bin/bash airflow -g airflow -d /var/lib/airflow -m
sudo apt-get install -y postgresql postgresql-contrib
sudo su postgres
psql
postgres=# CREATE USER airflow PASSWORD 'airflow';
postgres=# CREATE DATABASE airflow;
postgres=# \q
exit
sudo service postgresql restart
Airflow 기본설정
sudo su airflow
cd ~/
mkdir dags
AIRFLOW_HOME=/var/lib/airflow airflow initdb
airflow.cfg
편집 공식문서vi /var/lib/airflow/airflow.cfg
...
executor = LocalExecutor
...
sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@localhost:5432/airflow
...
load_examples = False
...
default_ui_timezone = Asia/Seoul
...
default_timezone = Asia/Seoul
:wq
AIRFLOW_HOME=/var/lib/airflow airflow initdb
sudo vi /etc/systemd/system/airflow-webserver.service
>>>[Unit]
Description=Airflow webserver
After=network.target
[Service]
Environment=AIRFLOW_HOME=/var/lib/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow webserver -p 8080
Restart=on-failure
RestartSec=10s
[Install]
WantedBy=multi-user.target
:wq
sudo vi /etc/systemd/system/airflow-scheduler.service
>>>[Unit]
Description=Airflow scheduler
After=network.target
[Service]
Environment=AIRFLOW_HOME=/var/lib/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow scheduler
Restart=on-failure
RestartSec=10s
[Install]
WantedBy=multi-user.target
:wq
sudo systemctl daemon-reload
sudo systemctl enable airflow-webserver
sudo systemctl enable airflow-scheduler
sudo systemctl start airflow-webserver
sudo systemctl start airflow-scheduler
sudo systemctl status airflow-webserver
sudo systemctl status airflow-scheduler
http://<ec2_hostname>:8080
DAG 실행
/var/lib/airflow/dags/
에 DAG 파일 작성
WEB에서 DAG 실행
CLI로 DAG 실행
airflow dags list # DAG 리스트 조회
airflow tasks list <dag_id> # DAG 안의 Task 조회
airflow tasks test <dag_id> <task_id> <execution_date> # DAG중 특정 Task 실행
airflow dags test <dag_id> <execution_date> # DAG 전체 실행
airflow dags backfill <dag_id> -s <start_date> -e <end_date> # DAG Backfill 실행