
services:
alertmanager:
image: prom/alertmanager:v0.27.0
container_name: do-alertmanager
restart: unless-stopped
ports:
- 9300:9093
volumes:
- ./conf/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
- ./data/alertmanager:/data
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/data'
global:
scrape_interval: 15s
evaluation_interval: 10s
rule_files:
- rules.yml
- cpu_rules.yml
- mem_rules.yml
- cpu_recording_rules.yml
- mem_recording_rules.yml
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9300']
scheme: http
timeout: 10s
api_version: v2
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets: ['localhost:9100']
- job_name: 'cadvisor'
static_configs:
- targets: ['localhost:9200']
groups:
- name: AllInstances
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
annotations:
title: 'Instance {{ $labels.instance }} down'
description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.'
labels:
severity: 'critical'groups:
- name: CPUThreshold
rules:
- alert: HighCPUUsage
expr: job:cpu_usage:avg1m > 50
for: 3m
labels:
severity: warning
annotations:
summary: "High CPU usage on {{ $labels.instance }}"
description: "CPU usage on {{ $labels.instance }} is greater than 50%. Current usage: {{ $value }}%"
- alert: CriticalCPUUsage
expr: job:cpu_usage:avg1m > 90
for: 1m
labels:
severity: critical
annotations:
summary: "Critical CPU usage on {{ $labels.instance }}"
description: "CPU usage on {{ $labels.instance }} is greater than 90%. Current usage: {{ $value }}%"
groups:
- name: MemoryThreshold
rules:
- alert: HighMemoryUsage
expr: job:memory_usage > 60
for: 3m
labels:
severity: warning
annotations:
summary: "High Memory usage on {{ $labels.instance }}"
description: "Memory usage on {{ $labels.instance }} is greater than 60%. Current usage: {{ $value }}%"
- alert: CriticalMemoryUsage
expr: job:memory_usage > 90
for: 1m
labels:
severity: critical
annotations:
summary: "Critical Memory usage on {{ $labels.instance }}"
description: "Memory usage on {{ $labels.instance }} is greater than 90%. Current usage: {{ $value }}%"
groups:
- name: recording-rules
rules:
- record: job:cpu_usage:avg1m
expr: 100 * (1 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[1m])))
groups:
- name: recording-rules
rules:
- record: job:memory_usage
expr: 100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))
작성한 파일을 ./conf/prometheus/ 경로에 저장
route:
group_by: ['alertname']
#group_wait: 30s
#group_interval: 5m
# repeat_interval: 5m
receiver: 'default'
routes:
- match:
alertname: 'HighCPUUsage'
receiver: 'cpu-warning'
repeat_interval: 10m
- match:
alertname: 'CriticalCPUUsage'
receiver: 'cpu-critical'
repeat_interval: 1m
- match:
alertname: 'HighMemoryUsage'
receiver: 'memory-warning'
repeat_interval: 10m
- match:
alertname: 'CriticalMemoryUsage'
receiver: 'memory-critical'
repeat_interval: 1m
- match:
alertname: 'InstanceDown'
receiver: 'instance-down'
repeat_interval: 1m
receivers:
- name: 'default'
- name: 'cpu-warning'
webhook_configs:
- url: 'http://172.17.0.1:9800/prom-alert'
- name: 'cpu-critical'
webhook_configs:
- url: 'http://172.17.0.1:9800/prom-alert'
- name: 'memory-warning'
webhook_configs:
- url: 'http://172.17.0.1:9800/prom-alert'
- name: 'memory-critical'
webhook_configs:
- url: 'http://172.17.0.1:9800/prom-alert'
- name: 'instance-down'
webhook_configs:
- url: 'http://172.17.0.1:9800/prom-alert'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
이 파일을 ./conf/alertmanager/ 경로에 저장



sudo apt install stress
명령어 도움말
stress --help
Stress -c <코어수>
지정한 코어 갯수만큼 사용량을 100%로 설정
stress -c 4
코어 수 확인
lscpu
stress --vm <프로세스 개수> --vm-bytes <사용 크기>
메모리 부하를 위한 프로세스 수, 사용할 메모리 사이즈 설정
stress --vm 2 --vm-bytes 4096m

FROM --platform=linux/amd64 node:20-alpine
WORKDIR /app
COPY package*.json ./
RUN npm install
COPY . .
RUN npm run build
ENV PORT=3000
EXPOSE $PORT
CMD [ "npm", "run", "start:prod" ]
# WEBHOOK
WEBHOOK_TYPE=TELEGRAM # Telegram or Jandi
TELEGRAM_API_TOKEN=token
TELEGRAM_CHANNEL_NAME=@channel
JANDI_TOPIC_PATH=pathHost서버에 docker-compose.yml파일과 동일한 경로에 .env파일을 수동으로 추가해줘야함
webhook-agent:
image: sylvius/webhook-agent:1.0
container_name: do-webhook-agent
restart: unless-stopped
environment:
- WEBHOOK_TYPE=${WEBHOOK_TYPE}
- TELEGRAM_API_TOKEN=${TELEGRAM_API_TOKEN}
- TELEGRAM_CHANNEL_NAME=${TELEGRAM_CHANNEL_NAME}
- JANDI_TOPIC_PATH=${JANDI_TOPIC_PATH}
ports:
- 9800:9800
services:
webhook-agent:
image: sylvius/webhook-agent:1.0
container_name: do-webhook-agent
restart: unless-stopped
environment:
- WEBHOOK_TYPE=${WEBHOOK_TYPE}
- TELEGRAM_API_TOKEN=${TELEGRAM_API_TOKEN}
- TELEGRAM_CHANNEL_NAME=${TELEGRAM_CHANNEL_NAME}
- JANDI_TOPIC_PATH=${JANDI_TOPIC_PATH}
ports:
- 9800:9800

