"""
config.py (클러스터 용도별 정규식 포맷 원부 정의)
"""
import re
from pathlib import Path
BASE_DATA_DIR = Path("./data")
RAW_DIR = BASE_DATA_DIR / "raw"
MERGED_DIR = BASE_DATA_DIR / "merged"
OUT_DIR = BASE_DATA_DIR / "output"
CLUSTER_NODE_PATTERNS = {
"COMPUTE": r"^(name1wk\d+|icdlh-prod-wk\d+|stg-lh-wk\d+)",
"STORAGE": r"^(minio-storage-node\d+|aistor-prod-wk\d+|lake-pool-\d+)"
}
DEFAULT_THANOS_URL = "http://thanos-query.internal.zone:9090"
MINIO_RAW_BUCKET = "enterprise-finops-raw-lake"
MINIO_REPORT_BUCKET = "devops-test"
def classify_cluster_infrastructure(node_name):
"""
정규식 매칭을 기반으로 소속 클러스터의 실명과 속성을 역추적합니다.
"""
n = str(node_name).lower()
if any(keyword in n for keyword in ["aistor", "storage", "lake-pool"]):
return "prod-storage-cluster", "STORAGE"
elif any(keyword in n for keyword in ["icdlh", "prod", "name1wk"]):
return "prod-compute-cluster", "COMPUTE"
else:
return "unclassified-cluster", "COMPUTE"
def get_workload_type(pod_name):
p = str(pod_name).lower()
if "spark" in p or "-exec-" in p or "-driver" in p:
if "executor" in p or "-exec-" in p: return "SPARK_EXECUTOR"
if "driver" in p or "-driver" in p: return "SPARK_DRIVER"
return "SPARK_SYSTEM"
if "airflow" in p or "statsd" in p:
if "worker" in p: return "AIRFLOW_WORKER"
if "scheduler" in p: return "AIRFLOW_SCHEDULER"
return "AIRFLOW_SYSTEM"
if "minio" in p or "aistor" in p or "lake-pool" in p: return "MINIO_STORAGE"
return "GENERAL_APPS"