[Week4] AWS EKS Observability (1/3)

오태경·2024년 3월 30일

본 게시물은 CloudNet@팀 Gasida(서종호) 님이 진행하시는
AWS EKS Workshop Study 내용을 기반으로 작성되었습니다.

Logging in EKS

Control Plane Logging

  • Kubernetes API server component logs (api)
    • kube-apiserver-<nnn...>
  • Audit (audit)
    • kube-apiserver-audit-<nnn...>
  • Authenticator (authenticator)
    • authenticator-<nnn...>
  • Controller manager (controllerManager)
    • kube-controller-manager-<nnn...>
  • Scheduler (scheduler)
    • kube-scheduler-<nnn...>

로그 활성화

# 모든 로깅 활성화
aws eks update-cluster-config --region $AWS_DEFAULT_REGION --name $CLUSTER_NAME \
    --logging '{"clusterLogging":[{"types":["api","audit","authenticator","controllerManager","scheduler"],"enabled":true}]}'

# 로그 그룹 확인
aws logs describe-log-groups | jq
{
  "logGroups": [
    {
      "logGroupName": "/aws/eks/myeks/cluster",
      "creationTime": 1711796302079,
      "metricFilterCount": 0,
      "arn": "arn:aws:logs:ap-northeast-2:891377200830:log-group:/aws/eks/myeks/cluster:*",
      "storedBytes": 0,
      "logGroupClass": "STANDARD",
      "logGroupArn": "arn:aws:logs:ap-northeast-2:891377200830:log-group:/aws/eks/myeks/cluster"
    }
  ]
}

# 로그 tail 확인 : aws logs tail help
aws logs tail /aws/eks/$CLUSTER_NAME/cluster | more

# 신규 로그를 바로 출력
aws logs tail /aws/eks/$CLUSTER_NAME/cluster --follow

# 필터 패턴
aws logs tail /aws/eks/$CLUSTER_NAME/cluster --filter-pattern <필터 패턴>

# 로그 스트림이름
aws logs tail /aws/eks/$CLUSTER_NAME/cluster --log-stream-name-prefix <로그 스트림 prefix> --follow
aws logs tail /aws/eks/$CLUSTER_NAME/cluster --log-stream-name-prefix kube-controller-manager --follow
kubectl scale deployment -n kube-system coredns --replicas=1
kubectl scale deployment -n kube-system coredns --replicas=2

# 시간 지정: 1초(s) 1분(m) 1시간(h) 하루(d) 한주(w)
aws logs tail /aws/eks/$CLUSTER_NAME/cluster --since 1h30m

# 짧게 출력
aws logs tail /aws/eks/$CLUSTER_NAME/cluster --since 1h30m --format short




CloudWatch Log Insights

# EC2 Instance가 NodeNotReady 상태인 로그 검색
fields @timestamp, @message
| filter @message like /NodeNotReady/
| sort @timestamp desc

# kube-apiserver-audit 로그에서 userAgent 정렬해서 아래 4개 필드 정보 검색
fields userAgent, requestURI, @timestamp, @message
| filter @logStream ~= "kube-apiserver-audit"
| stats count(userAgent) as count by userAgent
| sort count desc

#
fields @timestamp, @message
| filter @logStream ~= "kube-scheduler"
| sort @timestamp desc

#
fields @timestamp, @message
| filter @logStream ~= "authenticator"
| sort @timestamp desc

#
fields @timestamp, @message
| filter @logStream ~= "kube-controller-manager"
| sort @timestamp desc

With AWS CLI

# CloudWatch Log Insight Query
aws logs get-query-results --query-id $(aws logs start-query \
--log-group-name '/aws/eks/myeks/cluster' \
--start-time `date -d "-1 hours" +%s` \
--end-time `date +%s` \
--query-string 'fields @timestamp, @message | filter @logStream ~= "kube-controller" | sort @timestamp desc' \
| jq --raw-output '.queryId')
{
    "results": [
        [
            {
                "field": "@timestamp",
                "value": "2024-03-30 10:59:27.000"
            },
            {
                "field": "@message",
                "value": "I0330 10:59:27.133720      10 replica_set.go:676] \"Finished syncing\" kind=\"ReplicaSet\" key=\"kube-system/coredns-55474bf7b9\" duration=\"302.944µs\""
            },
            {
                "field": "@ptr",
                "value": "CmgKJwojODkxMzc3MjAwODMwOi9hd3MvZWtzL215ZWtzL2NsdXN0ZXIQBRI5GhgCBl8P9rcAAAAAOElKkgAGYH8CkAAAAXIgASj84Yr46DEwoMSP+OgxOOwIQLDASEjLvgdQq5gHGAAgARDDCBgB"
            }
        ]
    ],
    "statistics": {
        "recordsMatched": 12.0,
        "recordsScanned": 31408.0,
        "bytesScanned": 37513649.0
    },
    "status": "Running"
}




로깅 끄기

# EKS Control Plane 로깅(CloudWatch Logs) 비활성화
eksctl utils update-cluster-logging --cluster $CLUSTER_NAME --region $AWS_DEFAULT_REGION --disable-types all --approve

# 로그 그룹 삭제
aws logs delete-log-group --log-group-name /aws/eks/$CLUSTER_NAME/cluster



Container(Pod) Logging

Nginx 웹서버 배포

# NGINX 웹서버 배포
helm repo add bitnami https://charts.bitnami.com/bitnami

# 사용 리전의 인증서 ARN 확인
CERT_ARN=$(aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text)
echo $CERT_ARN

# 도메인 확인
echo $MyDomain

# 파라미터 파일 생성 : 인증서 ARN 지정하지 않아도 가능! 혹시 https 리스너 설정 안 될 경우 인증서 설정 추가(주석 제거)해서 배포 할 것
cat <<EOT > nginx-values.yaml
service:
  type: NodePort
  
networkPolicy:
  enabled: false

ingress:
  enabled: true
  ingressClassName: alb
  hostname: nginx.$MyDomain
  pathType: Prefix
  path: /
  annotations: 
    alb.ingress.kubernetes.io/scheme: internet-facing
    alb.ingress.kubernetes.io/target-type: ip
    alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
    #alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
    alb.ingress.kubernetes.io/success-codes: 200-399
    alb.ingress.kubernetes.io/load-balancer-name: $CLUSTER_NAME-ingress-alb
    alb.ingress.kubernetes.io/group.name: study
    alb.ingress.kubernetes.io/ssl-redirect: '443'
EOT
cat nginx-values.yaml | yh

# 배포
helm install nginx bitnami/nginx --version 15.14.0 -f nginx-values.yaml

# 확인
kubectl get ingress,deploy,svc,ep nginx
kubectl get targetgroupbindings # ALB TG 확인

# 접속 주소 확인 및 접속
echo -e "Nginx WebServer URL = https://nginx.$MyDomain"
Nginx WebServer URL = https://nginx.tkops.click

curl -s https://nginx.$MyDomain
kubectl logs deploy/nginx -f

## 외부에서는 접속이 잘되나, myeks EC2에서 url 접속이 잘 되지 않을 경우 : 이전 aws DNS cache 영향(추정)
dig +short nginx.$MyDomain
dig +short nginx.$MyDomain @192.168.0.2
dig +short nginx.$MyDomain @1.1.1.1
dig +short nginx.$MyDomain @8.8.8.8
cat /etc/resolv.conf
sed -i "s/^nameserver 192.168.0.2/nameserver 1.1.1.1/g" /etc/resolv.conf
cat /etc/resolv.conf
dig +short nginx.$MyDomain
dig +short nginx.$MyDomain @8.8.8.8
dig +short nginx.$MyDomain @192.168.0.2
curl -s https://nginx.$MyDomain
----

# 반복 접속
while true; do curl -s https://nginx.$MyDomain -I | head -n 1; date; sleep 1; done

# (참고) 삭제 시
helm uninstall nginx

Container 로그는 표준 출력(stdout)과 표준 에러(stderr)로 보내는 것을 권고하며, 사용자는 Pod 안으로 접속하지 않아도 kubectl logs 명령어를 사용하여 로그 파일의 위치와 상관없이 로그를 조회 가능

# 로그 모니터링
kubectl logs deploy/nginx -f
nginx 11:30:47.62 INFO  ==>
nginx 11:30:47.62 INFO  ==> Welcome to the Bitnami nginx container
nginx 11:30:47.62 INFO  ==> Subscribe to project updates by watching https://github.com/bitnami/containers
nginx 11:30:47.62 INFO  ==> Submit issues and feature requests at https://github.com/bitnami/containers/issues
nginx 11:30:47.63 INFO  ==>
nginx 11:30:47.63 INFO  ==> ** Starting NGINX setup **
nginx 11:30:47.65 INFO  ==> Validating settings in NGINX_* env vars
Certificate request self-signature ok
subject=CN = example.com
nginx 11:30:51.11 INFO  ==> No custom scripts in /docker-entrypoint-initdb.d
nginx 11:30:51.11 INFO  ==> Initializing NGINX
realpath: /bitnami/nginx/conf/vhosts: No such file or directory

nginx 11:30:51.15 INFO  ==> ** NGINX setup finished! **
nginx 11:30:51.16 INFO  ==> ** Starting NGINX **
192.168.1.100 - - [30/Mar/2024:11:32:08 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.3.0" "-"
192.168.1.100 - - [30/Mar/2024:11:32:41 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.3.0" "-"
192.168.1.100 - - [30/Mar/2024:11:32:53 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.3.0" "-"

# nginx 웹 접속 시도

# 컨테이너 로그 파일 위치 확인
kubectl exec -it deploy/nginx -- ls -l
total 0
lrwxrwxrwx 1 1001 1001 11 Mar 30 11:30 access.log -> /dev/stdout
lrwxrwxrwx 1 1001 1001 11 Mar 30 11:30 error.log -> /dev/stderr

종료된 파드의 로그는 kubectl logs로 조회 할 수 없으며,
kubelet 기본 설정은 로그 파일의 최대 크기가 10Mi로 10Mi를 초과하는 로그는 전체 로그 조회가 불가능함



Container Insights metrics in Amazon CloudWatch & Fluent Bit (Logs)

  • [수집] 플루언트비트 Fluent Bit 컨테이너를 데몬셋으로 동작시키고, 아래 3가지 종류의 로그CloudWatch Logs 에 전송
    1. /aws/containerinsights/Cluster_Name/application : 로그 소스(All log files in /var/log/containers), 각 컨테이너/파드 로그
    2. /aws/containerinsights/Cluster_Name/host : 로그 소스(Logs from /var/log/dmesg/var/log/secure, and /var/log/messages), 노드(호스트) 로그
    3. /aws/containerinsights/Cluster_Name/dataplane : 로그 소스(/var/log/journal for kubelet.servicekubeproxy.service, and docker.service), 쿠버네티스 데이터플레인 로그
  • [저장] : CloudWatch Logs 에 로그를 저장, 로그 그룹 별 로그 보존 기간 설정 가능
  • [시각화] : CloudWatch 의 Logs Insights 를 사용하여 대상 로그를 분석하고, CloudWatch 의 대시보드로 시각화한다



CloudWatch Container observability 설치

# 설치
aws eks create-addon --cluster-name $CLUSTER_NAME --addon-name amazon-cloudwatch-observability
aws eks list-addons --cluster-name myeks --output table

# 설치 확인
kubectl get-all -n amazon-cloudwatch
kubectl get ds,pod,cm,sa,amazoncloudwatchagent -n amazon-cloudwatch
kubectl describe clusterrole cloudwatch-agent-role amazon-cloudwatch-observability-manager-role    # 클러스터롤 확인
kubectl describe clusterrolebindings cloudwatch-agent-role-binding amazon-cloudwatch-observability-manager-rolebinding  # 클러스터롤 바인딩 확인
kubectl -n amazon-cloudwatch logs -l app.kubernetes.io/component=amazon-cloudwatch-agent -f # 파드 로그 확인
kubectl -n amazon-cloudwatch logs -l k8s-app=fluent-bit -f    # 파드 로그 확인

# cloudwatch-agent 설정 확인
kubectl describe cm cloudwatch-agent-agent -n amazon-cloudwatch

#Fluent bit 파드 수집하는 방법 : Volumes에 HostPath를 살펴보자! >> / 호스트 패스 공유??? 보안상 안전한가? 좀 더 범위를 좁힐수는 없을까요? 
kubectl describe -n amazon-cloudwatch ds cloudwatch-agent
...
  Volumes:
   ...
   rootfs:
    Type:          HostPath (bare host directory volume)
    Path:          /
    HostPathType:  

...
ssh ec2-user@$N1 sudo tree /dev/disk
...


# Fluent Bit 로그 INPUT/FILTER/OUTPUT 설정 확인 - 링크
## 설정 부분 구성 : application-log.conf , dataplane-log.conf , fluent-bit.conf , host-log.conf , parsers.conf
kubectl describe cm fluent-bit-config -n amazon-cloudwatch
...
application-log.conf:
----
[INPUT]
    Name                tail
    Tag                 application.*
    Exclude_Path        /var/log/containers/cloudwatch-agent*, /var/log/containers/fluent-bit*, /var/log/containers/aws-node*, /var/log/containers/kube-proxy*
    Path                /var/log/containers/*.log
    multiline.parser    docker, cri
    DB                  /var/fluent-bit/state/flb_container.db
    Mem_Buf_Limit       50MB
    Skip_Long_Lines     On
    Refresh_Interval    10
    Rotate_Wait         30
    storage.type        filesystem
    Read_from_Head      ${READ_FROM_HEAD}

[FILTER]
    Name                kubernetes
    Match               application.*
    Kube_URL            https://kubernetes.default.svc:443
    Kube_Tag_Prefix     application.var.log.containers.
    Merge_Log           On
    Merge_Log_Key       log_processed
    K8S-Logging.Parser  On
    K8S-Logging.Exclude Off
    Labels              Off
    Annotations         Off
    Use_Kubelet         On
    Kubelet_Port        10250
    Buffer_Size         0

[OUTPUT]
    Name                cloudwatch_logs
    Match               application.*
    region              ${AWS_REGION}
    log_group_name      /aws/containerinsights/${CLUSTER_NAME}/application
    log_stream_prefix   ${HOST_NAME}-
    auto_create_group   true
    extra_user_agent    container-insights
...

# Fluent Bit 파드가 수집하는 방법 : Volumes에 HostPath를 살펴보자!
kubectl describe -n amazon-cloudwatch ds fluent-bit
...
ssh ec2-user@$N1 sudo tree /var/log
...

# (참고) 삭제
aws eks delete-addon --cluster-name $CLUSTER_NAME --addon-name amazon-cloudwatch-observability

로깅 확인 (CloudWatch -> 로그 그룹)

메트릭 확인 (CloudWatch -> Insights -> Container Insights)

0개의 댓글