# YAML 파일 다운로드
curl -O https://s3.ap-northeast-2.amazonaws.com/cloudformation.cloudneta.net/K8S/eks-oneclick4.yaml
# CloudFormation 스택 배포
예시) aws cloudformation deploy --template-file eks-oneclick4.yaml --stack-name myeks --parameter-overrides KeyName=kp-gasida SgIngressSshCidr=$(curl -s ipinfo.io/ip)/32 MyIamUserAccessKeyID=AKIA5... MyIamUserSecretAccessKey='CVNa2...' ClusterBaseName=myeks --region ap-northeast-2
# CloudFormation 스택 배포 완료 후 작업용 EC2 IP 출력
aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text
# 작업용 EC2 SSH 접속
ssh -i ~/.ssh/kp-gasida.pem ec2-user@$(aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text)
or
ssh -i ~/.ssh/kp-gasida.pem root@$(aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text)
~ password: qwe123
# default 네임스페이스 적용
kubectl ns default
# 노드 정보 확인
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl get node --label-columns=node.kubernetes.io/instance-type,eks.amazonaws.com/capacityType,topology.kubernetes.io/zone
NAME STATUS ROLES AGE VERSION INSTANCE-TYPE CAPACITYTYPE ZONE
ip-192-168-1-146.ap-northeast-2.compute.internal Ready <none> 3m4s v1.28.5-eks-5e0fdde t3.medium ON_DEMAND ap-northeast-2a
ip-192-168-2-60.ap-northeast-2.compute.internal Ready <none> 3m9s v1.28.5-eks-5e0fdde t3.medium ON_DEMAND ap-northeast-2b
ip-192-168-3-134.ap-northeast-2.compute.internal Ready <none> 3m5s v1.28.5-eks-5e0fdde t3.medium ON_DEMAND ap-northeast-2c
# External DNS 설치
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# MyDomain=22joo.shop
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# echo "export MyDomain=22joo.shop" >> /etc/profile
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# MyDnzHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# echo $MyDomain, $MyDnzHostedZoneId
22joo.shop, /hostedzone/Z07798463AFECYTX1ODP4
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# curl -s -O https://raw.githubusercontent.com/gasida/PKOS/main/aews/externaldns.yaml
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# MyDomain=$MyDomain MyDnzHostedZoneId=$MyDnzHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -
serviceaccount/external-dns created
clusterrole.rbac.authorization.k8s.io/external-dns created
clusterrolebinding.rbac.authorization.k8s.io/external-dns-viewer created
deployment.apps/external-dns created
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# k get pods -n kube-system | grep external
external-dns-7fd77dcbc-7tx9b 1/1 Running 0 31s
# kube-ops-view 설치
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm repo add geek-cookbook https://geek-cookbook.github.io/charts/
"geek-cookbook" has been added to your repositories
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system
NAME: kube-ops-view
LAST DEPLOYED: Thu Apr 4 18:30:57 2024
NAMESPACE: kube-system
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
1. Get the application URL by running these commands:
export POD_NAME=$(kubectl get pods --namespace kube-system -l "app.kubernetes.io/name=kube-ops-view,app.kubernetes.io/instance=kube-ops-view" -o jsonpath="{.items[0].metadata.name}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl port-forward $POD_NAME 8080:8080
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'
service/kube-ops-view patched
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl annotate service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=kubeopsview.$MyDomain"
service/kube-ops-view annotated
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# echo -e "Kube Ops View URL = http://kubeopsview.$MyDomain:8080/#scale=1.5"
Kube Ops View URL = http://kubeopsview.22joo.shop:8080/#scale=1.5
# AWS LB 컨트롤러 설치
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm repo add eks https://aws.github.io/eks-charts
"eks" has been added to your repositories
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm repo update
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "eks" chart repository
...Successfully got an update from the "geek-cookbook" chart repository
Update Complete. ⎈Happy Helming!⎈
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME \
> --set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller
NAME: aws-load-balancer-controller
LAST DEPLOYED: Thu Apr 4 18:31:47 2024
NAMESPACE: kube-system
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
AWS Load Balancer controller installed!
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# k get pods -n kube-system | grep load
aws-load-balancer-controller-5f7b66cdd5-p9tld 1/1 Running 0 37s
aws-load-balancer-controller-5f7b66cdd5-qdmbx 1/1 Running 0 37s
# gp3 스토리지 클래스 생성
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl apply -f https://raw.githubusercontent.com/gasida/PKOS/main/aews/gp3-sc.yaml
storageclass.storage.k8s.io/gp3 created
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# k get sc
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
gp2 (default) kubernetes.io/aws-ebs Delete WaitForFirstConsumer false 18m
gp3 ebs.csi.aws.com Delete WaitForFirstConsumer true 5s
# 노드 보안그룹 ID 확인
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values=*ng1* --query "SecurityGroups[*].[GroupId]" --output text)
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.100/32
{
"Return": true,
"SecurityGroupRules": [
{
"SecurityGroupRuleId": "sgr-0a7cf7d5ab4ea1b7f",
"GroupId": "sg-0b63afee06b7cd84e",
"GroupOwnerId": "236747833953",
"IsEgress": false,
"IpProtocol": "-1",
"FromPort": -1,
"ToPort": -1,
"CidrIpv4": "192.168.1.100/32"
}
]
}
# 사용 리전의 인증서 ARN 확인
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# CERT_ARN=`aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text`
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# echo $CERT_ARN
arn:aws:acm:ap-northeast-2:236747833953:certificate/1244562e-aaa2-48df-a178-07ad03ef921d
# repo 추가
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
"prometheus-community" has been added to your repositories
# 파라미터 생성 : PV/PVC(AWS EBS) 삭제의 불편 때문에 PV/PVC 미사용
cat <<EOT > monitor-values.yaml
prometheus:
prometheusSpec:
podMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
retention: 5d
retentionSize: "10GiB"
verticalPodAutoscaler:
enabled: true
ingress:
enabled: true
ingressClassName: alb
hosts:
- prometheus.$MyDomain
paths:
- /*
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/ssl-redirect: '443'
grafana:
defaultDashboardsTimezone: Asia/Seoul
adminPassword: prom-operator
defaultDashboardsEnabled: false
ingress:
enabled: true
ingressClassName: alb
hosts:
- grafana.$MyDomain
paths:
- /*
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/ssl-redirect: '443'
kube-state-metrics:
rbac:
extraRules:
- apiGroups: ["autoscaling.k8s.io"]
resources: ["verticalpodautoscalers"]
verbs: ["list", "watch"]
prometheus:
monitor:
enabled: true
customResourceState:
enabled: true
config:
kind: CustomResourceStateMetrics
spec:
resources:
- groupVersionKind:
group: autoscaling.k8s.io
kind: "VerticalPodAutoscaler"
version: "v1"
labelsFromPath:
verticalpodautoscaler: [metadata, name]
namespace: [metadata, namespace]
target_api_version: [apiVersion]
target_kind: [spec, targetRef, kind]
target_name: [spec, targetRef, name]
metrics:
- name: "vpa_containerrecommendations_target"
help: "VPA container recommendations for memory."
each:
type: Gauge
gauge:
path: [status, recommendation, containerRecommendations]
valueFrom: [target, memory]
labelsFromPath:
container: [containerName]
commonLabels:
resource: "memory"
unit: "byte"
- name: "vpa_containerrecommendations_target"
help: "VPA container recommendations for cpu."
each:
type: Gauge
gauge:
path: [status, recommendation, containerRecommendations]
valueFrom: [target, cpu]
labelsFromPath:
container: [containerName]
commonLabels:
resource: "cpu"
unit: "core"
selfMonitor:
enabled: true
alertmanager:
enabled: false
EOT
# 배포
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl create ns monitoring
namespace/monitoring created
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 57.2.0 \
> --set prometheus.prometheusSpec.scrapeInterval='15s' --set prometheus.prometheusSpec.evaluationInterval='15s' \
> -f monitor-values.yaml --namespace monitoring
NAME: kube-prometheus-stack
LAST DEPLOYED: Thu Apr 4 18:37:27 2024
NAMESPACE: monitoring
STATUS: deployed
REVISION: 1
NOTES:
kube-prometheus-stack has been installed. Check its status by running:
kubectl --namespace monitoring get pods -l "release=kube-prometheus-stack"
Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator.
# Metrics-server 배포
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
serviceaccount/metrics-server created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrole.rbac.authorization.k8s.io/system:metrics-server created
rolebinding.rbac.authorization.k8s.io/metrics-server-auth-reader created
clusterrolebinding.rbac.authorization.k8s.io/metrics-server:system:auth-delegator created
clusterrolebinding.rbac.authorization.k8s.io/system:metrics-server created
service/metrics-server created
deployment.apps/metrics-server created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# k get ingress -n monitoring
NAME CLASS HOSTS ADDRESS PORTS AGE
kube-prometheus-stack-grafana alb grafana.22joo.shop myeks-ingress-alb-1771502456.ap-northeast-2.elb.amazonaws.com 80 44s
kube-prometheus-stack-prometheus alb prometheus.22joo.shop myeks-ingress-alb-1771502456.ap-northeast-2.elb.amazonaws.com 80 44s
# 프로메테우스 ingress 도메인으로 웹 접속
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# echo -e "Prometheus Web URL = https://prometheus.$MyDomain"
Prometheus Web URL = https://prometheus.22joo.shop
# 그라파나 웹 접속 : 기본 계정 - admin / prom-operator
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# echo -e "Grafana Web URL = https://grafana.$MyDomain"
Grafana Web URL = https://grafana.22joo.shop
# go 설치
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# wget https://go.dev/dl/go1.22.1.linux-amd64.tar.gz
--2024-04-04 18:41:47-- https://go.dev/dl/go1.22.1.linux-amd64.tar.gz
Resolving go.dev (go.dev)... 216.239.34.21, 216.239.32.21, 216.239.36.21, ...
Connecting to go.dev (go.dev)|216.239.34.21|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://dl.google.com/go/go1.22.1.linux-amd64.tar.gz [following]
--2024-04-04 18:41:48-- https://dl.google.com/go/go1.22.1.linux-amd64.tar.gz
Resolving dl.google.com (dl.google.com)... 142.251.42.206, 2404:6800:4004:827::200e
Connecting to dl.google.com (dl.google.com)|142.251.42.206|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68965341 (66M) [application/x-gzip]
Saving to: ‘go1.22.1.linux-amd64.tar.gz’
100%[==========================================================================>] 68,965,341 66.2MB/s in 1.0s
2024-04-04 18:41:49 (66.2 MB/s) - ‘go1.22.1.linux-amd64.tar.gz’ saved [68965341/68965341]
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# tar -C /usr/local -xzf go1.22.1.linux-amd64.tar.gz
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# export PATH=$PATH:/usr/local/go/bin
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# go version
go version go1.22.1 linux/amd64
# EKS Node Viewer 설치 : 약간의 시간이 소요됩니다.
go install github.com/awslabs/eks-node-viewer/cmd/eks-node-viewer@latest
# 확인
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# cd ~/go/bin/
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# ls
eks-node-viewer
# [신규 터미널] EKS Node Viewer 접속
## 명령 샘플
# Standard usage
./eks-node-viewer
# Display both CPU and Memory Usage
./eks-node-viewer --resources cpu,memory
# Karenter nodes only
./eks-node-viewer --node-selector "karpenter.sh/provisioner-name"
# Display extra labels, i.e. AZ
./eks-node-viewer --extra-labels topology.kubernetes.io/zone
# Specify a particular AWS profile and region
AWS_PROFILE=myprofile AWS_REGION=us-west-2
기본 옵션
# select only Karpenter managed nodes
node-selector=karpenter.sh/provisioner-name
# display both CPU and memory
resources=cpu,memory
JSON 주소 : https://file.notion.so/f/f/a6af158e-5b0f-4e31-9d12-0d0b2805956a/93cfa58d-5c66-4d80-9c4d-50da0563b6ad/17125_rev1.json?id=3db24c0f-8843-4ac7-9edc-91b5a6895dd3&table=block&spaceId=a6af158e-5b0f-4e31-9d12-0d0b2805956a&expirationTimestamp=1712361600000&signature=_zotBai9md1wOQ1GzSsyeWFPmFzPxbujFFccCP7DL3Q&downloadName=17125_rev1.json
curl -s -O https://raw.githubusercontent.com/kubernetes/website/main/content/en/examples/application/php-apache.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: php-apache
spec:
selector:
matchLabels:
run: php-apache
template:
metadata:
labels:
run: php-apache
spec:
containers:
- name: php-apache
image: registry.k8s.io/hpa-example
ports:
- containerPort: 80
resources:
limits:
cpu: 500m
requests:
cpu: 200m
---
apiVersion: v1
kind: Service
metadata:
name: php-apache
labels:
run: php-apache
spec:
ports:
- port: 80
selector:
run: php-apache
# 배포
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl apply -f php-apache.yaml
deployment.apps/php-apache created
service/php-apache created
# 확인
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl exec -it deploy/php-apache -- cat /var/www/html/index.php
<?php
$x = 0.0001;
for ($i = 0; $i <= 1000000; $i++) {
$x += sqrt($x);
}
echo "OK!";
?>
# 모니터링 : 터미널2개 사용
watch -d 'kubectl get hpa,pod;echo;kubectl top pod;echo;kubectl top node'
kubectl exec -it deploy/php-apache -- top
# 접속
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# PODIP=$(kubectl get pod -l run=php-apache -o jsonpath={.items[0].status.podIP})
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# curl -s $PODIP; echo
OK!
kubectl describe hpa
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl autoscale deployment php-apache --cpu-percent=50 --min=1 --max=10
horizontalpodautoscaler.autoscaling/php-apache autoscaled
# 반복 접속 1 (파드1 IP로 접속) >> 증가 확인 후 중지
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# while true;do curl -s $PODIP; sleep 0.5; done
# HPA 설정 확인
kubectl get hpa php-apache -o yaml | kubectl neat | yh
spec:
minReplicas: 1 # [4] 또는 최소 1개까지 줄어들 수도 있습니다
maxReplicas: 10 # [3] 포드를 최대 5개까지 늘립니다
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: php-apache # [1] php-apache 의 자원 사용량에서
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 50 # [2] CPU 활용률이 50% 이상인 경우
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl run -i --tty load-generator --rm --image=busybox:1.28 --restart=Never -- /bin/sh -c "while sleep 0.01; do wget -q -O- http://php-apache; done"
kubectl delete deploy,svc,hpa,pod --all
HPA 의 단점 CPU, Memory 만 가지고 오토스케일링을 진행함
기존의 HPA(Horizontal Pod Autoscaler)는 리소스(CPU, Memory) 메트릭을 기반으로 스케일 여부를 결정하게 됩니다.
반면에 KEDA는 특정 이벤트를 기반으로 스케일 여부를 결정할 수 있습니다.
예를 들어 airflow는 metadb를 통해 현재 실행 중이거나 대기 중인 task가 얼마나 존재하는지 알 수 있습니다.
이러한 이벤트를 활용하여 worker의 scale을 결정한다면 queue에 task가 많이 추가되는 시점에 더 빠르게 확장할 수 있습니다.
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Visualize metrics provided by KEDA",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1653,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 8,
"panels": [],
"title": "Metric Server",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "The total number of errors encountered for all scalers.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Errors/sec"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "http-demo"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "scaledObject"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "keda-system/keda-operator-metrics-apiserver"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 8,
"x": 0,
"y": 1
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum by(job) (rate(keda_scaler_errors{}[5m]))",
"legendFormat": "{{ job }}",
"range": true,
"refId": "A"
}
],
"title": "Scaler Total Errors",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "The number of errors that have occurred for each scaler.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Errors/sec"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "http-demo"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "scaler"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "prometheusScaler"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 8,
"x": 8,
"y": 1
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum by(scaler) (rate(keda_scaler_errors{exported_namespace=~\"$namespace\", scaledObject=~\"$scaledObject\", scaler=~\"$scaler\"}[5m]))",
"legendFormat": "{{ scaler }}",
"range": true,
"refId": "A"
}
],
"title": "Scaler Errors",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "The number of errors that have occurred for each scaled object.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Errors/sec"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "http-demo"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 8,
"x": 16,
"y": 1
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum by(scaledObject) (rate(keda_scaled_object_errors{exported_namespace=~\"$namespace\", scaledObject=~\"$scaledObject\"}[5m]))",
"legendFormat": "{{ scaledObject }}",
"range": true,
"refId": "A"
}
],
"title": "Scaled Object Errors",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 10,
"panels": [],
"title": "Scale Target",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "The current value for each scaler’s metric that would be used by the HPA in computing the target average.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "http-demo"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 11
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum by(metric) (keda_scaler_metrics_value{exported_namespace=~\"$namespace\", metric=~\"$metric\", scaledObject=\"$scaledObject\"})",
"legendFormat": "{{ metric }}",
"range": true,
"refId": "A"
}
],
"title": "Scaler Metric Value",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "shows current replicas against max ones based on time difference",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 21,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 20
},
"id": 13,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"exemplar": false,
"expr": "kube_horizontalpodautoscaler_status_current_replicas{namespace=\"$namespace\",horizontalpodautoscaler=\"keda-hpa-$scaledObject\"}",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "current_replicas",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"exemplar": false,
"expr": "kube_horizontalpodautoscaler_spec_max_replicas{namespace=\"$namespace\",horizontalpodautoscaler=\"keda-hpa-$scaledObject\"}",
"format": "time_series",
"hide": false,
"instant": false,
"legendFormat": "max_replicas",
"range": true,
"refId": "B"
}
],
"title": "Current/max replicas (time based)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "shows current replicas against max ones based on time difference",
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-GrYlRd"
},
"custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": false
},
"mappings": [
{
"options": {
"0": {
"color": "green",
"index": 0,
"text": "No scaling"
}
},
"type": "value"
},
{
"options": {
"from": -200,
"result": {
"color": "light-red",
"index": 1,
"text": "Scaling down"
},
"to": 0
},
"type": "range"
},
{
"options": {
"from": 0,
"result": {
"color": "semi-dark-red",
"index": 2,
"text": "Scaling up"
},
"to": 200
},
"type": "range"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 28
},
"id": 16,
"options": {
"alignValue": "left",
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": false,
"width": 0
},
"mergeValues": true,
"rowHeight": 1,
"showValue": "never",
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"exemplar": false,
"expr": "delta(kube_horizontalpodautoscaler_status_current_replicas{namespace=\"$namespace\",horizontalpodautoscaler=\"keda-hpa-$scaledObject\"}[1m])",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": ".",
"range": true,
"refId": "A"
}
],
"title": "Changes in replicas",
"type": "state-timeline"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"description": "shows current replicas against max ones",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 36
},
"id": 15,
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^current_replicas$/",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "9.5.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"exemplar": false,
"expr": "kube_horizontalpodautoscaler_status_current_replicas{namespace=\"$namespace\",horizontalpodautoscaler=\"keda-hpa-$scaledObject\"}",
"instant": true,
"legendFormat": "current_replicas",
"range": false,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"exemplar": false,
"expr": "kube_horizontalpodautoscaler_spec_max_replicas{namespace=\"$namespace\",horizontalpodautoscaler=\"keda-hpa-$scaledObject\"}",
"hide": false,
"instant": true,
"legendFormat": "max_replicas",
"range": false,
"refId": "B"
}
],
"title": "Current/max replicas",
"type": "gauge"
}
],
"refresh": "1m",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"includeAll": false,
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "bhe-test",
"value": "bhe-test"
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"definition": "label_values(keda_scaler_active,exported_namespace)",
"hide": 0,
"includeAll": false,
"multi": false,
"name": "namespace",
"options": [],
"query": {
"query": "label_values(keda_scaler_active,exported_namespace)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"definition": "label_values(keda_scaler_active{exported_namespace=\"$namespace\"},scaledObject)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "scaledObject",
"options": [],
"query": {
"query": "label_values(keda_scaler_active{exported_namespace=\"$namespace\"},scaledObject)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": "cronScaler",
"value": "cronScaler"
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"definition": "label_values(keda_scaler_active{exported_namespace=\"$namespace\"},scaler)",
"hide": 0,
"includeAll": false,
"multi": false,
"name": "scaler",
"options": [],
"query": {
"query": "label_values(keda_scaler_active{exported_namespace=\"$namespace\"},scaler)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": "s0-cron-Etc-UTC-40xxxx-55xxxx",
"value": "s0-cron-Etc-UTC-40xxxx-55xxxx"
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"definition": "label_values(keda_scaler_active{exported_namespace=\"$namespace\"},metric)",
"hide": 0,
"includeAll": false,
"multi": false,
"name": "metric",
"options": [],
"query": {
"query": "label_values(keda_scaler_active{exported_namespace=\"$namespace\"},metric)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "KEDA",
"uid": "asdasd8rvmMxdVk",
"version": 8,
"weekStart": ""
}
cat <<EOT > keda-values.yaml
metricsServer:
useHostNetwork: true
prometheus:
metricServer:
enabled: true
port: 9022
portName: metrics
path: /metrics
serviceMonitor:
# Enables ServiceMonitor creation for the Prometheus Operator
enabled: true
podMonitor:
# Enables PodMonitor creation for the Prometheus Operator
enabled: true
operator:
enabled: true
port: 8080
serviceMonitor:
# Enables ServiceMonitor creation for the Prometheus Operator
enabled: true
podMonitor:
# Enables PodMonitor creation for the Prometheus Operator
enabled: true
webhooks:
enabled: true
port: 8080
serviceMonitor:
# Enables ServiceMonitor creation for the Prometheus webhooks
enabled: true
EOT
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl create namespace keda
namespace/keda created
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# helm repo add kedacore https://kedacore.github.io/charts
"kedacore" has been added to your repositories
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# helm install keda kedacore/keda --version 2.13.0 --namespace keda -f keda-values.yaml
NAME: keda
LAST DEPLOYED: Thu Apr 4 19:40:38 2024
NAMESPACE: keda
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
...
Get started by deploying Scaled Objects to your cluster:
- Information about Scaled Objects : https://keda.sh/docs/latest/concepts/
- Samples: https://github.com/kedacore/samples
Get information about the deployed ScaledObjects:
kubectl get scaledobject [--namespace <namespace>]
Get details about a deployed ScaledObject:
kubectl describe scaledobject <scaled-object-name> [--namespace <namespace>]
Get information about the deployed ScaledObjects:
kubectl get triggerauthentication [--namespace <namespace>]
Get details about a deployed ScaledObject:
kubectl describe triggerauthentication <trigger-authentication-name> [--namespace <namespace>]
Get an overview of the Horizontal Pod Autoscalers (HPA) that KEDA is using behind the scenes:
kubectl get hpa [--all-namespaces] [--namespace <namespace>]
Learn more about KEDA:
- Documentation: https://keda.sh/
- Support: https://keda.sh/support/
- File an issue: https://github.com/kedacore/keda/issues/new/choose
# KEDA 설치 확인
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl get all -n keda
NAME READY STATUS RESTARTS AGE
pod/keda-admission-webhooks-5bffd88dcf-s6kng 1/1 Running 0 2m7s
pod/keda-operator-856b546d-k7whb 1/1 Running 1 (115s ago) 2m7s
pod/keda-operator-metrics-apiserver-5666945c65-kp85f 1/1 Running 0 2m7s
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/keda-admission-webhooks ClusterIP 10.100.142.96 <none> 443/TCP,8080/TCP 2m7s
service/keda-operator ClusterIP 10.100.132.136 <none> 9666/TCP,8080/TCP 2m7s
service/keda-operator-metrics-apiserver ClusterIP 10.100.116.210 <none> 443/TCP,9022/TCP 2m7s
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.apps/keda-admission-webhooks 1/1 1 1 2m7s
deployment.apps/keda-operator 1/1 1 1 2m7s
deployment.apps/keda-operator-metrics-apiserver 1/1 1 1 2m7s
NAME DESIRED CURRENT READY AGE
replicaset.apps/keda-admission-webhooks-5bffd88dcf 1 1 1 2m7s
replicaset.apps/keda-operator-856b546d 1 1 1 2m7s
replicaset.apps/keda-operator-metrics-apiserver-5666945c65 1 1 1 2m7s
kubectl get validatingwebhookconfigurations keda-admission
kubectl get validatingwebhookconfigurations keda-admission | kubectl neat | yh
kubectl get crd | grep keda
# keda 네임스페이스에 디플로이먼트 생성
## 방금 전 PHP
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl apply -f php-apache.yaml -n keda
deployment.apps/php-apache created
service/php-apache created
(leeeuijoo@myeks:default) [root@myeks-bastion bin]#
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl get pod -n keda
NAME READY STATUS RESTARTS AGE
keda-admission-webhooks-5bffd88dcf-s6kng 1/1 Running 0 3m7s
keda-operator-856b546d-k7whb 1/1 Running 1 (2m55s ago) 3m7s
keda-operator-metrics-apiserver-5666945c65-kp85f 1/1 Running 0 3m7s
php-apache-598b474864-4g8nr 1/1 Running 0 3s
cat <<EOT > keda-cron.yaml
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: php-apache-cron-scaled
spec:
minReplicaCount: 0
maxReplicaCount: 2
pollingInterval: 30
cooldownPeriod: 300
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: php-apache
triggers:
- type: cron
metadata:
timezone: Asia/Seoul
start: 00,15,30,45 * * * *
end: 05,20,35,50 * * * *
desiredReplicas: "1"
EOT
kubectl apply -f keda-cron.yaml -n keda
# 모니터링
watch -d 'kubectl get ScaledObject,hpa,pod -n keda'
kubectl get ScaledObject -w
# 확인
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# kubectl get ScaledObject,hpa,pod -n keda
NAME SCALETARGETKIND SCALETARGETNAME MIN MAX TRIGGERS AUTHENTICATION READY ACTIVE FALLBACK PAUSED AGE
scaledobject.keda.sh/php-apache-cron-scaled apps/v1.Deployment php-apache 0 2 cron True True Unknown Unknown 2m18s
NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE
horizontalpodautoscaler.autoscaling/keda-hpa-php-apache-cron-scaled Deployment/php-apache <unknown>/1 (avg) 1 2 1 2m17s
NAME READY STATUS RESTARTS AGE
pod/keda-admission-webhooks-5bffd88dcf-s6kng 1/1 Running 0 6m12s
pod/keda-operator-856b546d-k7whb 1/1 Running 1 (6m ago) 6m12s
pod/keda-operator-metrics-apiserver-5666945c65-kp85f 1/1 Running 0 6m12s
pod/php-apache-598b474864-bm88x 1/1 Running 0 107s
kubectl get hpa -o jsonpath={.items[0].spec} -n keda | jq
...
"metrics": [
{
"external": {
"metric": {
"name": "s0-cron-Asia-Seoul-00,15,30,45xxxx-05,20,35,50xxxx",
"selector": {
"matchLabels": {
"scaledobject.keda.sh/name": "php-apache-cron-scaled"
}
}
},
"target": {
"averageValue": "1",
"type": "AverageValue"
}
},
"type": "External"
}
kubectl delete -f keda-cron.yaml -n keda && kubectl delete deploy php-apache -n keda && helm uninstall keda -n keda
kubectl delete namespace keda
pod resources.request을 최대한 최적값으로 수정합니다.
단, HPA와 같이 사용이 불가능하며, 수정 시 파드가 재실행 됩니다.
대쉬보드 생성 - 14588 공식 대쉬보드 사용
# 코드 다운로드
## Git Clone
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# git clone https://github.com/kubernetes/autoscaler.git
Cloning into 'autoscaler'...
remote: Enumerating objects: 195289, done.
remote: Counting objects: 100% (2544/2544), done.
remote: Compressing objects: 100% (1788/1788), done.
remote: Total 195289 (delta 1383), reused 1033 (delta 741), pack-reused 192745
Receiving objects: 100% (195289/195289), 228.81 MiB | 14.95 MiB/s, done.
Resolving deltas: 100% (124157/124157), done.
Updating files: 100% (30209/30209), done.
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# cd ~/autoscaler/vertical-pod-autoscaler/
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# tree hack
hack
├── boilerplate.go.txt
├── convert-alpha-objects.sh
├── deploy-for-e2e-locally.sh
├── deploy-for-e2e.sh
├── e2e
│ ├── Dockerfile.externalmetrics-writer
│ ├── k8s-metrics-server.yaml
│ ├── kind-with-registry.sh
│ ├── metrics-pump.yaml
│ ├── prometheus-adapter.yaml
│ ├── prometheus.yaml
│ ├── recommender-externalmetrics-deployment.yaml
│ └── vpa-rbac.diff
├── emit-metrics.py
├── generate-crd-yaml.sh
├── local-cluster.md
├── run-e2e-locally.sh
├── run-e2e.sh
├── run-e2e-tests.sh
├── update-codegen.sh
├── update-kubernetes-deps-in-e2e.sh
├── update-kubernetes-deps.sh
├── verify-codegen.sh
├── vpa-apply-upgrade.sh
├── vpa-down.sh
├── vpa-process-yaml.sh
├── vpa-process-yamls.sh
├── vpa-up.sh
└── warn-obsolete-vpa-objects.sh
1 directory, 28 files
# openssl 버전 확인
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# openssl version
OpenSSL 1.0.2k-fips 26 Jan 2017
# openssl 1.1.1 이상 버전 확인
yum install openssl11 -y
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# openssl11 version
OpenSSL 1.1.1g FIPS 21 Apr 2020
# 스크립트파일내에 openssl11 수정
(leeeuijoo@myeks:default) [root@myeks-bastion bin]# sed -i 's/openssl/openssl11/g' ./autoscaler/vertical-pod-autoscaler/pkg/admission-controller/gencerts.sh
# 모니터링
watch -d kubectl get pod -n kube-system
# 설치 스크립트 싫행
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# ./hack/vpa-up.sh
# 확인
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl get crd | grep autoscaling
verticalpodautoscalercheckpoints.autoscaling.k8s.io 2024-04-04T11:02:30Z
verticalpodautoscalers.autoscaling.k8s.io 2024-04-04T11:02:30Z
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl get mutatingwebhookconfigurations vpa-webhook-config
NAME WEBHOOKS AGE
vpa-webhook-config 1 93s
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl get mutatingwebhookconfigurations vpa-webhook-config -o json | jq
# 모니터링
watch -d "kubectl top pod;echo "----------------------";kubectl describe pod | grep Requests: -A2"
# 공식 예제 배포
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# cd ~/autoscaler/vertical-pod-autoscaler/
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# cat examples/hamster.yaml | yh
---
apiVersion: "autoscaling.k8s.io/v1"
kind: VerticalPodAutoscaler
metadata:
name: hamster-vpa
spec:
# recommenders
# - name 'alternative'
targetRef:
apiVersion: "apps/v1"
kind: Deployment
name: hamster
resourcePolicy:
containerPolicies:
- containerName: '*'
minAllowed:
cpu: 100m
memory: 50Mi
maxAllowed:
cpu: 1
memory: 500Mi
controlledResources: ["cpu", "memory"]
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: hamster
spec:
selector:
matchLabels:
app: hamster
replicas: 2
template:
metadata:
labels:
app: hamster
spec:
securityContext:
runAsNonRoot: true
runAsUser: 65534 # nobody
containers:
- name: hamster
image: registry.k8s.io/ubuntu-slim:0.1
resources:
requests:
cpu: 100m
memory: 50Mi
command: ["/bin/sh"]
args:
- "-c"
- "while true; do timeout 0.5s yes >/dev/null; sleep 0.5s; done"
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl apply -f examples/hamster.yaml && kubectl get vpa -w
verticalpodautoscaler.autoscaling.k8s.io/hamster-vpa created
deployment.apps/hamster created
NAME MODE CPU MEM PROVIDED AGE
hamster-vpa Auto 1s
hamster-vpa Auto False 4s
NAME MODE CPU MEM PROVIDED AGE
hamster-vpa Auto 1s
hamster-vpa Auto False 4s
hamster-vpa Auto 476m 262144k True 64s
hamster-vpa Auto 511m 262144k True 3m4s
----------------
Every 2.0s: kubectl top pod;echo ----------------------;kubectl describe pod | grep Requests: -A2 Thu Apr 4 20:11:38 2024
NAME CPU(cores) MEMORY(bytes)
hamster-c6967774f-hv69t 440m 0Mi
hamster-c6967774f-qqsbg 405m 0Mi
----------------------
Requests:
cpu: 476m
memory: 262144k
--
Requests:
cpu: 476m
memory: 262144k
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl get events --sort-by=".metadata.crtionTimestamp" | grep VPA
4m9s Normal EvictedByVPA pod/hamster-c6967774f-zldgn Pod was evicted by VPA Updater to apply resource recommendation.
3m9s Normal EvictedByVPA pod/hamster-c6967774f-m4bkl Pod was evicted by VPA Updater to apply resource recommendation.
kubectl delete -f examples/hamster.yaml && cd ~/autoscaler/vertical-pod-autoscaler/ && ./hack/vpa-down.sh
kube_customresource_vpa_containerrecommendations_target{resource="cpu"}
kube_customresource_vpa_containerrecommendations_target{resource="memory"}
# EKS 노드에 이미 아래 tag가 들어가 있습니다.
# k8s.io/cluster-autoscaler/enabled : true
# k8s.io/cluster-autoscaler/myeks : owned
---------------------
- Key: k8s.io/cluster-autoscaler/myeks
Value: owned
- Key: k8s.io/cluster-autoscaler/enabled
Value: 'true'
---------------------
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling describe-auto-scaling-groups \
> --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" \
> --output table
-----------------------------------------------------------------
| DescribeAutoScalingGroups |
+------------------------------------------------+----+----+----+
| eks-ng1-f0c75435-4623-c223-5024-ce7ffb6535e8 | 3 | 3 | 3 |
+------------------------------------------------+----+----+----+
# MaxSize 6개로 수정 - 수치만 변경하는 것입니다.
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].AutoScalingGroupName" --output text)
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 3 --desired-capacity 3 --max-size 6
# 확인
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table
-----------------------------------------------------------------
| DescribeAutoScalingGroups |
+------------------------------------------------+----+----+----+
| eks-ng1-f0c75435-4623-c223-5024-ce7ffb6535e8 | 3 | 6 | 3 |
+------------------------------------------------+----+----+----+
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# curl -s -O https://raw.githubusercontent.com/kubernetes/autoscaler/master/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# sed -i "s/<YOUR CLUSTER NAME>/$CLUSTER_NAME/g" cluster-autoscaler-autodiscover.yaml
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl apply -f cluster-autoscaler-autodiscover.yaml
# 확인
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl get pod -n kube-system | grep cluster-autoscaler
cluster-autoscaler-857b945c88-vjwtq 1/1 Running 0 14s
kubectl describe deployments.apps -n kube-system cluster-autoscaler
kubectl describe deployments.apps -n kube-system cluster-autoscaler | grep node-group-auto-discovery
--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/myeks
# (옵션) cluster-autoscaler 파드가 동작하는 워커 노드가 퇴출(evict) 되지 않게 설정
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl -n kube-system annotate deployment.apps/cluster-autoscaler cluster-autoscaler.kubernetes.io/safe-to-evict="false"
deployment.apps/cluster-autoscaler annotated
# 모니터링
kubectl get nodes -w
while true; do kubectl get node; echo "------------------------------" ; date ; sleep 1; done
# 디플로이먼트 배포
# 파드 1
cat <<EoF> nginx.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-to-scaleout
spec:
replicas: 1
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
service: nginx
app: nginx
spec:
containers:
- image: nginx
name: nginx-to-scaleout
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
EoF
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl apply -f nginx.yaml
deployment.apps/nginx-to-scaleout created
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl get deployment/nginx-to-scaleout
NAME READY UP-TO-DATE AVAILABLE AGE
nginx-to-scaleout 1/1 1 1 12s
# 스케일링 진행
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl scale --replicas=15 deployment/nginx-to-scaleout && date
deployment.apps/nginx-to-scaleout scaled
Thu Apr 4 20:44:05 KST 2024
# 확인
Thu Apr 4 20:44:58 KST 2024
NAME STATUS ROLES AGE VERSION
ip-192-168-1-127.ap-northeast-2.compute.internal NotReady <none> 9s v1.28.5-eks-5e0fdde
ip-192-168-1-146.ap-northeast-2.compute.internal Ready <none> 140m v1.28.5-eks-5e0fdde
ip-192-168-2-60.ap-northeast-2.compute.internal Ready <none> 140m v1.28.5-eks-5e0fdde
ip-192-168-2-84.ap-northeast-2.compute.internal Ready <none> 16s v1.28.5-eks-5e0fdde
ip-192-168-3-134.ap-northeast-2.compute.internal Ready <none> 140m v1.28.5-eks-5e0fdde
ip-192-168-3-222.ap-northeast-2.compute.internal NotReady <none> 10s v1.28.5-eks-5e0fdde
# pending 된 pod 가 여러개 있음
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-to-scaleout-5f9f9c65ff-2vj5c 1/1 Running 0 46s 192.168.2.198 ip-192-168-2-60.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-68nl9 0/1 Pending 0 46s <none> <none> <none> <none>
nginx-to-scaleout-5f9f9c65ff-97v8j 0/1 Pending 0 46s <none> <none> <none> <none>
nginx-to-scaleout-5f9f9c65ff-g2ddk 1/1 Running 0 46s 192.168.1.235 ip-192-168-1-146.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-jxzzb 0/1 Pending 0 46s <none> <none> <none> <none>
nginx-to-scaleout-5f9f9c65ff-kl8b6 0/1 Pending 0 46s <none> <none> <none> <none>
nginx-to-scaleout-5f9f9c65ff-mdqqm 1/1 Running 0 2m 192.168.3.36 ip-192-168-3-134.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-p5c8g 0/1 Pending 0 46s <none> <none> <none> <none>
nginx-to-scaleout-5f9f9c65ff-q72xf 1/1 Running 0 46s 192.168.2.47 ip-192-168-2-60.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-rl9p8 1/1 Running 0 46s 192.168.2.135 ip-192-168-2-60.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-vcm9q 1/1 Running 0 46s 192.168.3.243 ip-192-168-3-134.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-wgljw 1/1 Running 0 46s 192.168.1.20 ip-192-168-1-146.ap-northeast-2.compute.internal <none> <none>
# node 들이 증가된 것을 확인
Thu Apr 4 20:46:37 KST 2024
NAME STATUS ROLES AGE VERSION
ip-192-168-1-127.ap-northeast-2.compute.internal Ready <none> 108s v1.28.5-eks-5e0fdde
ip-192-168-1-146.ap-northeast-2.compute.internal Ready <none> 141m v1.28.5-eks-5e0fdde
ip-192-168-2-60.ap-northeast-2.compute.internal Ready <none> 141m v1.28.5-eks-5e0fdde
ip-192-168-2-84.ap-northeast-2.compute.internal Ready <none> 115s v1.28.5-eks-5e0fdde
ip-192-168-3-134.ap-northeast-2.compute.internal Ready <none> 141m v1.28.5-eks-5e0fdde
ip-192-168-3-222.ap-northeast-2.compute.internal Ready <none> 109s v1.28.5-eks-5e0fdde
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# kubectl get pods -l app=nginx -o wide --watch
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-to-scaleout-5f9f9c65ff-2vj5c 1/1 Running 0 3m58s 192.168.2.198 ip-192-168-2-60.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-68nl9 1/1 Running 0 3m58s 192.168.1.126 ip-192-168-1-127.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-97v8j 1/1 Running 0 3m58s 192.168.3.113 ip-192-168-3-222.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-g2ddk 1/1 Running 0 3m58s 192.168.1.235 ip-192-168-1-146.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-jxzzb 1/1 Running 0 3m58s 192.168.1.217 ip-192-168-1-127.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-kl8b6 1/1 Running 0 3m58s 192.168.1.240 ip-192-168-1-127.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-mdqqm 1/1 Running 0 5m12s 192.168.3.36 ip-192-168-3-134.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-p5c8g 1/1 Running 0 3m58s 192.168.2.175 ip-192-168-2-84.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-q72xf 1/1 Running 0 3m58s 192.168.2.47 ip-192-168-2-60.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-rl9p8 1/1 Running 0 3m58s 192.168.2.135 ip-192-168-2-60.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-vcm9q 1/1 Running 0 3m58s 192.168.3.243 ip-192-168-3-134.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-wgljw 1/1 Running 0 3m58s 192.168.1.20 ip-192-168-1-146.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-wns67 1/1 Running 0 3m58s 192.168.2.28 ip-192-168-2-84.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-xnn82 1/1 Running 0 3m58s 192.168.2.90 ip-192-168-2-84.ap-northeast-2.compute.internal <none> <none>
nginx-to-scaleout-5f9f9c65ff-zp7dg 1/1 Running 0 3m58s 192.168.1.210 ip-192-168-1-146.ap-northeast-2.compute.internal <none> <none>
# 디플로이먼트 삭제
kubectl delete -f nginx.yaml && date
watch -d kubectl get node
# size 수정
aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 3 --desired-capacity 3 --max-size 3
aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table
# Cluster Autoscaler 삭제
kubectl delete -f cluster-autoscaler-autodiscover.yaml
하나의 자원에 대해 두군데 (AWS ASG vs AWS EKS)에서 각자의 방식으로 관리하기 때문에 관리 정보가 서로 동기화되지 않아 다양한 문제 발생합니다.
(leeeuijoo@myeks:default) [root@myeks-bastion ~]# helm repo add cluster-proportional-autoscaler https://kubernetes-sigs.github.io/cluster-proportional-autoscaler
"cluster-proportional-autoscaler" has been added to your repositories
# CPA규칙을 설정하고 helm차트를 릴리즈 할 것입니다.
# Deployment 배포
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# cat <<EOT > cpa-nginx.yaml
> apiVersion: apps/v1
> kind: Deployment
> metadata:
> name: nginx-deployment
> spec:
> replicas: 1
> selector:
> matchLabels:
> app: nginx
> template:
> metadata:
> labels:
> app: nginx
> spec:
> containers:
> - name: nginx
> image: nginx:latest
> resources:
> limits:
> cpu: "100m"
> memory: "64Mi"
> requests:
> cpu: "100m"
> memory: "64Mi"
> ports:
> - containerPort: 80
> EOT
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# kubectl apply -f cpa-nginx.yaml
deployment.apps/nginx-deployment created
# CPA 규칙 설정
cat <<EOF > cpa-values.yaml
config:
ladder:
nodesToReplicas:
- [1, 1]
- [2, 2]
- [3, 3]
- [4, 3]
- [5, 5]
options:
namespace: default
target: "deployment/nginx-deployment"
EOF
# 모니터링
watch -d kubectl get pod
# helm 업그레이드
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# helm upgrade --install cluster-proportional-autoscaler -f cpa-values.yaml cluster-proportional-autoscaler/cluster-proportional-autoscaler
Release "cluster-proportional-autoscaler" does not exist. Installing it now.
NAME: cluster-proportional-autoscaler
LAST DEPLOYED: Thu Apr 4 23:16:28 2024
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].AutoScalingGroupName" --output text)
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 5 --desired-capacity 5 --max-size 5
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table
-----------------------------------------------------------------
| DescribeAutoScalingGroups |
+------------------------------------------------+----+----+----+
| eks-ng1-f0c75435-4623-c223-5024-ce7ffb6535e8 | 5 | 5 | 5 |
+------------------------------------------------+----+----+----+
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 4 --desired-capacity 4 --max-size 4
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table
-----------------------------------------------------------------
| DescribeAutoScalingGroups |
+------------------------------------------------+----+----+----+
| eks-ng1-f0c75435-4623-c223-5024-ce7ffb6535e8 | 4 | 4 | 4 |
+------------------------------------------------+----+----+----+
(leeeuijoo@myeks:default) [root@myeks-bastion vertical-pod-autoscaler]# helm uninstall cluster-proportional-autoscaler && kubectl delete -f cpa-nginx.yaml
release "cluster-proportional-autoscaler" uninstalled
deployment.apps "nginx-deployment" deleted
eksctl delete cluster --name $CLUSTER_NAME && aws cloudformation delete-stack --stack-name $CLUSTER_NAME
# YAML 파일 다운로드
curl -O https://s3.ap-northeast-2.amazonaws.com/cloudformation.cloudneta.net/K8S/karpenter-preconfig.yaml
# CloudFormation 스택 배포
예시) aws cloudformation deploy --template-file karpenter-preconfig.yaml --stack-name myeks2 --parameter-overrides KeyName=kp-gasida SgIngressSshCidr=$(curl -s ipinfo.io/ip)/32 MyIamUserAccessKeyID=AKIA5... MyIamUserSecretAccessKey='CVNa2...' ClusterBaseName=myeks2 --region ap-northeast-2
# CloudFormation 스택 배포 완료 후 작업용 EC2 IP 출력
aws cloudformation describe-stacks --stack-name myeks2 --query 'Stacks[*].Outputs[0].OutputValue' --output text
# 작업용 EC2 SSH 접속
ssh -i ~/.ssh/kp-gasida.pem ec2-user@$(aws cloudformation describe-stacks --stack-name myeks2 --query 'Stacks[*].Outputs[0].OutputValue' --output text)
# IP 주소 확인 : 172.30.0.0/16 VPC 대역에서 172.30.1.0/24 대역을 사용 중
[root@myeks2-bastion ~]# ip -br -c addr
lo UNKNOWN 127.0.0.1/8 ::1/128
eth0 UP 172.30.1.100/24 fe80::8:beff:fec1:560b/64
docker0 DOWN 172.17.0.1/16
[root@myeks2-bastion ~]#
# EKS Node Viewer 설치 : 현재 ec2 spec에서는 설치에 다소 시간이 소요됨 = 2분 이상
[root@myeks2-bastion ~]# wget https://go.dev/dl/go1.22.1.linux-amd64.tar.gz
--2024-04-04 23:48:13-- https://go.dev/dl/go1.22.1.linux-amd64.tar.gz
Resolving go.dev (go.dev)... 216.239.38.21, 216.239.32.21, 216.239.34.21, ...
Connecting to go.dev (go.dev)|216.239.38.21|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://dl.google.com/go/go1.22.1.linux-amd64.tar.gz [following]
--2024-04-04 23:48:14-- https://dl.google.com/go/go1.22.1.linux-amd64.tar.gz
Resolving dl.google.com (dl.google.com)... 172.217.161.78, 2404:6800:4004:81f::200e
Connecting to dl.google.com (dl.google.com)|172.217.161.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68965341 (66M) [application/x-gzip]
Saving to: ‘go1.22.1.linux-amd64.tar.gz’
100%[=========================================================================================================================================>] 68,965,341 66.9MB/s in 1.0s
2024-04-04 23:48:15 (66.9 MB/s) - ‘go1.22.1.linux-amd64.tar.gz’ saved [68965341/68965341]
[root@myeks2-bastion ~]# tar -C /usr/local -xzf go1.22.1.linux-amd64.tar.gz
[root@myeks2-bastion ~]# export PATH=$PATH:/usr/local/go/bin
[root@myeks2-bastion ~]# go install github.com/awslabs/eks-node-viewer/cmd/eks-node-viewer@latest
# EKS 배포 완료 후 실행
cd ~/go/bin && ./eks-node-viewer --resources cpu,memory
# 변수 정보 확인
[root@myeks2-bastion ~]# export | egrep 'ACCOUNT|AWS_' | egrep -v 'SECRET|KEY'
declare -x ACCOUNT_ID="236747833953"
declare -x AWS_ACCOUNT_ID="236747833953"
declare -x AWS_DEFAULT_REGION="ap-northeast-2"
declare -x AWS_PAGER=""
declare -x AWS_REGION="ap-northeast-2"
# 변수 설정
export KARPENTER_NAMESPACE="kube-system"
export K8S_VERSION="1.29"
export KARPENTER_VERSION="0.35.2"
export TEMPOUT=$(mktemp)
export ARM_AMI_ID="$(aws ssm get-parameter --name /aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2-arm64/recommended/image_id --query Parameter.Value --output text)"
export AMD_AMI_ID="$(aws ssm get-parameter --name /aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2/recommended/image_id --query Parameter.Value --output text)"
export GPU_AMI_ID="$(aws ssm get-parameter --name /aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2-gpu/recommended/image_id --query Parameter.Value --output text)"
export AWS_PARTITION="aws"
export CLUSTER_NAME="${USER}-karpenter-demo"
echo "export CLUSTER_NAME=$CLUSTER_NAME" >> /etc/profile
echo $KARPENTER_VERSION $CLUSTER_NAME $AWS_DEFAULT_REGION $AWS_ACCOUNT_ID $TEMPOUT $ARM_AMI_ID $AMD_AMI_ID $GPU_AMI_ID
# CloudFormation 스택으로 IAM Policy, Role(KarpenterNodeRole-myeks2) 생성 : 3분 정도 소요
curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > "${TEMPOUT}" \
&& aws cloudformation deploy \
--stack-name "Karpenter-${CLUSTER_NAME}" \
--template-file "${TEMPOUT}" \
--capabilities CAPABILITY_NAMED_IAM \
--parameter-overrides "ClusterName=${CLUSTER_NAME}"
eksctl create cluster -f - <<EOF
---
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: ${CLUSTER_NAME}
region: ${AWS_DEFAULT_REGION}
version: "${K8S_VERSION}"
tags:
karpenter.sh/discovery: ${CLUSTER_NAME}
iam:
withOIDC: true
serviceAccounts:
- metadata:
name: karpenter
namespace: "${KARPENTER_NAMESPACE}"
roleName: ${CLUSTER_NAME}-karpenter
attachPolicyARNs:
- arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:policy/KarpenterControllerPolicy-${CLUSTER_NAME}
roleOnly: true
iamIdentityMappings:
- arn: "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/KarpenterNodeRole-${CLUSTER_NAME}"
username: system:node:{{EC2PrivateDNSName}}
groups:
- system:bootstrappers
- system:nodes
managedNodeGroups:
- instanceType: m5.large
amiFamily: AmazonLinux2
name: ${CLUSTER_NAME}-ng
desiredCapacity: 2
minSize: 1
maxSize: 10
iam:
withAddonPolicies:
externalDNS: true
EOF
# EKS 배포 확인
(leeeuijoo@root-karpenter-demo:N/A) [root@myeks2-bastion ~]# eksctl get cluster
NAME REGION EKSCTL CREATED
root-karpenter-demo ap-northeast-2 True
(leeeuijoo@root-karpenter-demo:N/A) [root@myeks2-bastion ~]# eksctl get nodegroup --cluster $CLUSTER_NAME
CLUSTER NODEGROUP STATUS CREATED MIN SIZE MAX SIZE DESIRED CAPACITY INSTANCE TYPE IMAGE ID ASG NAME TYPE
root-karpenter-demo root-karpenter-demo-ng ACTIVE 2024-04-04T15:08:40Z 1 10 2 m5.large AL2_x86_64 eks-root-karpenter-demo-ng-a4c754d3-cbdc-8af1-d357-da7646affafe managed
(leeeuijoo@root-karpenter-demo:N/A) [root@myeks2-bastion ~]# eksctl get iamidentitymapping --cluster $CLUSTER_NAME
ARN USERNAME GROUPS ACCOUNT
arn:aws:iam::236747833953:role/KarpenterNodeRole-root-karpenter-demo system:node:{{EC2PrivateDNSName}} system:bootstrappers,system:nodes
arn:aws:iam::236747833953:role/eksctl-root-karpenter-demo-nodegro-NodeInstanceRole-qLwn3RoEQp39 system:node:{{EC2PrivateDNSName}} system:bootstrappers,system:nodes
(leeeuijoo@root-karpenter-demo:N/A) [root@myeks2-bastion ~]# eksctl get iamserviceaccount --cluster $CLUSTER_NAME
NAMESPACE NAME ROLE ARN
kube-system aws-node arn:aws:iam::236747833953:role/eksctl-root-karpenter-demo-addon-iamserviceac-Role1-HXF4r6mccbxw
kube-system karpenter arn:aws:iam::236747833953:role/root-karpenter-demo-karpenter
(leeeuijoo@root-karpenter-demo:N/A) [root@myeks2-bastion ~]# eksctl get addon --cluster $CLUSTER_NAME
2024-04-05 00:12:36 [ℹ] Kubernetes version "1.29" in use by cluster "root-karpenter-demo"
2024-04-05 00:12:36 [ℹ] getting all addons
No addons found
# default 네임스페이스 적용
kubectl ns default
# 노드 정보 확인
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl get node --label-columns=node.kubernetes.io/instance-type,eks.amazonaws.com/capacityType,topology.kubernetes.io/zone
NAME STATUS ROLES AGE VERSION INSTANCE-TYPE CAPACITYTYPE ZONE
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 3m35s v1.29.0-eks-5e0fdde m5.large ON_DEMAND ap-northeast-2c
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 3m35s v1.29.0-eks-5e0fdde m5.large ON_DEMAND ap-northeast-2a
# ExternalDNS
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# MyDomain=22joo.shop
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# echo "export MyDomain=22joo.shop" >> /etc/profile
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# MyDnzHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# echo $MyDomain, $MyDnzHostedZoneId
22joo.shop, /hostedzone/Z07798463AFECYTX1ODP4
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# curl -s -O https://raw.githubusercontent.com/gasida/PKOS/main/aews/externaldns.yaml
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# MyDomain=$MyDomain MyDnzHostedZoneId=$MyDnzHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -
# kube-ops-view
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm repo add geek-cookbook https://geek-cookbook.github.io/charts/
"geek-cookbook" has been added to your repositories
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system
NAME: kube-ops-view
LAST DEPLOYED: Fri Apr 5 00:14:30 2024
NAMESPACE: kube-system
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
1. Get the application URL by running these commands:
export POD_NAME=$(kubectl get pods --namespace kube-system -l "app.kubernetes.io/name=kube-ops-view,app.kubernetes.io/instance=kube-ops-view" -o jsonpath="{.items[0].metadata.name}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl port-forward $POD_NAME 8080:8080
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'
service/kube-ops-view patched
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl annotate service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=kubeopsview.$MyDomain"
service/kube-ops-view annotated
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# echo -e "Kube Ops View URL = http://kubeopsview.$MyDomain:8080/#scale=1.5"
Kube Ops View URL = http://kubeopsview.22joo.shop:8080/#scale=1.5
# [터미널1] eks-node-viewer
cd ~/go/bin && ./eks-node-viewer --resources cpu,memory
# k8s 확인
kubectl cluster-info
kubectl get node --label-columns=node.kubernetes.io/instance-type,eks.amazonaws.com/capacityType,topology.kubernetes.io/zone
kubectl get pod -n kube-system -owide
kubectl describe cm -n kube-system aws-auth
# Karpenter 설치를 위한 변수 설정 및 확인
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# export CLUSTER_ENDPOINT="$(aws eks describe-cluster --name "${CLUSTER_NAME}" --query "cluster.endpoint" --output text)"
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# export KARPENTER_IAM_ROLE_ARN="arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter"
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# echo "${CLUSTER_ENDPOINT} ${KARPENTER_IAM_ROLE_ARN}"
https://4A9104969E6FC7E7A58E24242D4780BF.gr7.ap-northeast-2.eks.amazonaws.com arn:aws:iam::236747833953:role/root-karpenter-demo-karpenter
# docker logout : Logout of docker to perform an unauthenticated pull against the public ECR
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# docker logout public.ecr.aws
Removing login credentials for public.ecr.aws
# helm registry logout
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm registry logout public.ecr.aws
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm install karpenter oci://public.ecr.aws/karpenter/karpenter --version "${KARPENTER_VERSION}" --namespace "${KARPENTER_NAMESPACE}" --create-namespace \
> --set "serviceAccount.annotations.eks\.amazonaws\.com/role-arn=${KARPENTER_IAM_ROLE_ARN}" \
> --set "settings.clusterName=${CLUSTER_NAME}" \
> --set "settings.interruptionQueue=${CLUSTER_NAME}" \
> --set controller.resources.requests.cpu=1 \
> --set controller.resources.requests.memory=1Gi \
> --set controller.resources.limits.cpu=1 \
> --set controller.resources.limits.memory=1Gi \
> --wait
Pulled: public.ecr.aws/karpenter/karpenter:0.35.2
Digest: sha256:9a7691b9735fd7e6e3e7ff8c80d256b7c2197349e72fb36937c4ad963fb6d352
NAME: karpenter
LAST DEPLOYED: Fri Apr 5 00:20:29 2024
NAMESPACE: kube-system
STATUS: deployed
REVISION: 1
TEST SUITE: None
# 확인
kubectl get-all -n $KARPENTER_NAMESPACE
kubectl get all -n $KARPENTER_NAMESPACE
kubectl get crd | grep karpenter
# APi 변경
v1alpha5/Provisioner → v1beta1/NodePool
v1alpha1/AWSNodeTemplate → v1beta1/EC2NodeClass
v1alpha5/Machine → v1beta1/NodeClaim
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm repo add grafana-charts https://grafana.github.io/helm-charts
"grafana-charts" has been added to your repositories
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
"prometheus-community" has been added to your repositories
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm repo update
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "grafana-charts" chart repository
...Successfully got an update from the "prometheus-community" chart repository
...Successfully got an update from the "geek-cookbook" chart repository
Update Complete. ⎈Happy Helming!⎈
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl create namespace monitoring
namespace/monitoring created
# 프로메테우스 설치
curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/prometheus-values.yaml | envsubst | tee prometheus-values.yaml
helm install --namespace monitoring prometheus prometheus-community/prometheus --values prometheus-values.yaml
# Grafana 설치
curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/grafana-values.yaml | tee grafana-values.yaml
helm install --namespace monitoring grafana grafana-charts/grafana --values grafana-values.yaml
kubectl patch svc -n monitoring grafana -p '{"spec":{"type":"LoadBalancer"}}'
# admin 암호
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl get secret --namespace monitoring grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
2AMkLHBWIxiyH9ZZY2QYlZewXNd9E2cjjFQyQ5CW
# 그라파나 접속
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl annotate service grafana -n monitoring "external-dns.alpha.kubernetes.io/hostname=grafana.$MyDomain"
service/grafana annotated
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# echo -e "grafana URL = http://grafana.$MyDomain"
grafana URL = http://grafana.22joo.shop
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# curl http://grafana.22joo.shop
<a href="/login">Found</a>.
cat <<EOF | envsubst | kubectl apply -f -
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: default
spec:
template:
spec:
requirements:
- key: kubernetes.io/arch
operator: In
values: ["amd64"]
- key: kubernetes.io/os
operator: In
values: ["linux"]
- key: karpenter.sh/capacity-type
operator: In
values: ["spot"]
- key: karpenter.k8s.aws/instance-category
operator: In
values: ["c", "m", "r"]
- key: karpenter.k8s.aws/instance-generation
operator: Gt
values: ["2"]
nodeClassRef:
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
name: default
limits:
cpu: 1000
disruption:
consolidationPolicy: WhenUnderutilized
expireAfter: 720h # 30 * 24h = 720h
---
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: default
spec:
amiFamily: AL2 # Amazon Linux 2
role: "KarpenterNodeRole-${CLUSTER_NAME}" # replace with your cluster name
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: "${CLUSTER_NAME}" # replace with your cluster name
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: "${CLUSTER_NAME}" # replace with your cluster name
amiSelectorTerms:
- id: "${ARM_AMI_ID}"
- id: "${AMD_AMI_ID}"
# - id: "${GPU_AMI_ID}" # <- GPU Optimized AMD AMI
# - name: "amazon-eks-node-${K8S_VERSION}-*" # <- automatically upgrade when a new AL2 EKS Optimized AMI is released. This is unsafe for production workloads. Validate AMIs in lower environments before deploying them to production.
EOF
# 확인
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl get nodepool,ec2nodeclass
NAME NODECLASS
nodepool.karpenter.sh/default default
NAME AGE
ec2nodeclass.karpenter.k8s.aws/default 14s
# pause 파드 1개에 CPU 1개 최소 보장 할당
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# cat <<EOF | kubectl apply -f -
> apiVersion: apps/v1
> kind: Deployment
> metadata:
> name: inflate
> spec:
> replicas: 0
> selector:
> matchLabels:
> app: inflate
> template:
> metadata:
> labels:
> app: inflate
> spec:
> terminationGracePeriodSeconds: 0
> containers:
> - name: inflate
> image: public.ecr.aws/eks-distro/kubernetes/pause:3.7
> resources:
> requests:
> cpu: 1
> EOF
deployment.apps/inflate created
# Scale up
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl get pod
No resources found in default namespace.
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl scale deployment inflate --replicas 5
deployment.apps/inflate scaled
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion bin]# k get nodes
NAME STATUS ROLES AGE VERSION
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 27m v1.29.0-eks-5e0fdde
ip-192-168-54-137.ap-northeast-2.compute.internal Ready <none> 98s v1.29.0-eks-5e0fdde
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 27m v1.29.0-eks-5e0fdde
{"level":"INFO","time":"2024-04-04T15:38:33.792Z","logger":"controller.disruption","message":"disrupting via consolidation delete, terminating 1 nodes (0 pods) ip-192-168-54-137.ap-northeast-2.compute.internal/c5a.2xlarge/spot","commit":"8b2d1d7","command-id":"8a287f10-da20-4895-bff3-91975089e7f3"}
{"level":"INFO","time":"2024-04-04T15:38:34.308Z","logger":"controller.disruption.queue","message":"command succeeded","commit":"8b2d1d7","command-id":"8a287f10-da20-4895-bff3-91975089e7f3"}
{"level":"INFO","time":"2024-04-04T15:38:34.355Z","logger":"controller.node.termination","message":"tainted node","commit":"8b2d1d7","node":"ip-192-168-54-137.ap-northeast-2.compute.internal"}
{"level":"INFO","time":"2024-04-04T15:38:34.883Z","logger":"controller.node.termination","message":"deleted node","commit":"8b2d1d7","node":"ip-192-168-54-137.ap-northeast-2.compute.internal"}
{"level":"INFO","time":"2024-04-04T15:38:35.226Z","logger":"controller.nodeclaim.termination","message":"deleted nodeclaim","commit":"8b2d1d7","nodeclaim":"default-npl4f","node":"ip-192-168-54-137.ap-northeast-2.compute.internal","provider-id":"aws:///ap-northeast-2b/i-05b020aca8ee1872f"}
Expiration 만료 : 기본 720시간(30일) 후 인스턴스를 자동으로 만료하여 강제로 노드를 최신 상태로 유지
Drift 드리프트 : 구성 변경 사항(NodePool, EC2NodeClass)를 감지하여 필요한 변경 사항을 적용
Consolidation 통합 : 비용 효율적인 컴퓨팅 최적화
스팟 인스턴스 시작 시 Karpenter는 AWS EC2 Fleet Instance API를 호출하여 NodePool 구성 기반으로 선택한 인스턴스 유형을 전달.
AWS EC2 Fleet Instance API는 시작된 인스턴스 목록과 시작할 수 없는 인스턴스 목록을 즉시 반환하는 API로, 시작할 수 없을 경우 Karpenter는 대체 용량을 요청하거나 워크로드에 대한 soft 일정 제약 조건을 제거할 수 있음
Spot-to-Spot Consolidation 에는 주문형 통합과 다른 접근 방식이 필요했습니다. 온디맨드 통합의 경우 규모 조정 및 최저 가격이 주요 지표로 사용됩니다.
스팟 간 통합이 이루어지려면 Karpenter에는 최소 15개의 인스턴스 유형이 포함된 다양한 인스턴스 구성(연습에 정의된 NodePool 예제 참조)이 필요합니다. 이러한 제약 조건이 없으면 Karpenter가 가용성이 낮고 중단 빈도가 높은 인스턴스를 선택할 위험이 있습니다.
# 기존 nodepool 삭제
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl delete nodepool,ec2nodeclass default
nodepool.karpenter.sh "default" deleted
ec2nodeclass.karpenter.k8s.aws "default" deleted
# v0.34.0 부터 featureGates 에 spotToSpotConsolidation 활성화로 사용 가능
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# helm upgrade karpenter -n kube-system oci://public.ecr.aws/karpenter/karpenter --reuse-values --set settings.featureGates.spotToSpotConsolidation=true
Pulled: public.ecr.aws/karpenter/karpenter:0.35.4
Digest: sha256:ddbdf04bc5fe81b191a5bdc256cff970a3d83f9849028fab7d74713626786386
Release "karpenter" has been upgraded. Happy Helming!
NAME: karpenter
LAST DEPLOYED: Fri Apr 5 00:41:56 2024
NAMESPACE: kube-system
STATUS: deployed
REVISION: 2
TEST SUITE: None
# NodePool and EC2NodeClass 생성
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# cat <<EOF > nodepool.yaml
> apiVersion: karpenter.sh/v1beta1
> kind: NodePool
> metadata:
> name: default
> spec:
> template:
> metadata:
> labels:
> intent: apps
> spec:
> nodeClassRef:
> name: default
> requirements:
> - key: karpenter.sh/capacity-type
> operator: In
> values: ["spot"]
> - key: karpenter.k8s.aws/instance-category
> operator: In
> values: ["c","m","r"]
> - key: karpenter.k8s.aws/instance-size
> operator: NotIn
> values: ["nano","micro","small","medium"]
> - key: karpenter.k8s.aws/instance-hypervisor
> operator: In
> values: ["nitro"]
> limits:
> cpu: 100
> memory: 100Gi
> disruption:
> consolidationPolicy: WhenUnderutilized
> ---
> apiVersion: karpenter.k8s.aws/v1beta1
> kind: EC2NodeClass
> metadata:
> name: default
> spec:
> amiFamily: Bottlerocket
> subnetSelectorTerms:
> - tags:
> karpenter.sh/discovery: "${CLUSTER_NAME}" # replace with your cluster name
> securityGroupSelectorTerms:
> - tags:
> karpenter.sh/discovery: "${CLUSTER_NAME}" # replace with your cluster name
> role: "KarpenterNodeRole-${CLUSTER_NAME}" # replace with your cluster name
> tags:
> Name: karpenter.sh/nodepool/default
> IntentLabel: "apps"
> EOF
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl apply -f nodepool.yaml
nodepool.karpenter.sh/default created
ec2nodeclass.karpenter.k8s.aws/default created
# 샘플 워크로드 배포
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# cat <<EOF > inflate.yaml
> apiVersion: apps/v1
> kind: Deployment
> metadata:
> name: inflate
> spec:
> replicas: 5
> selector:
> matchLabels:
> app: inflate
> template:
> metadata:
> labels:
> app: inflate
> spec:
> nodeSelector:
> intent: apps
> containers:
> - name: inflate
> image: public.ecr.aws/eks-distro/kubernetes/pause:3.2
> resources:
> requests:
> cpu: 1
> memory: 1.5Gi
> EOF
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl apply -f inflate.yaml
deployment.apps/inflate created
# 확인
(leeeuijoo@root-karpenter-demo:N/A) [root@myeks2-bastion ~]# kubectl get nodes -L karpenter.sh/nodepool -L node.kubernetes.io/instance-type -L topology.kubernetes.io/zone -L karpenter.sh/capacity-type
NAME STATUS ROLES AGE VERSION NODEPOOL INSTANCE-TYPE ZONE CAPACITY-TYPE
ip-192-168-155-81.ap-northeast-2.compute.internal NotReady <none> 10s v1.29.1-eks-61c0bbb default c6gd.2xlarge ap-northeast-2b spot
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 33m v1.29.0-eks-5e0fdde m5.large ap-northeast-2c
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 33m v1.29.0-eks-5e0fdde m5.large ap-northeast-2a
watch -d "kubectl get nodes -L karpenter.sh/nodepool -L node.kubernetes.io/instance-type -L topology.kubernetes.io/zone -L karpenter.sh/capacity-type"
Every 2.0s: kubectl get nodes -L karpenter.sh/nodepool -L node.kubernetes.io/instance-type -L topology.kubernetes.io/zone -L karpenter.sh/capacity-type Fri Apr 5 00:44:09 2024
NAME STATUS ROLES AGE VERSION NODEPOOL INSTANCE-TYPE ZONE CAPACITY-TYPE
ip-192-168-155-81.ap-northeast-2.compute.internal Ready <none> 36s v1.29.1-eks-61c0bbb default c6gd.2xlarge ap-northeast-2b spot
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 34m v1.29.0-eks-5e0fdde m5.large ap-northeast-2c
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 34m v1.29.0-eks-5e0fdde m5.large ap-northeast-2a
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl scale --replicas=1 deployment/inflate
deployment.apps/inflate scaled
kubectl -n kube-system logs -l app.kubernetes.io/name=karpenter --all-containers=true -f --tail=20
{"level":"INFO","time":"2024-04-04T15:45:31.021Z","logger":"controller.nodeclaim.lifecycle","message":"launched nodeclaim","commit":"8b2d1d7","nodeclaim":"default-fttgq","provider-id":"aws:///ap-northeast-2b/i-0c0c2478a48404d8d","instance-type":"c6gn.large","zone":"ap-northeast-2b","capacity-type":"spot","allocatable":{"cpu":"1930m","ephemeral-storage":"17Gi","memory":"3055Mi","pods":"29","vpc.amazonaws.com/pod-eni":"9"}}
# 모니터링
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 27m v1.29.0-eks-5e0fdde
Every 2.0s: kubectl get nodes -L karpenter.sh/nodepool -L node.kubernetes.io/instance-type -L topology.kubernetes.io/zone -L karpenter.sh/capacity-type Fri Apr 5 00:45:57 2024
NAME STATUS ROLES AGE VERSION NODEPOOL INSTANCE-TYPE ZONE CAPACITY-TYPE
ip-192-168-155-81.ap-northeast-2.compute.internal Ready <none> 2m24s v1.29.1-eks-61c0bbb default c6gd.2xlarge ap-northeast-2b spot
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 36m v1.29.0-eks-5e0fdde m5.large ap-northeast-2c
ip-192-168-56-255.ap-northeast-2.compute.internal Ready <none> 11s v1.29.1-eks-61c0bbb default c6gn.large ap-northeast-2b spot
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 36m v1.29.0-eks-5e0fdde m5.large ap-northeast-2a
kubectl -n kube-system logs -l app.kubernetes.io/name=karpenter --all-containers=true -f --tail=20
kubectl get nodes -L karpenter.sh/nodepool -L node.kubernetes.io/instance-type -L topology.kubernetes.io/zone -L karpenter.sh/capacity-type
kubectl get node --label-columns=eks.amazonaws.com/capacityType,karpenter.sh/capacity-type
kubectl get node --label-columns=node.kubernetes.io/instance-type,topology.kubernetes.io/zone
# Node Claim 확인
(leeeuijoo@root-karpenter-demo:default) [root@myeks2-bastion ~]# kubectl get nodeclaims
NAME TYPE ZONE NODE READY AGE
default-fttgq c6gn.large ap-northeast-2b ip-192-168-56-255.ap-northeast-2.compute.internal True 3m12s
NAME STATUS ROLES AGE VERSION NODEPOOL INSTANCE-TYPE ZONE CAPACITY-TYPE
ip-192-168-155-81.ap-northeast-2.compute.internal Ready <none> 2m24s v1.29.1-eks-61c0bbb default c6gd.2xlarge ap-northeast-2b spot
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 36m v1.29.0-eks-5e0fdde m5.large ap-northeast-2c
ip-192-168-56-255.ap-northeast-2.compute.internal Ready <none> 11s v1.29.1-eks-61c0bbb default c6gn.large ap-northeast-2b spot
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 36m v1.29.0-eks-5e0fdde m5.large ap-northeast-2a
-------
NAME STATUS ROLES AGE VERSION NODEPOOL INSTANCE-TYPE ZONE CAPACITY-TYPE
ip-192-168-30-86.ap-northeast-2.compute.internal Ready <none> 45m v1.29.0-eks-5e0fdde m5.large ap-northeast-2c
ip-192-168-56-255.ap-northeast-2.compute.internal Ready <none> 9m9s v1.29.1-eks-61c0bbb default c6gn.large ap-northeast-2b spot
ip-192-168-75-127.ap-northeast-2.compute.internal Ready <none> 45m v1.29.0-eks-5e0fdde m5.large ap-northeast-2a
# 삭제
kubectl delete deployment inflate
kubectl delete nodepool,ec2nodeclass default
# Karpenter IAM Role 생성한 CloudFormation 삭제
aws cloudformation delete-stack --stack-name "Karpenter-${CLUSTER_NAME}"
# EC2 Launch Template 삭제
aws ec2 describe-launch-templates --filters "Name=tag:karpenter.k8s.aws/cluster,Values=${CLUSTER_NAME}" |
jq -r ".LaunchTemplates[].LaunchTemplateName" |
xargs -I{} aws ec2 delete-launch-template --launch-template-name {}
# 클러스터 삭제
eksctl delete cluster --name "${CLUSTER_NAME}"
# 위 삭제 완료 후 아래 삭제
aws cloudformation delete-stack --stack-name myeks2