# OpenStack : CloudBand Infrastructure Health check script

Chanpu-·2021년 12월 20일
0

# Work

목록 보기
2/5

作成理由

: Healthcheckに時間が掛かりすぎる

構築初期にはVM数も少ないし、Healthcheckを行う回数も少なかったが、
どんどんVMが増えて、担当Siteが増えて、エラーが出る場合も多くなった為

Healthcheckの用途に分けてOption3つ追加

  • Option無し
  • --all
  • --check-status
  • --check status --all
#!/bin/bash
# Editor: Chi Changyeop 
# Excute permission : chmod +x <File name>.sh
# All healthcheck process : <File name>.sh --all            [2021/11/13]
# Only check status : <File name>.sh --check-status         [2021/12/16]
# Only check status : <File name>.sh --check-status --all   [2021/12/18]


echo "

+++++++++++++++++++++++++++++++++++++++++++++++
+                                             +
+     CBIS Healthcheck Update 2021/12/18      +
+                                             +
+++++++++++++++++++++++++++++++++++++++++++++++

"

echo ""
echo ">>> HealthCheck date"
date

echo ""&&echo ""

# [2021/12/16] Add only check status

# Function list
# salt : for use salt command
function salt { source ~/venv/salt-ssh/bin/activate; salt-ssh -c /home/stack/salt/etc/salt/ --log-file /home/stack/salt/var/log/salt/ssh --no-host-keys "$@"; deactivate; }

# Cont[0-2]GaleraCheck: Check Galera cluster synced on the controller
function Cont1GaleraCheck() {
  DOCKER_GALERA_NUMBER=`$o 0 -q sudo docker container ls -a | grep -o galera-bundle-docker-[0-9]`
  echo "Controller#0 : $DOCKER_GALERA_NUMBER"
  $o 0 -q sudo docker exec $DOCKER_GALERA_NUMBER clustercheck
}

function Cont2GaleraCheck() {
  DOCKER_GALERA_NUMBER=`$o 1 -q sudo docker container ls -a | grep -o galera-bundle-docker-[0-9]`
  echo "Controller#1 : $DOCKER_GALERA_NUMBER"
  $o 1 -q sudo docker exec $DOCKER_GALERA_NUMBER clustercheck
}

function Cont3GaleraCheck() {
  DOCKER_GALERA_NUMBER=`$o 2 -q sudo docker container ls -a | grep -o galera-bundle-docker-[0-9]`
  echo "Controller#2 : $DOCKER_GALERA_NUMBER"
  $o 2 -q sudo docker exec $DOCKER_GALERA_NUMBER clustercheck
}

# Var list
# o: Access Controller
o="/usr/share/cbis/undercloud/tools/ssh-overcloud.sh Controller"



if [ "$1" = "--check-status" ]; then
  source /home/stack/stackrc

  echo "Commnad : openstack stack list -c 'Stack Status'"
  openstack stack list -c 'Stack Status'

  echo ""&& echo ""

  echo "Commnad : openstack cbis version"
  openstack cbis version;

  echo ""&&echo ""

  echo "Commnad : openstack server list -c 'Name' -c 'Status'"
  openstack server list -c 'Name' -c 'Status'

  echo ""&&echo ""

  echo "Command : openstack baremetal node list -c 'Name' -c 'Power State' -c 'Provisioning State' -c 'Maintenance'"
  openstack baremetal node list -c 'Name' -c 'Power State' -c 'Provisioning State' -c 'Maintenance'

  echo ""&&echo ""

  source /home/stack/overcloudrc

  echo "Command : openstack hypervisor list --long -c 'State' -c 'vCPUs' -c 'Memory MB'"
  openstack hypervisor list --long -c 'State' -c 'vCPUs' -c 'Memory MB'

  echo ""&&echo ""

  echo "Command : openstack availability zone list"
  openstack availability zone list

  echo ""&&echo ""

  echo "Command : openstack host list --sort Zone"
  openstack host list --sort Zone

  echo ""&&echo ""

  echo "Command : openstack compute service list -c 'Host' -c 'Status' -c 'State'"
  openstack compute service list -c 'Host' -c 'Status' -c 'State'

  echo ""&&echo ""

  echo "Command : openstack volume service list -c 'Host' -c 'Status' -c 'State'"
  openstack volume service list -c 'Host' -c 'Status' -c 'State'

  echo ""&&echo ""

  echo "Command : openstack volume type list -c 'Name' -c 'Is Public'"
  openstack volume type list -c 'Name' -c 'Is Public'

  echo ""&&echo ""

  echo "Command : openstack volume type list --default -c 'Name' -c 'Is Public'"
  openstack volume type list --default -c 'Name' -c 'Is Public'

  echo ""&&echo ""

  echo "Command : openstack network agent list --sort Host -c 'Host' -c 'Alive' -c 'State'"
  openstack network agent list --sort Host -c 'Host' -c 'Alive' -c 'State'

  echo ""&&echo ""

  echo "Command : sudo pcs status"
  $o 0 -q sudo pcs status

  echo ""&&echo ""

  Cont1GaleraCheck # Function

  echo ""&&echo ""

  Cont2GaleraCheck # Function

  echo ""&&echo ""

  Cont3GaleraCheck # Function

  echo ""&&echo

    if [ "$2" = "--all" ]; then
      # [2021/12/18] Add only check status for salt command
      echo "Command : salt \"*\" cmd.run \"lscpu | grep -e '^CPU(s)' -e 'Socket' -e 'NUMA node(s)'"
      salt "*" cmd.run "lscpu | grep -e '^CPU(s)' -e 'Socket' -e 'NUMA node(s)'"

      echo ""&&echo ""

      echo "Command : salt \"*\" cmd.run \"numactl -H\""
      salt "*" cmd.run "numactl -H | grep 'available'"

      echo ""&&echo ""

      echo "Command : salt \"*cont*\" cmd.run \"numactl -s\""
      salt "*cont*" cmd.run "numactl -s | grep 'bind' | sed 's/0 1 [0-9].*79/0 1 .. 79/'"

      echo ""&&echo ""

      echo "Command : salt \"*comp*\" cmd.run \"numactl -s\""
      salt "*comp*" cmd.run "numactl -s | grep 'bind'"

      echo ""&&echo ""

      echo "Command : salt \"*\" cmd.run \"free -h\""
      salt "*" cmd.run "free -h"

      echo ""&&echo ""

      echo "Command : salt \"*comp*\" cmd.run \"grep -e HugePages_Total -e Hugepagesize /proc/meminfo\""
      salt "*comp*" cmd.run "grep -e HugePages_Total -e Hugepagesize /proc/meminfo"

      echo ""&&echo ""

      echo "Command : salt \"*\" cmd.run \"lspci | grep 'Ethernet'\""
      salt "*" cmd.run "lspci | grep Ethernet | sed 's/.*controller://'"

      echo ""&&echo ""
      
      echo "Command : salt \"*\" cmd.run \"ip l | grep -e em[12] -e p3p[12]\""
      salt "*" cmd.run "ip l | grep -e em[12] -e p3p[12] | sed 's/ state.*1000//' | sed 's/[0-9]: //'"

      echo ""&&echo ""

      echo "Command : salt \"*\" cmd.run \"head /sys/class/net/{em[12],p3p[12]}/device/numa_node\""
      salt "cont*" cmd.run "head /sys/class/net/{em[12],p3p[12]}/device/numa_node | sed 's/^0/numa node: 0/' | sed 's/^1/numa node: 1/'"

      echo ""&&echo ""

      echo "Command : salt \"*cont*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
      salt "*cont*" cmd.run "ip -4 a | grep -v 'scope' | grep -e 'mtu 9000' -e 'inet' | sed 's/ qdisc.*//'"

      echo ""&&echo ""

      echo "Command : salt \"*comp*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
      salt "*comp*" cmd.run "ip -4 a | grep -v 'scope' | grep -e 'mtu 9000' -e 'inet' | sed 's/ qdisc.*//'"

      echo ""&&echo ""

      echo "Command : salt \"*cont*\" cmd.run \"lsblk\""
      salt "*cont*" cmd.run "lsblk | grep -o '893.8G'"
      
      echo ""&&echo ""

      echo "Command : salt \"*comp*\" cmd.run \"lsblk\""
      salt "*comp*" cmd.run "lsblk | grep -o '893.8G'"
      
      echo ""&&echo ""
  else
      echo ">>> Skip"
  fi
  echo "Command : salt \"*cont*\" cmd.run \"ntpq -pn\""
  salt "*cont*" cmd.run "ntpq -pn | egrep -v '\+|\*'"

  echo "Command : salt \"*comp*\" cmd.run \"ntpq -pn\""
  salt "*comp*" cmd.run "ntpq -pn | egrep -v '\+|\*'"

  echo ">>> CBIS STATUS CHECK FINISH"
  date

  echo ""&&echo

  # Escape
  exit 1
else
  echo ">>> Skip"
fi

echo ""&&echo ""

echo "Command : openstack stack list"
# source change to stackrc
source /home/stack/stackrc
openstack stack list

echo ""&&echo ""

echo "Command : openstack cbis version"
openstack cbis version

echo ""&&echo ""

echo "Command : openstack server list --sort Name"
openstack server list --sort Name

echo ""&&echo ""

echo "Command : openstack baremetal node list"
openstack baremetal node list

echo ""&&echo ""

echo "Command : openstack image list"
openstack image list

echo ""&&echo ""

# source change to overcloudrc
source /home/stack/overcloudrc

echo ""&&echo ""

echo "Command : openstack hypervisor list --long --sort 'Hypervisor Hostname'"
openstack hypervisor list --long --sort "Hypervisor Hostname"

echo ""&&echo ""

echo "Command : openstack availability zone list"
openstack availability zone list

echo ""&&echo ""

echo "Command : openstack host list --sort Zone"
openstack host list --sort Zone

echo ""&&echo ""

echo "Command : openstack compute service list"
openstack compute service list

echo ""&&echo ""

echo "Command : openstack volume service list"
openstack volume service list

echo ""&&echo ""

echo "Command : openstack volume type list"
openstack volume type list

echo ""&&echo ""

echo "Command : openstack volume type list --default"
openstack volume type list --default

echo ""&&echo ""

echo "Command : openstack network agent list --sort Host"
openstack network agent list --sort Host

echo ""&&echo ""

echo "Command : sudo pcs status"
/usr/share/cbis/undercloud/tools/ssh-overcloud.sh Controller 0 -q sudo pcs status

echo ""&&echo ""

echo "Command : salt \"*cont*\" cmd.run \"hostname -s | grep -o -E '[0-9]+' | tail -1 | xargs -i sudo docker exec galera-bundle-docker-{} clustercheck\""

echo ""&&echo ""

# Function
Cont1GaleraCheck

echo ""&&echo ""

# Function
Cont2GaleraCheck

echo ""&&echo ""

# Function
Cont3GaleraCheck

echo ""&&echo

# Skip if there is no "--all" argument.
if [ "$1" = "--all" ]; then

  echo "Command : salt \"*\" cmd.run \"lscpu | grep -e 'CPU(s)' -e Core -e Socket -e NUMA -e Virtualization\""
  salt "*" cmd.run "lscpu | grep -e 'CPU(s)' -e Core -e Socket -e NUMA -e Virtualization"

  echo ""&&echo ""

  echo "Command : salt \"*\" cmd.run \"numactl -H\""
  salt "*" cmd.run "numactl -H"

  echo ""&&echo ""

  echo "Command : salt \"*cont*\" cmd.run \"numactl -s\""
  salt "*cont*" cmd.run "numactl -s"

  echo ""&&echo ""

  echo "Command : salt \"*comp*\" cmd.run \"numactl -s\""
  salt "*comp*" cmd.run "numactl -s"

  echo ""&&echo ""

  echo "Command : salt \"*\" cmd.run \"free -h\""
  salt "*" cmd.run "free -h"

  echo ""&&echo ""

  echo "Command : salt \"*comp*\" cmd.run \"grep -e HugePages_Total -e Hugepagesize /proc/meminfo\""
  salt "*comp*" cmd.run "grep -e HugePages_Total -e Hugepagesize /proc/meminfo"

  echo ""&&echo ""

  echo "Command : salt \"*\" cmd.run 'lspci | grep Ethernet'"
  salt "*" cmd.run 'lspci | grep Ethernet'

  echo ""&&echo ""

  echo "Command : salt \"*\" cmd.run \"ip l | grep -e em[12] -e p3p[12]\""
  salt "*" cmd.run "ip l | grep -e em[12] -e p3p[12]"

  echo ""&&echo ""

  echo "Command : salt \"*\" cmd.run \"head /sys/class/net/{em[12],p3p[12]}/device/numa_node\""
  salt "*" cmd.run "head /sys/class/net/{em[12],p3p[12]}/device/numa_node"

  echo ""&&echo ""

  echo "Command : salt \"*cont*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
  salt "*cont*" cmd.run "ip -4 a | grep -e mtu -e inet"

  echo ""&&echo ""

  echo "Command : salt \"*comp*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
  salt "*comp*" cmd.run "ip -4 a | grep -e mtu -e inet"

  echo ""&&echo ""

  echo "Command : salt \"*cont*\" cmd.run \"lsblk\""
  salt "*cont*" cmd.run "lsblk"

  echo ""&&echo ""

  echo "Command : salt \"*comp*\" cmd.run \"lsblk\""
  salt "*comp*" cmd.run "lsblk"

  echo ""&&echo ""

else
  echo ">>> Skip"
fi

echo "Command : salt \"*cont*\" cmd.run \"ntpq -pn\""
salt "*cont*" cmd.run "ntpq -pn"

echo ""&&echo ""

echo "Command : salt \"*comp*\" cmd.run \"ntpq -pn\""
salt "*comp*" cmd.run "ntpq -pn"

echo ""&&echo ""

echo ">>> CBIS HEALTHCHECK FINISH"
date

echo ""&&echo ""

# watch '/usr/share/cbis/undercloud/tools/ssh-overcloud.sh Controller 0 -q sudo pcs status'

# 思った点

▸ 初めて自分のShell Scriptを作成し、完成できて達成感がすごい
▸ 試行錯誤を減って Command、Alias、PATH、Shell等々全体的な勉強が出来た
▸ 自分が作ったScriptが手順書に含まれた時に気持ち良かった
▸ テスト環境が自由に使えることに感謝した(覚えないくらい実行して失敗した)
▸ 目的通り、作業が楽になったので満足

profile
何とかしちゃおう (*´ω`)

0개의 댓글