構築初期にはVM数も少ないし、Healthcheckを行う回数も少なかったが、
どんどんVMが増えて、担当Siteが増えて、エラーが出る場合も多くなった為
- Option無し
- --all
- --check-status
- --check status --all
#!/bin/bash
# Editor: Chi Changyeop
# Excute permission : chmod +x <File name>.sh
# All healthcheck process : <File name>.sh --all [2021/11/13]
# Only check status : <File name>.sh --check-status [2021/12/16]
# Only check status : <File name>.sh --check-status --all [2021/12/18]
echo "
+++++++++++++++++++++++++++++++++++++++++++++++
+ +
+ CBIS Healthcheck Update 2021/12/18 +
+ +
+++++++++++++++++++++++++++++++++++++++++++++++
"
echo ""
echo ">>> HealthCheck date"
date
echo ""&&echo ""
# [2021/12/16] Add only check status
# Function list
# salt : for use salt command
function salt { source ~/venv/salt-ssh/bin/activate; salt-ssh -c /home/stack/salt/etc/salt/ --log-file /home/stack/salt/var/log/salt/ssh --no-host-keys "$@"; deactivate; }
# Cont[0-2]GaleraCheck: Check Galera cluster synced on the controller
function Cont1GaleraCheck() {
DOCKER_GALERA_NUMBER=`$o 0 -q sudo docker container ls -a | grep -o galera-bundle-docker-[0-9]`
echo "Controller#0 : $DOCKER_GALERA_NUMBER"
$o 0 -q sudo docker exec $DOCKER_GALERA_NUMBER clustercheck
}
function Cont2GaleraCheck() {
DOCKER_GALERA_NUMBER=`$o 1 -q sudo docker container ls -a | grep -o galera-bundle-docker-[0-9]`
echo "Controller#1 : $DOCKER_GALERA_NUMBER"
$o 1 -q sudo docker exec $DOCKER_GALERA_NUMBER clustercheck
}
function Cont3GaleraCheck() {
DOCKER_GALERA_NUMBER=`$o 2 -q sudo docker container ls -a | grep -o galera-bundle-docker-[0-9]`
echo "Controller#2 : $DOCKER_GALERA_NUMBER"
$o 2 -q sudo docker exec $DOCKER_GALERA_NUMBER clustercheck
}
# Var list
# o: Access Controller
o="/usr/share/cbis/undercloud/tools/ssh-overcloud.sh Controller"
if [ "$1" = "--check-status" ]; then
source /home/stack/stackrc
echo "Commnad : openstack stack list -c 'Stack Status'"
openstack stack list -c 'Stack Status'
echo ""&& echo ""
echo "Commnad : openstack cbis version"
openstack cbis version;
echo ""&&echo ""
echo "Commnad : openstack server list -c 'Name' -c 'Status'"
openstack server list -c 'Name' -c 'Status'
echo ""&&echo ""
echo "Command : openstack baremetal node list -c 'Name' -c 'Power State' -c 'Provisioning State' -c 'Maintenance'"
openstack baremetal node list -c 'Name' -c 'Power State' -c 'Provisioning State' -c 'Maintenance'
echo ""&&echo ""
source /home/stack/overcloudrc
echo "Command : openstack hypervisor list --long -c 'State' -c 'vCPUs' -c 'Memory MB'"
openstack hypervisor list --long -c 'State' -c 'vCPUs' -c 'Memory MB'
echo ""&&echo ""
echo "Command : openstack availability zone list"
openstack availability zone list
echo ""&&echo ""
echo "Command : openstack host list --sort Zone"
openstack host list --sort Zone
echo ""&&echo ""
echo "Command : openstack compute service list -c 'Host' -c 'Status' -c 'State'"
openstack compute service list -c 'Host' -c 'Status' -c 'State'
echo ""&&echo ""
echo "Command : openstack volume service list -c 'Host' -c 'Status' -c 'State'"
openstack volume service list -c 'Host' -c 'Status' -c 'State'
echo ""&&echo ""
echo "Command : openstack volume type list -c 'Name' -c 'Is Public'"
openstack volume type list -c 'Name' -c 'Is Public'
echo ""&&echo ""
echo "Command : openstack volume type list --default -c 'Name' -c 'Is Public'"
openstack volume type list --default -c 'Name' -c 'Is Public'
echo ""&&echo ""
echo "Command : openstack network agent list --sort Host -c 'Host' -c 'Alive' -c 'State'"
openstack network agent list --sort Host -c 'Host' -c 'Alive' -c 'State'
echo ""&&echo ""
echo "Command : sudo pcs status"
$o 0 -q sudo pcs status
echo ""&&echo ""
Cont1GaleraCheck # Function
echo ""&&echo ""
Cont2GaleraCheck # Function
echo ""&&echo ""
Cont3GaleraCheck # Function
echo ""&&echo
if [ "$2" = "--all" ]; then
# [2021/12/18] Add only check status for salt command
echo "Command : salt \"*\" cmd.run \"lscpu | grep -e '^CPU(s)' -e 'Socket' -e 'NUMA node(s)'"
salt "*" cmd.run "lscpu | grep -e '^CPU(s)' -e 'Socket' -e 'NUMA node(s)'"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"numactl -H\""
salt "*" cmd.run "numactl -H | grep 'available'"
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"numactl -s\""
salt "*cont*" cmd.run "numactl -s | grep 'bind' | sed 's/0 1 [0-9].*79/0 1 .. 79/'"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"numactl -s\""
salt "*comp*" cmd.run "numactl -s | grep 'bind'"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"free -h\""
salt "*" cmd.run "free -h"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"grep -e HugePages_Total -e Hugepagesize /proc/meminfo\""
salt "*comp*" cmd.run "grep -e HugePages_Total -e Hugepagesize /proc/meminfo"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"lspci | grep 'Ethernet'\""
salt "*" cmd.run "lspci | grep Ethernet | sed 's/.*controller://'"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"ip l | grep -e em[12] -e p3p[12]\""
salt "*" cmd.run "ip l | grep -e em[12] -e p3p[12] | sed 's/ state.*1000//' | sed 's/[0-9]: //'"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"head /sys/class/net/{em[12],p3p[12]}/device/numa_node\""
salt "cont*" cmd.run "head /sys/class/net/{em[12],p3p[12]}/device/numa_node | sed 's/^0/numa node: 0/' | sed 's/^1/numa node: 1/'"
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
salt "*cont*" cmd.run "ip -4 a | grep -v 'scope' | grep -e 'mtu 9000' -e 'inet' | sed 's/ qdisc.*//'"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
salt "*comp*" cmd.run "ip -4 a | grep -v 'scope' | grep -e 'mtu 9000' -e 'inet' | sed 's/ qdisc.*//'"
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"lsblk\""
salt "*cont*" cmd.run "lsblk | grep -o '893.8G'"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"lsblk\""
salt "*comp*" cmd.run "lsblk | grep -o '893.8G'"
echo ""&&echo ""
else
echo ">>> Skip"
fi
echo "Command : salt \"*cont*\" cmd.run \"ntpq -pn\""
salt "*cont*" cmd.run "ntpq -pn | egrep -v '\+|\*'"
echo "Command : salt \"*comp*\" cmd.run \"ntpq -pn\""
salt "*comp*" cmd.run "ntpq -pn | egrep -v '\+|\*'"
echo ">>> CBIS STATUS CHECK FINISH"
date
echo ""&&echo
# Escape
exit 1
else
echo ">>> Skip"
fi
echo ""&&echo ""
echo "Command : openstack stack list"
# source change to stackrc
source /home/stack/stackrc
openstack stack list
echo ""&&echo ""
echo "Command : openstack cbis version"
openstack cbis version
echo ""&&echo ""
echo "Command : openstack server list --sort Name"
openstack server list --sort Name
echo ""&&echo ""
echo "Command : openstack baremetal node list"
openstack baremetal node list
echo ""&&echo ""
echo "Command : openstack image list"
openstack image list
echo ""&&echo ""
# source change to overcloudrc
source /home/stack/overcloudrc
echo ""&&echo ""
echo "Command : openstack hypervisor list --long --sort 'Hypervisor Hostname'"
openstack hypervisor list --long --sort "Hypervisor Hostname"
echo ""&&echo ""
echo "Command : openstack availability zone list"
openstack availability zone list
echo ""&&echo ""
echo "Command : openstack host list --sort Zone"
openstack host list --sort Zone
echo ""&&echo ""
echo "Command : openstack compute service list"
openstack compute service list
echo ""&&echo ""
echo "Command : openstack volume service list"
openstack volume service list
echo ""&&echo ""
echo "Command : openstack volume type list"
openstack volume type list
echo ""&&echo ""
echo "Command : openstack volume type list --default"
openstack volume type list --default
echo ""&&echo ""
echo "Command : openstack network agent list --sort Host"
openstack network agent list --sort Host
echo ""&&echo ""
echo "Command : sudo pcs status"
/usr/share/cbis/undercloud/tools/ssh-overcloud.sh Controller 0 -q sudo pcs status
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"hostname -s | grep -o -E '[0-9]+' | tail -1 | xargs -i sudo docker exec galera-bundle-docker-{} clustercheck\""
echo ""&&echo ""
# Function
Cont1GaleraCheck
echo ""&&echo ""
# Function
Cont2GaleraCheck
echo ""&&echo ""
# Function
Cont3GaleraCheck
echo ""&&echo
# Skip if there is no "--all" argument.
if [ "$1" = "--all" ]; then
echo "Command : salt \"*\" cmd.run \"lscpu | grep -e 'CPU(s)' -e Core -e Socket -e NUMA -e Virtualization\""
salt "*" cmd.run "lscpu | grep -e 'CPU(s)' -e Core -e Socket -e NUMA -e Virtualization"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"numactl -H\""
salt "*" cmd.run "numactl -H"
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"numactl -s\""
salt "*cont*" cmd.run "numactl -s"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"numactl -s\""
salt "*comp*" cmd.run "numactl -s"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"free -h\""
salt "*" cmd.run "free -h"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"grep -e HugePages_Total -e Hugepagesize /proc/meminfo\""
salt "*comp*" cmd.run "grep -e HugePages_Total -e Hugepagesize /proc/meminfo"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run 'lspci | grep Ethernet'"
salt "*" cmd.run 'lspci | grep Ethernet'
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"ip l | grep -e em[12] -e p3p[12]\""
salt "*" cmd.run "ip l | grep -e em[12] -e p3p[12]"
echo ""&&echo ""
echo "Command : salt \"*\" cmd.run \"head /sys/class/net/{em[12],p3p[12]}/device/numa_node\""
salt "*" cmd.run "head /sys/class/net/{em[12],p3p[12]}/device/numa_node"
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
salt "*cont*" cmd.run "ip -4 a | grep -e mtu -e inet"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"ip -4 a | grep -e mtu -e inet\""
salt "*comp*" cmd.run "ip -4 a | grep -e mtu -e inet"
echo ""&&echo ""
echo "Command : salt \"*cont*\" cmd.run \"lsblk\""
salt "*cont*" cmd.run "lsblk"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"lsblk\""
salt "*comp*" cmd.run "lsblk"
echo ""&&echo ""
else
echo ">>> Skip"
fi
echo "Command : salt \"*cont*\" cmd.run \"ntpq -pn\""
salt "*cont*" cmd.run "ntpq -pn"
echo ""&&echo ""
echo "Command : salt \"*comp*\" cmd.run \"ntpq -pn\""
salt "*comp*" cmd.run "ntpq -pn"
echo ""&&echo ""
echo ">>> CBIS HEALTHCHECK FINISH"
date
echo ""&&echo ""
# watch '/usr/share/cbis/undercloud/tools/ssh-overcloud.sh Controller 0 -q sudo pcs status'
▸ 初めて自分のShell Scriptを作成し、完成できて達成感がすごい
▸ 試行錯誤を減って Command、Alias、PATH、Shell等々全体的な勉強が出来た
▸ 自分が作ったScriptが手順書に含まれた時に気持ち良かった
▸ テスト環境が自由に使えることに感謝した(覚えないくらい実行して失敗した)
▸ 目的通り、作業が楽になったので満足