mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
Merge pull request #126586 from SergeyKanzhelev/remove-health-monitor
remove health-monitor service as it does more harm than good
This commit is contained in:
commit
5b96672ce4
@ -410,7 +410,6 @@ function kube::release::package_kube_manifests_tarball() {
|
|||||||
if [[ -e "${KUBE_ROOT}/cluster/gce/gci/gke-internal-configure-helper.sh" ]]; then
|
if [[ -e "${KUBE_ROOT}/cluster/gce/gci/gke-internal-configure-helper.sh" ]]; then
|
||||||
cp "${KUBE_ROOT}/cluster/gce/gci/gke-internal-configure-helper.sh" "${dst_dir}/"
|
cp "${KUBE_ROOT}/cluster/gce/gci/gke-internal-configure-helper.sh" "${dst_dir}/"
|
||||||
fi
|
fi
|
||||||
cp "${KUBE_ROOT}/cluster/gce/gci/health-monitor.sh" "${dst_dir}/health-monitor.sh"
|
|
||||||
# Merge GCE-specific addons with general purpose addons.
|
# Merge GCE-specific addons with general purpose addons.
|
||||||
for d in cluster/addons cluster/gce/addons; do
|
for d in cluster/addons cluster/gce/addons; do
|
||||||
find "${KUBE_ROOT}/${d}" \( \( -name \*.yaml -o -name \*.yaml.in -o -name \*.json \) -a ! \( -name \*demo\* \) \) -print0 | "${TAR}" c --transform "s|${KUBE_ROOT#/*}/${d}||" --null -T - | "${TAR}" x -C "${dst_dir}"
|
find "${KUBE_ROOT}/${d}" \( \( -name \*.yaml -o -name \*.yaml.in -o -name \*.json \) -a ! \( -name \*demo\* \) \) -print0 | "${TAR}" c --transform "s|${KUBE_ROOT#/*}/${d}||" --null -T - | "${TAR}" x -C "${dst_dir}"
|
||||||
|
@ -418,8 +418,6 @@ function install-kube-manifests {
|
|||||||
cp "${dst_dir}/kubernetes/gci-trusty/gke-internal-configure-helper.sh" "${KUBE_BIN}/"
|
cp "${dst_dir}/kubernetes/gci-trusty/gke-internal-configure-helper.sh" "${KUBE_BIN}/"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cp "${dst_dir}/kubernetes/gci-trusty/health-monitor.sh" "${KUBE_BIN}/health-monitor.sh"
|
|
||||||
|
|
||||||
rm -f "${KUBE_HOME}/${manifests_tar}"
|
rm -f "${KUBE_HOME}/${manifests_tar}"
|
||||||
rm -f "${KUBE_HOME}/${manifests_tar}.sha512"
|
rm -f "${KUBE_HOME}/${manifests_tar}.sha512"
|
||||||
}
|
}
|
||||||
|
@ -1,99 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Copyright 2016 The Kubernetes Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# This script is for master and node instance health monitoring, which is
|
|
||||||
# packed in kube-manifest tarball. It is executed through a systemd service
|
|
||||||
# in cluster/gce/gci/<master/node>.yaml. The env variables come from an env
|
|
||||||
# file provided by the systemd service.
|
|
||||||
|
|
||||||
set -o nounset
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
# We simply kill the process when there is a failure. Another systemd service will
|
|
||||||
# automatically restart the process.
|
|
||||||
function container_runtime_monitoring {
|
|
||||||
local -r max_attempts=5
|
|
||||||
local attempt=1
|
|
||||||
local -r crictl="${KUBE_HOME}/bin/crictl"
|
|
||||||
local -r container_runtime_name="${CONTAINER_RUNTIME_NAME:-containerd}"
|
|
||||||
local -r healthcheck_command=("${crictl}" pods)
|
|
||||||
# Container runtime startup takes time. Make initial attempts before starting
|
|
||||||
# killing the container runtime.
|
|
||||||
until timeout 60 "${healthcheck_command[@]}" > /dev/null; do
|
|
||||||
if (( attempt == max_attempts )); then
|
|
||||||
echo "Max attempt ${max_attempts} reached! Proceeding to monitor container runtime healthiness."
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
echo "$attempt initial attempt \"${healthcheck_command[*]}\"! Trying again in $attempt seconds..."
|
|
||||||
sleep "$(( 2 ** attempt++ ))"
|
|
||||||
done
|
|
||||||
while true; do
|
|
||||||
if ! timeout 60 "${healthcheck_command[@]}" > /dev/null; then
|
|
||||||
echo "Container runtime ${container_runtime_name} failed!"
|
|
||||||
systemctl kill --kill-who=main "${container_runtime_name}"
|
|
||||||
# Wait for a while, as we don't want to kill it again before it is really up.
|
|
||||||
sleep 120
|
|
||||||
else
|
|
||||||
sleep "${SLEEP_SECONDS}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
function kubelet_monitoring {
|
|
||||||
echo "Wait for 2 minutes for kubelet to be functional"
|
|
||||||
# TODO(andyzheng0831): replace it with a more reliable method if possible.
|
|
||||||
sleep 120
|
|
||||||
local -r max_seconds=10
|
|
||||||
local output=""
|
|
||||||
while true; do
|
|
||||||
if ! output=$(curl -m "${max_seconds}" -f -s -S http://127.0.0.1:10248/healthz 2>&1); then
|
|
||||||
# Print the response and/or errors.
|
|
||||||
echo "${output}"
|
|
||||||
echo "Kubelet is unhealthy!"
|
|
||||||
systemctl kill kubelet
|
|
||||||
# Wait for a while, as we don't want to kill it again before it is really up.
|
|
||||||
sleep 60
|
|
||||||
else
|
|
||||||
sleep "${SLEEP_SECONDS}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
############## Main Function ################
|
|
||||||
if [[ "$#" -ne 1 ]]; then
|
|
||||||
echo "Usage: health-monitor.sh <container-runtime/kubelet>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
KUBE_HOME="/home/kubernetes"
|
|
||||||
KUBE_ENV="${KUBE_HOME}/kube-env"
|
|
||||||
if [[ ! -e "${KUBE_ENV}" ]]; then
|
|
||||||
echo "The ${KUBE_ENV} file does not exist!! Terminate health monitoring"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
SLEEP_SECONDS=10
|
|
||||||
component=$1
|
|
||||||
echo "Start kubernetes health monitoring for ${component}"
|
|
||||||
source "${KUBE_ENV}"
|
|
||||||
if [[ "${component}" == "container-runtime" ]]; then
|
|
||||||
container_runtime_monitoring
|
|
||||||
elif [[ "${component}" == "kubelet" ]]; then
|
|
||||||
kubelet_monitoring
|
|
||||||
else
|
|
||||||
echo "Health monitoring for component \"${component}\" is not supported!"
|
|
||||||
fi
|
|
@ -90,42 +90,6 @@ write_files:
|
|||||||
[Install]
|
[Install]
|
||||||
WantedBy=kubernetes.target
|
WantedBy=kubernetes.target
|
||||||
|
|
||||||
- path: /etc/systemd/system/kube-container-runtime-monitor.service
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=Kubernetes health monitoring for container runtime
|
|
||||||
After=kube-master-configuration.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Restart=always
|
|
||||||
RestartSec=10
|
|
||||||
RemainAfterExit=yes
|
|
||||||
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
|
||||||
ExecStart=/home/kubernetes/bin/health-monitor.sh container-runtime
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=kubernetes.target
|
|
||||||
|
|
||||||
- path: /etc/systemd/system/kubelet-monitor.service
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=Kubernetes health monitoring for kubelet
|
|
||||||
After=kube-master-configuration.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Restart=always
|
|
||||||
RestartSec=10
|
|
||||||
RemainAfterExit=yes
|
|
||||||
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
|
||||||
ExecStart=/home/kubernetes/bin/health-monitor.sh kubelet
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=kubernetes.target
|
|
||||||
|
|
||||||
- path: /etc/systemd/system/kube-logrotate.timer
|
- path: /etc/systemd/system/kube-logrotate.timer
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
owner: root
|
owner: root
|
||||||
@ -170,8 +134,6 @@ runcmd:
|
|||||||
- systemctl enable kube-master-installation.service
|
- systemctl enable kube-master-installation.service
|
||||||
- systemctl enable kube-master-internal-route.service
|
- systemctl enable kube-master-internal-route.service
|
||||||
- systemctl enable kube-master-configuration.service
|
- systemctl enable kube-master-configuration.service
|
||||||
- systemctl enable kube-container-runtime-monitor.service
|
|
||||||
- systemctl enable kubelet-monitor.service
|
|
||||||
- systemctl enable kube-logrotate.timer
|
- systemctl enable kube-logrotate.timer
|
||||||
- systemctl enable kube-logrotate.service
|
- systemctl enable kube-logrotate.service
|
||||||
- systemctl enable kubernetes.target
|
- systemctl enable kubernetes.target
|
||||||
|
@ -41,44 +41,6 @@ write_files:
|
|||||||
[Install]
|
[Install]
|
||||||
WantedBy=kubernetes.target
|
WantedBy=kubernetes.target
|
||||||
|
|
||||||
- path: /etc/systemd/system/kube-container-runtime-monitor.service
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=Kubernetes health monitoring for container runtime
|
|
||||||
After=kube-node-configuration.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Restart=always
|
|
||||||
RestartSec=10
|
|
||||||
RemainAfterExit=yes
|
|
||||||
RemainAfterExit=yes
|
|
||||||
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
|
||||||
ExecStart=/home/kubernetes/bin/health-monitor.sh container-runtime
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=kubernetes.target
|
|
||||||
|
|
||||||
- path: /etc/systemd/system/kubelet-monitor.service
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=Kubernetes health monitoring for kubelet
|
|
||||||
After=kube-node-configuration.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Restart=always
|
|
||||||
RestartSec=10
|
|
||||||
RemainAfterExit=yes
|
|
||||||
RemainAfterExit=yes
|
|
||||||
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
|
||||||
ExecStart=/home/kubernetes/bin/health-monitor.sh kubelet
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=kubernetes.target
|
|
||||||
|
|
||||||
- path: /etc/systemd/system/kube-logrotate.timer
|
- path: /etc/systemd/system/kube-logrotate.timer
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
owner: root
|
owner: root
|
||||||
@ -128,8 +90,6 @@ runcmd:
|
|||||||
- systemctl daemon-reload
|
- systemctl daemon-reload
|
||||||
- systemctl enable kube-node-installation.service
|
- systemctl enable kube-node-installation.service
|
||||||
- systemctl enable kube-node-configuration.service
|
- systemctl enable kube-node-configuration.service
|
||||||
- systemctl enable kube-container-runtime-monitor.service
|
|
||||||
- systemctl enable kubelet-monitor.service
|
|
||||||
- systemctl enable kube-logrotate.timer
|
- systemctl enable kube-logrotate.timer
|
||||||
- systemctl enable kube-logrotate.service
|
- systemctl enable kube-logrotate.service
|
||||||
- systemctl enable kubernetes.target
|
- systemctl enable kubernetes.target
|
||||||
|
Loading…
Reference in New Issue
Block a user