mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Merge pull request #63357 from Random-Liu/install-and-use-crictl
Automatic merge from submit-queue (batch tested with PRs 63167, 63357). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Install and use crictl in gce kube-up.sh Download and use crictl in gce kube-up.sh. This PR: 1. Downloads crictl `v1.0.0-beta.0` onto the node, which supports CRI v1alpha2. We'll upgrade it to `v1.0.0-beta.1` soon after the release is cut. 2. Change `kube-docker-monitor` to `kube-container-runtime-monitor`, and let it use `crictl` to do health monitoring. 3. Change `e2e-image-puller` to use `crictl`. Because of https://github.com/kubernetes/kubernetes/issues/63355, it doesn't work now. But in `crictl v1.0.0-beta.1`, we are going to statically link it, and the `e2e-image-puller` should work again. 4. Use `systemctl kill --kill-who=main` instead of `pkill`, the reason is that: a. `pkill docker` will send `SIGTERM` to all processes including `dockerd`, `docker-containerd`, `docker-containerd-shim`. This is not a problem for Docker 17.03 CE, because `containerd-shim` in containerd 0.2.x doesn't exit with SIGERM (see [code](https://github.com/containerd/containerd/blob/v0.2.x/containerd-shim/main.go#L123)). However, `containerd-shim` in containerd 1.0+ does exit with SIGTERM (see [code](https://github.com/containerd/containerd/blob/master/cmd/containerd-shim/main_unix.go#L200)). This means that `pkill docker` and `pkill containerd` will kill all shim processes for Docker 17.11+ and containerd 1.0+. b. We can use `pkill -x` instead. However, docker systemd service name is `docker`, but daemon process name is `dockerd`. We have to introduce another environment variable to specify "daemon process name". Given so, it seems easier to just use `systemctl kill` which only requires systemd service name. `systemctl kill --kill-who=main` will make sure only main process receives SIGTERM. Signed-off-by: Lantao Liu <lantaol@google.com> /cc @filbranden @yujuhong @feiskyer @mrunalp @kubernetes/sig-node-pr-reviews @kubernetes/sig-cluster-lifecycle-pr-reviews **Release note**: ```release-note Kubernetes cluster on GCE have crictl installed now. Users can use it to help debug their node. The documentation of crictl can be found https://github.com/kubernetes-incubator/cri-tools/blob/master/docs/crictl.md. ```
This commit is contained in:
commit
7b8bb6e7d3
@ -2114,10 +2114,16 @@ function start-fluentd-resource-update {
|
|||||||
wait-for-apiserver-and-update-fluentd &
|
wait-for-apiserver-and-update-fluentd &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Update {{ container-runtime }} with actual container runtime name.
|
# Update {{ container-runtime }} with actual container runtime name,
|
||||||
|
# and {{ container-runtime-endpoint }} with actual container runtime
|
||||||
|
# endpoint.
|
||||||
function update-container-runtime {
|
function update-container-runtime {
|
||||||
local -r configmap_yaml="$1"
|
local -r file="$1"
|
||||||
sed -i -e "s@{{ *container_runtime *}}@${CONTAINER_RUNTIME_NAME:-docker}@g" "${configmap_yaml}"
|
local -r container_runtime_endpoint="${CONTAINER_RUNTIME_ENDPOINT:-unix:///var/run/dockershim.sock}"
|
||||||
|
sed -i \
|
||||||
|
-e "s@{{ *container_runtime *}}@${CONTAINER_RUNTIME_NAME:-docker}@g" \
|
||||||
|
-e "s@{{ *container_runtime_endpoint *}}@${container_runtime_endpoint#unix://}@g" \
|
||||||
|
"${file}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Remove configuration in yaml file if node journal is not enabled.
|
# Remove configuration in yaml file if node journal is not enabled.
|
||||||
@ -2399,8 +2405,9 @@ EOF
|
|||||||
# Starts an image-puller - used in test clusters.
|
# Starts an image-puller - used in test clusters.
|
||||||
function start-image-puller {
|
function start-image-puller {
|
||||||
echo "Start image-puller"
|
echo "Start image-puller"
|
||||||
cp "${KUBE_HOME}/kube-manifests/kubernetes/gci-trusty/e2e-image-puller.manifest" \
|
local -r e2e_image_puller_manifest="${KUBE_HOME}/kube-manifests/kubernetes/gci-trusty/e2e-image-puller.manifest"
|
||||||
/etc/kubernetes/manifests/
|
update-container-runtime "${e2e_image_puller_manifest}"
|
||||||
|
cp "${e2e_image_puller_manifest}" /etc/kubernetes/manifests/
|
||||||
}
|
}
|
||||||
|
|
||||||
# Setups manifests for ingress controller and gce-specific policies for service controller.
|
# Setups manifests for ingress controller and gce-specific policies for service controller.
|
||||||
|
@ -28,6 +28,8 @@ DEFAULT_CNI_VERSION="v0.6.0"
|
|||||||
DEFAULT_CNI_SHA1="d595d3ded6499a64e8dac02466e2f5f2ce257c9f"
|
DEFAULT_CNI_SHA1="d595d3ded6499a64e8dac02466e2f5f2ce257c9f"
|
||||||
DEFAULT_NPD_VERSION="v0.4.1"
|
DEFAULT_NPD_VERSION="v0.4.1"
|
||||||
DEFAULT_NPD_SHA1="a57a3fe64cab8a18ec654f5cef0aec59dae62568"
|
DEFAULT_NPD_SHA1="a57a3fe64cab8a18ec654f5cef0aec59dae62568"
|
||||||
|
DEFAULT_CRICTL_VERSION="v1.0.0-beta.1"
|
||||||
|
DEFAULT_CRICTL_SHA1="6816982ea1b83506945ce02949199171fee17b0b"
|
||||||
DEFAULT_MOUNTER_TAR_SHA="8003b798cf33c7f91320cd6ee5cec4fa22244571"
|
DEFAULT_MOUNTER_TAR_SHA="8003b798cf33c7f91320cd6ee5cec4fa22244571"
|
||||||
###
|
###
|
||||||
|
|
||||||
@ -234,6 +236,34 @@ function install-cni-binaries {
|
|||||||
rm -f "${KUBE_HOME}/${cni_tar}"
|
rm -f "${KUBE_HOME}/${cni_tar}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Install crictl binary.
|
||||||
|
function install-crictl {
|
||||||
|
if [[ -n "${CRICTL_VERSION:-}" ]]; then
|
||||||
|
local -r crictl_version="${CRICTL_VERSION}"
|
||||||
|
local -r crictl_sha1="${CRICTL_TAR_HASH}"
|
||||||
|
else
|
||||||
|
local -r crictl_version="${DEFAULT_CRICTL_VERSION}"
|
||||||
|
local -r crictl_sha1="${DEFAULT_CRICTL_SHA1}"
|
||||||
|
fi
|
||||||
|
local -r crictl="crictl-${crictl_version}-linux-amd64"
|
||||||
|
|
||||||
|
if is-preloaded "${crictl}" "${crictl_sha1}"; then
|
||||||
|
echo "crictl is preloaded"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Downloading crictl"
|
||||||
|
local -r crictl_path="https://storage.googleapis.com/kubernetes-release/crictl"
|
||||||
|
download-or-bust "${crictl_sha1}" "${crictl_path}/${crictl}"
|
||||||
|
mv "${KUBE_HOME}/${crictl}" "${KUBE_BIN}/crictl"
|
||||||
|
chmod a+x "${KUBE_BIN}/crictl"
|
||||||
|
|
||||||
|
# Create crictl config file.
|
||||||
|
cat > /etc/crictl.yaml <<EOF
|
||||||
|
runtime-endpoint: ${CONTAINER_RUNTIME_ENDPOINT:-unix:///var/run/dockershim.sock}
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
function install-kube-manifests {
|
function install-kube-manifests {
|
||||||
# Put kube-system pods manifests in ${KUBE_HOME}/kube-manifests/.
|
# Put kube-system pods manifests in ${KUBE_HOME}/kube-manifests/.
|
||||||
local dst_dir="${KUBE_HOME}/kube-manifests"
|
local dst_dir="${KUBE_HOME}/kube-manifests"
|
||||||
@ -370,6 +400,9 @@ function install-kube-binary-config {
|
|||||||
remount-flexvolume-directory "${VOLUME_PLUGIN_DIR}"
|
remount-flexvolume-directory "${VOLUME_PLUGIN_DIR}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Install crictl on each node.
|
||||||
|
install-crictl
|
||||||
|
|
||||||
# Clean up.
|
# Clean up.
|
||||||
rm -rf "${KUBE_HOME}/kubernetes"
|
rm -rf "${KUBE_HOME}/kubernetes"
|
||||||
rm -f "${KUBE_HOME}/${server_binary_tar}"
|
rm -f "${KUBE_HOME}/${server_binary_tar}"
|
||||||
|
@ -24,11 +24,33 @@ set -o pipefail
|
|||||||
|
|
||||||
# We simply kill the process when there is a failure. Another systemd service will
|
# We simply kill the process when there is a failure. Another systemd service will
|
||||||
# automatically restart the process.
|
# automatically restart the process.
|
||||||
function docker_monitoring {
|
function container_runtime_monitoring {
|
||||||
while [ 1 ]; do
|
local -r max_attempts=5
|
||||||
if ! timeout 60 docker ps > /dev/null; then
|
local attempt=1
|
||||||
echo "Docker daemon failed!"
|
local -r crictl="${KUBE_HOME}/bin/crictl"
|
||||||
pkill docker
|
local -r container_runtime_name="${CONTAINER_RUNTIME_NAME:-docker}"
|
||||||
|
# We still need to use `docker ps` when container runtime is "docker". This is because
|
||||||
|
# dockershim is still part of kubelet today. When kubelet is down, crictl pods
|
||||||
|
# will also fail, and docker will be killed. This is undesirable especially when
|
||||||
|
# docker live restore is disabled.
|
||||||
|
local healthcheck_command="docker ps"
|
||||||
|
if [[ "${CONTAINER_RUNTIME:-docker}" != "docker" ]]; then
|
||||||
|
healthcheck_command="${crictl} pods"
|
||||||
|
fi
|
||||||
|
# Container runtime startup takes time. Make initial attempts before starting
|
||||||
|
# killing the container runtime.
|
||||||
|
until timeout 60 ${healthcheck_command} > /dev/null; do
|
||||||
|
if (( attempt == max_attempts )); then
|
||||||
|
echo "Max attempt ${max_attempts} reached! Proceeding to monitor container runtime healthiness."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "$attempt initial attempt \"${healthcheck_command}\"! Trying again in $attempt seconds..."
|
||||||
|
sleep "$(( 2 ** attempt++ ))"
|
||||||
|
done
|
||||||
|
while true; do
|
||||||
|
if ! timeout 60 ${healthcheck_command} > /dev/null; then
|
||||||
|
echo "Container runtime ${container_runtime_name} failed!"
|
||||||
|
systemctl kill --kill-who=main "${container_runtime_name}"
|
||||||
# Wait for a while, as we don't want to kill it again before it is really up.
|
# Wait for a while, as we don't want to kill it again before it is really up.
|
||||||
sleep 120
|
sleep 120
|
||||||
else
|
else
|
||||||
@ -48,7 +70,7 @@ function kubelet_monitoring {
|
|||||||
# Print the response and/or errors.
|
# Print the response and/or errors.
|
||||||
echo $output
|
echo $output
|
||||||
echo "Kubelet is unhealthy!"
|
echo "Kubelet is unhealthy!"
|
||||||
pkill kubelet
|
systemctl kill kubelet
|
||||||
# Wait for a while, as we don't want to kill it again before it is really up.
|
# Wait for a while, as we don't want to kill it again before it is really up.
|
||||||
sleep 60
|
sleep 60
|
||||||
else
|
else
|
||||||
@ -60,11 +82,12 @@ function kubelet_monitoring {
|
|||||||
|
|
||||||
############## Main Function ################
|
############## Main Function ################
|
||||||
if [[ "$#" -ne 1 ]]; then
|
if [[ "$#" -ne 1 ]]; then
|
||||||
echo "Usage: health-monitor.sh <docker/kubelet>"
|
echo "Usage: health-monitor.sh <container-runtime/kubelet>"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
KUBE_ENV="/home/kubernetes/kube-env"
|
KUBE_HOME="/home/kubernetes"
|
||||||
|
KUBE_ENV="${KUBE_HOME}/kube-env"
|
||||||
if [[ ! -e "${KUBE_ENV}" ]]; then
|
if [[ ! -e "${KUBE_ENV}" ]]; then
|
||||||
echo "The ${KUBE_ENV} file does not exist!! Terminate health monitoring"
|
echo "The ${KUBE_ENV} file does not exist!! Terminate health monitoring"
|
||||||
exit 1
|
exit 1
|
||||||
@ -74,8 +97,8 @@ SLEEP_SECONDS=10
|
|||||||
component=$1
|
component=$1
|
||||||
echo "Start kubernetes health monitoring for ${component}"
|
echo "Start kubernetes health monitoring for ${component}"
|
||||||
source "${KUBE_ENV}"
|
source "${KUBE_ENV}"
|
||||||
if [[ "${component}" == "docker" ]]; then
|
if [[ "${component}" == "container-runtime" ]]; then
|
||||||
docker_monitoring
|
container_runtime_monitoring
|
||||||
elif [[ "${component}" == "kubelet" ]]; then
|
elif [[ "${component}" == "kubelet" ]]; then
|
||||||
kubelet_monitoring
|
kubelet_monitoring
|
||||||
else
|
else
|
||||||
|
@ -40,12 +40,12 @@ write_files:
|
|||||||
[Install]
|
[Install]
|
||||||
WantedBy=kubernetes.target
|
WantedBy=kubernetes.target
|
||||||
|
|
||||||
- path: /etc/systemd/system/kube-docker-monitor.service
|
- path: /etc/systemd/system/kube-container-runtime-monitor.service
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
owner: root
|
owner: root
|
||||||
content: |
|
content: |
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Kubernetes health monitoring for docker
|
Description=Kubernetes health monitoring for container runtime
|
||||||
After=kube-master-configuration.service
|
After=kube-master-configuration.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
@ -54,7 +54,7 @@ write_files:
|
|||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
||||||
ExecStart=/home/kubernetes/bin/health-monitor.sh docker
|
ExecStart=/home/kubernetes/bin/health-monitor.sh container-runtime
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=kubernetes.target
|
WantedBy=kubernetes.target
|
||||||
@ -120,7 +120,7 @@ runcmd:
|
|||||||
- systemctl daemon-reload
|
- systemctl daemon-reload
|
||||||
- systemctl enable kube-master-installation.service
|
- systemctl enable kube-master-installation.service
|
||||||
- systemctl enable kube-master-configuration.service
|
- systemctl enable kube-master-configuration.service
|
||||||
- systemctl enable kube-docker-monitor.service
|
- systemctl enable kube-container-runtime-monitor.service
|
||||||
- systemctl enable kubelet-monitor.service
|
- systemctl enable kubelet-monitor.service
|
||||||
- systemctl enable kube-logrotate.timer
|
- systemctl enable kube-logrotate.timer
|
||||||
- systemctl enable kube-logrotate.service
|
- systemctl enable kube-logrotate.service
|
||||||
|
@ -40,12 +40,12 @@ write_files:
|
|||||||
[Install]
|
[Install]
|
||||||
WantedBy=kubernetes.target
|
WantedBy=kubernetes.target
|
||||||
|
|
||||||
- path: /etc/systemd/system/kube-docker-monitor.service
|
- path: /etc/systemd/system/kube-container-runtime-monitor.service
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
owner: root
|
owner: root
|
||||||
content: |
|
content: |
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Kubernetes health monitoring for docker
|
Description=Kubernetes health monitoring for container runtime
|
||||||
After=kube-node-configuration.service
|
After=kube-node-configuration.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
@ -54,7 +54,7 @@ write_files:
|
|||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
|
||||||
ExecStart=/home/kubernetes/bin/health-monitor.sh docker
|
ExecStart=/home/kubernetes/bin/health-monitor.sh container-runtime
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=kubernetes.target
|
WantedBy=kubernetes.target
|
||||||
@ -120,7 +120,7 @@ runcmd:
|
|||||||
- systemctl daemon-reload
|
- systemctl daemon-reload
|
||||||
- systemctl enable kube-node-installation.service
|
- systemctl enable kube-node-installation.service
|
||||||
- systemctl enable kube-node-configuration.service
|
- systemctl enable kube-node-configuration.service
|
||||||
- systemctl enable kube-docker-monitor.service
|
- systemctl enable kube-container-runtime-monitor.service
|
||||||
- systemctl enable kubelet-monitor.service
|
- systemctl enable kubelet-monitor.service
|
||||||
- systemctl enable kube-logrotate.timer
|
- systemctl enable kube-logrotate.timer
|
||||||
- systemctl enable kube-logrotate.service
|
- systemctl enable kube-logrotate.service
|
||||||
|
@ -76,14 +76,16 @@ spec:
|
|||||||
gcr.io/kubernetes-e2e-test-images/volume-rbd:0.1
|
gcr.io/kubernetes-e2e-test-images/volume-rbd:0.1
|
||||||
k8s.gcr.io/zookeeper-install-3.5.0-alpha:e2e
|
k8s.gcr.io/zookeeper-install-3.5.0-alpha:e2e
|
||||||
gcr.io/google_samples/gb-redisslave:nonexistent
|
gcr.io/google_samples/gb-redisslave:nonexistent
|
||||||
; do echo $(date '+%X') pulling $i; docker pull $i 1>/dev/null; done; exit 0;
|
; do echo $(date '+%X') pulling $i; crictl pull $i 1>/dev/null; done; exit 0;
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /var/run/docker.sock
|
- mountPath: {{ container_runtime_endpoint }}
|
||||||
name: socket
|
name: socket
|
||||||
- mountPath: /usr/bin/docker
|
- mountPath: /usr/bin/crictl
|
||||||
name: docker
|
name: crictl
|
||||||
|
- mountPath: /etc/crictl.yaml
|
||||||
|
name: config
|
||||||
# Add a container that runs a health-check
|
# Add a container that runs a health-check
|
||||||
- name: nethealth-check
|
- name: nethealth-check
|
||||||
resources:
|
resources:
|
||||||
@ -98,13 +100,17 @@ spec:
|
|||||||
- "/usr/bin/nethealth || true"
|
- "/usr/bin/nethealth || true"
|
||||||
volumes:
|
volumes:
|
||||||
- hostPath:
|
- hostPath:
|
||||||
path: /var/run/docker.sock
|
path: {{ container_runtime_endpoint }}
|
||||||
type: Socket
|
type: Socket
|
||||||
name: socket
|
name: socket
|
||||||
- hostPath:
|
- hostPath:
|
||||||
path: /usr/bin/docker
|
path: /home/kubernetes/bin/crictl
|
||||||
type: File
|
type: File
|
||||||
name: docker
|
name: crictl
|
||||||
|
- hostPath:
|
||||||
|
path: /etc/crictl.yaml
|
||||||
|
type: File
|
||||||
|
name: config
|
||||||
# This pod is really fire-and-forget.
|
# This pod is really fire-and-forget.
|
||||||
restartPolicy: OnFailure
|
restartPolicy: OnFailure
|
||||||
# This pod needs hostNetworking for true VM perf measurement as well as avoiding cbr0 issues
|
# This pod needs hostNetworking for true VM perf measurement as well as avoiding cbr0 issues
|
||||||
|
@ -673,7 +673,6 @@ function construct-kubelet-flags {
|
|||||||
if [[ -n "${CONTAINER_RUNTIME:-}" ]]; then
|
if [[ -n "${CONTAINER_RUNTIME:-}" ]]; then
|
||||||
flags+=" --container-runtime=${CONTAINER_RUNTIME}"
|
flags+=" --container-runtime=${CONTAINER_RUNTIME}"
|
||||||
fi
|
fi
|
||||||
# TODO(mtaufen): CONTAINER_RUNTIME_ENDPOINT seems unused; delete it?
|
|
||||||
if [[ -n "${CONTAINER_RUNTIME_ENDPOINT:-}" ]]; then
|
if [[ -n "${CONTAINER_RUNTIME_ENDPOINT:-}" ]]; then
|
||||||
flags+=" --container-runtime-endpoint=${CONTAINER_RUNTIME_ENDPOINT}"
|
flags+=" --container-runtime-endpoint=${CONTAINER_RUNTIME_ENDPOINT}"
|
||||||
fi
|
fi
|
||||||
|
@ -50,7 +50,7 @@ readonly gce_logfiles="startupscript"
|
|||||||
readonly kern_logfile="kern"
|
readonly kern_logfile="kern"
|
||||||
readonly initd_logfiles="docker"
|
readonly initd_logfiles="docker"
|
||||||
readonly supervisord_logfiles="kubelet supervisor/supervisord supervisor/kubelet-stdout supervisor/kubelet-stderr supervisor/docker-stdout supervisor/docker-stderr"
|
readonly supervisord_logfiles="kubelet supervisor/supervisord supervisor/kubelet-stdout supervisor/kubelet-stderr supervisor/docker-stdout supervisor/docker-stderr"
|
||||||
readonly systemd_services="kubelet ${LOG_DUMP_SYSTEMD_SERVICES:-docker}"
|
readonly systemd_services="kubelet kubelet-monitor kube-container-runtime-monitor ${LOG_DUMP_SYSTEMD_SERVICES:-docker}"
|
||||||
|
|
||||||
# Limit the number of concurrent node connections so that we don't run out of
|
# Limit the number of concurrent node connections so that we don't run out of
|
||||||
# file descriptors for large clusters.
|
# file descriptors for large clusters.
|
||||||
|
Loading…
Reference in New Issue
Block a user