mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 06:54:01 +00:00
Merge pull request #22434 from justinsb/aws_docker_babysitter
Auto commit by PR queue bot
This commit is contained in:
commit
57c944caa2
44
cluster/saltbase/salt/docker/docker-healthcheck
Executable file
44
cluster/saltbase/salt/docker/docker-healthcheck
Executable file
@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script is intended to be run periodically, to check the health
|
||||
# of docker. If it detects a failure, it will restart docker using systemctl.
|
||||
|
||||
if timeout 10 docker version > /dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "docker failed"
|
||||
echo "Giving docker 30 seconds grace before restarting"
|
||||
sleep 30
|
||||
|
||||
if timeout 10 docker version > /dev/null; then
|
||||
echo "docker recovered"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "docker still down; triggering docker restart"
|
||||
systemctl restart docker
|
||||
|
||||
echo "Waiting 60 seconds to give docker time to start"
|
||||
sleep 60
|
||||
|
||||
if timeout 10 docker version > /dev/null; then
|
||||
echo "docker recovered"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "docker still failing"
|
9
cluster/saltbase/salt/docker/docker-healthcheck.service
Normal file
9
cluster/saltbase/salt/docker/docker-healthcheck.service
Normal file
@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Run docker-healthcheck once
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/opt/kubernetes/helpers/docker-healthcheck
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
9
cluster/saltbase/salt/docker/docker-healthcheck.timer
Normal file
9
cluster/saltbase/salt/docker/docker-healthcheck.timer
Normal file
@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Trigger docker-healthcheck periodically
|
||||
|
||||
[Timer]
|
||||
OnUnitInactiveSec=10s
|
||||
Unit=docker-healthcheck.service
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
22
cluster/saltbase/salt/docker/docker-prestart
Executable file
22
cluster/saltbase/salt/docker/docker-prestart
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script is intended to be run before we start Docker.
|
||||
|
||||
# cleanup docker network checkpoint to avoid running into known issue
|
||||
# of docker (https://github.com/docker/docker/issues/18283)
|
||||
rm -rf /var/lib/docker/network
|
||||
|
@ -1,10 +1,11 @@
|
||||
[Unit]
|
||||
Description=Docker Application Container Engine
|
||||
Documentation=http://docs.docker.com
|
||||
Documentation=https://docs.docker.com
|
||||
After=network.target docker.socket
|
||||
Requires=docker.socket
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
EnvironmentFile={{ environment_file }}
|
||||
ExecStart=/usr/bin/docker daemon -H fd:// "$DOCKER_OPTS"
|
||||
MountFlags=slave
|
||||
@ -14,7 +15,7 @@ LimitCORE=infinity
|
||||
Restart=always
|
||||
RestartSec=2s
|
||||
StartLimitInterval=0
|
||||
ExecStartPre=/opt/kubernetes/helpers/docker-prestart
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
@ -51,6 +51,13 @@ docker:
|
||||
|
||||
{% if pillar.get('is_systemd') %}
|
||||
|
||||
/opt/kubernetes/helpers/docker-prestart:
|
||||
file.managed:
|
||||
- source: salt://docker/docker-prestart
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: 755
|
||||
|
||||
{{ pillar.get('systemd_system_path') }}/docker.service:
|
||||
file.managed:
|
||||
- source: salt://docker/docker.service
|
||||
@ -60,6 +67,8 @@ docker:
|
||||
- mode: 644
|
||||
- defaults:
|
||||
environment_file: {{ environment_file }}
|
||||
- require:
|
||||
- file: /opt/kubernetes/helpers/docker-prestart
|
||||
|
||||
# The docker service.running block below doesn't work reliably
|
||||
# Instead we run our script which e.g. does a systemd daemon-reload
|
||||
@ -297,9 +306,16 @@ docker-upgrade:
|
||||
- file: /var/cache/docker-install/{{ override_deb }}
|
||||
{% endif %} # end override_docker_ver != ''
|
||||
|
||||
# Default docker systemd unit file doesn't use an EnvironmentFile; replace it with one that does.
|
||||
{% if pillar.get('is_systemd') %}
|
||||
|
||||
/opt/kubernetes/helpers/docker-prestart:
|
||||
file.managed:
|
||||
- source: salt://docker/docker-prestart
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: 755
|
||||
|
||||
# Default docker systemd unit file doesn't use an EnvironmentFile; replace it with one that does.
|
||||
{{ pillar.get('systemd_system_path') }}/docker.service:
|
||||
file.managed:
|
||||
- source: salt://docker/docker.service
|
||||
@ -309,6 +325,8 @@ docker-upgrade:
|
||||
- mode: 644
|
||||
- defaults:
|
||||
environment_file: {{ environment_file }}
|
||||
- require:
|
||||
- file: /opt/kubernetes/helpers/docker-prestart
|
||||
|
||||
# The docker service.running block below doesn't work reliably
|
||||
# Instead we run our script which e.g. does a systemd daemon-reload
|
||||
@ -316,7 +334,7 @@ docker-upgrade:
|
||||
# TODO: Fix this
|
||||
fix-service-docker:
|
||||
cmd.wait:
|
||||
- name: /opt/kubernetes/helpers/services bounce docker
|
||||
- name: /opt/kubernetes/helpers/services enable docker
|
||||
- watch:
|
||||
- file: {{ pillar.get('systemd_system_path') }}/docker.service
|
||||
- file: {{ environment_file }}
|
||||
@ -325,30 +343,77 @@ fix-service-docker:
|
||||
- cmd: docker-upgrade
|
||||
{% endif %}
|
||||
|
||||
/opt/kubernetes/helpers/docker-healthcheck:
|
||||
file.managed:
|
||||
- source: salt://docker/docker-healthcheck
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: 755
|
||||
|
||||
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.service:
|
||||
file.managed:
|
||||
- source: salt://docker/docker-healthcheck.service
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: 644
|
||||
|
||||
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer:
|
||||
file.managed:
|
||||
- source: salt://docker/docker-healthcheck.timer
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: 644
|
||||
|
||||
# Tell systemd to load the timer
|
||||
fix-systemd-docker-healthcheck-timer:
|
||||
cmd.wait:
|
||||
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.timer
|
||||
- watch:
|
||||
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer
|
||||
|
||||
# Trigger a first run of docker-healthcheck; needed because the timer fires 10s after the previous run.
|
||||
fix-systemd-docker-healthcheck-service:
|
||||
cmd.wait:
|
||||
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.service
|
||||
- watch:
|
||||
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.service
|
||||
- require:
|
||||
- cmd: fix-service-docker
|
||||
|
||||
{% endif %}
|
||||
|
||||
docker:
|
||||
service.running:
|
||||
# Starting Docker is racy on aws for some reason. To be honest, since Monit
|
||||
# is managing Docker restart we should probably just delete this whole thing
|
||||
# but the kubernetes components use salt 'require' to set up a dag, and that
|
||||
# complicated and scary to unwind.
|
||||
# On AWS, we use a trick now... we don't start the docker service through Salt.
|
||||
# Kubelet or our health checker will start it. But we use service.enabled,
|
||||
# so we still have a `service: docker` node for our DAG.
|
||||
{% if grains.cloud is defined and grains.cloud == 'aws' %}
|
||||
- enable: False
|
||||
service.enabled:
|
||||
{% else %}
|
||||
service.running:
|
||||
- enable: True
|
||||
{% endif %}
|
||||
# If we put a watch on this, salt will try to start the service.
|
||||
# We put the watch on the fixer instead
|
||||
{% if not pillar.get('is_systemd') %}
|
||||
- watch:
|
||||
- file: {{ environment_file }}
|
||||
{% if override_docker_ver != '' %}
|
||||
- cmd: docker-upgrade
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
- require:
|
||||
- file: {{ environment_file }}
|
||||
{% if override_docker_ver != '' %}
|
||||
- cmd: docker-upgrade
|
||||
{% endif %}
|
||||
{% if pillar.get('is_systemd') %}
|
||||
- file: {{ pillar.get('systemd_system_path') }}/docker.service
|
||||
{% endif %}
|
||||
{% if override_docker_ver != '' %}
|
||||
- require:
|
||||
- cmd: docker-upgrade
|
||||
- cmd: fix-service-docker
|
||||
{% endif %}
|
||||
{% endif %} # end grains.os_family != 'RedHat'
|
||||
|
||||
|
@ -63,6 +63,9 @@ elif [[ "${ACTION}" == "down" ]]; then
|
||||
reload_state
|
||||
disable_service
|
||||
stop_service
|
||||
elif [[ "${ACTION}" == "enable" ]]; then
|
||||
reload_state
|
||||
enable_service
|
||||
else
|
||||
echo "Unknown action: ${ACTION}"
|
||||
exit 1
|
||||
|
Loading…
Reference in New Issue
Block a user