mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 09:49:50 +00:00
Merge pull request #22434 from justinsb/aws_docker_babysitter
Auto commit by PR queue bot
This commit is contained in:
commit
57c944caa2
44
cluster/saltbase/salt/docker/docker-healthcheck
Executable file
44
cluster/saltbase/salt/docker/docker-healthcheck
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# This script is intended to be run periodically, to check the health
|
||||||
|
# of docker. If it detects a failure, it will restart docker using systemctl.
|
||||||
|
|
||||||
|
if timeout 10 docker version > /dev/null; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "docker failed"
|
||||||
|
echo "Giving docker 30 seconds grace before restarting"
|
||||||
|
sleep 30
|
||||||
|
|
||||||
|
if timeout 10 docker version > /dev/null; then
|
||||||
|
echo "docker recovered"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "docker still down; triggering docker restart"
|
||||||
|
systemctl restart docker
|
||||||
|
|
||||||
|
echo "Waiting 60 seconds to give docker time to start"
|
||||||
|
sleep 60
|
||||||
|
|
||||||
|
if timeout 10 docker version > /dev/null; then
|
||||||
|
echo "docker recovered"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "docker still failing"
|
9
cluster/saltbase/salt/docker/docker-healthcheck.service
Normal file
9
cluster/saltbase/salt/docker/docker-healthcheck.service
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Run docker-healthcheck once
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/opt/kubernetes/helpers/docker-healthcheck
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
9
cluster/saltbase/salt/docker/docker-healthcheck.timer
Normal file
9
cluster/saltbase/salt/docker/docker-healthcheck.timer
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Trigger docker-healthcheck periodically
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnUnitInactiveSec=10s
|
||||||
|
Unit=docker-healthcheck.service
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
22
cluster/saltbase/salt/docker/docker-prestart
Executable file
22
cluster/saltbase/salt/docker/docker-prestart
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# This script is intended to be run before we start Docker.
|
||||||
|
|
||||||
|
# cleanup docker network checkpoint to avoid running into known issue
|
||||||
|
# of docker (https://github.com/docker/docker/issues/18283)
|
||||||
|
rm -rf /var/lib/docker/network
|
||||||
|
|
@ -1,10 +1,11 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Docker Application Container Engine
|
Description=Docker Application Container Engine
|
||||||
Documentation=http://docs.docker.com
|
Documentation=https://docs.docker.com
|
||||||
After=network.target docker.socket
|
After=network.target docker.socket
|
||||||
Requires=docker.socket
|
Requires=docker.socket
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
|
Type=notify
|
||||||
EnvironmentFile={{ environment_file }}
|
EnvironmentFile={{ environment_file }}
|
||||||
ExecStart=/usr/bin/docker daemon -H fd:// "$DOCKER_OPTS"
|
ExecStart=/usr/bin/docker daemon -H fd:// "$DOCKER_OPTS"
|
||||||
MountFlags=slave
|
MountFlags=slave
|
||||||
@ -14,7 +15,7 @@ LimitCORE=infinity
|
|||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=2s
|
RestartSec=2s
|
||||||
StartLimitInterval=0
|
StartLimitInterval=0
|
||||||
|
ExecStartPre=/opt/kubernetes/helpers/docker-prestart
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
|
@ -51,6 +51,13 @@ docker:
|
|||||||
|
|
||||||
{% if pillar.get('is_systemd') %}
|
{% if pillar.get('is_systemd') %}
|
||||||
|
|
||||||
|
/opt/kubernetes/helpers/docker-prestart:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-prestart
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 755
|
||||||
|
|
||||||
{{ pillar.get('systemd_system_path') }}/docker.service:
|
{{ pillar.get('systemd_system_path') }}/docker.service:
|
||||||
file.managed:
|
file.managed:
|
||||||
- source: salt://docker/docker.service
|
- source: salt://docker/docker.service
|
||||||
@ -60,6 +67,8 @@ docker:
|
|||||||
- mode: 644
|
- mode: 644
|
||||||
- defaults:
|
- defaults:
|
||||||
environment_file: {{ environment_file }}
|
environment_file: {{ environment_file }}
|
||||||
|
- require:
|
||||||
|
- file: /opt/kubernetes/helpers/docker-prestart
|
||||||
|
|
||||||
# The docker service.running block below doesn't work reliably
|
# The docker service.running block below doesn't work reliably
|
||||||
# Instead we run our script which e.g. does a systemd daemon-reload
|
# Instead we run our script which e.g. does a systemd daemon-reload
|
||||||
@ -297,9 +306,16 @@ docker-upgrade:
|
|||||||
- file: /var/cache/docker-install/{{ override_deb }}
|
- file: /var/cache/docker-install/{{ override_deb }}
|
||||||
{% endif %} # end override_docker_ver != ''
|
{% endif %} # end override_docker_ver != ''
|
||||||
|
|
||||||
# Default docker systemd unit file doesn't use an EnvironmentFile; replace it with one that does.
|
|
||||||
{% if pillar.get('is_systemd') %}
|
{% if pillar.get('is_systemd') %}
|
||||||
|
|
||||||
|
/opt/kubernetes/helpers/docker-prestart:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-prestart
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 755
|
||||||
|
|
||||||
|
# Default docker systemd unit file doesn't use an EnvironmentFile; replace it with one that does.
|
||||||
{{ pillar.get('systemd_system_path') }}/docker.service:
|
{{ pillar.get('systemd_system_path') }}/docker.service:
|
||||||
file.managed:
|
file.managed:
|
||||||
- source: salt://docker/docker.service
|
- source: salt://docker/docker.service
|
||||||
@ -309,6 +325,8 @@ docker-upgrade:
|
|||||||
- mode: 644
|
- mode: 644
|
||||||
- defaults:
|
- defaults:
|
||||||
environment_file: {{ environment_file }}
|
environment_file: {{ environment_file }}
|
||||||
|
- require:
|
||||||
|
- file: /opt/kubernetes/helpers/docker-prestart
|
||||||
|
|
||||||
# The docker service.running block below doesn't work reliably
|
# The docker service.running block below doesn't work reliably
|
||||||
# Instead we run our script which e.g. does a systemd daemon-reload
|
# Instead we run our script which e.g. does a systemd daemon-reload
|
||||||
@ -316,7 +334,7 @@ docker-upgrade:
|
|||||||
# TODO: Fix this
|
# TODO: Fix this
|
||||||
fix-service-docker:
|
fix-service-docker:
|
||||||
cmd.wait:
|
cmd.wait:
|
||||||
- name: /opt/kubernetes/helpers/services bounce docker
|
- name: /opt/kubernetes/helpers/services enable docker
|
||||||
- watch:
|
- watch:
|
||||||
- file: {{ pillar.get('systemd_system_path') }}/docker.service
|
- file: {{ pillar.get('systemd_system_path') }}/docker.service
|
||||||
- file: {{ environment_file }}
|
- file: {{ environment_file }}
|
||||||
@ -325,30 +343,77 @@ fix-service-docker:
|
|||||||
- cmd: docker-upgrade
|
- cmd: docker-upgrade
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
/opt/kubernetes/helpers/docker-healthcheck:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-healthcheck
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 755
|
||||||
|
|
||||||
|
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.service:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-healthcheck.service
|
||||||
|
- template: jinja
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 644
|
||||||
|
|
||||||
|
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-healthcheck.timer
|
||||||
|
- template: jinja
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 644
|
||||||
|
|
||||||
|
# Tell systemd to load the timer
|
||||||
|
fix-systemd-docker-healthcheck-timer:
|
||||||
|
cmd.wait:
|
||||||
|
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.timer
|
||||||
|
- watch:
|
||||||
|
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer
|
||||||
|
|
||||||
|
# Trigger a first run of docker-healthcheck; needed because the timer fires 10s after the previous run.
|
||||||
|
fix-systemd-docker-healthcheck-service:
|
||||||
|
cmd.wait:
|
||||||
|
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.service
|
||||||
|
- watch:
|
||||||
|
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.service
|
||||||
|
- require:
|
||||||
|
- cmd: fix-service-docker
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
docker:
|
docker:
|
||||||
service.running:
|
|
||||||
# Starting Docker is racy on aws for some reason. To be honest, since Monit
|
# Starting Docker is racy on aws for some reason. To be honest, since Monit
|
||||||
# is managing Docker restart we should probably just delete this whole thing
|
# is managing Docker restart we should probably just delete this whole thing
|
||||||
# but the kubernetes components use salt 'require' to set up a dag, and that
|
# but the kubernetes components use salt 'require' to set up a dag, and that
|
||||||
# complicated and scary to unwind.
|
# complicated and scary to unwind.
|
||||||
|
# On AWS, we use a trick now... we don't start the docker service through Salt.
|
||||||
|
# Kubelet or our health checker will start it. But we use service.enabled,
|
||||||
|
# so we still have a `service: docker` node for our DAG.
|
||||||
{% if grains.cloud is defined and grains.cloud == 'aws' %}
|
{% if grains.cloud is defined and grains.cloud == 'aws' %}
|
||||||
- enable: False
|
service.enabled:
|
||||||
{% else %}
|
{% else %}
|
||||||
|
service.running:
|
||||||
- enable: True
|
- enable: True
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
# If we put a watch on this, salt will try to start the service.
|
||||||
|
# We put the watch on the fixer instead
|
||||||
|
{% if not pillar.get('is_systemd') %}
|
||||||
- watch:
|
- watch:
|
||||||
- file: {{ environment_file }}
|
- file: {{ environment_file }}
|
||||||
{% if override_docker_ver != '' %}
|
{% if override_docker_ver != '' %}
|
||||||
- cmd: docker-upgrade
|
- cmd: docker-upgrade
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
- require:
|
||||||
|
- file: {{ environment_file }}
|
||||||
|
{% if override_docker_ver != '' %}
|
||||||
|
- cmd: docker-upgrade
|
||||||
|
{% endif %}
|
||||||
{% if pillar.get('is_systemd') %}
|
{% if pillar.get('is_systemd') %}
|
||||||
- file: {{ pillar.get('systemd_system_path') }}/docker.service
|
- cmd: fix-service-docker
|
||||||
{% endif %}
|
|
||||||
{% if override_docker_ver != '' %}
|
|
||||||
- require:
|
|
||||||
- cmd: docker-upgrade
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %} # end grains.os_family != 'RedHat'
|
{% endif %} # end grains.os_family != 'RedHat'
|
||||||
|
|
||||||
|
@ -63,6 +63,9 @@ elif [[ "${ACTION}" == "down" ]]; then
|
|||||||
reload_state
|
reload_state
|
||||||
disable_service
|
disable_service
|
||||||
stop_service
|
stop_service
|
||||||
|
elif [[ "${ACTION}" == "enable" ]]; then
|
||||||
|
reload_state
|
||||||
|
enable_service
|
||||||
else
|
else
|
||||||
echo "Unknown action: ${ACTION}"
|
echo "Unknown action: ${ACTION}"
|
||||||
exit 1
|
exit 1
|
||||||
|
Loading…
Reference in New Issue
Block a user