Systemd/non-Redhat: Add docker healthcheck script

We do the equivalent of #21727 for systemd systems.

Issue #21731
This commit is contained in:
Justin Santa Barbara 2016-03-03 09:24:21 -05:00
parent 64f1cbaddd
commit dbff0ef67b
4 changed files with 101 additions and 0 deletions

View File

@ -0,0 +1,44 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to be run periodically, to check the health
# of docker. If it detects a failure, it will restart docker using systemctl.
if timeout 10 docker version > /dev/null; then
exit 0
fi
echo "docker failed"
echo "Giving docker 30 seconds grace before restarting"
sleep 30
if timeout 10 docker version > /dev/null; then
echo "docker recovered"
exit 0
fi
echo "docker still down; triggering docker restart"
systemctl restart docker
echo "Waiting 60 seconds to give docker time to start"
sleep 60
if timeout 10 docker version > /dev/null; then
echo "docker recovered"
exit 0
fi
echo "docker still failing"

View File

@ -0,0 +1,9 @@
[Unit]
Description=Run docker-healthcheck once
[Service]
Type=oneshot
ExecStart=/opt/kubernetes/helpers/docker-healthcheck
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,9 @@
[Unit]
Description=Trigger docker-healthcheck periodically
[Timer]
OnUnitInactiveSec=10s
Unit=docker-healthcheck.service
[Install]
WantedBy=multi-user.target

View File

@ -338,6 +338,45 @@ fix-service-docker:
- cmd: docker-upgrade
{% endif %}
/opt/kubernetes/helpers/docker-healthcheck:
file.managed:
- source: salt://docker/docker-healthcheck
- user: root
- group: root
- mode: 755
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.service:
file.managed:
- source: salt://docker/docker-healthcheck.service
- template: jinja
- user: root
- group: root
- mode: 644
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer:
file.managed:
- source: salt://docker/docker-healthcheck.timer
- template: jinja
- user: root
- group: root
- mode: 644
# Tell systemd to load the timer
fix-systemd-docker-healthcheck-timer:
cmd.wait:
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.timer
- watch:
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer
# Trigger a first run of docker-healthcheck; needed because the timer fires 10s after the previous run.
fix-systemd-docker-healthcheck-service:
cmd.wait:
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.service
- watch:
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.service
- require:
- cmd: fix-service-docker
{% endif %}
docker: