Add support for supervisord as a monit alternative.

This commit is contained in:
Brendan Burns 2015-08-05 19:37:58 -07:00
parent 9f495e7f69
commit 15b9d98733
11 changed files with 331 additions and 0 deletions

View File

@ -0,0 +1,34 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to start the docker and then loop until
# it detects a failure. It then exits, and supervisord restarts it
# which in turn restarts docker.
/etc/init.d/docker stop
/etc/init.d/docker start
echo "waiting a minute for startup"
sleep 60
while true; do
if ! sudo timeout 10 docker version > /dev/null; then
echo "Docker failed!"
exit 2
fi
sleep 10
done

View File

@ -0,0 +1,6 @@
[program:docker]
command=/usr/sbin/docker-checker.sh
stderr_logfile=/var/log/supervisor/docker-stderr.log
stdout_logfile=/var/log/supervisor/docker-stdout.log
autorestart=true
startretries=1000000

View File

@ -0,0 +1,125 @@
{% if not pillar.get('is_systemd') %}
supervisor:
pkg:
- installed
monit:
pkg:
- purged
/etc/supervisor/conf.d/docker.conf:
file:
- managed
- source: salt://supervisor/docker.conf
- user: root
- group: root
- mode: 644
- makedirs: True
- require_in:
- pkg: supervisor
- require:
- file: /usr/sbin/docker-checker.sh
/usr/sbin/docker-checker.sh:
file:
- managed
- source: salt://supervisor/docker-checker.sh
- user: root
- group: root
- mode: 755
- makedirs: True
/etc/supervisor/conf.d/kubelet.conf:
file:
- managed
- source: salt://supervisor/kubelet.conf
- user: root
- group: root
- mode: 644
- makedirs: True
- require_in:
- pkg: supervisor
- require:
- file: /usr/sbin/kubelet-checker.sh
/usr/sbin/kubelet-checker.sh:
file:
- managed
- source: salt://supervisor/kubelet-checker.sh
- user: root
- group: root
- mode: 755
- makedirs: True
{% if "kubernetes-pool" in grains.get('roles', []) %}
/etc/supervisor/conf.d/kube-proxy.conf:
file:
- managed
- source: salt://supervisor/kube-proxy.conf
- user: root
- group: root
- mode: 644
- makedirs: True
- require_in:
- pkg: supervisor
- require:
- file: /usr/sbin/kube-proxy-checker.sh
/usr/sbin/kube-proxy-checker.sh:
file:
- managed
- source: salt://supervisor/kube-proxy-checker.sh
- user: root
- group: root
- mode: 755
- makedirs: True
{% endif %}
{% if grains['roles'][0] == 'kubernetes-master' -%}
/etc/supervisor/conf.d/kube-addons.conf:
file:
- managed
- source: salt://supervisor/kube-addons.conf
- user: root
- group: root
- mode: 644
- makedirs: True
- require_in:
- pkg: supervisor
- require:
- file: /usr/sbin/kube-addons-checker.sh
/usr/sbin/kube-addons-checker.sh:
file:
- managed
- source: salt://supervisor/kube-addons-checker.sh
- user: root
- group: root
- mode: 755
- makedirs: True
{% endif %}
/etc/supervisor/supervisor_watcher.sh:
file.managed:
- source: salt://supervisor/supervisor_watcher.sh
- user: root
- group: root
- mode: 755
- makedirs: True
crontab -l | { cat; echo "* * * * * /etc/supervisor/supervisor_watcher.sh 2>&1 | logger"; } | crontab -:
cmd.run:
- unless: crontab -l | grep "* * * * * /etc/supervisor/supervisor_watcher.sh 2>&1 | logger"
supervisor-service:
service:
- running
- name: supervisor
- watch:
- pkg: supervisor
- file: /etc/supervisor/conf.d/*
- require:
- pkg: supervisor
{% endif %}

View File

@ -0,0 +1,34 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to start the kube-addons and then loop until
# it detects a failure. It then exits, and supervisord restarts it
# which in turn restarts the kube-addons.
/etc/init.d/kube-addons stop
/etc/init.d/kube-addons start
echo "waiting a minute for startup"
sleep 60
while true; do
if ! /etc/init.d/kube-addons status > /dev/null; then
echo "kube-addons failed!"
exit 2
fi
sleep 10
done

View File

@ -0,0 +1,6 @@
[program:kube-addons]
command=/usr/sbin/kube-addons-checker.sh
stderr_logfile=/var/log/supervisor/kube-addons-stderr.log
stdout_logfile=/var/log/supervisor/kube-addons-stdout.log
autorestart=true
startretries=1000000

View File

@ -0,0 +1,36 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to start the kube-proxy and then loop until
# it detects a failure. It then exits, and supervisord restarts it
# which in turn restarts the kube-proxy.
/etc/init.d/kube-proxy stop
/etc/init.d/kube-proxy start
echo "waiting a minute for startup"
sleep 60
max_seconds=10
while true; do
if ! curl -m ${max_seconds} -f -s http://127.0.0.1:10249/healthz > /dev/null; then
echo "kube-proxy failed!"
exit 2
fi
sleep 10
done

View File

@ -0,0 +1,6 @@
[program:kube-proxy]
command=/usr/sbin/kube-proxy-checker.sh
stderr_logfile=/var/log/supervisor/kube-proxy-stderr.log
stdout_logfile=/var/log/supervisor/kube-proxy-stdout.log
autorestart=true
startretries=1000000

View File

@ -0,0 +1,36 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to start the kubelet and then loop until
# it detects a failure. It then exits, and supervisord restarts it
# which in turn restarts the kubelet.
/etc/init.d/kubelet stop
/etc/init.d/kubelet start
echo "waiting a minute for startup"
sleep 60
max_seconds=10
while true; do
if ! curl -m ${max_seconds} -f -s http://127.0.0.1:10255/healthz > /dev/null; then
echo "kubelet failed!"
exit 2
fi
sleep 10
done

View File

@ -0,0 +1,6 @@
[program:kubelet]
command=/usr/sbin/kubelet-checker.sh
stderr_logfile=/var/log/supervisor/kubelet-stderr.log
stdout_logfile=/var/log/supervisor/kubelet-stdout.log
autorestart=true
startretries=1000000

View File

@ -0,0 +1,34 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is invoked by crond every minute to check if supervisord is
# up and oom protected. If down it restarts supervisord; otherwise, it exits
# after applying oom_score_adj
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
if ! /etc/init.d/supervisor status > /dev/null; then
service supervisor start
sleep 10
fi
# Apply oom_score_adj: -901 to processes
pids=$(cat /var/run/supervisord.pid)
for pid in "${pids}"; do
echo -901 > /proc/$pid/oom_score_adj
done

View File

@ -26,7 +26,11 @@ base:
{% endif %}
{% endif %}
- logrotate
{% if grains['cloud'] is defined and grains.cloud == 'gce' %}
- supervisor
{% else %}
- monit
{% endif %}
'roles:kubernetes-master':
- match: grain
@ -35,7 +39,11 @@ base:
- kube-apiserver
- kube-controller-manager
- kube-scheduler
{% if grains['cloud'] is defined and grains.cloud == 'gce' %}
- supervisor
{% else %}
- monit
{% endif %}
{% if grains['cloud'] is defined and not grains.cloud in [ 'aws', 'gce', 'vagrant' ] %}
- nginx
{% endif %}