mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Update status to show failing services.
This commit is contained in:
parent
acc19cafa4
commit
685dff99ab
@ -30,6 +30,7 @@ from shlex import split
|
||||
from subprocess import check_call
|
||||
from subprocess import check_output
|
||||
from subprocess import CalledProcessError
|
||||
from time import sleep
|
||||
|
||||
from charms import layer
|
||||
from charms.layer import snap
|
||||
@ -323,7 +324,36 @@ def idle_status(kube_api, kube_control):
|
||||
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
|
||||
hookenv.status_set('active', msg)
|
||||
else:
|
||||
hookenv.status_set('active', 'Kubernetes master running.')
|
||||
# All services should be up and running at this point. Double-check...
|
||||
failing_services = master_services_down()
|
||||
if len(failing_services) == 0:
|
||||
hookenv.status_set('active', 'Kubernetes master running.')
|
||||
else:
|
||||
msg = 'Stopped services: {}'.format(','.join(failing_services))
|
||||
hookenv.status_set('waiting', msg)
|
||||
|
||||
|
||||
def master_services_down():
|
||||
"""Ensure master services are up and running.
|
||||
Try to restart any failing services once.
|
||||
|
||||
Return: list of failing services"""
|
||||
services = ['kube-apiserver',
|
||||
'kube-controller-manager',
|
||||
'kube-scheduler']
|
||||
for service in services:
|
||||
daemon = 'snap.{}.daemon'.format(service)
|
||||
if not host.service_running(daemon):
|
||||
hookenv.log("Service {} was down. Starting it.".format(daemon))
|
||||
host.service_start(daemon)
|
||||
sleep(10)
|
||||
|
||||
failing_services = []
|
||||
for service in services:
|
||||
daemon = 'snap.{}.daemon'.format(service)
|
||||
if not host.service_running(daemon):
|
||||
failing_services.append(service)
|
||||
return failing_services
|
||||
|
||||
|
||||
@when('etcd.available', 'tls_client.server.certificate.saved',
|
||||
|
@ -22,6 +22,7 @@ from shlex import split
|
||||
from subprocess import check_call, check_output
|
||||
from subprocess import CalledProcessError
|
||||
from socket import gethostname
|
||||
from time import sleep
|
||||
|
||||
from charms import layer
|
||||
from charms.layer import snap
|
||||
@ -257,11 +258,27 @@ def update_kubelet_status():
|
||||
''' There are different states that the kubelet can be in, where we are
|
||||
waiting for dns, waiting for cluster turnup, or ready to serve
|
||||
applications.'''
|
||||
if (_systemctl_is_active('snap.kubelet.daemon')):
|
||||
services = [
|
||||
'kubelet',
|
||||
'kube-proxy'
|
||||
]
|
||||
for service in services:
|
||||
daemon = 'snap.{}.daemon'.format(service)
|
||||
if not _systemctl_is_active(daemon):
|
||||
hookenv.log("Service {} id down. Starting it.".format(daemon))
|
||||
sleep(10)
|
||||
|
||||
failing_services = []
|
||||
for service in services:
|
||||
daemon = 'snap.{}.daemon'.format(service)
|
||||
if not _systemctl_is_active(daemon):
|
||||
failing_services.append(service)
|
||||
|
||||
if len(failing_services) == 0:
|
||||
hookenv.status_set('active', 'Kubernetes worker running.')
|
||||
# if kubelet is not running, we're waiting on something else to converge
|
||||
elif (not _systemctl_is_active('snap.kubelet.daemon')):
|
||||
hookenv.status_set('waiting', 'Waiting for kubelet to start.')
|
||||
else:
|
||||
msg = 'Waiting for {} to start.'.format(','.join(failing_services))
|
||||
hookenv.status_set('waiting', msg)
|
||||
|
||||
|
||||
@when('certificates.available')
|
||||
|
Loading…
Reference in New Issue
Block a user