Merge pull request #49296 from juju-solutions/feature/report-failing-services

Automatic merge from submit-queue (batch tested with PRs 49420, 49296, 49299, 49371, 46514)

Update status to show failing services.

**What this PR does / why we need it**: Report on charm status any services that are not running.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes https://github.com/juju-solutions/bundle-canonical-kubernetes/issues/341

**Special notes for your reviewer**:

**Release note**:

```Report failing services in Juju deployed clusters.
```
This commit is contained in:
Kubernetes Submit Queue 2017-07-21 22:23:17 -07:00 committed by GitHub
commit 0af4306695
2 changed files with 36 additions and 5 deletions

View File

@ -323,7 +323,28 @@ def idle_status(kube_api, kube_control):
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr() msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
hookenv.status_set('active', msg) hookenv.status_set('active', msg)
else: else:
hookenv.status_set('active', 'Kubernetes master running.') # All services should be up and running at this point. Double-check...
failing_services = master_services_down()
if len(failing_services) == 0:
hookenv.status_set('active', 'Kubernetes master running.')
else:
msg = 'Stopped services: {}'.format(','.join(failing_services))
hookenv.status_set('waiting', msg)
def master_services_down():
"""Ensure master services are up and running.
Return: list of failing services"""
services = ['kube-apiserver',
'kube-controller-manager',
'kube-scheduler']
failing_services = []
for service in services:
daemon = 'snap.{}.daemon'.format(service)
if not host.service_running(daemon):
failing_services.append(service)
return failing_services
@when('etcd.available', 'tls_client.server.certificate.saved', @when('etcd.available', 'tls_client.server.certificate.saved',

View File

@ -257,11 +257,21 @@ def update_kubelet_status():
''' There are different states that the kubelet can be in, where we are ''' There are different states that the kubelet can be in, where we are
waiting for dns, waiting for cluster turnup, or ready to serve waiting for dns, waiting for cluster turnup, or ready to serve
applications.''' applications.'''
if (_systemctl_is_active('snap.kubelet.daemon')): services = [
'kubelet',
'kube-proxy'
]
failing_services = []
for service in services:
daemon = 'snap.{}.daemon'.format(service)
if not _systemctl_is_active(daemon):
failing_services.append(service)
if len(failing_services) == 0:
hookenv.status_set('active', 'Kubernetes worker running.') hookenv.status_set('active', 'Kubernetes worker running.')
# if kubelet is not running, we're waiting on something else to converge else:
elif (not _systemctl_is_active('snap.kubelet.daemon')): msg = 'Waiting for {} to start.'.format(','.join(failing_services))
hookenv.status_set('waiting', 'Waiting for kubelet to start.') hookenv.status_set('waiting', msg)
@when('certificates.available') @when('certificates.available')