Merge pull request #49296 from juju-solutions/feature/report-failing-services

Automatic merge from submit-queue (batch tested with PRs 49420, 49296, 49299, 49371, 46514)

Update status to show failing services.

**What this PR does / why we need it**: Report on charm status any services that are not running.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes https://github.com/juju-solutions/bundle-canonical-kubernetes/issues/341

**Special notes for your reviewer**:

**Release note**:

```Report failing services in Juju deployed clusters.
```
This commit is contained in:
Kubernetes Submit Queue 2017-07-21 22:23:17 -07:00 committed by GitHub
commit 0af4306695
2 changed files with 36 additions and 5 deletions

View File

@ -323,7 +323,28 @@ def idle_status(kube_api, kube_control):
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
hookenv.status_set('active', msg)
else:
hookenv.status_set('active', 'Kubernetes master running.')
# All services should be up and running at this point. Double-check...
failing_services = master_services_down()
if len(failing_services) == 0:
hookenv.status_set('active', 'Kubernetes master running.')
else:
msg = 'Stopped services: {}'.format(','.join(failing_services))
hookenv.status_set('waiting', msg)
def master_services_down():
"""Ensure master services are up and running.
Return: list of failing services"""
services = ['kube-apiserver',
'kube-controller-manager',
'kube-scheduler']
failing_services = []
for service in services:
daemon = 'snap.{}.daemon'.format(service)
if not host.service_running(daemon):
failing_services.append(service)
return failing_services
@when('etcd.available', 'tls_client.server.certificate.saved',

View File

@ -257,11 +257,21 @@ def update_kubelet_status():
''' There are different states that the kubelet can be in, where we are
waiting for dns, waiting for cluster turnup, or ready to serve
applications.'''
if (_systemctl_is_active('snap.kubelet.daemon')):
services = [
'kubelet',
'kube-proxy'
]
failing_services = []
for service in services:
daemon = 'snap.{}.daemon'.format(service)
if not _systemctl_is_active(daemon):
failing_services.append(service)
if len(failing_services) == 0:
hookenv.status_set('active', 'Kubernetes worker running.')
# if kubelet is not running, we're waiting on something else to converge
elif (not _systemctl_is_active('snap.kubelet.daemon')):
hookenv.status_set('waiting', 'Waiting for kubelet to start.')
else:
msg = 'Waiting for {} to start.'.format(','.join(failing_services))
hookenv.status_set('waiting', msg)
@when('certificates.available')