diff --git a/cluster/juju/layers/kubeapi-load-balancer/layer.yaml b/cluster/juju/layers/kubeapi-load-balancer/layer.yaml index 44e31b78220..1774fbacb32 100644 --- a/cluster/juju/layers/kubeapi-load-balancer/layer.yaml +++ b/cluster/juju/layers/kubeapi-load-balancer/layer.yaml @@ -1,5 +1,6 @@ repo: https://github.com/kubernetes/kubernetes.git includes: + - 'layer:nagios' - 'layer:nginx' - 'layer:tls-client' - 'interface:public-address' diff --git a/cluster/juju/layers/kubeapi-load-balancer/reactive/load_balancer.py b/cluster/juju/layers/kubeapi-load-balancer/reactive/load_balancer.py index e0db6526061..af46cfbadf8 100644 --- a/cluster/juju/layers/kubeapi-load-balancer/reactive/load_balancer.py +++ b/cluster/juju/layers/kubeapi-load-balancer/reactive/load_balancer.py @@ -19,8 +19,10 @@ import socket import subprocess from charms import layer -from charms.reactive import when +from charms.reactive import when, when_any, when_not +from charms.reactive import set_state, remove_state from charmhelpers.core import hookenv +from charmhelpers.contrib.charmsupport import nrpe from charms.layer import nginx @@ -111,3 +113,41 @@ def provide_loadbalancing(loadbalancer): the subordinates can get the public address of this loadbalancer.''' loadbalancer.set_address_port(hookenv.unit_get('public-address'), hookenv.config('port')) + + +@when('nrpe-external-master.available') +@when_not('nrpe-external-master.initial-config') +def initial_nrpe_config(nagios=None): + set_state('nrpe-external-master.initial-config') + update_nrpe_config(nagios) + + +@when('nginx.available') +@when('nrpe-external-master.available') +@when_any('config.changed.nagios_context', + 'config.changed.nagios_servicegroups') +def update_nrpe_config(unused=None): + services = ('nginx',) + + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + nrpe_setup = nrpe.NRPE(hostname=hostname) + nrpe.add_init_service_checks(nrpe_setup, services, current_unit) + nrpe_setup.write() + + +@when_not('nrpe-external-master.available') +@when('nrpe-external-master.initial-config') +def remove_nrpe_config(nagios=None): + remove_state('nrpe-external-master.initial-config') + + # List of systemd services for which the checks will be removed + services = ('nginx',) + + # The current nrpe-external-master interface doesn't handle a lot of logic, + # use the charm-helpers code for now. + hostname = nrpe.get_nagios_hostname() + nrpe_setup = nrpe.NRPE(hostname=hostname) + + for service in services: + nrpe_setup.remove_check(shortname=service) diff --git a/cluster/juju/layers/kubernetes-master/layer.yaml b/cluster/juju/layers/kubernetes-master/layer.yaml index 7625119a011..8cd45211fd9 100644 --- a/cluster/juju/layers/kubernetes-master/layer.yaml +++ b/cluster/juju/layers/kubernetes-master/layer.yaml @@ -4,11 +4,12 @@ includes: - 'layer:tls-client' - 'layer:leadership' - 'layer:debug' + - 'layer:nagios' + - 'interface:ceph-admin' - 'interface:etcd' - 'interface:http' - 'interface:kubernetes-cni' - 'interface:kube-dns' - - 'interface:ceph-admin' - 'interface:public-address' options: basic: diff --git a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py index 86c69e2ec59..3cd695a55f6 100644 --- a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py +++ b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py @@ -33,8 +33,7 @@ from charms import layer from charms.reactive import hook from charms.reactive import remove_state from charms.reactive import set_state -from charms.reactive import when -from charms.reactive import when_not +from charms.reactive import when, when_any, when_not from charms.reactive.helpers import data_changed from charms.kubernetes.flagmanager import FlagManager @@ -43,6 +42,7 @@ from charmhelpers.core import host from charmhelpers.core import unitdata from charmhelpers.core.templating import render from charmhelpers.fetch import apt_install +from charmhelpers.contrib.charmsupport import nrpe dashboard_templates = [ @@ -489,6 +489,44 @@ def ceph_storage(ceph_admin): set_state('ceph-storage.configured') +@when('nrpe-external-master.available') +@when_not('nrpe-external-master.initial-config') +def initial_nrpe_config(nagios=None): + set_state('nrpe-external-master.initial-config') + update_nrpe_config(nagios) + + +@when('kubernetes-master.components.started') +@when('nrpe-external-master.available') +@when_any('config.changed.nagios_context', + 'config.changed.nagios_servicegroups') +def update_nrpe_config(unused=None): + services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler') + + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + nrpe_setup = nrpe.NRPE(hostname=hostname) + nrpe.add_init_service_checks(nrpe_setup, services, current_unit) + nrpe_setup.write() + + +@when_not('nrpe-external-master.available') +@when('nrpe-external-master.initial-config') +def remove_nrpe_config(nagios=None): + remove_state('nrpe-external-master.initial-config') + + # List of systemd services for which the checks will be removed + services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler') + + # The current nrpe-external-master interface doesn't handle a lot of logic, + # use the charm-helpers code for now. + hostname = nrpe.get_nagios_hostname() + nrpe_setup = nrpe.NRPE(hostname=hostname) + + for service in services: + nrpe_setup.remove_check(shortname=service) + + def create_addon(template, context): '''Create an addon from a template''' source = 'addons/' + template diff --git a/cluster/juju/layers/kubernetes-worker/layer.yaml b/cluster/juju/layers/kubernetes-worker/layer.yaml index d037d324b14..c3023d57b6f 100644 --- a/cluster/juju/layers/kubernetes-worker/layer.yaml +++ b/cluster/juju/layers/kubernetes-worker/layer.yaml @@ -1,9 +1,10 @@ repo: https://github.com/kubernetes/kubernetes.git includes: - 'layer:basic' - - 'layer:docker' - - 'layer:tls-client' - 'layer:debug' + - 'layer:docker' + - 'layer:nagios' + - 'layer:tls-client' - 'interface:http' - 'interface:kubernetes-cni' - 'interface:kube-dns' diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index e8115602409..8b8c00d72c2 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -24,13 +24,14 @@ from socket import gethostname from charms import layer from charms.reactive import hook from charms.reactive import set_state, remove_state -from charms.reactive import when, when_not +from charms.reactive import when, when_any, when_not from charms.reactive.helpers import data_changed from charms.kubernetes.flagmanager import FlagManager from charms.templating.jinja2 import render from charmhelpers.core import hookenv from charmhelpers.core.host import service_stop +from charmhelpers.contrib.charmsupport import nrpe kubeconfig_path = '/srv/kubernetes/config' @@ -447,6 +448,44 @@ def kubectl_manifest(operation, manifest): return kubectl_success(operation, '-f', manifest) +@when('nrpe-external-master.available') +@when_not('nrpe-external-master.initial-config') +def initial_nrpe_config(nagios=None): + set_state('nrpe-external-master.initial-config') + update_nrpe_config(nagios) + + +@when('kubernetes-worker.config.created') +@when('nrpe-external-master.available') +@when_any('config.changed.nagios_context', + 'config.changed.nagios_servicegroups') +def update_nrpe_config(unused=None): + services = ('kubelet', 'kube-proxy') + + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + nrpe_setup = nrpe.NRPE(hostname=hostname) + nrpe.add_init_service_checks(nrpe_setup, services, current_unit) + nrpe_setup.write() + + +@when_not('nrpe-external-master.available') +@when('nrpe-external-master.initial-config') +def remove_nrpe_config(nagios=None): + remove_state('nrpe-external-master.initial-config') + + # List of systemd services for which the checks will be removed + services = ('kubelet', 'kube-proxy') + + # The current nrpe-external-master interface doesn't handle a lot of logic, + # use the charm-helpers code for now. + hostname = nrpe.get_nagios_hostname() + nrpe_setup = nrpe.NRPE(hostname=hostname) + + for service in services: + nrpe_setup.remove_check(shortname=service) + + def _systemctl_is_active(application): ''' Poll systemctl to determine if the application is running ''' cmd = ['systemctl', 'is-active', application]