From d3428ef3a4bc8f3593b05a201424f97ce108881b Mon Sep 17 00:00:00 2001 From: Marco Ceppi Date: Thu, 23 Feb 2017 15:30:07 -0500 Subject: [PATCH 01/10] Add metric collection to charms for autoscalling --- cluster/juju/.gitignore | 2 ++ .../layers/kubeapi-load-balancer/layer.yaml | 1 + .../layers/kubeapi-load-balancer/metrics.yaml | 2 ++ .../juju/layers/kubernetes-master/layer.yaml | 5 +-- .../layers/kubernetes-master/metrics.yaml | 34 +++++++++++++++++++ .../juju/layers/kubernetes-worker/layer.yaml | 1 + .../layers/kubernetes-worker/metrics.yaml | 2 ++ 7 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 cluster/juju/.gitignore create mode 100644 cluster/juju/layers/kubeapi-load-balancer/metrics.yaml create mode 100644 cluster/juju/layers/kubernetes-master/metrics.yaml create mode 100644 cluster/juju/layers/kubernetes-worker/metrics.yaml diff --git a/cluster/juju/.gitignore b/cluster/juju/.gitignore new file mode 100644 index 00000000000..71d6f3c6d9f --- /dev/null +++ b/cluster/juju/.gitignore @@ -0,0 +1,2 @@ +builds +deps diff --git a/cluster/juju/layers/kubeapi-load-balancer/layer.yaml b/cluster/juju/layers/kubeapi-load-balancer/layer.yaml index 1774fbacb32..9cb22a3d4a3 100644 --- a/cluster/juju/layers/kubeapi-load-balancer/layer.yaml +++ b/cluster/juju/layers/kubeapi-load-balancer/layer.yaml @@ -1,5 +1,6 @@ repo: https://github.com/kubernetes/kubernetes.git includes: + - 'layer:metrics' - 'layer:nagios' - 'layer:nginx' - 'layer:tls-client' diff --git a/cluster/juju/layers/kubeapi-load-balancer/metrics.yaml b/cluster/juju/layers/kubeapi-load-balancer/metrics.yaml new file mode 100644 index 00000000000..0fcb3c1c489 --- /dev/null +++ b/cluster/juju/layers/kubeapi-load-balancer/metrics.yaml @@ -0,0 +1,2 @@ +metrics: + juju-units: {} diff --git a/cluster/juju/layers/kubernetes-master/layer.yaml b/cluster/juju/layers/kubernetes-master/layer.yaml index 75bd5a27b61..4e5273c2253 100644 --- a/cluster/juju/layers/kubernetes-master/layer.yaml +++ b/cluster/juju/layers/kubernetes-master/layer.yaml @@ -1,10 +1,11 @@ repo: https://github.com/kubernetes/kubernetes.git includes: - 'layer:basic' - - 'layer:tls-client' - - 'layer:leadership' - 'layer:debug' + - 'layer:leadership' + - 'layer:metrics' - 'layer:nagios' + - 'layer:tls-client' - 'interface:ceph-admin' - 'interface:etcd' - 'interface:http' diff --git a/cluster/juju/layers/kubernetes-master/metrics.yaml b/cluster/juju/layers/kubernetes-master/metrics.yaml new file mode 100644 index 00000000000..ba5550d70a1 --- /dev/null +++ b/cluster/juju/layers/kubernetes-master/metrics.yaml @@ -0,0 +1,34 @@ +metrics: + juju-units: {} + pods: + type: gauge + description: number of pods + command: kubectl get po --all-namespaces | tail -n+2 | wc -l + services: + type: gauge + description: number of services + command: kubectl get svc --all-namespaces | tail -n+2 | wc -l + replicasets: + type: gauge + description: number of replicasets + command: kubectl get rs --all-namespaces | tail -n+2 | wc -l + replicationcontrollers: + type: gauge + description: number of replicationcontrollers + command: kubectl get rc --all-namespaces | tail -n+2 | wc -l + nodes: + type: gauge + description: number of kubernetes nodes + command: kubectl get nodes | tail -n+2 | wc -l + persistentvolume: + type: gauge + description: number of pv + command: kubectl get pv --all-namespaces | tail -n+2 | wc -l + persistentvolumeclaims: + type: gauge + description: number of claims + command: kubectl get pvc --all-namespaces | tail -n+2 | wc -l + serviceaccounts: + type: gauge + description: number of sa + command: kubectl get sa --all-namespaces | tail -n+2 | wc -l diff --git a/cluster/juju/layers/kubernetes-worker/layer.yaml b/cluster/juju/layers/kubernetes-worker/layer.yaml index ce0979de6f2..ffe1fa154d2 100644 --- a/cluster/juju/layers/kubernetes-worker/layer.yaml +++ b/cluster/juju/layers/kubernetes-worker/layer.yaml @@ -3,6 +3,7 @@ includes: - 'layer:basic' - 'layer:debug' - 'layer:docker' + - 'layer:metrics' - 'layer:nagios' - 'layer:tls-client' - 'layer:nvidia-cuda' diff --git a/cluster/juju/layers/kubernetes-worker/metrics.yaml b/cluster/juju/layers/kubernetes-worker/metrics.yaml new file mode 100644 index 00000000000..0fcb3c1c489 --- /dev/null +++ b/cluster/juju/layers/kubernetes-worker/metrics.yaml @@ -0,0 +1,2 @@ +metrics: + juju-units: {} From 5d9905f4e5dca39d133afc3c0f4ae9aaee02c87e Mon Sep 17 00:00:00 2001 From: Konstantinos Tsakalozos Date: Fri, 3 Mar 2017 23:14:39 +0200 Subject: [PATCH 02/10] Fail test action when test suite fails. Minor README update. --- cluster/juju/layers/kubernetes-e2e/README.md | 2 +- cluster/juju/layers/kubernetes-e2e/actions/test | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cluster/juju/layers/kubernetes-e2e/README.md b/cluster/juju/layers/kubernetes-e2e/README.md index 6697f8a171c..5e56c64f6cd 100644 --- a/cluster/juju/layers/kubernetes-e2e/README.md +++ b/cluster/juju/layers/kubernetes-e2e/README.md @@ -73,7 +73,7 @@ a deployed cluster. The following example will skip the `Flaky`, `Slow`, and `Feature` labeled tests: ```shell -juju run-action kubernetes-e2e/0 skip='\[(Flaky|Slow|Feature:.*)\]' +juju run-action kubernetes-e2e/0 test skip='\[(Flaky|Slow|Feature:.*)\]' ``` > Note: the escaping of the regex due to how bash handles brackets. diff --git a/cluster/juju/layers/kubernetes-e2e/actions/test b/cluster/juju/layers/kubernetes-e2e/actions/test index 02981ac86fc..99ea799345e 100755 --- a/cluster/juju/layers/kubernetes-e2e/actions/test +++ b/cluster/juju/layers/kubernetes-e2e/actions/test @@ -45,3 +45,7 @@ tar -czf $ACTION_LOG_TGZ ${JUJU_ACTION_UUID}.log action-set log="$ACTION_LOG_TGZ" action-set junit="$ACTION_JUNIT_TGZ" + +if tail ${JUJU_ACTION_UUID}.log | grep -q "Test Suite Failed"; then + action-fail "Failure detected in the logs" +fi From ca4afd87738ece2bc0cc6056964b177ea8a927a1 Mon Sep 17 00:00:00 2001 From: Rye Terrell Date: Fri, 27 Jan 2017 10:43:15 -0600 Subject: [PATCH 03/10] Update CDK charms to use snaps --- .../kubernetes-master/actions/create-rbd-pv | 3 + .../juju/layers/kubernetes-master/config.yaml | 5 + .../kubernetes-master/debug-scripts/kubectl | 2 + .../debug-scripts/kubernetes-master-services | 10 +- .../juju/layers/kubernetes-master/layer.yaml | 17 +- .../lib/charms/kubernetes/common.py | 29 - .../lib/charms/kubernetes/flagmanager.py | 7 + .../layers/kubernetes-master/metadata.yaml | 22 +- .../layers/kubernetes-master/metrics.yaml | 16 +- .../reactive/kubernetes_master.py | 534 +++++++----------- .../kubernetes-master/tactics/__init__.py | 16 - .../tactics/update_addons.py | 185 ------ .../templates/kube-apiserver.defaults | 17 - .../templates/kube-apiserver.service | 22 - .../kube-controller-manager.defaults | 8 - .../templates/kube-controller-manager.service | 18 - .../templates/kube-defaults.defaults | 22 - .../templates/kube-scheduler.defaults | 7 - .../templates/kube-scheduler.service | 17 - .../layers/kubernetes-worker/actions.yaml | 2 + .../layers/kubernetes-worker/actions/microbot | 8 +- .../layers/kubernetes-worker/actions/pause | 6 +- .../layers/kubernetes-worker/actions/resume | 4 +- .../layers/kubernetes-worker/actions/upgrade | 5 + .../juju/layers/kubernetes-worker/config.yaml | 11 + .../kubernetes-worker/debug-scripts/kubectl | 4 +- .../debug-scripts/kubernetes-worker-services | 10 +- .../juju/layers/kubernetes-worker/layer.yaml | 11 +- .../lib/charms/kubernetes/common.py | 29 - .../lib/charms/kubernetes/flagmanager.py | 7 + .../layers/kubernetes-worker/metadata.yaml | 18 +- .../reactive/kubernetes_worker.py | 373 ++++++------ .../kubernetes-worker/templates/kube-default | 22 - .../templates/kube-proxy.defaults | 1 - .../templates/kube-proxy.service | 19 - .../templates/kubelet.defaults | 14 - .../templates/kubelet.service | 22 - hack/verify-flags/exceptions.txt | 2 +- 38 files changed, 540 insertions(+), 985 deletions(-) delete mode 100644 cluster/juju/layers/kubernetes-master/tactics/__init__.py delete mode 100755 cluster/juju/layers/kubernetes-master/tactics/update_addons.py delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-apiserver.service delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.defaults delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.service delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-scheduler.defaults delete mode 100644 cluster/juju/layers/kubernetes-master/templates/kube-scheduler.service create mode 100755 cluster/juju/layers/kubernetes-worker/actions/upgrade delete mode 100644 cluster/juju/layers/kubernetes-worker/templates/kube-default delete mode 100644 cluster/juju/layers/kubernetes-worker/templates/kube-proxy.defaults delete mode 100644 cluster/juju/layers/kubernetes-worker/templates/kube-proxy.service delete mode 100644 cluster/juju/layers/kubernetes-worker/templates/kubelet.defaults delete mode 100644 cluster/juju/layers/kubernetes-worker/templates/kubelet.service diff --git a/cluster/juju/layers/kubernetes-master/actions/create-rbd-pv b/cluster/juju/layers/kubernetes-master/actions/create-rbd-pv index 37a670cde92..0a3dbf6e784 100755 --- a/cluster/juju/layers/kubernetes-master/actions/create-rbd-pv +++ b/cluster/juju/layers/kubernetes-master/actions/create-rbd-pv @@ -28,6 +28,9 @@ import os import sys +os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin') + + def main(): ''' Control logic to enlist Ceph RBD volumes as PersistentVolumes in Kubernetes. This will invoke the validation steps, and only execute if diff --git a/cluster/juju/layers/kubernetes-master/config.yaml b/cluster/juju/layers/kubernetes-master/config.yaml index 9a0140f2f5d..0fde833377a 100644 --- a/cluster/juju/layers/kubernetes-master/config.yaml +++ b/cluster/juju/layers/kubernetes-master/config.yaml @@ -21,3 +21,8 @@ options: privileged mode. If "auto", kube-apiserver will not run in privileged mode by default, but will switch to privileged mode if gpu hardware is detected on a worker node. + channel: + type: string + default: "stable" + description: | + Snap channel to install Kubernetes master services from diff --git a/cluster/juju/layers/kubernetes-master/debug-scripts/kubectl b/cluster/juju/layers/kubernetes-master/debug-scripts/kubectl index 018f8483968..0d62a0bbcee 100755 --- a/cluster/juju/layers/kubernetes-master/debug-scripts/kubectl +++ b/cluster/juju/layers/kubernetes-master/debug-scripts/kubectl @@ -1,6 +1,8 @@ #!/bin/sh set -ux +export PATH=$PATH:/snap/bin + alias kubectl="kubectl --kubeconfig=/home/ubuntu/config" kubectl cluster-info > $DEBUG_SCRIPT_DIR/cluster-info diff --git a/cluster/juju/layers/kubernetes-master/debug-scripts/kubernetes-master-services b/cluster/juju/layers/kubernetes-master/debug-scripts/kubernetes-master-services index 9e5e54f69f0..a1eef81d846 100755 --- a/cluster/juju/layers/kubernetes-master/debug-scripts/kubernetes-master-services +++ b/cluster/juju/layers/kubernetes-master/debug-scripts/kubernetes-master-services @@ -2,12 +2,8 @@ set -ux for service in kube-apiserver kube-controller-manager kube-scheduler; do - systemctl status $service > $DEBUG_SCRIPT_DIR/$service-systemctl-status - journalctl -u $service > $DEBUG_SCRIPT_DIR/$service-journal + systemctl status snap.$service.daemon > $DEBUG_SCRIPT_DIR/$service-systemctl-status + journalctl -u snap.$service.daemon > $DEBUG_SCRIPT_DIR/$service-journal done -mkdir -p $DEBUG_SCRIPT_DIR/etc-default -cp -v /etc/default/kube* $DEBUG_SCRIPT_DIR/etc-default - -mkdir -p $DEBUG_SCRIPT_DIR/lib-systemd-system -cp -v /lib/systemd/system/kube* $DEBUG_SCRIPT_DIR/lib-systemd-system +# FIXME: grab snap config or something diff --git a/cluster/juju/layers/kubernetes-master/layer.yaml b/cluster/juju/layers/kubernetes-master/layer.yaml index 4e5273c2253..3b3b9e73222 100644 --- a/cluster/juju/layers/kubernetes-master/layer.yaml +++ b/cluster/juju/layers/kubernetes-master/layer.yaml @@ -1,11 +1,12 @@ repo: https://github.com/kubernetes/kubernetes.git includes: - 'layer:basic' - - 'layer:debug' + - 'layer:snap' + - 'layer:tls-client' - 'layer:leadership' + - 'layer:debug' - 'layer:metrics' - 'layer:nagios' - - 'layer:tls-client' - 'interface:ceph-admin' - 'interface:etcd' - 'interface:http' @@ -18,10 +19,8 @@ options: packages: - socat tls-client: - ca_certificate_path: '/srv/kubernetes/ca.crt' - server_certificate_path: '/srv/kubernetes/server.crt' - server_key_path: '/srv/kubernetes/server.key' - client_certificate_path: '/srv/kubernetes/client.crt' - client_key_path: '/srv/kubernetes/client.key' -tactics: - - 'tactics.update_addons.UpdateAddonsTactic' + ca_certificate_path: '/root/cdk/ca.crt' + server_certificate_path: '/root/cdk/server.crt' + server_key_path: '/root/cdk/server.key' + client_certificate_path: '/root/cdk/client.crt' + client_key_path: '/root/cdk/client.key' diff --git a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py index d7fbf01aaa0..054399aeef2 100644 --- a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py +++ b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py @@ -17,10 +17,6 @@ import re import subprocess -from charmhelpers.core import unitdata - -BIN_VERSIONS = 'bin_versions' - def get_version(bin_name): """Get the version of an installed Kubernetes binary. @@ -33,31 +29,6 @@ def get_version(bin_name): >>> `get_version('kubelet') (1, 6, 0) - """ - db = unitdata.kv() - bin_versions = db.get(BIN_VERSIONS, {}) - - cached_version = bin_versions.get(bin_name) - if cached_version: - return tuple(cached_version) - - version = _get_bin_version(bin_name) - bin_versions[bin_name] = list(version) - db.set(BIN_VERSIONS, bin_versions) - return version - - -def reset_versions(): - """Reset the cache of bin versions. - - """ - db = unitdata.kv() - db.unset(BIN_VERSIONS) - - -def _get_bin_version(bin_name): - """Get a binary version by calling it with --version and parsing output. - """ cmd = '{} --version'.format(bin_name).split() version_string = subprocess.check_output(cmd).decode('utf-8') diff --git a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py index 0ff013b4c39..7fe5737a6ef 100644 --- a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py +++ b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py @@ -118,6 +118,13 @@ class FlagManager: """ return self.data.get(key, default) + def destroy_all(self): + ''' + Destructively removes all data from the FlagManager. + ''' + self.data.clear() + self.__save() + def to_s(self): ''' Render the flags to a single string, prepared for the Docker diff --git a/cluster/juju/layers/kubernetes-master/metadata.yaml b/cluster/juju/layers/kubernetes-master/metadata.yaml index b86dd035781..ffce0c3dc93 100644 --- a/cluster/juju/layers/kubernetes-master/metadata.yaml +++ b/cluster/juju/layers/kubernetes-master/metadata.yaml @@ -37,7 +37,23 @@ requires: ceph-storage: interface: ceph-admin resources: - kubernetes: + kubectl: type: file - filename: kubernetes.tar.gz - description: "A tarball packaged release of the kubernetes bins." + filename: kubectl.snap + description: kubectl snap + kube-apiserver: + type: file + filename: kube-apiserver.snap + description: kube-apiserver snap + kube-controller-manager: + type: file + filename: kube-controller-manager.snap + description: kube-controller-manager snap + kube-scheduler: + type: file + filename: kube-scheduler.snap + description: kube-scheduler snap + cdk-addons: + type: file + filename: cdk-addons.snap + description: CDK addons snap diff --git a/cluster/juju/layers/kubernetes-master/metrics.yaml b/cluster/juju/layers/kubernetes-master/metrics.yaml index ba5550d70a1..c5996c040b8 100644 --- a/cluster/juju/layers/kubernetes-master/metrics.yaml +++ b/cluster/juju/layers/kubernetes-master/metrics.yaml @@ -3,32 +3,32 @@ metrics: pods: type: gauge description: number of pods - command: kubectl get po --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get po --all-namespaces | tail -n+2 | wc -l services: type: gauge description: number of services - command: kubectl get svc --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get svc --all-namespaces | tail -n+2 | wc -l replicasets: type: gauge description: number of replicasets - command: kubectl get rs --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get rs --all-namespaces | tail -n+2 | wc -l replicationcontrollers: type: gauge description: number of replicationcontrollers - command: kubectl get rc --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get rc --all-namespaces | tail -n+2 | wc -l nodes: type: gauge description: number of kubernetes nodes - command: kubectl get nodes | tail -n+2 | wc -l + command: /snap/bin/kubectl get nodes | tail -n+2 | wc -l persistentvolume: type: gauge description: number of pv - command: kubectl get pv --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get pv --all-namespaces | tail -n+2 | wc -l persistentvolumeclaims: type: gauge description: number of claims - command: kubectl get pvc --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get pvc --all-namespaces | tail -n+2 | wc -l serviceaccounts: type: gauge description: number of sa - command: kubectl get sa --all-namespaces | tail -n+2 | wc -l + command: /snap/bin/kubectl get sa --all-namespaces | tail -n+2 | wc -l diff --git a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py index 2fe803db96d..0c3c3871422 100644 --- a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py +++ b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py @@ -16,7 +16,9 @@ import base64 import os +import re import random +import shutil import socket import string import json @@ -24,18 +26,19 @@ import json import charms.leadership from shlex import split -from subprocess import call from subprocess import check_call from subprocess import check_output from subprocess import CalledProcessError from charms import layer +from charms.layer import snap from charms.reactive import hook from charms.reactive import remove_state from charms.reactive import set_state +from charms.reactive import is_state from charms.reactive import when, when_any, when_not from charms.reactive.helpers import data_changed -from charms.kubernetes.common import get_version, reset_versions +from charms.kubernetes.common import get_version from charms.kubernetes.flagmanager import FlagManager from charmhelpers.core import hookenv @@ -46,15 +49,7 @@ from charmhelpers.fetch import apt_install from charmhelpers.contrib.charmsupport import nrpe -dashboard_templates = [ - 'dashboard-controller.yaml', - 'dashboard-service.yaml', - 'influxdb-grafana-controller.yaml', - 'influxdb-service.yaml', - 'grafana-service.yaml', - 'heapster-controller.yaml', - 'heapster-service.yaml' -] +os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin') def service_cidr(): @@ -74,66 +69,91 @@ def freeze_service_cidr(): @hook('upgrade-charm') def reset_states_for_delivery(): '''An upgrade charm event was triggered by Juju, react to that here.''' + migrate_from_pre_snaps() + install_snaps() + remove_state('authentication.setup') + remove_state('kubernetes-master.components.started') + + +def rename_file_idempotent(source, destination): + if os.path.isfile(source): + os.rename(source, destination) + + +def migrate_from_pre_snaps(): + # remove old states + remove_state('kubernetes.components.installed') + remove_state('kubernetes.dashboard.available') + remove_state('kube-dns.available') + remove_state('kubernetes-master.app_version.set') + + # disable old services services = ['kube-apiserver', 'kube-controller-manager', 'kube-scheduler'] for service in services: hookenv.log('Stopping {0} service.'.format(service)) host.service_stop(service) - remove_state('kubernetes-master.components.started') - remove_state('kubernetes-master.components.installed') - remove_state('kube-dns.available') - remove_state('kubernetes.dashboard.available') + # rename auth files + os.makedirs('/root/cdk', exist_ok=True) + rename_file_idempotent('/etc/kubernetes/serviceaccount.key', + '/root/cdk/serviceaccount.key') + rename_file_idempotent('/srv/kubernetes/basic_auth.csv', + '/root/cdk/basic_auth.csv') + rename_file_idempotent('/srv/kubernetes/known_tokens.csv', + '/root/cdk/known_tokens.csv') -@when_not('kubernetes-master.components.installed') -def install(): - '''Unpack and put the Kubernetes master files on the path.''' - # Get the resource via resource_get - try: - archive = hookenv.resource_get('kubernetes') - except Exception: - message = 'Error fetching the kubernetes resource.' - hookenv.log(message) - hookenv.status_set('blocked', message) - return - - if not archive: - hookenv.log('Missing kubernetes resource.') - hookenv.status_set('blocked', 'Missing kubernetes resource.') - return - - # Handle null resource publication, we check if filesize < 1mb - filesize = os.stat(archive).st_size - if filesize < 1000000: - hookenv.status_set('blocked', 'Incomplete kubernetes resource.') - return - - hookenv.status_set('maintenance', 'Unpacking kubernetes resource.') - files_dir = os.path.join(hookenv.charm_dir(), 'files') - - os.makedirs(files_dir, exist_ok=True) - - command = 'tar -xvzf {0} -C {1}'.format(archive, files_dir) - hookenv.log(command) - check_call(split(command)) - - apps = [ - {'name': 'kube-apiserver', 'path': '/usr/local/bin'}, - {'name': 'kube-controller-manager', 'path': '/usr/local/bin'}, - {'name': 'kube-scheduler', 'path': '/usr/local/bin'}, - {'name': 'kubectl', 'path': '/usr/local/bin'}, + # cleanup old files + files = [ + "/lib/systemd/system/kube-apiserver.service", + "/lib/systemd/system/kube-controller-manager.service", + "/lib/systemd/system/kube-scheduler.service", + "/etc/default/kube-defaults", + "/etc/default/kube-apiserver.defaults", + "/etc/default/kube-controller-manager.defaults", + "/etc/default/kube-scheduler.defaults", + "/srv/kubernetes", + "/home/ubuntu/kubectl", + "/usr/local/bin/kubectl", + "/usr/local/bin/kube-apiserver", + "/usr/local/bin/kube-controller-manager", + "/usr/local/bin/kube-scheduler", + "/etc/kubernetes" ] + for file in files: + if os.path.isdir(file): + hookenv.log("Removing directory: " + file) + shutil.rmtree(file) + elif os.path.isfile(file): + hookenv.log("Removing file: " + file) + os.remove(file) - for app in apps: - unpacked = '{}/{}'.format(files_dir, app['name']) - app_path = os.path.join(app['path'], app['name']) - install = ['install', '-v', '-D', unpacked, app_path] - hookenv.log(install) - check_call(install) + # clear the flag managers + FlagManager('kube-apiserver').destroy_all() + FlagManager('kube-controller-manager').destroy_all() + FlagManager('kube-scheduler').destroy_all() - reset_versions() - set_state('kubernetes-master.components.installed') + +def install_snaps(): + channel = hookenv.config('channel') + hookenv.status_set('maintenance', 'Installing kubectl snap') + snap.install('kubectl', channel=channel, classic=True) + hookenv.status_set('maintenance', 'Installing kube-apiserver snap') + snap.install('kube-apiserver', channel=channel) + hookenv.status_set('maintenance', + 'Installing kube-controller-manager snap') + snap.install('kube-controller-manager', channel=channel) + hookenv.status_set('maintenance', 'Installing kube-scheduler snap') + snap.install('kube-scheduler', channel=channel) + hookenv.status_set('maintenance', 'Installing cdk-addons snap') + snap.install('cdk-addons', channel=channel) + set_state('kubernetes-master.snaps.installed') + + +@when('config.changed.channel') +def channel_changed(): + install_snaps() @when('cni.connected') @@ -145,20 +165,18 @@ def configure_cni(cni): @when('leadership.is_leader') -@when('kubernetes-master.components.installed') @when_not('authentication.setup') def setup_leader_authentication(): '''Setup basic authentication and token access for the cluster.''' api_opts = FlagManager('kube-apiserver') controller_opts = FlagManager('kube-controller-manager') - service_key = '/etc/kubernetes/serviceaccount.key' - basic_auth = '/srv/kubernetes/basic_auth.csv' - known_tokens = '/srv/kubernetes/known_tokens.csv' + service_key = '/root/cdk/serviceaccount.key' + basic_auth = '/root/cdk/basic_auth.csv' + known_tokens = '/root/cdk/known_tokens.csv' - api_opts.add('--basic-auth-file', basic_auth) - api_opts.add('--token-auth-file', known_tokens) - api_opts.add('--service-cluster-ip-range', service_cidr()) + api_opts.add('basic-auth-file', basic_auth) + api_opts.add('token-auth-file', known_tokens) hookenv.status_set('maintenance', 'Rendering authentication templates.') if not os.path.isfile(basic_auth): setup_basic_auth('admin', 'admin', 'admin') @@ -167,13 +185,13 @@ def setup_leader_authentication(): setup_tokens(None, 'kubelet', 'kubelet') setup_tokens(None, 'kube_proxy', 'kube_proxy') # Generate the default service account token key - os.makedirs('/etc/kubernetes', exist_ok=True) - - cmd = ['openssl', 'genrsa', '-out', service_key, - '2048'] - check_call(cmd) - api_opts.add('--service-account-key-file', service_key) - controller_opts.add('--service-account-private-key-file', service_key) + os.makedirs('/root/cdk', exist_ok=True) + if not os.path.isfile(service_key): + cmd = ['openssl', 'genrsa', '-out', service_key, + '2048'] + check_call(cmd) + api_opts.add('service-account-key-file', service_key) + controller_opts.add('service-account-private-key-file', service_key) # read service account key for syndication leader_data = {} @@ -184,27 +202,25 @@ def setup_leader_authentication(): # this is slightly opaque, but we are sending file contents under its file # path as a key. # eg: - # {'/etc/kubernetes/serviceaccount.key': 'RSA:2471731...'} + # {'/root/cdk/serviceaccount.key': 'RSA:2471731...'} charms.leadership.leader_set(leader_data) set_state('authentication.setup') @when_not('leadership.is_leader') -@when('kubernetes-master.components.installed') @when_not('authentication.setup') def setup_non_leader_authentication(): api_opts = FlagManager('kube-apiserver') controller_opts = FlagManager('kube-controller-manager') - service_key = '/etc/kubernetes/serviceaccount.key' - basic_auth = '/srv/kubernetes/basic_auth.csv' - known_tokens = '/srv/kubernetes/known_tokens.csv' + service_key = '/root/cdk/serviceaccount.key' + basic_auth = '/root/cdk/basic_auth.csv' + known_tokens = '/root/cdk/known_tokens.csv' # This races with other codepaths, and seems to require being created first # This block may be extracted later, but for now seems to work as intended - os.makedirs('/etc/kubernetes', exist_ok=True) - os.makedirs('/srv/kubernetes', exist_ok=True) + os.makedirs('/root/cdk', exist_ok=True) hookenv.status_set('maintenance', 'Rendering authentication templates.') @@ -225,23 +241,22 @@ def setup_non_leader_authentication(): with open(k, 'w+') as fp: fp.write(contents) - api_opts.add('--basic-auth-file', basic_auth) - api_opts.add('--token-auth-file', known_tokens) - api_opts.add('--service-cluster-ip-range', service_cidr()) - api_opts.add('--service-account-key-file', service_key) - controller_opts.add('--service-account-private-key-file', service_key) + api_opts.add('basic-auth-file', basic_auth) + api_opts.add('token-auth-file', known_tokens) + api_opts.add('service-account-key-file', service_key) + controller_opts.add('service-account-private-key-file', service_key) set_state('authentication.setup') -@when('kubernetes-master.components.installed') +@when('kubernetes-master.snaps.installed') def set_app_version(): ''' Declare the application version to juju ''' version = check_output(['kube-apiserver', '--version']) hookenv.application_version_set(version.split(b' v')[-1].rstrip()) -@when('kube-dns.available', 'kubernetes-master.components.installed') +@when('cdk-addons.configured') def idle_status(): ''' Signal at the end of the run that we are running. ''' if not all_kube_system_pods_running(): @@ -253,25 +268,25 @@ def idle_status(): hookenv.status_set('active', 'Kubernetes master running.') -@when('etcd.available', 'kubernetes-master.components.installed', - 'certificates.server.cert.available', 'authentication.setup') +@when('etcd.available', 'certificates.server.cert.available', + 'authentication.setup') @when_not('kubernetes-master.components.started') def start_master(etcd, tls): '''Run the Kubernetes master components.''' hookenv.status_set('maintenance', - 'Rendering the Kubernetes master systemd files.') + 'Configuring the Kubernetes master services.') freeze_service_cidr() handle_etcd_relation(etcd) - # Use the etcd relation object to render files with etcd information. - render_files() + configure_master_services() hookenv.status_set('maintenance', 'Starting the Kubernetes master services.') + services = ['kube-apiserver', 'kube-controller-manager', 'kube-scheduler'] for service in services: - hookenv.log('Starting {0} service.'.format(service)) - host.service_start(service) + host.service_restart('snap.%s.daemon' % service) + hookenv.open_port(6443) set_state('kubernetes-master.components.started') @@ -345,63 +360,28 @@ def push_api_data(kube_api): kube_api.set_api_port('6443') -@when('kubernetes-master.components.started', 'kube-dns.available') -@when_not('kubernetes.dashboard.available') -def install_dashboard_addons(): - ''' Launch dashboard addons if they are enabled in config ''' - if hookenv.config('enable-dashboard-addons'): - hookenv.log('Launching kubernetes dashboard.') - context = {} - context['arch'] = arch() - try: - context['pillar'] = {'num_nodes': get_node_count()} - for template in dashboard_templates: - create_addon(template, context) - set_state('kubernetes.dashboard.available') - except CalledProcessError: - hookenv.log('Kubernetes dashboard waiting on kubeapi') - - -@when('kubernetes-master.components.started', 'kubernetes.dashboard.available') -def remove_dashboard_addons(): - ''' Removes dashboard addons if they are disabled in config ''' - if not hookenv.config('enable-dashboard-addons'): - hookenv.log('Removing kubernetes dashboard.') - for template in dashboard_templates: - delete_addon(template) - remove_state('kubernetes.dashboard.available') - - @when('kubernetes-master.components.started') -@when_not('kube-dns.available') -def start_kube_dns(): - ''' State guard to starting DNS ''' - hookenv.status_set('maintenance', 'Deploying KubeDNS') - - context = { - 'arch': arch(), - # The dictionary named 'pillar' is a construct of the k8s template file - 'pillar': { - 'dns_server': get_dns_ip(), - 'dns_replicas': 1, - 'dns_domain': hookenv.config('dns_domain') - } - } - +def configure_cdk_addons(): + ''' Configure CDK addons ''' + dbEnabled = str(hookenv.config('enable-dashboard-addons')).lower() + args = [ + 'arch=' + arch(), + 'dns-ip=' + get_dns_ip(), + 'dns-domain=' + hookenv.config('dns_domain'), + 'enable-dashboard=' + dbEnabled + ] + check_call(['snap', 'set', 'cdk-addons'] + args) try: - create_addon('kubedns-sa.yaml', context) - create_addon('kubedns-cm.yaml', context) - create_addon('kubedns-controller.yaml', context) - create_addon('kubedns-svc.yaml', context) + check_call(['cdk-addons.apply']) except CalledProcessError: - hookenv.status_set('waiting', 'Waiting to retry KubeDNS deployment') + hookenv.status_set('waiting', 'Waiting to retry addon deployment') + remove_state('cdk-addons.configured') return - - set_state('kube-dns.available') + set_state('cdk-addons.configured') -@when('kubernetes-master.components.installed', 'loadbalancer.available', - 'certificates.ca.available', 'certificates.client.cert.available') +@when('loadbalancer.available', 'certificates.ca.available', + 'certificates.client.cert.available') def loadbalancer_kubeconfig(loadbalancer, ca, client): # Get the potential list of loadbalancers from the relation object. hosts = loadbalancer.get_addresses_ports() @@ -413,8 +393,7 @@ def loadbalancer_kubeconfig(loadbalancer, ca, client): build_kubeconfig(server) -@when('kubernetes-master.components.installed', - 'certificates.ca.available', 'certificates.client.cert.available') +@when('certificates.ca.available', 'certificates.client.cert.available') @when_not('loadbalancer.available') def create_self_config(ca, client): '''Create a kubernetes configuration for the master unit.''' @@ -520,8 +499,11 @@ def initial_nrpe_config(nagios=None): @when_any('config.changed.nagios_context', 'config.changed.nagios_servicegroups') def update_nrpe_config(unused=None): - services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler') - + services = ( + 'snap.kube-apiserver.daemon', + 'snap.kube-controller-manager.daemon', + 'snap.kube-scheduler.daemon' + ) hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) @@ -535,7 +517,11 @@ def remove_nrpe_config(nagios=None): remove_state('nrpe-external-master.initial-config') # List of systemd services for which the checks will be removed - services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler') + services = ( + 'snap.kube-apiserver.daemon', + 'snap.kube-controller-manager.daemon', + 'snap.kube-scheduler.daemon' + ) # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. @@ -546,45 +532,15 @@ def remove_nrpe_config(nagios=None): nrpe_setup.remove_check(shortname=service) -def set_privileged(privileged, render_config=True): - """Update the KUBE_ALLOW_PRIV flag for kube-apiserver and re-render config. - - If the flag already matches the requested value, this is a no-op. - - :param str privileged: "true" or "false" - :param bool render_config: whether to render new config file - :return: True if the flag was changed, else false +def is_privileged(): + """Return boolean indicating whether or not to set allow-privileged=true. """ - if privileged == "true": - set_state('kubernetes-master.privileged') + privileged = hookenv.config('allow-privileged') + if privileged == 'auto': + return is_state('kubernetes-master.gpu.enabled') else: - remove_state('kubernetes-master.privileged') - - flag = '--allow-privileged' - kube_allow_priv_opts = FlagManager('KUBE_ALLOW_PRIV') - if kube_allow_priv_opts.get(flag) == privileged: - # Flag isn't changing, nothing to do - return False - - hookenv.log('Setting {}={}'.format(flag, privileged)) - - # Update --allow-privileged flag value - kube_allow_priv_opts.add(flag, privileged, strict=True) - - # re-render config with new options - if render_config: - context = { - 'kube_allow_priv': kube_allow_priv_opts.to_s(), - } - - # render the kube-defaults file - render('kube-defaults.defaults', '/etc/default/kube-defaults', context) - - # signal that we need a kube-apiserver restart - set_state('kubernetes-master.kube-apiserver.restart') - - return True + return privileged == 'true' @when('config.changed.allow-privileged') @@ -593,24 +549,10 @@ def on_config_allow_privileged_change(): """React to changed 'allow-privileged' config value. """ - config = hookenv.config() - privileged = config['allow-privileged'] - if privileged == "auto": - return - - set_privileged(privileged) + remove_state('kubernetes-master.components.started') remove_state('config.changed.allow-privileged') -@when('kubernetes-master.kube-apiserver.restart') -def restart_kube_apiserver(): - """Restart kube-apiserver. - - """ - host.service_restart('kube-apiserver') - remove_state('kubernetes-master.kube-apiserver.restart') - - @when('kube-control.gpu.available') @when('kubernetes-master.components.started') @when_not('kubernetes-master.gpu.enabled') @@ -628,7 +570,7 @@ def on_gpu_available(kube_control): ) return - set_privileged("true") + remove_state('kubernetes-master.components.started') set_state('kubernetes-master.gpu.enabled') @@ -642,32 +584,6 @@ def disable_gpu_mode(): remove_state('kubernetes-master.gpu.enabled') -def create_addon(template, context): - '''Create an addon from a template''' - source = 'addons/' + template - target = '/etc/kubernetes/addons/' + template - render(source, target, context) - # Need --force when upgrading between k8s versions where the templates have - # changed. - cmd = ['kubectl', 'apply', '--force', '-f', target] - check_call(cmd) - - -def delete_addon(template): - '''Delete an addon from a template''' - target = '/etc/kubernetes/addons/' + template - cmd = ['kubectl', 'delete', '-f', target] - call(cmd) - - -def get_node_count(): - '''Return the number of Kubernetes nodes in the cluster''' - cmd = ['kubectl', 'get', 'nodes', '-o', 'name'] - output = check_output(cmd) - node_count = len(output.splitlines()) - return node_count - - def arch(): '''Return the package architecture as a string. Raise an exception if the architecture is not supported by kubernetes.''' @@ -695,16 +611,10 @@ def build_kubeconfig(server): # Cache last server string to know if we need to regenerate the config. if not data_changed('kubeconfig.server', server): return - # The final destination of the kubeconfig and kubectl. - destination_directory = '/home/ubuntu' # Create an absolute path for the kubeconfig file. - kubeconfig_path = os.path.join(destination_directory, 'config') + kubeconfig_path = os.path.join(os.sep, 'home', 'ubuntu', 'config') # Create the kubeconfig on this system so users can access the cluster. create_kubeconfig(kubeconfig_path, server, ca, key, cert) - # Copy the kubectl binary to the destination directory. - cmd = ['install', '-v', '-o', 'ubuntu', '-g', 'ubuntu', - '/usr/local/bin/kubectl', destination_directory] - check_call(cmd) # Make the config file readable by the ubuntu users so juju scp works. cmd = ['chown', 'ubuntu:ubuntu', kubeconfig_path] check_call(cmd) @@ -753,7 +663,7 @@ def handle_etcd_relation(reldata): etcd declares itself as available''' connection_string = reldata.get_connection_string() # Define where the etcd tls files will be kept. - etcd_dir = '/etc/ssl/etcd' + etcd_dir = '/root/cdk/etcd' # Create paths to the etcd client ca, key, and cert file locations. ca = os.path.join(etcd_dir, 'client-ca.pem') key = os.path.join(etcd_dir, 'client-key.pem') @@ -767,38 +677,28 @@ def handle_etcd_relation(reldata): # Never use stale data, always prefer whats coming in during context # building. if its stale, its because whats in unitdata is stale data = api_opts.data - if data.get('--etcd-servers-strict') or data.get('--etcd-servers'): - api_opts.destroy('--etcd-cafile') - api_opts.destroy('--etcd-keyfile') - api_opts.destroy('--etcd-certfile') - api_opts.destroy('--etcd-servers', strict=True) - api_opts.destroy('--etcd-servers') + if data.get('etcd-servers-strict') or data.get('etcd-servers'): + api_opts.destroy('etcd-cafile') + api_opts.destroy('etcd-keyfile') + api_opts.destroy('etcd-certfile') + api_opts.destroy('etcd-servers', strict=True) + api_opts.destroy('etcd-servers') # Set the apiserver flags in the options manager - api_opts.add('--etcd-cafile', ca) - api_opts.add('--etcd-keyfile', key) - api_opts.add('--etcd-certfile', cert) - api_opts.add('--etcd-servers', connection_string, strict=True) + api_opts.add('etcd-cafile', ca) + api_opts.add('etcd-keyfile', key) + api_opts.add('etcd-certfile', cert) + api_opts.add('etcd-servers', connection_string, strict=True) -def render_files(): - '''Use jinja templating to render the docker-compose.yml and master.json - file to contain the dynamic data for the configuration files.''' - context = {} - config = hookenv.config() - # Add the charm configuration data to the context. - context.update(config) - - # Update the context with extra values: arch, and networking information - context.update({'arch': arch(), - 'master_address': hookenv.unit_get('private-address'), - 'public_address': hookenv.unit_get('public-address'), - 'private_address': hookenv.unit_get('private-address')}) +def configure_master_services(): + ''' Add remaining flags for the master services and configure snaps to use + them ''' api_opts = FlagManager('kube-apiserver') controller_opts = FlagManager('kube-controller-manager') scheduler_opts = FlagManager('kube-scheduler') - scheduler_opts.add('--v', '2') + scheduler_opts.add('v', '2') # Get the tls paths from the layer data. layer_options = layer.options('tls-client') @@ -808,23 +708,27 @@ def render_files(): server_cert_path = layer_options.get('server_certificate_path') server_key_path = layer_options.get('server_key_path') - # set --allow-privileged flag for kube-apiserver - set_privileged( - "true" if config['allow-privileged'] == "true" else "false", - render_config=False) + if is_privileged(): + api_opts.add('allow-privileged', 'true', strict=True) + set_state('kubernetes-master.privileged') + else: + api_opts.add('allow-privileged', 'false', strict=True) + remove_state('kubernetes-master.privileged') # Handle static options for now - api_opts.add('--min-request-timeout', '300') - api_opts.add('--v', '4') - api_opts.add('--client-ca-file', ca_cert_path) - api_opts.add('--tls-cert-file', server_cert_path) - api_opts.add('--tls-private-key-file', server_key_path) - api_opts.add('--kubelet-certificate-authority', ca_cert_path) - api_opts.add('--kubelet-client-certificate', client_cert_path) - api_opts.add('--kubelet-client-key', client_key_path) - # Needed for upgrade from 1.5.x to 1.6.0 - # XXX: support etcd3 - api_opts.add('--storage-backend', 'etcd2') + api_opts.add('service-cluster-ip-range', service_cidr()) + api_opts.add('min-request-timeout', '300') + api_opts.add('v', '4') + api_opts.add('client-ca-file', ca_cert_path) + api_opts.add('tls-cert-file', server_cert_path) + api_opts.add('tls-private-key-file', server_key_path) + api_opts.add('kubelet-certificate-authority', ca_cert_path) + api_opts.add('kubelet-client-certificate', client_cert_path) + api_opts.add('kubelet-client-key', client_key_path) + api_opts.add('logtostderr', 'true') + api_opts.add('insecure-bind-address', '127.0.0.1') + api_opts.add('insecure-port', '8080') + api_opts.add('storage-backend', 'etcd2') # FIXME: add etcd3 support admission_control = [ 'NamespaceLifecycle', 'LimitRanger', @@ -832,68 +736,50 @@ def render_files(): 'ResourceQuota', 'DefaultTolerationSeconds' ] + if get_version('kube-apiserver') < (1, 6): hookenv.log('Removing DefaultTolerationSeconds from admission-control') admission_control.remove('DefaultTolerationSeconds') - api_opts.add( - '--admission-control', ','.join(admission_control), strict=True) + api_opts.add('admission-control', ','.join(admission_control), strict=True) # Default to 3 minute resync. TODO: Make this configureable? - controller_opts.add('--min-resync-period', '3m') - controller_opts.add('--v', '2') - controller_opts.add('--root-ca-file', ca_cert_path) + controller_opts.add('min-resync-period', '3m') + controller_opts.add('v', '2') + controller_opts.add('root-ca-file', ca_cert_path) + controller_opts.add('logtostderr', 'true') + controller_opts.add('master', 'http://127.0.0.1:8080') - context.update({ - 'kube_allow_priv': FlagManager('KUBE_ALLOW_PRIV').to_s(), - 'kube_apiserver_flags': api_opts.to_s(), - 'kube_scheduler_flags': scheduler_opts.to_s(), - 'kube_controller_manager_flags': controller_opts.to_s(), - }) + scheduler_opts.add('v', '2') + scheduler_opts.add('logtostderr', 'true') + scheduler_opts.add('master', 'http://127.0.0.1:8080') - # Render the configuration files that contains parameters for - # the apiserver, scheduler, and controller-manager - render_service('kube-apiserver', context) - render_service('kube-controller-manager', context) - render_service('kube-scheduler', context) - - # explicitly render the generic defaults file - render('kube-defaults.defaults', '/etc/default/kube-defaults', context) - - # when files change on disk, we need to inform systemd of the changes - call(['systemctl', 'daemon-reload']) - call(['systemctl', 'enable', 'kube-apiserver']) - call(['systemctl', 'enable', 'kube-controller-manager']) - call(['systemctl', 'enable', 'kube-scheduler']) - - -def render_service(service_name, context): - '''Render the systemd service by name.''' - unit_directory = '/lib/systemd/system' - source = '{0}.service'.format(service_name) - target = os.path.join(unit_directory, '{0}.service'.format(service_name)) - render(source, target, context) - conf_directory = '/etc/default' - source = '{0}.defaults'.format(service_name) - target = os.path.join(conf_directory, service_name) - render(source, target, context) + cmd = ['snap', 'set', 'kube-apiserver'] + api_opts.to_s().split(' ') + check_call(cmd) + cmd = ( + ['snap', 'set', 'kube-controller-manager'] + + controller_opts.to_s().split(' ') + ) + check_call(cmd) + cmd = ['snap', 'set', 'kube-scheduler'] + scheduler_opts.to_s().split(' ') + check_call(cmd) def setup_basic_auth(username='admin', password='admin', user='admin'): '''Create the htacces file and the tokens.''' - srv_kubernetes = '/srv/kubernetes' - if not os.path.isdir(srv_kubernetes): - os.makedirs(srv_kubernetes) - htaccess = os.path.join(srv_kubernetes, 'basic_auth.csv') + root_cdk = '/root/cdk' + if not os.path.isdir(root_cdk): + os.makedirs(root_cdk) + htaccess = os.path.join(root_cdk, 'basic_auth.csv') with open(htaccess, 'w') as stream: stream.write('{0},{1},{2}'.format(username, password, user)) def setup_tokens(token, username, user): '''Create a token file for kubernetes authentication.''' - srv_kubernetes = '/srv/kubernetes' - if not os.path.isdir(srv_kubernetes): - os.makedirs(srv_kubernetes) - known_tokens = os.path.join(srv_kubernetes, 'known_tokens.csv') + root_cdk = '/root/cdk' + if not os.path.isdir(root_cdk): + os.makedirs(root_cdk) + known_tokens = os.path.join(root_cdk, 'known_tokens.csv') if not token: alpha = string.ascii_letters + string.digits token = ''.join(random.SystemRandom().choice(alpha) for _ in range(32)) @@ -920,3 +806,9 @@ def all_kube_system_pods_running(): return False return True + + +def apiserverVersion(): + cmd = 'kube-apiserver --version'.split() + version_string = check_output(cmd).decode('utf-8') + return tuple(int(q) for q in re.findall("[0-9]+", version_string)[:3]) diff --git a/cluster/juju/layers/kubernetes-master/tactics/__init__.py b/cluster/juju/layers/kubernetes-master/tactics/__init__.py deleted file mode 100644 index 1b0c13728f7..00000000000 --- a/cluster/juju/layers/kubernetes-master/tactics/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/cluster/juju/layers/kubernetes-master/tactics/update_addons.py b/cluster/juju/layers/kubernetes-master/tactics/update_addons.py deleted file mode 100755 index 3382ef08707..00000000000 --- a/cluster/juju/layers/kubernetes-master/tactics/update_addons.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import shutil -import subprocess -import tempfile -import logging -from contextlib import contextmanager - -import charmtools.utils -from charmtools.build.tactics import Tactic - - -description = """ -Update addon manifests for the charm. - -This will clone the kubernetes repo and place the addons in -/templates/addons. - -Can be run with no arguments and from any folder. -""" - -log = logging.getLogger(__name__) - - -def clean_addon_dir(addon_dir): - """ Remove and recreate the addons folder """ - log.debug("Cleaning " + addon_dir) - shutil.rmtree(addon_dir, ignore_errors=True) - os.makedirs(addon_dir) - - -def run_with_logging(command): - """ Run a command with controlled logging """ - log.debug("Running: %s" % command) - process = subprocess.Popen(command, stderr=subprocess.PIPE) - stderr = process.communicate()[1].rstrip() - process.wait() - if process.returncode != 0: - log.error(stderr) - raise Exception("%s: exit code %d" % (command, process.returncode)) - log.debug(stderr) - - -@contextmanager -def kubernetes_repo(): - """ Yield a kubernetes repo to copy addons from. - - If KUBE_VERSION is set, this will clone the local repo and checkout the - corresponding branch. Otherwise, the local branch will be used. """ - repo = os.path.abspath("../../../..") - if "KUBE_VERSION" in os.environ: - branch = os.environ["KUBE_VERSION"] - log.info("Cloning %s with branch %s" % (repo, branch)) - path = tempfile.mkdtemp(prefix="kubernetes") - try: - cmd = ["git", "clone", repo, path, "-b", branch] - run_with_logging(cmd) - yield path - finally: - shutil.rmtree(path) - else: - log.info("Using local repo " + repo) - yield repo - - -def add_addon(repo, source, dest): - """ Add an addon manifest from the given repo and source. - - Any occurrences of 'amd64' are replaced with '{{ arch }}' so the charm can - fill it in during deployment. """ - source = os.path.join(repo, "cluster/addons", source) - if os.path.isdir(dest): - dest = os.path.join(dest, os.path.basename(source)) - log.debug("Copying: %s -> %s" % (source, dest)) - with open(source, "r") as f: - content = f.read() - content = content.replace("amd64", "{{ arch }}") - with open(dest, "w") as f: - f.write(content) - - -def update_addons(dest): - """ Update addons. This will clean the addons folder and add new manifests - from upstream. """ - with kubernetes_repo() as repo: - log.info("Copying addons to charm") - clean_addon_dir(dest) - add_addon(repo, "dashboard/dashboard-controller.yaml", dest) - add_addon(repo, "dashboard/dashboard-service.yaml", dest) - try: - add_addon(repo, "dns/kubedns-sa.yaml", - dest + "/kubedns-sa.yaml") - add_addon(repo, "dns/kubedns-cm.yaml", - dest + "/kubedns-cm.yaml") - add_addon(repo, "dns/kubedns-controller.yaml.in", - dest + "/kubedns-controller.yaml") - add_addon(repo, "dns/kubedns-svc.yaml.in", - dest + "/kubedns-svc.yaml") - except IOError as e: - # fall back to the older filenames - log.debug(e) - add_addon(repo, "dns/skydns-rc.yaml.in", - dest + "/kubedns-controller.yaml") - add_addon(repo, "dns/skydns-svc.yaml.in", - dest + "/kubedns-svc.yaml") - influxdb = "cluster-monitoring/influxdb" - add_addon(repo, influxdb + "/grafana-service.yaml", dest) - add_addon(repo, influxdb + "/heapster-controller.yaml", dest) - add_addon(repo, influxdb + "/heapster-service.yaml", dest) - add_addon(repo, influxdb + "/influxdb-grafana-controller.yaml", dest) - add_addon(repo, influxdb + "/influxdb-service.yaml", dest) - -# Entry points - - -class UpdateAddonsTactic(Tactic): - """ This tactic is used by charm-tools to dynamically populate the - template/addons folder at `charm build` time. """ - - @classmethod - def trigger(cls, entity, target=None, layer=None, next_config=None): - """ Determines which files the tactic should apply to. We only want - this tactic to trigger once, so let's use the templates/ folder - """ - relpath = entity.relpath(layer.directory) if layer else entity - return relpath == "templates" - - @property - def dest(self): - """ The destination we are writing to. This isn't a Tactic thing, - it's just a helper for UpdateAddonsTactic """ - return self.target / "templates" / "addons" - - def __call__(self): - """ When the tactic is called, update addons and put them directly in - our build destination """ - update_addons(self.dest) - - def sign(self): - """ Return signatures for the charm build manifest. We need to do this - because the addon template files were added dynamically """ - sigs = {} - for file in os.listdir(self.dest): - path = self.dest / file - relpath = path.relpath(self.target.directory) - sigs[relpath] = ( - self.current.url, - "dynamic", - charmtools.utils.sign(path) - ) - return sigs - - -def parse_args(): - """ Parse args. This is solely done for the usage output with -h """ - parser = argparse.ArgumentParser(description=description) - parser.parse_args() - - -def main(): - """ Update addons into the layer's templates/addons folder """ - parse_args() - os.chdir(os.path.join(os.path.dirname(__file__), "..")) - dest = "templates/addons" - update_addons(dest) - - -if __name__ == "__main__": - main() diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults b/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults deleted file mode 100644 index 20e00ae2d2c..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults +++ /dev/null @@ -1,17 +0,0 @@ -### -# kubernetes system config -# -# The following values are used to configure the kube-apiserver -# - -# The address on the local server to listen to. -KUBE_API_ADDRESS="--insecure-bind-address=127.0.0.1" - -# The port on the local server to listen on. -KUBE_API_PORT="--insecure-port=8080" - -# default admission control policies -KUBE_ADMISSION_CONTROL="" - -# Add your own! -KUBE_API_ARGS="{{ kube_apiserver_flags }}" diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.service b/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.service deleted file mode 100644 index 6e551382c05..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.service +++ /dev/null @@ -1,22 +0,0 @@ -[Unit] -Description=Kubernetes API Server -Documentation=http://kubernetes.io/docs/admin/kube-apiserver/ -After=network.target - -[Service] -EnvironmentFile=-/etc/default/kube-defaults -EnvironmentFile=-/etc/default/kube-apiserver -ExecStart=/usr/local/bin/kube-apiserver \ - $KUBE_LOGTOSTDERR \ - $KUBE_LOG_LEVEL \ - $KUBE_API_ADDRESS \ - $KUBE_API_PORT \ - $KUBE_ALLOW_PRIV \ - $KUBE_ADMISSION_CONTROL \ - $KUBE_API_ARGS -Restart=on-failure -Type=notify -LimitNOFILE=65536 - -[Install] -WantedBy=multi-user.target diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.defaults b/cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.defaults deleted file mode 100644 index 5993a639227..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.defaults +++ /dev/null @@ -1,8 +0,0 @@ - -### -# The following values are used to configure the kubernetes controller-manager - -# defaults from config and apiserver should be adequate - -# Add your own! -KUBE_CONTROLLER_MANAGER_ARGS="{{ kube_controller_manager_flags }}" diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.service b/cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.service deleted file mode 100644 index 8c951e7c073..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-controller-manager.service +++ /dev/null @@ -1,18 +0,0 @@ - -[Unit] -Description=Kubernetes Controller Manager -Documentation=https://github.com/GoogleCloudPlatform/kubernetes - -[Service] -EnvironmentFile=-/etc/default/kube-defaults -EnvironmentFile=-/etc/default/kube-controller-manager -ExecStart=/usr/local/bin/kube-controller-manager \ - $KUBE_LOGTOSTDERR \ - $KUBE_LOG_LEVEL \ - $KUBE_MASTER \ - $KUBE_CONTROLLER_MANAGER_ARGS -Restart=on-failure -LimitNOFILE=65536 - -[Install] -WantedBy=multi-user.target diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults b/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults deleted file mode 100644 index 3fe065bf570..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults +++ /dev/null @@ -1,22 +0,0 @@ -### -# kubernetes system config -# -# The following values are used to configure various aspects of all -# kubernetes services, including -# -# kube-apiserver.service -# kube-controller-manager.service -# kube-scheduler.service -# kubelet.service -# kube-proxy.service -# logging to stderr means we get it in the systemd journal -KUBE_LOGTOSTDERR="--logtostderr=true" - -# journal message level, 0 is debug -KUBE_LOG_LEVEL="--v=0" - -# Should this cluster be allowed to run privileged docker containers -KUBE_ALLOW_PRIV="{{ kube_allow_priv }}" - -# How the controller-manager, scheduler, and proxy find the apiserver -KUBE_MASTER="--master=http://127.0.0.1:8080" diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-scheduler.defaults b/cluster/juju/layers/kubernetes-master/templates/kube-scheduler.defaults deleted file mode 100644 index a45753a0f47..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-scheduler.defaults +++ /dev/null @@ -1,7 +0,0 @@ -### -# kubernetes scheduler config - -# default config should be adequate - -# Add your own! -KUBE_SCHEDULER_ARGS="{{ kube_scheduler_flags }}" diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-scheduler.service b/cluster/juju/layers/kubernetes-master/templates/kube-scheduler.service deleted file mode 100644 index 7f3ee583a71..00000000000 --- a/cluster/juju/layers/kubernetes-master/templates/kube-scheduler.service +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Kubernetes Scheduler Plugin -Documentation=http://kubernetes.io/docs/admin/multiple-schedulers/ - -[Service] -EnvironmentFile=-/etc/default/kube-defaults -EnvironmentFile=-/etc/default/kube-scheduler -ExecStart=/usr/local/bin/kube-scheduler \ - $KUBE_LOGTOSTDERR \ - $KUBE_LOG_LEVEL \ - $KUBE_MASTER \ - $KUBE_SCHEDULER_ARGS -Restart=on-failure -LimitNOFILE=65536 - -[Install] -WantedBy=multi-user.target diff --git a/cluster/juju/layers/kubernetes-worker/actions.yaml b/cluster/juju/layers/kubernetes-worker/actions.yaml index c24a589a5ad..22c4d17368c 100644 --- a/cluster/juju/layers/kubernetes-worker/actions.yaml +++ b/cluster/juju/layers/kubernetes-worker/actions.yaml @@ -15,3 +15,5 @@ microbot: type: boolean default: False description: Removes a microbots deployment, service, and ingress if True. +upgrade: + description: Upgrade the kubernetes snaps diff --git a/cluster/juju/layers/kubernetes-worker/actions/microbot b/cluster/juju/layers/kubernetes-worker/actions/microbot index e99d2ee3596..0306747061f 100755 --- a/cluster/juju/layers/kubernetes-worker/actions/microbot +++ b/cluster/juju/layers/kubernetes-worker/actions/microbot @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import sys from charmhelpers.core.hookenv import action_get @@ -22,6 +23,7 @@ from charmhelpers.core.hookenv import unit_public_ip from charms.templating.jinja2 import render from subprocess import call +os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin') context = {} context['replicas'] = action_get('replicas') @@ -32,7 +34,7 @@ if not context['replicas']: context['replicas'] = 3 # Declare a kubectl template when invoking kubectl -kubectl = ['kubectl', '--kubeconfig=/srv/kubernetes/config'] +kubectl = ['kubectl', '--kubeconfig=/root/cdk/kubeconfig'] # Remove deployment if requested if context['delete']: @@ -56,11 +58,11 @@ if context['delete']: # Creation request -render('microbot-example.yaml', '/etc/kubernetes/addons/microbot.yaml', +render('microbot-example.yaml', '/root/cdk/addons/microbot.yaml', context) create_command = kubectl + ['create', '-f', - '/etc/kubernetes/addons/microbot.yaml'] + '/root/cdk/addons/microbot.yaml'] create_response = call(create_command) diff --git a/cluster/juju/layers/kubernetes-worker/actions/pause b/cluster/juju/layers/kubernetes-worker/actions/pause index b8e6117c223..b3aba36baf8 100755 --- a/cluster/juju/layers/kubernetes-worker/actions/pause +++ b/cluster/juju/layers/kubernetes-worker/actions/pause @@ -2,6 +2,8 @@ set -ex -kubectl --kubeconfig=/srv/kubernetes/config cordon $(hostname) -kubectl --kubeconfig=/srv/kubernetes/config drain $(hostname) --force +export PATH=$PATH:/snap/bin + +kubectl --kubeconfig=/root/cdk/kubeconfig cordon $(hostname) +kubectl --kubeconfig=/root/cdk/kubeconfig drain $(hostname) --force status-set 'waiting' 'Kubernetes unit paused' diff --git a/cluster/juju/layers/kubernetes-worker/actions/resume b/cluster/juju/layers/kubernetes-worker/actions/resume index 2f28b93c923..6131e8e037b 100755 --- a/cluster/juju/layers/kubernetes-worker/actions/resume +++ b/cluster/juju/layers/kubernetes-worker/actions/resume @@ -2,5 +2,7 @@ set -ex -kubectl --kubeconfig=/srv/kubernetes/config uncordon $(hostname) +export PATH=$PATH:/snap/bin + +kubectl --kubeconfig=/root/cdk/kubeconfig uncordon $(hostname) status-set 'active' 'Kubernetes unit resumed' diff --git a/cluster/juju/layers/kubernetes-worker/actions/upgrade b/cluster/juju/layers/kubernetes-worker/actions/upgrade new file mode 100755 index 00000000000..a97c19b3db0 --- /dev/null +++ b/cluster/juju/layers/kubernetes-worker/actions/upgrade @@ -0,0 +1,5 @@ +#!/bin/sh +set -eux + +charms.reactive set_state kubernetes-worker.snaps.upgrade-specified +exec hooks/config-changed diff --git a/cluster/juju/layers/kubernetes-worker/config.yaml b/cluster/juju/layers/kubernetes-worker/config.yaml index b3d345d3127..730b70907e2 100644 --- a/cluster/juju/layers/kubernetes-worker/config.yaml +++ b/cluster/juju/layers/kubernetes-worker/config.yaml @@ -20,3 +20,14 @@ options: mode by default. If "false", kubelet will never run in privileged mode. If "auto", kubelet will not run in privileged mode by default, but will switch to privileged mode if gpu hardware is detected. + channel: + type: string + default: "stable" + description: | + Snap channel to install Kubernetes worker services from + require-manual-upgrade: + type: boolean + default: true + description: | + When true, worker services will not be upgraded until the user triggers + it manually by running the upgrade action. diff --git a/cluster/juju/layers/kubernetes-worker/debug-scripts/kubectl b/cluster/juju/layers/kubernetes-worker/debug-scripts/kubectl index 290d73cdf79..1192c3c9a12 100755 --- a/cluster/juju/layers/kubernetes-worker/debug-scripts/kubectl +++ b/cluster/juju/layers/kubernetes-worker/debug-scripts/kubectl @@ -1,7 +1,9 @@ #!/bin/sh set -ux -alias kubectl="kubectl --kubeconfig=/srv/kubernetes/config" +export PATH=$PATH:/snap/bin + +alias kubectl="kubectl --kubeconfig=/root/cdk/kubeconfig" kubectl cluster-info > $DEBUG_SCRIPT_DIR/cluster-info kubectl cluster-info dump > $DEBUG_SCRIPT_DIR/cluster-info-dump diff --git a/cluster/juju/layers/kubernetes-worker/debug-scripts/kubernetes-worker-services b/cluster/juju/layers/kubernetes-worker/debug-scripts/kubernetes-worker-services index ff8390f4fe3..4f9dfa23b33 100755 --- a/cluster/juju/layers/kubernetes-worker/debug-scripts/kubernetes-worker-services +++ b/cluster/juju/layers/kubernetes-worker/debug-scripts/kubernetes-worker-services @@ -2,12 +2,8 @@ set -ux for service in kubelet kube-proxy; do - systemctl status $service > $DEBUG_SCRIPT_DIR/$service-systemctl-status - journalctl -u $service > $DEBUG_SCRIPT_DIR/$service-journal + systemctl status snap.$service.daemon > $DEBUG_SCRIPT_DIR/$service-systemctl-status + journalctl -u snap.$service.daemon > $DEBUG_SCRIPT_DIR/$service-journal done -mkdir -p $DEBUG_SCRIPT_DIR/etc-default -cp -v /etc/default/kube* $DEBUG_SCRIPT_DIR/etc-default - -mkdir -p $DEBUG_SCRIPT_DIR/lib-systemd-system -cp -v /lib/systemd/system/kube* $DEBUG_SCRIPT_DIR/lib-systemd-system +# FIXME: get the snap config or something diff --git a/cluster/juju/layers/kubernetes-worker/layer.yaml b/cluster/juju/layers/kubernetes-worker/layer.yaml index ffe1fa154d2..014fec02bac 100644 --- a/cluster/juju/layers/kubernetes-worker/layer.yaml +++ b/cluster/juju/layers/kubernetes-worker/layer.yaml @@ -2,6 +2,7 @@ repo: https://github.com/kubernetes/kubernetes.git includes: - 'layer:basic' - 'layer:debug' + - 'layer:snap' - 'layer:docker' - 'layer:metrics' - 'layer:nagios' @@ -18,8 +19,8 @@ options: - 'ceph-common' - 'socat' tls-client: - ca_certificate_path: '/srv/kubernetes/ca.crt' - server_certificate_path: '/srv/kubernetes/server.crt' - server_key_path: '/srv/kubernetes/server.key' - client_certificate_path: '/srv/kubernetes/client.crt' - client_key_path: '/srv/kubernetes/client.key' + ca_certificate_path: '/root/cdk/ca.crt' + server_certificate_path: '/root/cdk/server.crt' + server_key_path: '/root/cdk/server.key' + client_certificate_path: '/root/cdk/client.crt' + client_key_path: '/root/cdk/client.key' diff --git a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py index d7fbf01aaa0..054399aeef2 100644 --- a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py +++ b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py @@ -17,10 +17,6 @@ import re import subprocess -from charmhelpers.core import unitdata - -BIN_VERSIONS = 'bin_versions' - def get_version(bin_name): """Get the version of an installed Kubernetes binary. @@ -33,31 +29,6 @@ def get_version(bin_name): >>> `get_version('kubelet') (1, 6, 0) - """ - db = unitdata.kv() - bin_versions = db.get(BIN_VERSIONS, {}) - - cached_version = bin_versions.get(bin_name) - if cached_version: - return tuple(cached_version) - - version = _get_bin_version(bin_name) - bin_versions[bin_name] = list(version) - db.set(BIN_VERSIONS, bin_versions) - return version - - -def reset_versions(): - """Reset the cache of bin versions. - - """ - db = unitdata.kv() - db.unset(BIN_VERSIONS) - - -def _get_bin_version(bin_name): - """Get a binary version by calling it with --version and parsing output. - """ cmd = '{} --version'.format(bin_name).split() version_string = subprocess.check_output(cmd).decode('utf-8') diff --git a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py index 0ff013b4c39..7fe5737a6ef 100644 --- a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py +++ b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py @@ -118,6 +118,13 @@ class FlagManager: """ return self.data.get(key, default) + def destroy_all(self): + ''' + Destructively removes all data from the FlagManager. + ''' + self.data.clear() + self.__save() + def to_s(self): ''' Render the flags to a single string, prepared for the Docker diff --git a/cluster/juju/layers/kubernetes-worker/metadata.yaml b/cluster/juju/layers/kubernetes-worker/metadata.yaml index 23bc3b6bd46..10696fe40ea 100644 --- a/cluster/juju/layers/kubernetes-worker/metadata.yaml +++ b/cluster/juju/layers/kubernetes-worker/metadata.yaml @@ -29,7 +29,19 @@ provides: interface: kubernetes-cni scope: container resources: - kubernetes: + cni: type: file - filename: kubernetes.tar.gz - description: "An archive of kubernetes binaries for the worker." + filename: cni.tgz + description: CNI plugins + kubectl: + type: file + filename: kubectl.snap + description: kubectl snap + kubelet: + type: file + filename: kubelet.snap + description: kubelet snap + kube-proxy: + type: file + filename: kube-proxy.snap + description: kube-proxy snap diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index 47c0cf778c0..86511542969 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -15,40 +15,133 @@ # limitations under the License. import os +import shutil from shlex import split -from subprocess import call, check_call, check_output +from subprocess import check_call, check_output from subprocess import CalledProcessError from socket import gethostname from charms import layer +from charms.layer import snap from charms.reactive import hook -from charms.reactive import set_state, remove_state +from charms.reactive import set_state, remove_state, is_state from charms.reactive import when, when_any, when_not -from charms.reactive.helpers import data_changed -from charms.kubernetes.common import get_version, reset_versions + +from charms.kubernetes.common import get_version from charms.kubernetes.flagmanager import FlagManager + +from charms.reactive.helpers import data_changed, any_file_changed from charms.templating.jinja2 import render -from charmhelpers.core import hookenv -from charmhelpers.core.host import service_stop -from charmhelpers.core.host import service_restart +from charmhelpers.core import hookenv, unitdata +from charmhelpers.core.host import service_stop, service_restart from charmhelpers.contrib.charmsupport import nrpe -kubeconfig_path = '/srv/kubernetes/config' +kubeconfig_path = '/root/cdk/kubeconfig' + +os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin') + +db = unitdata.kv() @hook('upgrade-charm') -def remove_installed_state(): - remove_state('kubernetes-worker.components.installed') +def upgrade_charm(): + cleanup_pre_snap_services() + check_resources_for_upgrade_needed() # Remove gpu.enabled state so we can reconfigure gpu-related kubelet flags, # since they can differ between k8s versions remove_state('kubernetes-worker.gpu.enabled') kubelet_opts = FlagManager('kubelet') - kubelet_opts.destroy('--feature-gates') - kubelet_opts.destroy('--experimental-nvidia-gpus') + kubelet_opts.destroy('feature-gates') + kubelet_opts.destroy('experimental-nvidia-gpus') + + remove_state('kubernetes-worker.cni-plugins.installed') + remove_state('kubernetes-worker.config.created') + remove_state('kubernetes-worker.ingress.available') + set_state('kubernetes-worker.restart-needed') + + +def check_resources_for_upgrade_needed(): + hookenv.status_set('maintenance', 'Checking resources') + resources = ['kubectl', 'kubelet', 'kube-proxy'] + paths = [hookenv.resource_get(resource) for resource in resources] + if any_file_changed(paths): + set_upgrade_needed() + + +def set_upgrade_needed(): + set_state('kubernetes-worker.snaps.upgrade-needed') + config = hookenv.config() + previous_channel = config.previous('channel') + require_manual = config.get('require-manual-upgrade') + if previous_channel is None or not require_manual: + set_state('kubernetes-worker.snaps.upgrade-specified') + + +def cleanup_pre_snap_services(): + # remove old states + remove_state('kubernetes-worker.components.installed') + + # disable old services + services = ['kubelet', 'kube-proxy'] + for service in services: + hookenv.log('Stopping {0} service.'.format(service)) + service_stop(service) + + # cleanup old files + files = [ + "/lib/systemd/system/kubelet.service", + "/lib/systemd/system/kube-proxy.service" + "/etc/default/kube-default", + "/etc/default/kubelet", + "/etc/default/kube-proxy", + "/srv/kubernetes", + "/usr/local/bin/kubectl", + "/usr/local/bin/kubelet", + "/usr/local/bin/kube-proxy", + "/etc/kubernetes" + ] + for file in files: + if os.path.isdir(file): + hookenv.log("Removing directory: " + file) + shutil.rmtree(file) + elif os.path.isfile(file): + hookenv.log("Removing file: " + file) + os.remove(file) + + # cleanup old flagmanagers + FlagManager('kubelet').destroy_all() + FlagManager('kube-proxy').destroy_all() + + +@when('config.changed.channel') +def channel_changed(): + set_upgrade_needed() + + +@when('kubernetes-worker.snaps.upgrade-needed') +@when_not('kubernetes-worker.snaps.upgrade-specified') +def upgrade_needed_status(): + msg = 'Needs manual upgrade, run the upgrade action' + hookenv.status_set('blocked', msg) + + +@when('kubernetes-worker.snaps.upgrade-specified') +def install_snaps(): + check_resources_for_upgrade_needed() + channel = hookenv.config('channel') + hookenv.status_set('maintenance', 'Installing kubectl snap') + snap.install('kubectl', channel=channel, classic=True) + hookenv.status_set('maintenance', 'Installing kubelet snap') + snap.install('kubelet', channel=channel, classic=True) + hookenv.status_set('maintenance', 'Installing kube-proxy snap') + snap.install('kube-proxy', channel=channel, classic=True) + set_state('kubernetes-worker.snaps.installed') + remove_state('kubernetes-worker.snaps.upgrade-needed') + remove_state('kubernetes-worker.snaps.upgrade-specified') @hook('stop') @@ -57,52 +150,49 @@ def shutdown(): - delete the current node - stop the kubelet service - stop the kube-proxy service - - remove the 'kubernetes-worker.components.installed' state + - remove the 'kubernetes-worker.cni-plugins.installed' state ''' kubectl('delete', 'node', gethostname()) service_stop('kubelet') service_stop('kube-proxy') - remove_state('kubernetes-worker.components.installed') + remove_state('kubernetes-worker.cni-plugins.installed') @when('docker.available') -@when_not('kubernetes-worker.components.installed') -def install_kubernetes_components(): - ''' Unpack the kubernetes worker binaries ''' +@when_not('kubernetes-worker.cni-plugins.installed') +def install_cni_plugins(): + ''' Unpack the cni-plugins resource ''' charm_dir = os.getenv('CHARM_DIR') # Get the resource via resource_get try: - archive = hookenv.resource_get('kubernetes') + archive = hookenv.resource_get('cni') except Exception: - message = 'Error fetching the kubernetes resource.' + message = 'Error fetching the cni resource.' hookenv.log(message) hookenv.status_set('blocked', message) return if not archive: - hookenv.log('Missing kubernetes resource.') - hookenv.status_set('blocked', 'Missing kubernetes resource.') + hookenv.log('Missing cni resource.') + hookenv.status_set('blocked', 'Missing cni resource.') return # Handle null resource publication, we check if filesize < 1mb filesize = os.stat(archive).st_size if filesize < 1000000: - hookenv.status_set('blocked', 'Incomplete kubernetes resource.') + hookenv.status_set('blocked', 'Incomplete cni resource.') return - hookenv.status_set('maintenance', 'Unpacking kubernetes resource.') + hookenv.status_set('maintenance', 'Unpacking cni resource.') - unpack_path = '{}/files/kubernetes'.format(charm_dir) + unpack_path = '{}/files/cni'.format(charm_dir) os.makedirs(unpack_path, exist_ok=True) cmd = ['tar', 'xfvz', archive, '-C', unpack_path] hookenv.log(cmd) check_call(cmd) apps = [ - {'name': 'kubelet', 'path': '/usr/local/bin'}, - {'name': 'kube-proxy', 'path': '/usr/local/bin'}, - {'name': 'kubectl', 'path': '/usr/local/bin'}, {'name': 'loopback', 'path': '/opt/cni/bin'} ] @@ -113,11 +203,10 @@ def install_kubernetes_components(): hookenv.log(install) check_call(install) - reset_versions() - set_state('kubernetes-worker.components.installed') + set_state('kubernetes-worker.cni-plugins.installed') -@when('kubernetes-worker.components.installed') +@when('kubernetes-worker.snaps.installed') def set_app_version(): ''' Declare the application version to juju ''' cmd = ['kubelet', '--version'] @@ -125,7 +214,7 @@ def set_app_version(): hookenv.application_version_set(version.split(b' v')[-1].rstrip()) -@when('kubernetes-worker.components.installed') +@when('kubernetes-worker.snaps.installed') @when_not('kube-control.dns.available') def notify_user_transient_status(): ''' Notify to the user we are in a transient state and the application @@ -140,7 +229,9 @@ def notify_user_transient_status(): hookenv.status_set('waiting', 'Waiting for cluster DNS.') -@when('kubernetes-worker.components.installed', 'kube-control.dns.available') +@when('kubernetes-worker.snaps.installed', + 'kube-control.dns.available') +@when_not('kubernetes-worker.snaps.upgrade-needed') def charm_status(kube_control): '''Update the status message with the current status of kubelet.''' update_kubelet_status() @@ -150,10 +241,10 @@ def update_kubelet_status(): ''' There are different states that the kubelet can be in, where we are waiting for dns, waiting for cluster turnup, or ready to serve applications.''' - if (_systemctl_is_active('kubelet')): + if (_systemctl_is_active('snap.kubelet.daemon')): hookenv.status_set('active', 'Kubernetes worker running.') # if kubelet is not running, we're waiting on something else to converge - elif (not _systemctl_is_active('kubelet')): + elif (not _systemctl_is_active('snap.kubelet.daemon')): hookenv.status_set('waiting', 'Waiting for kubelet to start.') @@ -178,14 +269,13 @@ def send_data(tls): tls.request_server_cert(common_name, sans, certificate_name) -@when('kubernetes-worker.components.installed', 'kube-api-endpoint.available', +@when('kubernetes-worker.snaps.installed', 'kube-api-endpoint.available', 'tls_client.ca.saved', 'tls_client.client.certificate.saved', 'tls_client.client.key.saved', 'tls_client.server.certificate.saved', 'tls_client.server.key.saved', 'kube-control.dns.available', 'cni.available') def start_worker(kube_api, kube_control, cni): ''' Start kubelet using the provided API and DNS info.''' - config = hookenv.config() servers = get_kube_api_servers(kube_api) # Note that the DNS server doesn't necessarily exist at this point. We know # what its IP will eventually be, though, so we can go ahead and configure @@ -194,28 +284,19 @@ def start_worker(kube_api, kube_control, cni): dns = kube_control.get_dns() - if (data_changed('kube-api-servers', servers) or + if (is_state('kubernetes-worker.restart-needed') or + data_changed('kube-api-servers', servers) or data_changed('kube-dns', dns)): - # Create FlagManager for kubelet and add dns flags - opts = FlagManager('kubelet') - opts.add('--cluster-dns', dns['sdn-ip']) # FIXME sdn-ip needs a rename - opts.add('--cluster-domain', dns['domain']) - - # Create FlagManager for KUBE_MASTER and add api server addresses - kube_master_opts = FlagManager('KUBE_MASTER') - kube_master_opts.add('--master', ','.join(servers)) - # set --allow-privileged flag for kubelet - set_privileged( - "true" if config['allow-privileged'] == "true" else "false", - render_config=False) + set_privileged() create_config(servers[0]) - render_init_scripts() + configure_worker_services(servers, dns) set_state('kubernetes-worker.config.created') restart_unit_services() update_kubelet_status() + remove_state('kubernetes-worker.restart-needed') @when('cni.connected') @@ -254,9 +335,9 @@ def render_and_launch_ingress(): else: hookenv.log('Deleting the http backend and ingress.') kubectl_manifest('delete', - '/etc/kubernetes/addons/default-http-backend.yaml') + '/root/cdk/addons/default-http-backend.yaml') kubectl_manifest('delete', - '/etc/kubernetes/addons/ingress-replication-controller.yaml') # noqa + '/root/cdk/addons/ingress-replication-controller.yaml') # noqa hookenv.close_port(80) hookenv.close_port(443) @@ -338,46 +419,39 @@ def create_config(server): user='kubelet') -def render_init_scripts(): - ''' We have related to either an api server or a load balancer connected - to the apiserver. Render the config files and prepare for launch ''' - context = {} - context.update(hookenv.config()) - +def configure_worker_services(api_servers, dns): + ''' Add remaining flags for the worker services and configure snaps to use + them ''' layer_options = layer.options('tls-client') ca_cert_path = layer_options.get('ca_certificate_path') server_cert_path = layer_options.get('server_certificate_path') server_key_path = layer_options.get('server_key_path') - unit_name = os.getenv('JUJU_UNIT_NAME').replace('/', '-') - context.update({ - 'kube_allow_priv': FlagManager('KUBE_ALLOW_PRIV').to_s(), - 'kube_api_endpoint': FlagManager('KUBE_MASTER').to_s(), - 'JUJU_UNIT_NAME': unit_name, - }) - kubelet_opts = FlagManager('kubelet') - kubelet_opts.add('--require-kubeconfig', None) - kubelet_opts.add('--kubeconfig', kubeconfig_path) - kubelet_opts.add('--network-plugin', 'cni') - kubelet_opts.add('--anonymous-auth', 'false') - kubelet_opts.add('--client-ca-file', ca_cert_path) - kubelet_opts.add('--tls-cert-file', server_cert_path) - kubelet_opts.add('--tls-private-key-file', server_key_path) - context['kubelet_opts'] = kubelet_opts.to_s() + kubelet_opts.add('require-kubeconfig', 'true') + kubelet_opts.add('kubeconfig', kubeconfig_path) + kubelet_opts.add('network-plugin', 'cni') + kubelet_opts.add('logtostderr', 'true') + kubelet_opts.add('v', '0') + kubelet_opts.add('address', '0.0.0.0') + kubelet_opts.add('port', '10250') + kubelet_opts.add('cluster-dns', dns['sdn-ip']) + kubelet_opts.add('cluster-domain', dns['domain']) + kubelet_opts.add('anonymous-auth', 'false') + kubelet_opts.add('client-ca-file', ca_cert_path) + kubelet_opts.add('tls-cert-file', server_cert_path) + kubelet_opts.add('tls-private-key-file', server_key_path) kube_proxy_opts = FlagManager('kube-proxy') - kube_proxy_opts.add('--kubeconfig', kubeconfig_path) - context['kube_proxy_opts'] = kube_proxy_opts.to_s() + kube_proxy_opts.add('kubeconfig', kubeconfig_path) + kube_proxy_opts.add('logtostderr', 'true') + kube_proxy_opts.add('v', '0') + kube_proxy_opts.add('master', ','.join(api_servers), strict=True) - os.makedirs('/var/lib/kubelet', exist_ok=True) - - render('kube-default', '/etc/default/kube-default', context) - render('kubelet.defaults', '/etc/default/kubelet', context) - render('kubelet.service', '/lib/systemd/system/kubelet.service', context) - render('kube-proxy.defaults', '/etc/default/kube-proxy', context) - render('kube-proxy.service', '/lib/systemd/system/kube-proxy.service', - context) + cmd = ['snap', 'set', 'kubelet'] + kubelet_opts.to_s().split(' ') + check_call(cmd) + cmd = ['snap', 'set', 'kube-proxy'] + kube_proxy_opts.to_s().split(' ') + check_call(cmd) def create_kubeconfig(kubeconfig, server, ca, key, certificate, user='ubuntu', @@ -406,38 +480,45 @@ def launch_default_ingress_controller(): ''' Launch the Kubernetes ingress controller & default backend (404) ''' context = {} context['arch'] = arch() - addon_path = '/etc/kubernetes/addons/{}' - manifest = addon_path.format('default-http-backend.yaml') + addon_path = '/root/cdk/addons/{}' + # Render the default http backend (404) replicationcontroller manifest + manifest = addon_path.format('default-http-backend.yaml') render('default-http-backend.yaml', manifest, context) hookenv.log('Creating the default http backend.') - kubectl_manifest('create', manifest) + try: + kubectl('apply', '-f', manifest) + except CalledProcessError as e: + hookenv.log(e) + hookenv.log('Failed to create default-http-backend. Will attempt again next update.') # noqa + hookenv.close_port(80) + hookenv.close_port(443) + return + # Render the ingress replication controller manifest manifest = addon_path.format('ingress-replication-controller.yaml') render('ingress-replication-controller.yaml', manifest, context) - if kubectl_manifest('create', manifest): - hookenv.log('Creating the ingress replication controller.') - set_state('kubernetes-worker.ingress.available') - hookenv.open_port(80) - hookenv.open_port(443) - else: + hookenv.log('Creating the ingress replication controller.') + try: + kubectl('apply', '-f', manifest) + except CalledProcessError as e: + hookenv.log(e) hookenv.log('Failed to create ingress controller. Will attempt again next update.') # noqa hookenv.close_port(80) hookenv.close_port(443) + return + + set_state('kubernetes-worker.ingress.available') + hookenv.open_port(80) + hookenv.open_port(443) def restart_unit_services(): - '''Reload the systemd configuration and restart the services.''' - # Tell systemd to reload configuration from disk for all daemons. - call(['systemctl', 'daemon-reload']) - # Ensure the services available after rebooting. - call(['systemctl', 'enable', 'kubelet.service']) - call(['systemctl', 'enable', 'kube-proxy.service']) - # Restart the services. - hookenv.log('Restarting kubelet, and kube-proxy.') - call(['systemctl', 'restart', 'kubelet']) - remove_state('kubernetes-worker.kubelet.restart') - call(['systemctl', 'restart', 'kube-proxy']) + '''Restart worker services.''' + hookenv.log('Restarting kubelet and kube-proxy.') + services = ['kube-proxy', 'kubelet'] + for service in services: + service_restart('snap.%s.daemon' % service) def get_kube_api_servers(kube_api): @@ -504,8 +585,7 @@ def initial_nrpe_config(nagios=None): @when_any('config.changed.nagios_context', 'config.changed.nagios_servicegroups') def update_nrpe_config(unused=None): - services = ('kubelet', 'kube-proxy') - + services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) @@ -519,7 +599,7 @@ def remove_nrpe_config(nagios=None): remove_state('nrpe-external-master.initial-config') # List of systemd services for which the checks will be removed - services = ('kubelet', 'kube-proxy') + services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon') # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. @@ -530,41 +610,26 @@ def remove_nrpe_config(nagios=None): nrpe_setup.remove_check(shortname=service) -def set_privileged(privileged, render_config=True): - """Update the KUBE_ALLOW_PRIV flag for kubelet and re-render config files. - - If the flag already matches the requested value, this is a no-op. - - :param str privileged: "true" or "false" - :param bool render_config: whether to render new config files - :return: True if the flag was changed, else false +def set_privileged(): + """Update the allow-privileged flag for kube-apiserver. """ - if privileged == "true": + privileged = hookenv.config('allow-privileged') + if privileged == 'auto': + gpu_enabled = is_state('kubernetes-worker.gpu.enabled') + privileged = 'true' if gpu_enabled else 'false' + + flag = 'allow-privileged' + hookenv.log('Setting {}={}'.format(flag, privileged)) + + kubelet_opts = FlagManager('kubelet') + kubelet_opts.add(flag, privileged) + + if privileged == 'true': set_state('kubernetes-worker.privileged') else: remove_state('kubernetes-worker.privileged') - flag = '--allow-privileged' - kube_allow_priv_opts = FlagManager('KUBE_ALLOW_PRIV') - if kube_allow_priv_opts.get(flag) == privileged: - # Flag isn't changing, nothing to do - return False - - hookenv.log('Setting {}={}'.format(flag, privileged)) - - # Update --allow-privileged flag value - kube_allow_priv_opts.add(flag, privileged, strict=True) - - # re-render config with new options - if render_config: - render_init_scripts() - - # signal that we need a kubelet restart - set_state('kubernetes-worker.kubelet.restart') - - return True - @when('config.changed.allow-privileged') @when('kubernetes-worker.config.created') @@ -572,29 +637,11 @@ def on_config_allow_privileged_change(): """React to changed 'allow-privileged' config value. """ - config = hookenv.config() - privileged = config['allow-privileged'] - if privileged == "auto": - return - - set_privileged(privileged) + set_state('kubernetes-worker.restart-needed') remove_state('config.changed.allow-privileged') -@when('kubernetes-worker.kubelet.restart') -def restart_kubelet(): - """Restart kubelet. - - """ - # Make sure systemd loads latest service config - call(['systemctl', 'daemon-reload']) - # Restart kubelet - service_restart('kubelet') - remove_state('kubernetes-worker.kubelet.restart') - - @when('cuda.installed') -@when('kubernetes-worker.components.installed') @when('kubernetes-worker.config.created') @when_not('kubernetes-worker.gpu.enabled') def enable_gpu(): @@ -614,14 +661,10 @@ def enable_gpu(): kubelet_opts = FlagManager('kubelet') if get_version('kubelet') < (1, 6): hookenv.log('Adding --experimental-nvidia-gpus=1 to kubelet') - kubelet_opts.add('--experimental-nvidia-gpus', '1') + kubelet_opts.add('experimental-nvidia-gpus', '1') else: hookenv.log('Adding --feature-gates=Accelerators=true to kubelet') - kubelet_opts.add('--feature-gates', 'Accelerators=true') - - # enable privileged mode and re-render config files - set_privileged("true", render_config=False) - render_init_scripts() + kubelet_opts.add('feature-gates', 'Accelerators=true') # Apply node labels _apply_node_label('gpu=true', overwrite=True) @@ -633,7 +676,7 @@ def enable_gpu(): check_call(['nvidia-smi']) set_state('kubernetes-worker.gpu.enabled') - set_state('kubernetes-worker.kubelet.restart') + set_state('kubernetes-worker.restart-needed') @when('kubernetes-worker.gpu.enabled') @@ -650,18 +693,16 @@ def disable_gpu(): kubelet_opts = FlagManager('kubelet') if get_version('kubelet') < (1, 6): - kubelet_opts.destroy('--experimental-nvidia-gpus') + kubelet_opts.destroy('experimental-nvidia-gpus') else: - kubelet_opts.remove('--feature-gates', 'Accelerators=true') - - render_init_scripts() + kubelet_opts.remove('feature-gates', 'Accelerators=true') # Remove node labels _apply_node_label('gpu', delete=True) _apply_node_label('cuda', delete=True) remove_state('kubernetes-worker.gpu.enabled') - set_state('kubernetes-worker.kubelet.restart') + set_state('kubernetes-worker.restart-needed') @when('kubernetes-worker.gpu.enabled') diff --git a/cluster/juju/layers/kubernetes-worker/templates/kube-default b/cluster/juju/layers/kubernetes-worker/templates/kube-default deleted file mode 100644 index 9b6e28b1966..00000000000 --- a/cluster/juju/layers/kubernetes-worker/templates/kube-default +++ /dev/null @@ -1,22 +0,0 @@ -### -# kubernetes system config -# -# The following values are used to configure various aspects of all -# kubernetes services, including -# -# kube-apiserver.service -# kube-controller-manager.service -# kube-scheduler.service -# kubelet.service -# kube-proxy.service -# logging to stderr means we get it in the systemd journal -KUBE_LOGTOSTDERR="--logtostderr=true" - -# journal message level, 0 is debug -KUBE_LOG_LEVEL="--v=0" - -# Should this cluster be allowed to run privileged docker containers -KUBE_ALLOW_PRIV="{{ kube_allow_priv }}" - -# How the controller-manager, scheduler, and proxy find the apiserver -KUBE_MASTER="{{ kube_api_endpoint }}" diff --git a/cluster/juju/layers/kubernetes-worker/templates/kube-proxy.defaults b/cluster/juju/layers/kubernetes-worker/templates/kube-proxy.defaults deleted file mode 100644 index fa47aefc893..00000000000 --- a/cluster/juju/layers/kubernetes-worker/templates/kube-proxy.defaults +++ /dev/null @@ -1 +0,0 @@ -KUBE_PROXY_ARGS="{{ kube_proxy_opts }}" diff --git a/cluster/juju/layers/kubernetes-worker/templates/kube-proxy.service b/cluster/juju/layers/kubernetes-worker/templates/kube-proxy.service deleted file mode 100644 index 9e66bedf06c..00000000000 --- a/cluster/juju/layers/kubernetes-worker/templates/kube-proxy.service +++ /dev/null @@ -1,19 +0,0 @@ - -[Unit] -Description=Kubernetes Kube-Proxy Server -Documentation=http://kubernetes.io/docs/admin/kube-proxy/ -After=network.target - -[Service] -EnvironmentFile=-/etc/default/kube-default -EnvironmentFile=-/etc/default/kube-proxy -ExecStart=/usr/local/bin/kube-proxy \ - $KUBE_LOGTOSTDERR \ - $KUBE_LOG_LEVEL \ - $KUBE_MASTER \ - $KUBE_PROXY_ARGS -Restart=on-failure -LimitNOFILE=65536 - -[Install] -WantedBy=multi-user.target diff --git a/cluster/juju/layers/kubernetes-worker/templates/kubelet.defaults b/cluster/juju/layers/kubernetes-worker/templates/kubelet.defaults deleted file mode 100644 index 26b5c5491c3..00000000000 --- a/cluster/juju/layers/kubernetes-worker/templates/kubelet.defaults +++ /dev/null @@ -1,14 +0,0 @@ -# kubernetes kubelet (node) config - -# The address for the info server to serve on (set to 0.0.0.0 or "" for all interfaces) -KUBELET_ADDRESS="--address=0.0.0.0" - -# The port for the info server to serve on -KUBELET_PORT="--port=10250" - -# You may leave this blank to use the actual hostname. If you override this -# reachability problems become your own issue. -# KUBELET_HOSTNAME="--hostname-override={{ JUJU_UNIT_NAME }}" - -# Add your own! -KUBELET_ARGS="{{ kubelet_opts }}" diff --git a/cluster/juju/layers/kubernetes-worker/templates/kubelet.service b/cluster/juju/layers/kubernetes-worker/templates/kubelet.service deleted file mode 100644 index b3c20d8022a..00000000000 --- a/cluster/juju/layers/kubernetes-worker/templates/kubelet.service +++ /dev/null @@ -1,22 +0,0 @@ -[Unit] -Description=Kubernetes Kubelet Server -Documentation=http://kubernetes.io/docs/admin/kubelet/ -After=docker.service -Requires=docker.service - -[Service] -WorkingDirectory=/var/lib/kubelet -EnvironmentFile=-/etc/default/kube-default -EnvironmentFile=-/etc/default/kubelet -ExecStart=/usr/local/bin/kubelet \ - $KUBE_LOGTOSTDERR \ - $KUBE_LOG_LEVEL \ - $KUBELET_ADDRESS \ - $KUBELET_PORT \ - $KUBELET_HOSTNAME \ - $KUBE_ALLOW_PRIV \ - $KUBELET_ARGS -Restart=on-failure - -[Install] -WantedBy=multi-user.target diff --git a/hack/verify-flags/exceptions.txt b/hack/verify-flags/exceptions.txt index 0a096da0c2f..951fbffbcde 100644 --- a/hack/verify-flags/exceptions.txt +++ b/hack/verify-flags/exceptions.txt @@ -47,7 +47,7 @@ cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py:def send_clu cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py:def service_cidr(): cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py: context.update({'kube_api_endpoint': ','.join(api_servers), cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py: ca_cert_path = layer_options.get('ca_certificate_path') -cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py:def render_init_scripts(api_servers): +cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py:def configure_worker_services(api_servers, dns): cluster/lib/logging.sh: local source_file=${BASH_SOURCE[$frame_no]} cluster/lib/logging.sh: local source_file=${BASH_SOURCE[$stack_skip]} cluster/log-dump.sh: local -r node_name="${1}" From ebd2f88f6b7537639ce4ab70a7009f154b9f80df Mon Sep 17 00:00:00 2001 From: Jacek N Date: Tue, 14 Mar 2017 15:40:09 +0000 Subject: [PATCH 04/10] Add registry action to the kubernetes-worker layer --- .../juju/layers/kubernetes-worker/README.md | 24 +++- .../layers/kubernetes-worker/actions.yaml | 28 +++- .../layers/kubernetes-worker/actions/registry | 136 ++++++++++++++++++ .../reactive/kubernetes_worker.py | 5 + .../kubernetes-worker/registry-configmap.yaml | 6 + .../ingress-replication-controller.yaml | 6 + .../kubernetes-worker/templates/registry.yaml | 118 +++++++++++++++ 7 files changed, 320 insertions(+), 3 deletions(-) create mode 100755 cluster/juju/layers/kubernetes-worker/actions/registry create mode 100644 cluster/juju/layers/kubernetes-worker/registry-configmap.yaml create mode 100644 cluster/juju/layers/kubernetes-worker/templates/registry.yaml diff --git a/cluster/juju/layers/kubernetes-worker/README.md b/cluster/juju/layers/kubernetes-worker/README.md index b086bcba6f4..ed160f10f80 100644 --- a/cluster/juju/layers/kubernetes-worker/README.md +++ b/cluster/juju/layers/kubernetes-worker/README.md @@ -41,6 +41,27 @@ a unit for maintenance. Resuming the workload will [uncordon](http://kubernetes.io/docs/user-guide/kubectl/kubectl_uncordon/) a paused unit. Workloads will automatically migrate unless otherwise directed via their application declaration. +## Private registry + +With the "registry" action that is part for the kubernetes-worker charm, you can very easily create a private docker registry, with authentication, and available over TLS. Please note that the registry deployed with the action is not HA, and uses storage tied to the kubernetes node where the pod is running. So if the registry pod changes is migrated from one node to another for whatever reason, you will need to re-publish the images. + +### Example usage + +Create the relevant authentication files. Let's say you want user `userA` to authenticate with the password `passwordA`. Then you'll do : + + echo "userA:passwordA" > htpasswd-plain + htpasswd -c -b -B htpasswd userA passwordA + +(the `htpasswd` program comes with the `apache2-utils` package) + +Supposing your registry will be reachable at `myregistry.company.com`, and that you already have your TLS key in the `registry.key` file, and your TLS certificate (with `myregistry.company.com` as Common Name) in the `registry.crt` file, you would then run : + + juju run-action kubernetes-worker/0 registry domain=myregistry.company.com htpasswd="$(base64 -w0 htpasswd)" htpasswd-plain="$(base64 -w0 htpasswd-plain)" tlscert="$(base64 -w0 registry.crt)" tlskey="$(base64 -w0 registry.key)" ingress=true + +If you then decide that you want do delete the registry, just run : + + juju run-action kubernetes-worker/0 registry delete=true ingress=true + ## Known Limitations Kubernetes workers currently only support 'phaux' HA scenarios. Even when configured with an HA cluster string, they will only ever contact the first unit in the cluster map. To enable a proper HA story, kubernetes-worker units are encouraged to proxy through a [kubeapi-load-balancer](https://jujucharms.com/kubeapi-load-balancer) @@ -48,5 +69,4 @@ application. This enables a HA deployment without the need to re-render configuration and disrupt the worker services. External access to pods must be performed through a [Kubernetes -Ingress Resource](http://kubernetes.io/docs/user-guide/ingress/). More -information +Ingress Resource](http://kubernetes.io/docs/user-guide/ingress/). diff --git a/cluster/juju/layers/kubernetes-worker/actions.yaml b/cluster/juju/layers/kubernetes-worker/actions.yaml index 22c4d17368c..4520354f2ca 100644 --- a/cluster/juju/layers/kubernetes-worker/actions.yaml +++ b/cluster/juju/layers/kubernetes-worker/actions.yaml @@ -14,6 +14,32 @@ microbot: delete: type: boolean default: False - description: Removes a microbots deployment, service, and ingress if True. + description: Remove a microbots deployment, service, and ingress if True. upgrade: description: Upgrade the kubernetes snaps +registry: + description: Create a private Docker registry + params: + htpasswd: + type: string + description: base64 encoded htpasswd file used for authentication. + htpasswd-plain: + type: string + description: base64 encoded plaintext version of the htpasswd file, needed by docker daemons to authenticate to the registry. + tlscert: + type: string + description: base64 encoded TLS certificate for the registry. Common Name must match the domain name of the registry. + tlskey: + type: string + description: base64 encoded TLS key for the registry. + domain: + type: string + description: The domain name for the registry. Must match the Common Name of the certificate. + ingress: + type: boolean + default: false + description: Create an Ingress resource for the registry (or delete resource object if "delete" is True) + delete: + type: boolean + default: false + description: Remove a registry replication controller, service, and ingress if True. diff --git a/cluster/juju/layers/kubernetes-worker/actions/registry b/cluster/juju/layers/kubernetes-worker/actions/registry new file mode 100755 index 00000000000..1968c9702b7 --- /dev/null +++ b/cluster/juju/layers/kubernetes-worker/actions/registry @@ -0,0 +1,136 @@ +#!/usr/bin/python3 +# +# For a usage examples, see README.md +# +# TODO +# +# - make the action idempotent (i.e. if you run it multiple times, the first +# run will create/delete the registry, and the reset will be a no-op and won't +# error out) +# +# - take only a plain authentication file, and create the encrypted version in +# the action +# +# - validate the parameters (make sure tlscert is a certificate, that tlskey is a +# proper key, etc) +# +# - when https://bugs.launchpad.net/juju/+bug/1661015 is fixed, handle the +# base64 encoding the parameters in the action itself + +import os +import sys + +from base64 import b64encode + +from charmhelpers.core.hookenv import action_get +from charmhelpers.core.hookenv import action_set +from charms.templating.jinja2 import render +from subprocess import call + +os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin') + +deletion = action_get('delete') + +context = {} + +# These config options must be defined in the case of a creation +param_error = False +for param in ('tlscert', 'tlskey', 'domain', 'htpasswd', 'htpasswd-plain'): + value = action_get(param) + if not value and not deletion: + key = "registry-create-parameter-{}".format(param) + error = "failure, parameter {} is required".format(param) + action_set({key: error}) + param_error = True + + context[param] = value + +# Create the dockercfg template variable +dockercfg = '{"%s:443": {"auth": "%s", "email": "root@localhost"}}' % \ + (context['domain'], context['htpasswd-plain']) +context['dockercfg'] = b64encode(dockercfg.encode()).decode('ASCII') + +if param_error: + sys.exit(0) + +# This one is either true or false, no need to check if it has a "good" value. +context['ingress'] = action_get('ingress') + +# Declare a kubectl template when invoking kubectl +kubectl = ['kubectl', '--kubeconfig=/root/cdk/kubeconfig'] + +# Remove deployment if requested +if deletion: + resources = ['svc/kube-registry', 'rc/kube-registry-v0', 'secrets/registry-tls-data', + 'secrets/registry-auth-data', 'secrets/registry-access'] + + if action_get('ingress'): + resources.append('ing/registry-ing') + + delete_command = kubectl + ['delete', '--ignore-not-found=true'] + resources + delete_response = call(delete_command) + if delete_response == 0: + action_set({'registry-delete': 'success'}) + else: + action_set({'registry-delete': 'failure'}) + + sys.exit(0) + +# Creation request +render('registry.yaml', '/root/cdk/addons/registry.yaml', + context) + +create_command = kubectl + ['create', '-f', + '/root/cdk/addons/registry.yaml'] + +create_response = call(create_command) + +if create_response == 0: + action_set({'registry-create': 'success'}) + + # Create a ConfigMap if it doesn't exist yet, else patch it. + # A ConfigMap is needed to change the default value for nginx' client_max_body_size. + # The default is 1MB, and this is the maximum size of images that can be + # pushed on the registry. 1MB images aren't useful, so we bump this value to 1024MB. + cm_name = 'nginx-load-balancer-conf' + check_cm_command = kubectl + ['get', 'cm', cm_name] + check_cm_response = call(check_cm_command) + + if check_cm_response == 0: + # There is an existing ConfigMap, patch it + patch = '{"data":{"max-body-size":"1024m"}}' + patch_cm_command = kubectl + ['patch', 'cm', cm_name, '-p', patch] + patch_cm_response = call(patch_cm_command) + + if patch_cm_response == 0: + action_set({'configmap-patch': 'success'}) + else: + action_set({'configmap-patch': 'failure'}) + + else: + # No existing ConfigMap, create it + render('registry-configmap.yaml', '/root/cdk/addons/registry-configmap.yaml', + context) + create_cm_command = kubectl + ['create', '-f', '/root/cdk/addons/registry-configmap.yaml'] + create_cm_response = call(create_cm_command) + + if create_cm_response == 0: + action_set({'configmap-create': 'success'}) + else: + action_set({'configmap-create': 'failure'}) + + # Patch the "default" serviceaccount with an imagePullSecret. + # This will allow the docker daemons to authenticate to our private + # registry automatically + patch = '{"imagePullSecrets":[{"name":"registry-access"}]}' + patch_sa_command = kubectl + ['patch', 'sa', 'default', '-p', patch] + patch_sa_response = call(patch_sa_command) + + if patch_sa_response == 0: + action_set({'serviceaccount-patch': 'success'}) + else: + action_set({'serviceaccount-patch': 'failure'}) + + +else: + action_set({'registry-create': 'failure'}) diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index 86511542969..40a0b7f42e2 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -203,6 +203,11 @@ def install_cni_plugins(): hookenv.log(install) check_call(install) + # Used by the "registry" action. The action is run on a single worker, but + # the registry pod can end up on any worker, so we need this directory on + # all the workers. + os.makedirs('/srv/registry', exist_ok=True) + set_state('kubernetes-worker.cni-plugins.installed') diff --git a/cluster/juju/layers/kubernetes-worker/registry-configmap.yaml b/cluster/juju/layers/kubernetes-worker/registry-configmap.yaml new file mode 100644 index 00000000000..4800ff3c04f --- /dev/null +++ b/cluster/juju/layers/kubernetes-worker/registry-configmap.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +data: + body-size: 1024m +kind: ConfigMap +metadata: + name: nginx-load-balancer-conf diff --git a/cluster/juju/layers/kubernetes-worker/templates/ingress-replication-controller.yaml b/cluster/juju/layers/kubernetes-worker/templates/ingress-replication-controller.yaml index 1ebf5fa8f66..fa979b0f309 100644 --- a/cluster/juju/layers/kubernetes-worker/templates/ingress-replication-controller.yaml +++ b/cluster/juju/layers/kubernetes-worker/templates/ingress-replication-controller.yaml @@ -1,4 +1,9 @@ apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-load-balancer-conf +--- +apiVersion: v1 kind: ReplicationController metadata: name: nginx-ingress-controller @@ -45,3 +50,4 @@ spec: args: - /nginx-ingress-controller - --default-backend-service=$(POD_NAMESPACE)/default-http-backend + - --nginx-configmap=$(POD_NAMESPACE)/nginx-load-balancer-conf diff --git a/cluster/juju/layers/kubernetes-worker/templates/registry.yaml b/cluster/juju/layers/kubernetes-worker/templates/registry.yaml new file mode 100644 index 00000000000..d24b713ce7f --- /dev/null +++ b/cluster/juju/layers/kubernetes-worker/templates/registry.yaml @@ -0,0 +1,118 @@ +apiVersion: v1 +kind: Secret +metadata: + name: registry-tls-data +type: Opaque +data: + tls.crt: {{ tlscert }} + tls.key: {{ tlskey }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: registry-auth-data +type: Opaque +data: + htpasswd: {{ htpasswd }} +--- +apiVersion: v1 +kind: ReplicationController +metadata: + name: kube-registry-v0 + labels: + k8s-app: kube-registry + version: v0 + kubernetes.io/cluster-service: "true" +spec: + replicas: 1 + selector: + k8s-app: kube-registry + version: v0 + template: + metadata: + labels: + k8s-app: kube-registry + version: v0 + kubernetes.io/cluster-service: "true" + spec: + containers: + - name: registry + image: registry:2 + resources: + # keep request = limit to keep this container in guaranteed class + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + env: + - name: REGISTRY_HTTP_ADDR + value: :5000 + - name: REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY + value: /var/lib/registry + - name: REGISTRY_AUTH_HTPASSWD_REALM + value: basic_realm + - name: REGISTRY_AUTH_HTPASSWD_PATH + value: /auth/htpasswd + volumeMounts: + - name: image-store + mountPath: /var/lib/registry + - name: auth-dir + mountPath: /auth + ports: + - containerPort: 5000 + name: registry + protocol: TCP + volumes: + - name: image-store + hostPath: + path: /srv/registry + - name: auth-dir + secret: + secretName: registry-auth-data +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-registry + labels: + k8s-app: kube-registry + kubernetes.io/cluster-service: "true" + kubernetes.io/name: "KubeRegistry" +spec: + selector: + k8s-app: kube-registry + type: LoadBalancer + ports: + - name: registry + port: 5000 + protocol: TCP +--- +apiVersion: v1 +kind: Secret +metadata: + name: registry-access +data: + .dockercfg: {{ dockercfg }} +type: kubernetes.io/dockercfg +{%- if ingress %} +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: registry-ing +spec: + tls: + - hosts: + - {{ domain }} + secretName: registry-tls-data + rules: + - host: {{ domain }} + http: + paths: + - backend: + serviceName: kube-registry + servicePort: 5000 + path: / +{% endif %} From 33fee22032a899e7c6bafd857381e2a586443c66 Mon Sep 17 00:00:00 2001 From: Rye Terrell Date: Tue, 28 Mar 2017 16:20:39 -0500 Subject: [PATCH 05/10] add support for kube-proxy cluster-cidr option --- .../kubernetes-worker/reactive/kubernetes_worker.py | 13 ++++++++++--- hack/verify-flags/exceptions.txt | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index 40a0b7f42e2..f69fe59e32f 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -288,16 +288,22 @@ def start_worker(kube_api, kube_control, cni): # the correct DNS even though the server isn't ready yet. dns = kube_control.get_dns() + cluster_cidr = cni.get_config()['cidr'] + + if cluster_cidr is None: + hookenv.log('Waiting for cluster cidr.') + return if (is_state('kubernetes-worker.restart-needed') or data_changed('kube-api-servers', servers) or - data_changed('kube-dns', dns)): + data_changed('kube-dns', dns) or + data_changed('cluster-cidr', cluster_cidr)): # set --allow-privileged flag for kubelet set_privileged() create_config(servers[0]) - configure_worker_services(servers, dns) + configure_worker_services(servers, dns, cluster_cidr) set_state('kubernetes-worker.config.created') restart_unit_services() update_kubelet_status() @@ -424,7 +430,7 @@ def create_config(server): user='kubelet') -def configure_worker_services(api_servers, dns): +def configure_worker_services(api_servers, dns, cluster_cidr): ''' Add remaining flags for the worker services and configure snaps to use them ''' layer_options = layer.options('tls-client') @@ -448,6 +454,7 @@ def configure_worker_services(api_servers, dns): kubelet_opts.add('tls-private-key-file', server_key_path) kube_proxy_opts = FlagManager('kube-proxy') + kube_proxy_opts.add('cluster-cidr', cluster_cidr) kube_proxy_opts.add('kubeconfig', kubeconfig_path) kube_proxy_opts.add('logtostderr', 'true') kube_proxy_opts.add('v', '0') diff --git a/hack/verify-flags/exceptions.txt b/hack/verify-flags/exceptions.txt index 951fbffbcde..92c16206f2a 100644 --- a/hack/verify-flags/exceptions.txt +++ b/hack/verify-flags/exceptions.txt @@ -47,7 +47,7 @@ cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py:def send_clu cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py:def service_cidr(): cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py: context.update({'kube_api_endpoint': ','.join(api_servers), cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py: ca_cert_path = layer_options.get('ca_certificate_path') -cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py:def configure_worker_services(api_servers, dns): +cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py:def configure_worker_services(api_servers, dns, cluster_cidr): cluster/lib/logging.sh: local source_file=${BASH_SOURCE[$frame_no]} cluster/lib/logging.sh: local source_file=${BASH_SOURCE[$stack_skip]} cluster/log-dump.sh: local -r node_name="${1}" From d50cf1e4999eacd57858e4c64f7247e53c092943 Mon Sep 17 00:00:00 2001 From: George Kraft Date: Wed, 29 Mar 2017 10:48:53 -0500 Subject: [PATCH 06/10] Fix juju kubernetes-master starting services before TLS certs are saved master: Fix start_master args --- .../layers/kubernetes-master/reactive/kubernetes_master.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py index 0c3c3871422..6e6bde349e8 100644 --- a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py +++ b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py @@ -268,10 +268,10 @@ def idle_status(): hookenv.status_set('active', 'Kubernetes master running.') -@when('etcd.available', 'certificates.server.cert.available', +@when('etcd.available', 'tls_client.server.certificate.saved', 'authentication.setup') @when_not('kubernetes-master.components.started') -def start_master(etcd, tls): +def start_master(etcd): '''Run the Kubernetes master components.''' hookenv.status_set('maintenance', 'Configuring the Kubernetes master services.') From 258ee22858a77e067d4af84ee248ab48c8aa1d25 Mon Sep 17 00:00:00 2001 From: Matt Bruzek Date: Wed, 5 Apr 2017 16:09:02 -0500 Subject: [PATCH 07/10] Putting the nvidia-smi command in a try catch to avoid errors. --- .../reactive/kubernetes_worker.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index f69fe59e32f..04ad6f4c206 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -669,6 +669,15 @@ def enable_gpu(): return hookenv.log('Enabling gpu mode') + try: + # Not sure why this is necessary, but if you don't run this, k8s will + # think that the node has 0 gpus (as shown by the output of + # `kubectl get nodes -o yaml` + check_call(['nvidia-smi']) + except CalledProcessError as cpe: + hookenv.log('Unable to communicate with the NVIDIA driver.') + hookenv.log(cpe) + return kubelet_opts = FlagManager('kubelet') if get_version('kubelet') < (1, 6): @@ -682,11 +691,6 @@ def enable_gpu(): _apply_node_label('gpu=true', overwrite=True) _apply_node_label('cuda=true', overwrite=True) - # Not sure why this is necessary, but if you don't run this, k8s will - # think that the node has 0 gpus (as shown by the output of - # `kubectl get nodes -o yaml` - check_call(['nvidia-smi']) - set_state('kubernetes-worker.gpu.enabled') set_state('kubernetes-worker.restart-needed') From 6e831d6fe9866531066c8b732102d90f0ea2c47f Mon Sep 17 00:00:00 2001 From: Rye Terrell Date: Thu, 6 Apr 2017 14:57:42 -0500 Subject: [PATCH 08/10] don't try to remove the node if kubeconfig doesn't exist --- .../layers/kubernetes-worker/reactive/kubernetes_worker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index 04ad6f4c206..cf4dfbe3994 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -152,7 +152,8 @@ def shutdown(): - stop the kube-proxy service - remove the 'kubernetes-worker.cni-plugins.installed' state ''' - kubectl('delete', 'node', gethostname()) + if os.path.isfile(kubeconfig_path): + kubectl('delete', 'node', gethostname()) service_stop('kubelet') service_stop('kube-proxy') remove_state('kubernetes-worker.cni-plugins.installed') From 86415961d59ee4cb1efe7c10241bb315d3fd2075 Mon Sep 17 00:00:00 2001 From: George Kraft Date: Fri, 7 Apr 2017 11:20:28 -0500 Subject: [PATCH 09/10] Fix handling of kubernetes-worker.restart-needed state Credit to @tvansteenburgh, thanks! --- .../reactive/kubernetes_worker.py | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index cf4dfbe3994..88c907f387a 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -275,11 +275,27 @@ def send_data(tls): tls.request_server_cert(common_name, sans, certificate_name) +@when('kube-api-endpoint.available', 'kube-control.dns.available', + 'cni.available') +def watch_for_changes(kube_api, kube_control, cni): + ''' Watch for configuration changes and signal if we need to restart the + worker services ''' + servers = get_kube_api_servers(kube_api) + dns = kube_control.get_dns() + cluster_cidr = cni.get_config()['cidr'] + + if (data_changed('kube-api-servers', servers) or + data_changed('kube-dns', dns) or + data_changed('cluster-cidr', cluster_cidr)): + + set_state('kubernetes-worker.restart-needed') + + @when('kubernetes-worker.snaps.installed', 'kube-api-endpoint.available', 'tls_client.ca.saved', 'tls_client.client.certificate.saved', 'tls_client.client.key.saved', 'tls_client.server.certificate.saved', 'tls_client.server.key.saved', 'kube-control.dns.available', - 'cni.available') + 'cni.available', 'kubernetes-worker.restart-needed') def start_worker(kube_api, kube_control, cni): ''' Start kubelet using the provided API and DNS info.''' servers = get_kube_api_servers(kube_api) @@ -295,20 +311,15 @@ def start_worker(kube_api, kube_control, cni): hookenv.log('Waiting for cluster cidr.') return - if (is_state('kubernetes-worker.restart-needed') or - data_changed('kube-api-servers', servers) or - data_changed('kube-dns', dns) or - data_changed('cluster-cidr', cluster_cidr)): + # set --allow-privileged flag for kubelet + set_privileged() - # set --allow-privileged flag for kubelet - set_privileged() - - create_config(servers[0]) - configure_worker_services(servers, dns, cluster_cidr) - set_state('kubernetes-worker.config.created') - restart_unit_services() - update_kubelet_status() - remove_state('kubernetes-worker.restart-needed') + create_config(servers[0]) + configure_worker_services(servers, dns, cluster_cidr) + set_state('kubernetes-worker.config.created') + restart_unit_services() + update_kubelet_status() + remove_state('kubernetes-worker.restart-needed') @when('cni.connected') @@ -624,7 +635,7 @@ def remove_nrpe_config(nagios=None): def set_privileged(): - """Update the allow-privileged flag for kube-apiserver. + """Update the allow-privileged flag for kubelet. """ privileged = hookenv.config('allow-privileged') @@ -698,6 +709,7 @@ def enable_gpu(): @when('kubernetes-worker.gpu.enabled') @when_not('kubernetes-worker.privileged') +@when_not('kubernetes-worker.restart-needed') def disable_gpu(): """Disable GPU usage on this node. From 7b841fe77eb9ab362b7506c05b4b64cfa6c9aa47 Mon Sep 17 00:00:00 2001 From: Tim Van Steenburgh Date: Thu, 13 Apr 2017 15:11:33 -0400 Subject: [PATCH 10/10] Fix nagios checks. --- .../layers/kubernetes-master/reactive/kubernetes_master.py | 5 +++++ .../layers/kubernetes-worker/reactive/kubernetes_worker.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py index 6e6bde349e8..a392a9dbff5 100644 --- a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py +++ b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py @@ -49,6 +49,11 @@ from charmhelpers.fetch import apt_install from charmhelpers.contrib.charmsupport import nrpe +# Override the default nagios shortname regex to allow periods, which we +# need because our bin names contain them (e.g. 'snap.foo.daemon'). The +# default regex in charmhelpers doesn't allow periods, but nagios itself does. +nrpe.Check.shortname_re = '[\.A-Za-z0-9-_]+$' + os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin') diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index 88c907f387a..c887dc23421 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -39,6 +39,11 @@ from charmhelpers.core.host import service_stop, service_restart from charmhelpers.contrib.charmsupport import nrpe +# Override the default nagios shortname regex to allow periods, which we +# need because our bin names contain them (e.g. 'snap.foo.daemon'). The +# default regex in charmhelpers doesn't allow periods, but nagios itself does. +nrpe.Check.shortname_re = '[\.A-Za-z0-9-_]+$' + kubeconfig_path = '/root/cdk/kubeconfig' os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin')