diff --git a/cluster/juju/layers/kubernetes-master/config.yaml b/cluster/juju/layers/kubernetes-master/config.yaml index 2f9f848648d..9a0140f2f5d 100644 --- a/cluster/juju/layers/kubernetes-master/config.yaml +++ b/cluster/juju/layers/kubernetes-master/config.yaml @@ -11,3 +11,13 @@ options: type: string default: 10.152.183.0/24 description: CIDR to user for Kubernetes services. Cannot be changed after deployment. + allow-privileged: + type: string + default: "auto" + description: | + Allow kube-apiserver to run in privileged mode. Supported values are + "true", "false", and "auto". If "true", kube-apiserver will run in + privileged mode by default. If "false", kube-apiserver will never run in + privileged mode. If "auto", kube-apiserver will not run in privileged + mode by default, but will switch to privileged mode if gpu hardware is + detected on a worker node. diff --git a/cluster/juju/layers/kubernetes-master/layer.yaml b/cluster/juju/layers/kubernetes-master/layer.yaml index 8cd45211fd9..75bd5a27b61 100644 --- a/cluster/juju/layers/kubernetes-master/layer.yaml +++ b/cluster/juju/layers/kubernetes-master/layer.yaml @@ -10,6 +10,7 @@ includes: - 'interface:http' - 'interface:kubernetes-cni' - 'interface:kube-dns' + - 'interface:kube-control' - 'interface:public-address' options: basic: diff --git a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py new file mode 100644 index 00000000000..d7fbf01aaa0 --- /dev/null +++ b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/common.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +# Copyright 2015 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.core import unitdata + +BIN_VERSIONS = 'bin_versions' + + +def get_version(bin_name): + """Get the version of an installed Kubernetes binary. + + :param str bin_name: Name of binary + :return: 3-tuple version (maj, min, patch) + + Example:: + + >>> `get_version('kubelet') + (1, 6, 0) + + """ + db = unitdata.kv() + bin_versions = db.get(BIN_VERSIONS, {}) + + cached_version = bin_versions.get(bin_name) + if cached_version: + return tuple(cached_version) + + version = _get_bin_version(bin_name) + bin_versions[bin_name] = list(version) + db.set(BIN_VERSIONS, bin_versions) + return version + + +def reset_versions(): + """Reset the cache of bin versions. + + """ + db = unitdata.kv() + db.unset(BIN_VERSIONS) + + +def _get_bin_version(bin_name): + """Get a binary version by calling it with --version and parsing output. + + """ + cmd = '{} --version'.format(bin_name).split() + version_string = subprocess.check_output(cmd).decode('utf-8') + return tuple(int(q) for q in re.findall("[0-9]+", version_string)[:3]) diff --git a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py index 2f5e685cb90..0ff013b4c39 100644 --- a/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py +++ b/cluster/juju/layers/kubernetes-master/lib/charms/kubernetes/flagmanager.py @@ -107,10 +107,17 @@ class FlagManager: if strict: self.data.pop('{}-strict'.format(key)) else: - self.data.pop('key') + self.data.pop(key) + self.__save() except KeyError: pass + def get(self, key, default=None): + """Return the value for ``key``, or the default if ``key`` doesn't exist. + + """ + return self.data.get(key, default) + def to_s(self): ''' Render the flags to a single string, prepared for the Docker diff --git a/cluster/juju/layers/kubernetes-master/metadata.yaml b/cluster/juju/layers/kubernetes-master/metadata.yaml index 51ecf351b20..b86dd035781 100644 --- a/cluster/juju/layers/kubernetes-master/metadata.yaml +++ b/cluster/juju/layers/kubernetes-master/metadata.yaml @@ -20,7 +20,12 @@ provides: kube-api-endpoint: interface: http cluster-dns: + # kube-dns is deprecated. Its functionality has been rolled into the + # kube-control interface. The cluster-dns relation will be removed in + # a future release. interface: kube-dns + kube-control: + interface: kube-control cni: interface: kubernetes-cni scope: container diff --git a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py index cfa718780c7..2fe803db96d 100644 --- a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py +++ b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py @@ -35,6 +35,7 @@ from charms.reactive import remove_state from charms.reactive import set_state from charms.reactive import when, when_any, when_not from charms.reactive.helpers import data_changed +from charms.kubernetes.common import get_version, reset_versions from charms.kubernetes.flagmanager import FlagManager from charmhelpers.core import hookenv @@ -131,6 +132,7 @@ def install(): hookenv.log(install) check_call(install) + reset_versions() set_state('kubernetes-master.components.installed') @@ -274,13 +276,28 @@ def start_master(etcd, tls): set_state('kubernetes-master.components.started') -@when('cluster-dns.connected') -def send_cluster_dns_detail(cluster_dns): +@when('kube-control.connected') +def send_cluster_dns_detail(kube_control): ''' Send cluster DNS info ''' # Note that the DNS server doesn't necessarily exist at this point. We know # where we're going to put it, though, so let's send the info anyway. dns_ip = get_dns_ip() - cluster_dns.set_dns_info(53, hookenv.config('dns_domain'), dns_ip) + kube_control.set_dns(53, hookenv.config('dns_domain'), dns_ip) + + +@when_not('kube-control.connected') +def missing_kube_control(): + """Inform the operator they need to add the kube-control relation. + + If deploying via bundle this won't happen, but if operator is upgrading a + a charm in a deployment that pre-dates the kube-control relation, it'll be + missing. + + """ + hookenv.status_set( + 'blocked', + 'Relate {}:kube-control kubernetes-worker:kube-control'.format( + hookenv.service_name())) @when('kube-api-endpoint.available') @@ -529,12 +546,110 @@ def remove_nrpe_config(nagios=None): nrpe_setup.remove_check(shortname=service) +def set_privileged(privileged, render_config=True): + """Update the KUBE_ALLOW_PRIV flag for kube-apiserver and re-render config. + + If the flag already matches the requested value, this is a no-op. + + :param str privileged: "true" or "false" + :param bool render_config: whether to render new config file + :return: True if the flag was changed, else false + + """ + if privileged == "true": + set_state('kubernetes-master.privileged') + else: + remove_state('kubernetes-master.privileged') + + flag = '--allow-privileged' + kube_allow_priv_opts = FlagManager('KUBE_ALLOW_PRIV') + if kube_allow_priv_opts.get(flag) == privileged: + # Flag isn't changing, nothing to do + return False + + hookenv.log('Setting {}={}'.format(flag, privileged)) + + # Update --allow-privileged flag value + kube_allow_priv_opts.add(flag, privileged, strict=True) + + # re-render config with new options + if render_config: + context = { + 'kube_allow_priv': kube_allow_priv_opts.to_s(), + } + + # render the kube-defaults file + render('kube-defaults.defaults', '/etc/default/kube-defaults', context) + + # signal that we need a kube-apiserver restart + set_state('kubernetes-master.kube-apiserver.restart') + + return True + + +@when('config.changed.allow-privileged') +@when('kubernetes-master.components.started') +def on_config_allow_privileged_change(): + """React to changed 'allow-privileged' config value. + + """ + config = hookenv.config() + privileged = config['allow-privileged'] + if privileged == "auto": + return + + set_privileged(privileged) + remove_state('config.changed.allow-privileged') + + +@when('kubernetes-master.kube-apiserver.restart') +def restart_kube_apiserver(): + """Restart kube-apiserver. + + """ + host.service_restart('kube-apiserver') + remove_state('kubernetes-master.kube-apiserver.restart') + + +@when('kube-control.gpu.available') +@when('kubernetes-master.components.started') +@when_not('kubernetes-master.gpu.enabled') +def on_gpu_available(kube_control): + """The remote side (kubernetes-worker) is gpu-enabled. + + We need to run in privileged mode. + + """ + config = hookenv.config() + if config['allow-privileged'] == "false": + hookenv.status_set( + 'active', + 'GPUs available. Set allow-privileged="auto" to enable.' + ) + return + + set_privileged("true") + set_state('kubernetes-master.gpu.enabled') + + +@when('kubernetes-master.gpu.enabled') +@when_not('kubernetes-master.privileged') +def disable_gpu_mode(): + """We were in gpu mode, but the operator has set allow-privileged="false", + so we can't run in gpu mode anymore. + + """ + remove_state('kubernetes-master.gpu.enabled') + + def create_addon(template, context): '''Create an addon from a template''' source = 'addons/' + template target = '/etc/kubernetes/addons/' + template render(source, target, context) - cmd = ['kubectl', 'apply', '-f', target] + # Need --force when upgrading between k8s versions where the templates have + # changed. + cmd = ['kubectl', 'apply', '--force', '-f', target] check_call(cmd) @@ -683,6 +798,7 @@ def render_files(): api_opts = FlagManager('kube-apiserver') controller_opts = FlagManager('kube-controller-manager') scheduler_opts = FlagManager('kube-scheduler') + scheduler_opts.add('--v', '2') # Get the tls paths from the layer data. layer_options = layer.options('tls-client') @@ -692,6 +808,11 @@ def render_files(): server_cert_path = layer_options.get('server_certificate_path') server_key_path = layer_options.get('server_key_path') + # set --allow-privileged flag for kube-apiserver + set_privileged( + "true" if config['allow-privileged'] == "true" else "false", + render_config=False) + # Handle static options for now api_opts.add('--min-request-timeout', '300') api_opts.add('--v', '4') @@ -701,17 +822,33 @@ def render_files(): api_opts.add('--kubelet-certificate-authority', ca_cert_path) api_opts.add('--kubelet-client-certificate', client_cert_path) api_opts.add('--kubelet-client-key', client_key_path) - - scheduler_opts.add('--v', '2') + # Needed for upgrade from 1.5.x to 1.6.0 + # XXX: support etcd3 + api_opts.add('--storage-backend', 'etcd2') + admission_control = [ + 'NamespaceLifecycle', + 'LimitRanger', + 'ServiceAccount', + 'ResourceQuota', + 'DefaultTolerationSeconds' + ] + if get_version('kube-apiserver') < (1, 6): + hookenv.log('Removing DefaultTolerationSeconds from admission-control') + admission_control.remove('DefaultTolerationSeconds') + api_opts.add( + '--admission-control', ','.join(admission_control), strict=True) # Default to 3 minute resync. TODO: Make this configureable? controller_opts.add('--min-resync-period', '3m') controller_opts.add('--v', '2') controller_opts.add('--root-ca-file', ca_cert_path) - context.update({'kube_apiserver_flags': api_opts.to_s(), - 'kube_scheduler_flags': scheduler_opts.to_s(), - 'kube_controller_manager_flags': controller_opts.to_s()}) + context.update({ + 'kube_allow_priv': FlagManager('KUBE_ALLOW_PRIV').to_s(), + 'kube_apiserver_flags': api_opts.to_s(), + 'kube_scheduler_flags': scheduler_opts.to_s(), + 'kube_controller_manager_flags': controller_opts.to_s(), + }) # Render the configuration files that contains parameters for # the apiserver, scheduler, and controller-manager diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults b/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults index 29065a51df3..20e00ae2d2c 100644 --- a/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults +++ b/cluster/juju/layers/kubernetes-master/templates/kube-apiserver.defaults @@ -11,7 +11,7 @@ KUBE_API_ADDRESS="--insecure-bind-address=127.0.0.1" KUBE_API_PORT="--insecure-port=8080" # default admission control policies -KUBE_ADMISSION_CONTROL="--admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultTolerationSeconds,ResourceQuota" +KUBE_ADMISSION_CONTROL="" # Add your own! KUBE_API_ARGS="{{ kube_apiserver_flags }}" diff --git a/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults b/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults index 8c0a28493fd..3fe065bf570 100644 --- a/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults +++ b/cluster/juju/layers/kubernetes-master/templates/kube-defaults.defaults @@ -16,7 +16,7 @@ KUBE_LOGTOSTDERR="--logtostderr=true" KUBE_LOG_LEVEL="--v=0" # Should this cluster be allowed to run privileged docker containers -KUBE_ALLOW_PRIV="--allow-privileged=false" +KUBE_ALLOW_PRIV="{{ kube_allow_priv }}" # How the controller-manager, scheduler, and proxy find the apiserver KUBE_MASTER="--master=http://127.0.0.1:8080" diff --git a/cluster/juju/layers/kubernetes-worker/config.yaml b/cluster/juju/layers/kubernetes-worker/config.yaml index fef17ff7542..b3d345d3127 100644 --- a/cluster/juju/layers/kubernetes-worker/config.yaml +++ b/cluster/juju/layers/kubernetes-worker/config.yaml @@ -11,3 +11,12 @@ options: description: | Labels can be used to organize and to select subsets of nodes in the cluster. Declare node labels in key=value format, separated by spaces. + allow-privileged: + type: string + default: "auto" + description: | + Allow privileged containers to run on worker nodes. Supported values are + "true", "false", and "auto". If "true", kubelet will run in privileged + mode by default. If "false", kubelet will never run in privileged mode. + If "auto", kubelet will not run in privileged mode by default, but will + switch to privileged mode if gpu hardware is detected. diff --git a/cluster/juju/layers/kubernetes-worker/layer.yaml b/cluster/juju/layers/kubernetes-worker/layer.yaml index c3023d57b6f..ce0979de6f2 100644 --- a/cluster/juju/layers/kubernetes-worker/layer.yaml +++ b/cluster/juju/layers/kubernetes-worker/layer.yaml @@ -5,9 +5,11 @@ includes: - 'layer:docker' - 'layer:nagios' - 'layer:tls-client' + - 'layer:nvidia-cuda' - 'interface:http' - 'interface:kubernetes-cni' - 'interface:kube-dns' + - 'interface:kube-control' options: basic: packages: diff --git a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py new file mode 100644 index 00000000000..d7fbf01aaa0 --- /dev/null +++ b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/common.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +# Copyright 2015 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.core import unitdata + +BIN_VERSIONS = 'bin_versions' + + +def get_version(bin_name): + """Get the version of an installed Kubernetes binary. + + :param str bin_name: Name of binary + :return: 3-tuple version (maj, min, patch) + + Example:: + + >>> `get_version('kubelet') + (1, 6, 0) + + """ + db = unitdata.kv() + bin_versions = db.get(BIN_VERSIONS, {}) + + cached_version = bin_versions.get(bin_name) + if cached_version: + return tuple(cached_version) + + version = _get_bin_version(bin_name) + bin_versions[bin_name] = list(version) + db.set(BIN_VERSIONS, bin_versions) + return version + + +def reset_versions(): + """Reset the cache of bin versions. + + """ + db = unitdata.kv() + db.unset(BIN_VERSIONS) + + +def _get_bin_version(bin_name): + """Get a binary version by calling it with --version and parsing output. + + """ + cmd = '{} --version'.format(bin_name).split() + version_string = subprocess.check_output(cmd).decode('utf-8') + return tuple(int(q) for q in re.findall("[0-9]+", version_string)[:3]) diff --git a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py index 2f5e685cb90..0ff013b4c39 100644 --- a/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py +++ b/cluster/juju/layers/kubernetes-worker/lib/charms/kubernetes/flagmanager.py @@ -107,10 +107,17 @@ class FlagManager: if strict: self.data.pop('{}-strict'.format(key)) else: - self.data.pop('key') + self.data.pop(key) + self.__save() except KeyError: pass + def get(self, key, default=None): + """Return the value for ``key``, or the default if ``key`` doesn't exist. + + """ + return self.data.get(key, default) + def to_s(self): ''' Render the flags to a single string, prepared for the Docker diff --git a/cluster/juju/layers/kubernetes-worker/metadata.yaml b/cluster/juju/layers/kubernetes-worker/metadata.yaml index 7670d7f5339..23bc3b6bd46 100644 --- a/cluster/juju/layers/kubernetes-worker/metadata.yaml +++ b/cluster/juju/layers/kubernetes-worker/metadata.yaml @@ -18,7 +18,12 @@ requires: kube-api-endpoint: interface: http kube-dns: + # kube-dns is deprecated. Its functionality has been rolled into the + # kube-control interface. The kube-dns relation will be removed in + # a future release. interface: kube-dns + kube-control: + interface: kube-control provides: cni: interface: kubernetes-cni diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index b364211cea9..47c0cf778c0 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -26,11 +26,13 @@ from charms.reactive import hook from charms.reactive import set_state, remove_state from charms.reactive import when, when_any, when_not from charms.reactive.helpers import data_changed +from charms.kubernetes.common import get_version, reset_versions from charms.kubernetes.flagmanager import FlagManager from charms.templating.jinja2 import render from charmhelpers.core import hookenv from charmhelpers.core.host import service_stop +from charmhelpers.core.host import service_restart from charmhelpers.contrib.charmsupport import nrpe @@ -41,6 +43,13 @@ kubeconfig_path = '/srv/kubernetes/config' def remove_installed_state(): remove_state('kubernetes-worker.components.installed') + # Remove gpu.enabled state so we can reconfigure gpu-related kubelet flags, + # since they can differ between k8s versions + remove_state('kubernetes-worker.gpu.enabled') + kubelet_opts = FlagManager('kubelet') + kubelet_opts.destroy('--feature-gates') + kubelet_opts.destroy('--experimental-nvidia-gpus') + @hook('stop') def shutdown(): @@ -104,6 +113,7 @@ def install_kubernetes_components(): hookenv.log(install) check_call(install) + reset_versions() set_state('kubernetes-worker.components.installed') @@ -116,7 +126,7 @@ def set_app_version(): @when('kubernetes-worker.components.installed') -@when_not('kube-dns.available') +@when_not('kube-control.dns.available') def notify_user_transient_status(): ''' Notify to the user we are in a transient state and the application is still converging. Potentially remotely, or we may be in a detached loop @@ -130,8 +140,8 @@ def notify_user_transient_status(): hookenv.status_set('waiting', 'Waiting for cluster DNS.') -@when('kubernetes-worker.components.installed', 'kube-dns.available') -def charm_status(kube_dns): +@when('kubernetes-worker.components.installed', 'kube-control.dns.available') +def charm_status(kube_control): '''Update the status message with the current status of kubelet.''' update_kubelet_status() @@ -171,28 +181,38 @@ def send_data(tls): @when('kubernetes-worker.components.installed', 'kube-api-endpoint.available', 'tls_client.ca.saved', 'tls_client.client.certificate.saved', 'tls_client.client.key.saved', 'tls_client.server.certificate.saved', - 'tls_client.server.key.saved', 'kube-dns.available', 'cni.available') -def start_worker(kube_api, kube_dns, cni): + 'tls_client.server.key.saved', 'kube-control.dns.available', + 'cni.available') +def start_worker(kube_api, kube_control, cni): ''' Start kubelet using the provided API and DNS info.''' + config = hookenv.config() servers = get_kube_api_servers(kube_api) # Note that the DNS server doesn't necessarily exist at this point. We know # what its IP will eventually be, though, so we can go ahead and configure # kubelet with that info. This ensures that early pods are configured with # the correct DNS even though the server isn't ready yet. - dns = kube_dns.details() + dns = kube_control.get_dns() if (data_changed('kube-api-servers', servers) or data_changed('kube-dns', dns)): - # Initialize a FlagManager object to add flags to unit data. - opts = FlagManager('kubelet') - # Append the DNS flags + data to the FlagManager object. + # Create FlagManager for kubelet and add dns flags + opts = FlagManager('kubelet') opts.add('--cluster-dns', dns['sdn-ip']) # FIXME sdn-ip needs a rename opts.add('--cluster-domain', dns['domain']) + # Create FlagManager for KUBE_MASTER and add api server addresses + kube_master_opts = FlagManager('KUBE_MASTER') + kube_master_opts.add('--master', ','.join(servers)) + + # set --allow-privileged flag for kubelet + set_privileged( + "true" if config['allow-privileged'] == "true" else "false", + render_config=False) + create_config(servers[0]) - render_init_scripts(servers) + render_init_scripts() set_state('kubernetes-worker.config.created') restart_unit_services() update_kubelet_status() @@ -318,7 +338,7 @@ def create_config(server): user='kubelet') -def render_init_scripts(api_servers): +def render_init_scripts(): ''' We have related to either an api server or a load balancer connected to the apiserver. Render the config files and prepare for launch ''' context = {} @@ -330,8 +350,11 @@ def render_init_scripts(api_servers): server_key_path = layer_options.get('server_key_path') unit_name = os.getenv('JUJU_UNIT_NAME').replace('/', '-') - context.update({'kube_api_endpoint': ','.join(api_servers), - 'JUJU_UNIT_NAME': unit_name}) + context.update({ + 'kube_allow_priv': FlagManager('KUBE_ALLOW_PRIV').to_s(), + 'kube_api_endpoint': FlagManager('KUBE_MASTER').to_s(), + 'JUJU_UNIT_NAME': unit_name, + }) kubelet_opts = FlagManager('kubelet') kubelet_opts.add('--require-kubeconfig', None) @@ -413,6 +436,7 @@ def restart_unit_services(): # Restart the services. hookenv.log('Restarting kubelet, and kube-proxy.') call(['systemctl', 'restart', 'kubelet']) + remove_state('kubernetes-worker.kubelet.restart') call(['systemctl', 'restart', 'kube-proxy']) @@ -506,6 +530,173 @@ def remove_nrpe_config(nagios=None): nrpe_setup.remove_check(shortname=service) +def set_privileged(privileged, render_config=True): + """Update the KUBE_ALLOW_PRIV flag for kubelet and re-render config files. + + If the flag already matches the requested value, this is a no-op. + + :param str privileged: "true" or "false" + :param bool render_config: whether to render new config files + :return: True if the flag was changed, else false + + """ + if privileged == "true": + set_state('kubernetes-worker.privileged') + else: + remove_state('kubernetes-worker.privileged') + + flag = '--allow-privileged' + kube_allow_priv_opts = FlagManager('KUBE_ALLOW_PRIV') + if kube_allow_priv_opts.get(flag) == privileged: + # Flag isn't changing, nothing to do + return False + + hookenv.log('Setting {}={}'.format(flag, privileged)) + + # Update --allow-privileged flag value + kube_allow_priv_opts.add(flag, privileged, strict=True) + + # re-render config with new options + if render_config: + render_init_scripts() + + # signal that we need a kubelet restart + set_state('kubernetes-worker.kubelet.restart') + + return True + + +@when('config.changed.allow-privileged') +@when('kubernetes-worker.config.created') +def on_config_allow_privileged_change(): + """React to changed 'allow-privileged' config value. + + """ + config = hookenv.config() + privileged = config['allow-privileged'] + if privileged == "auto": + return + + set_privileged(privileged) + remove_state('config.changed.allow-privileged') + + +@when('kubernetes-worker.kubelet.restart') +def restart_kubelet(): + """Restart kubelet. + + """ + # Make sure systemd loads latest service config + call(['systemctl', 'daemon-reload']) + # Restart kubelet + service_restart('kubelet') + remove_state('kubernetes-worker.kubelet.restart') + + +@when('cuda.installed') +@when('kubernetes-worker.components.installed') +@when('kubernetes-worker.config.created') +@when_not('kubernetes-worker.gpu.enabled') +def enable_gpu(): + """Enable GPU usage on this node. + + """ + config = hookenv.config() + if config['allow-privileged'] == "false": + hookenv.status_set( + 'active', + 'GPUs available. Set allow-privileged="auto" to enable.' + ) + return + + hookenv.log('Enabling gpu mode') + + kubelet_opts = FlagManager('kubelet') + if get_version('kubelet') < (1, 6): + hookenv.log('Adding --experimental-nvidia-gpus=1 to kubelet') + kubelet_opts.add('--experimental-nvidia-gpus', '1') + else: + hookenv.log('Adding --feature-gates=Accelerators=true to kubelet') + kubelet_opts.add('--feature-gates', 'Accelerators=true') + + # enable privileged mode and re-render config files + set_privileged("true", render_config=False) + render_init_scripts() + + # Apply node labels + _apply_node_label('gpu=true', overwrite=True) + _apply_node_label('cuda=true', overwrite=True) + + # Not sure why this is necessary, but if you don't run this, k8s will + # think that the node has 0 gpus (as shown by the output of + # `kubectl get nodes -o yaml` + check_call(['nvidia-smi']) + + set_state('kubernetes-worker.gpu.enabled') + set_state('kubernetes-worker.kubelet.restart') + + +@when('kubernetes-worker.gpu.enabled') +@when_not('kubernetes-worker.privileged') +def disable_gpu(): + """Disable GPU usage on this node. + + This handler fires when we're running in gpu mode, and then the operator + sets allow-privileged="false". Since we can no longer run privileged + containers, we need to disable gpu mode. + + """ + hookenv.log('Disabling gpu mode') + + kubelet_opts = FlagManager('kubelet') + if get_version('kubelet') < (1, 6): + kubelet_opts.destroy('--experimental-nvidia-gpus') + else: + kubelet_opts.remove('--feature-gates', 'Accelerators=true') + + render_init_scripts() + + # Remove node labels + _apply_node_label('gpu', delete=True) + _apply_node_label('cuda', delete=True) + + remove_state('kubernetes-worker.gpu.enabled') + set_state('kubernetes-worker.kubelet.restart') + + +@when('kubernetes-worker.gpu.enabled') +@when('kube-control.connected') +def notify_master_gpu_enabled(kube_control): + """Notify kubernetes-master that we're gpu-enabled. + + """ + kube_control.set_gpu(True) + + +@when_not('kubernetes-worker.gpu.enabled') +@when('kube-control.connected') +def notify_master_gpu_not_enabled(kube_control): + """Notify kubernetes-master that we're not gpu-enabled. + + """ + kube_control.set_gpu(False) + + +@when_not('kube-control.connected') +def missing_kube_control(): + """Inform the operator they need to add the kube-control relation. + + If deploying via bundle this won't happen, but if operator is upgrading a + a charm in a deployment that pre-dates the kube-control relation, it'll be + missing. + + """ + hookenv.status_set( + 'blocked', + 'Relate {}:kube-control kubernetes-master:kube-control'.format( + hookenv.service_name())) + + def _systemctl_is_active(application): ''' Poll systemctl to determine if the application is running ''' cmd = ['systemctl', 'is-active', application] @@ -516,7 +707,7 @@ def _systemctl_is_active(application): return False -def _apply_node_label(label, delete=False): +def _apply_node_label(label, delete=False, overwrite=False): ''' Invoke kubectl to apply node label changes ''' hostname = gethostname() @@ -529,6 +720,8 @@ def _apply_node_label(label, delete=False): cmd = cmd + '-' else: cmd = cmd_base.format(kubeconfig_path, hostname, label) + if overwrite: + cmd = '{} --overwrite'.format(cmd) check_call(split(cmd)) diff --git a/cluster/juju/layers/kubernetes-worker/templates/kube-default b/cluster/juju/layers/kubernetes-worker/templates/kube-default index 9b8e2d35d12..9b6e28b1966 100644 --- a/cluster/juju/layers/kubernetes-worker/templates/kube-default +++ b/cluster/juju/layers/kubernetes-worker/templates/kube-default @@ -16,7 +16,7 @@ KUBE_LOGTOSTDERR="--logtostderr=true" KUBE_LOG_LEVEL="--v=0" # Should this cluster be allowed to run privileged docker containers -KUBE_ALLOW_PRIV="--allow-privileged=false" +KUBE_ALLOW_PRIV="{{ kube_allow_priv }}" # How the controller-manager, scheduler, and proxy find the apiserver -KUBE_MASTER="--master={{ kube_api_endpoint }}" +KUBE_MASTER="{{ kube_api_endpoint }}"