diff --git a/cluster/juju/layers/kubernetes-master/config.yaml b/cluster/juju/layers/kubernetes-master/config.yaml index be192b412ed..3917e62e31e 100644 --- a/cluster/juju/layers/kubernetes-master/config.yaml +++ b/cluster/juju/layers/kubernetes-master/config.yaml @@ -31,6 +31,14 @@ options: privileged mode. If "auto", kube-apiserver will not run in privileged mode by default, but will switch to privileged mode if gpu hardware is detected on a worker node. + enable-nvidia-plugin: + type: string + default: "auto" + description: | + Load the nvidia device plugin daemonset. Supported values are + "auto" and "false". When "auto", the daemonset will be loaded + only if GPUs are detected. When "false" the nvidia device plugin + will not be loaded. channel: type: string default: "1.9/stable" diff --git a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py index ab378cb98eb..3a75259b67a 100644 --- a/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py +++ b/cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py @@ -638,6 +638,10 @@ def kick_api_server(tls): def configure_cdk_addons(): ''' Configure CDK addons ''' remove_state('cdk-addons.configured') + load_gpu_plugin = hookenv.config('enable-nvidia-plugin').lower() + gpuEnable = (get_version('kube-apiserver') >= (1, 9) and + load_gpu_plugin == "auto" and + is_state('kubernetes-master.gpu.enabled')) dbEnabled = str(hookenv.config('enable-dashboard-addons')).lower() dnsEnabled = str(hookenv.config('enable-kube-dns')).lower() metricsEnabled = str(hookenv.config('enable-metrics')).lower() @@ -647,7 +651,8 @@ def configure_cdk_addons(): 'dns-domain=' + hookenv.config('dns_domain'), 'enable-dashboard=' + dbEnabled, 'enable-kube-dns=' + dnsEnabled, - 'enable-metrics=' + metricsEnabled + 'enable-metrics=' + metricsEnabled, + 'enable-gpu=' + str(gpuEnable).lower() ] check_call(['snap', 'set', 'cdk-addons'] + args) if not addons_ready(): @@ -887,8 +892,10 @@ def on_gpu_available(kube_control): We need to run in privileged mode. """ + kube_version = get_version('kube-apiserver') config = hookenv.config() - if config['allow-privileged'].lower() == "false": + if (config['allow-privileged'].lower() == "false" and + kube_version < (1, 9)): hookenv.status_set( 'active', 'GPUs available. Set allow-privileged="auto" to enable.' @@ -900,11 +907,25 @@ def on_gpu_available(kube_control): @when('kubernetes-master.gpu.enabled') +@when('kubernetes-master.components.started') @when_not('kubernetes-master.privileged') -def disable_gpu_mode(): +def gpu_with_no_privileged(): """We were in gpu mode, but the operator has set allow-privileged="false", so we can't run in gpu mode anymore. + """ + if get_version('kube-apiserver') < (1, 9): + remove_state('kubernetes-master.gpu.enabled') + + +@when('kube-control.connected') +@when_not('kube-control.gpu.available') +@when('kubernetes-master.gpu.enabled') +@when('kubernetes-master.components.started') +def gpu_departed(kube_control): + """We were in gpu mode, but the workers informed us there is + no gpu support anymore. + """ remove_state('kubernetes-master.gpu.enabled') @@ -1185,7 +1206,7 @@ def configure_apiserver(etcd_connection_string, leader_etcd_version): else: api_opts['admission-control'] = ','.join(admission_control) - if get_version('kube-apiserver') > (1, 6) and \ + if kube_version > (1, 6) and \ hookenv.config('enable-metrics'): api_opts['requestheader-client-ca-file'] = ca_cert_path api_opts['requestheader-allowed-names'] = 'client' diff --git a/cluster/juju/layers/kubernetes-worker/layer.yaml b/cluster/juju/layers/kubernetes-worker/layer.yaml index a008d6f8ea4..b9f3768a4e5 100644 --- a/cluster/juju/layers/kubernetes-worker/layer.yaml +++ b/cluster/juju/layers/kubernetes-worker/layer.yaml @@ -7,7 +7,6 @@ includes: - 'layer:metrics' - 'layer:nagios' - 'layer:tls-client' - - 'layer:nvidia-cuda' - 'layer:cdk-service-kicker' - 'interface:http' - 'interface:kubernetes-cni' diff --git a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py index baff2204ea3..750aabb15d7 100644 --- a/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py +++ b/cluster/juju/layers/kubernetes-worker/reactive/kubernetes_worker.py @@ -70,6 +70,7 @@ def upgrade_charm(): # Remove gpu.enabled state so we can reconfigure gpu-related kubelet flags, # since they can differ between k8s versions remove_state('kubernetes-worker.gpu.enabled') + disable_gpu() remove_state('kubernetes-worker.cni-plugins.installed') remove_state('kubernetes-worker.config.created') @@ -629,12 +630,10 @@ def configure_kubelet(dns, ingress_ip): kubelet_opts['allow-privileged'] = 'true' if privileged else 'false' if is_state('kubernetes-worker.gpu.enabled'): - if get_version('kubelet') < (1, 6): - hookenv.log('Adding --experimental-nvidia-gpus=1 to kubelet') - kubelet_opts['experimental-nvidia-gpus'] = '1' - else: - hookenv.log('Adding --feature-gates=Accelerators=true to kubelet') - kubelet_opts['feature-gates'] = 'Accelerators=true' + hookenv.log('Adding ' + '--feature-gates=Accelerators=true,DevicePlugins=true ' + 'to kubelet') + kubelet_opts['feature-gates'] = 'Accelerators=true,DevicePlugins=true' configure_kubernetes_service('kubelet', kubelet_opts, 'kubelet-extra-args') @@ -870,14 +869,17 @@ def set_privileged(): """ privileged = hookenv.config('allow-privileged').lower() - if privileged == 'auto': - gpu_enabled = is_state('kubernetes-worker.gpu.enabled') - privileged = 'true' if gpu_enabled else 'false' + gpu_needs_privileged = (is_state('kubernetes-worker.gpu.enabled') and + get_version('kubelet') < (1, 9)) - if privileged == 'true': - set_state('kubernetes-worker.privileged') - else: - remove_state('kubernetes-worker.privileged') + if privileged == 'auto': + privileged = 'true' if gpu_needs_privileged else 'false' + + if privileged == 'false' and gpu_needs_privileged: + disable_gpu() + remove_state('kubernetes-worker.gpu.enabled') + # No need to restart kubernetes (set the restart-needed state) + # because set-privileged is already in the restart path @when('config.changed.allow-privileged') @@ -890,18 +892,17 @@ def on_config_allow_privileged_change(): remove_state('config.changed.allow-privileged') -@when('cuda.installed') +@when('nvidia-docker.installed') @when('kubernetes-worker.config.created') @when_not('kubernetes-worker.gpu.enabled') def enable_gpu(): """Enable GPU usage on this node. """ - config = hookenv.config() - if config['allow-privileged'] == "false": + if get_version('kubelet') < (1, 9): hookenv.status_set( 'active', - 'GPUs available. Set allow-privileged="auto" to enable.' + 'Upgrade to snap channel >= 1.9/stable to enable GPU suppport.' ) return @@ -916,7 +917,6 @@ def enable_gpu(): hookenv.log(cpe) return - # Apply node labels set_label('gpu', 'true') set_label('cuda', 'true') @@ -925,15 +925,19 @@ def enable_gpu(): @when('kubernetes-worker.gpu.enabled') -@when_not('kubernetes-worker.privileged') +@when_not('nvidia-docker.installed') @when_not('kubernetes-worker.restart-needed') +def nvidia_departed(): + """Cuda departed, probably due to the docker layer switching to a + non nvidia-docker.""" + disable_gpu() + remove_state('kubernetes-worker.gpu.enabled') + set_state('kubernetes-worker.restart-needed') + + def disable_gpu(): """Disable GPU usage on this node. - This handler fires when we're running in gpu mode, and then the operator - sets allow-privileged="false". Since we can no longer run privileged - containers, we need to disable gpu mode. - """ hookenv.log('Disabling gpu mode') @@ -941,9 +945,6 @@ def disable_gpu(): remove_label('gpu') remove_label('cuda') - remove_state('kubernetes-worker.gpu.enabled') - set_state('kubernetes-worker.restart-needed') - @when('kubernetes-worker.gpu.enabled') @when('kube-control.connected')