From d7341749ff26a071b0971dba3f779277b93d76df Mon Sep 17 00:00:00 2001 From: Rohit Agarwal Date: Wed, 29 Nov 2017 11:31:39 -0800 Subject: [PATCH 1/2] nvidia-gpu-device-plugin daemonset should tolerate nvidia.com/gpu taint. It is expected that nodes with extended resources attached will be tainted with the resouce name, so that we can create dedicated nodes. If ExtendedResourceToleration admission controller is enabled, pods requesting such resources will automatically tolerate such taints. nvidia-gpu-device-plugin daemonset doesn't request such resources but still needs to run on such nodes, so it needs this toleration. --- cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml b/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml index 6b5edbf733d..de66faecb30 100644 --- a/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml +++ b/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml @@ -22,6 +22,10 @@ spec: - matchExpressions: - key: cloud.google.com/gke-accelerator operator: Exists + tolerations: + - key: "nvidia.com/gpu" + effect: "NoSchedule" + operator: "Exists" hostNetwork: true hostPID: true volumes: From ad05928c6e5b2bab5d7222da45f0ab0d54f13416 Mon Sep 17 00:00:00 2001 From: Rohit Agarwal Date: Wed, 29 Nov 2017 12:36:55 -0800 Subject: [PATCH 2/2] Add wildcard tolerations to kube-proxy. fluend-gcp already has these tolerations. kube-proxy when it runs as a static pod gets wildcard `NoExecute` toleration (all static pods get that). So, added the same toleration to kube-proxy when it runs as a daemonset. Also added wildcard `NoSchedule` toleration to kube-proxy. --- cluster/addons/fluentd-gcp/fluentd-gcp-ds.yaml | 1 - cluster/addons/kube-proxy/kube-proxy-ds.yaml | 5 +++++ cluster/saltbase/salt/kube-proxy/kube-proxy.manifest | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-ds.yaml b/cluster/addons/fluentd-gcp/fluentd-gcp-ds.yaml index 69021f4263c..7f6a47deb4b 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-ds.yaml +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-ds.yaml @@ -107,7 +107,6 @@ spec: effect: "NoSchedule" - operator: "Exists" effect: "NoExecute" - #TODO: remove this toleration once #44445 is properly fixed. - operator: "Exists" effect: "NoSchedule" terminationGracePeriodSeconds: 30 diff --git a/cluster/addons/kube-proxy/kube-proxy-ds.yaml b/cluster/addons/kube-proxy/kube-proxy-ds.yaml index 479c6eeb023..2134e875fba 100644 --- a/cluster/addons/kube-proxy/kube-proxy-ds.yaml +++ b/cluster/addons/kube-proxy/kube-proxy-ds.yaml @@ -28,6 +28,11 @@ spec: hostNetwork: true nodeSelector: beta.kubernetes.io/kube-proxy-ds-ready: "true" + tolerations: + - operator: "Exists" + effect: "NoExecute" + - operator: "Exists" + effect: "NoSchedule" containers: - name: kube-proxy image: {{pillar['kube_docker_registry']}}/kube-proxy:{{pillar['kube-proxy_docker_tag']}} diff --git a/cluster/saltbase/salt/kube-proxy/kube-proxy.manifest b/cluster/saltbase/salt/kube-proxy/kube-proxy.manifest index 4c9882a6ffa..69075cb9d04 100644 --- a/cluster/saltbase/salt/kube-proxy/kube-proxy.manifest +++ b/cluster/saltbase/salt/kube-proxy/kube-proxy.manifest @@ -65,6 +65,11 @@ metadata: spec: {{pod_priority}} hostNetwork: true + tolerations: + - operator: "Exists" + effect: "NoExecute" + - operator: "Exists" + effect: "NoSchedule" containers: - name: kube-proxy image: {{pillar['kube_docker_registry']}}/kube-proxy:{{pillar['kube-proxy_docker_tag']}}