diff --git a/build/dependencies.yaml b/build/dependencies.yaml index 311758d8d1b..c1e906cf122 100644 --- a/build/dependencies.yaml +++ b/build/dependencies.yaml @@ -97,6 +97,8 @@ dependencies: match: registry.k8s.io/node-problem-detector/node-problem-detector - path: cluster/addons/node-problem-detector/npd.yaml match: registry.k8s.io/node-problem-detector/node-problem-detector + - path: cluster/addons/node-problem-detector/npd.yaml + match: app.kubernetes.io/version # TODO(dims): Ensure newer versions get uploaded to # - https://console.cloud.google.com/storage/browser/gke-release/winnode/node-problem-detector # - https://gcsweb.k8s.io/gcs/kubernetes-release/node-problem-detector/ diff --git a/cluster/addons/node-problem-detector/npd.yaml b/cluster/addons/node-problem-detector/npd.yaml index 459f28981f6..7ba02511b18 100644 --- a/cluster/addons/node-problem-detector/npd.yaml +++ b/cluster/addons/node-problem-detector/npd.yaml @@ -26,24 +26,22 @@ subjects: apiVersion: apps/v1 kind: DaemonSet metadata: - name: npd-v0.8.9 + name: node-problem-detector namespace: kube-system labels: - k8s-app: node-problem-detector - version: v0.8.9 - kubernetes.io/cluster-service: "true" + app.kubernetes.io/name: node-problem-detector + app.kubernetes.io/version: v0.8.13 addonmanager.kubernetes.io/mode: Reconcile spec: selector: matchLabels: - k8s-app: node-problem-detector - version: v0.8.9 + app.kubernetes.io/name: node-problem-detector + app.kubernetes.io/version: v0.8.13 template: metadata: labels: - k8s-app: node-problem-detector - version: v0.8.9 - kubernetes.io/cluster-service: "true" + app.kubernetes.io/name: node-problem-detector + app.kubernetes.io/version: v0.8.13 spec: containers: - name: node-problem-detector @@ -69,6 +67,9 @@ spec: volumeMounts: - name: log mountPath: /var/log + - name: kmsg + mountPath: /dev/kmsg + readOnly: true - name: localtime mountPath: /etc/localtime readOnly: true @@ -76,6 +77,9 @@ spec: - name: log hostPath: path: /var/log/ + - name: kmsg + hostPath: + path: /dev/kmsg - name: localtime hostPath: path: /etc/localtime @@ -84,5 +88,7 @@ spec: tolerations: - operator: "Exists" effect: "NoExecute" + - operator: "Exists" + effect: "NoSchedule" - key: "CriticalAddonsOnly" operator: "Exists" diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index e402c0c2bd6..b0e62f9488f 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -287,12 +287,7 @@ export ENABLE_DNS_HORIZONTAL_AUTOSCALER="${KUBE_ENABLE_DNS_HORIZONTAL_AUTOSCALER # none - Not run node problem detector. # daemonset - Run node problem detector as daemonset. # standalone - Run node problem detector as standalone system daemon. -if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then - # Enable standalone mode by default for gci. - ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-standalone}" -else - export ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}" -fi +export ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}" NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}" NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}" NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}" @@ -564,4 +559,4 @@ fi # --image-credential-provider-bin-dir=${path-to-auth-provider-binary} # Also, it is required that DisableKubeletCloudCredentialProviders # feature gates are set to true for kubelet to use external credential provider. -export ENABLE_AUTH_PROVIDER_GCP="${ENABLE_AUTH_PROVIDER_GCP:-true}" \ No newline at end of file +export ENABLE_AUTH_PROVIDER_GCP="${ENABLE_AUTH_PROVIDER_GCP:-true}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index ea88f364b03..dfb52dbc63d 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -329,13 +329,7 @@ export ENABLE_DNS_HORIZONTAL_AUTOSCALER=${KUBE_ENABLE_DNS_HORIZONTAL_AUTOSCALER: # none - Not run node problem detector. # daemonset - Run node problem detector as daemonset. # standalone - Run node problem detector as standalone system daemon. -if [[ "${NODE_OS_DISTRIBUTION}" = 'gci' ]]; then - # Enable standalone mode by default for gci. - ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-standalone} -else - ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset} -fi -export ENABLE_NODE_PROBLEM_DETECTOR +export ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset} NODE_PROBLEM_DETECTOR_VERSION=${NODE_PROBLEM_DETECTOR_VERSION:-} NODE_PROBLEM_DETECTOR_TAR_HASH=${NODE_PROBLEM_DETECTOR_TAR_HASH:-} NODE_PROBLEM_DETECTOR_RELEASE_PATH=${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-} @@ -613,4 +607,4 @@ fi # --image-credential-provider-bin-dir=${path-to-auth-provider-binary} # Also, it is required that DisableKubeletCloudCredentialProviders and KubeletCredentialProviders # feature gates are set to true for kubelet to use external credential provider. -export ENABLE_AUTH_PROVIDER_GCP="${ENABLE_AUTH_PROVIDER_GCP:-true}" \ No newline at end of file +export ENABLE_AUTH_PROVIDER_GCP="${ENABLE_AUTH_PROVIDER_GCP:-true}" diff --git a/test/e2e/node/node_problem_detector.go b/test/e2e/node/node_problem_detector.go index 2ed867ce0a5..f93af320f47 100644 --- a/test/e2e/node/node_problem_detector.go +++ b/test/e2e/node/node_problem_detector.go @@ -380,7 +380,7 @@ func getNpdPodStat(ctx context.Context, f *framework.Framework, nodeName string) hasNpdPod := false for _, pod := range summary.Pods { - if !strings.HasPrefix(pod.PodRef.Name, "npd") { + if !strings.HasPrefix(pod.PodRef.Name, "node-problem-detector") { continue } cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9