configure npd to run as daemonset on cos

This commit is contained in:
upodroid 2023-10-16 12:34:47 +01:00
parent 12dc19d46f
commit 011c65e17e
5 changed files with 22 additions and 25 deletions

View File

@ -97,6 +97,8 @@ dependencies:
match: registry.k8s.io/node-problem-detector/node-problem-detector match: registry.k8s.io/node-problem-detector/node-problem-detector
- path: cluster/addons/node-problem-detector/npd.yaml - path: cluster/addons/node-problem-detector/npd.yaml
match: registry.k8s.io/node-problem-detector/node-problem-detector match: registry.k8s.io/node-problem-detector/node-problem-detector
- path: cluster/addons/node-problem-detector/npd.yaml
match: app.kubernetes.io/version
# TODO(dims): Ensure newer versions get uploaded to # TODO(dims): Ensure newer versions get uploaded to
# - https://console.cloud.google.com/storage/browser/gke-release/winnode/node-problem-detector # - https://console.cloud.google.com/storage/browser/gke-release/winnode/node-problem-detector
# - https://gcsweb.k8s.io/gcs/kubernetes-release/node-problem-detector/ # - https://gcsweb.k8s.io/gcs/kubernetes-release/node-problem-detector/

View File

@ -26,24 +26,22 @@ subjects:
apiVersion: apps/v1 apiVersion: apps/v1
kind: DaemonSet kind: DaemonSet
metadata: metadata:
name: npd-v0.8.9 name: node-problem-detector
namespace: kube-system namespace: kube-system
labels: labels:
k8s-app: node-problem-detector app.kubernetes.io/name: node-problem-detector
version: v0.8.9 app.kubernetes.io/version: v0.8.13
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile addonmanager.kubernetes.io/mode: Reconcile
spec: spec:
selector: selector:
matchLabels: matchLabels:
k8s-app: node-problem-detector app.kubernetes.io/name: node-problem-detector
version: v0.8.9 app.kubernetes.io/version: v0.8.13
template: template:
metadata: metadata:
labels: labels:
k8s-app: node-problem-detector app.kubernetes.io/name: node-problem-detector
version: v0.8.9 app.kubernetes.io/version: v0.8.13
kubernetes.io/cluster-service: "true"
spec: spec:
containers: containers:
- name: node-problem-detector - name: node-problem-detector
@ -69,6 +67,9 @@ spec:
volumeMounts: volumeMounts:
- name: log - name: log
mountPath: /var/log mountPath: /var/log
- name: kmsg
mountPath: /dev/kmsg
readOnly: true
- name: localtime - name: localtime
mountPath: /etc/localtime mountPath: /etc/localtime
readOnly: true readOnly: true
@ -76,6 +77,9 @@ spec:
- name: log - name: log
hostPath: hostPath:
path: /var/log/ path: /var/log/
- name: kmsg
hostPath:
path: /dev/kmsg
- name: localtime - name: localtime
hostPath: hostPath:
path: /etc/localtime path: /etc/localtime
@ -84,5 +88,7 @@ spec:
tolerations: tolerations:
- operator: "Exists" - operator: "Exists"
effect: "NoExecute" effect: "NoExecute"
- operator: "Exists"
effect: "NoSchedule"
- key: "CriticalAddonsOnly" - key: "CriticalAddonsOnly"
operator: "Exists" operator: "Exists"

View File

@ -287,12 +287,7 @@ export ENABLE_DNS_HORIZONTAL_AUTOSCALER="${KUBE_ENABLE_DNS_HORIZONTAL_AUTOSCALER
# none - Not run node problem detector. # none - Not run node problem detector.
# daemonset - Run node problem detector as daemonset. # daemonset - Run node problem detector as daemonset.
# standalone - Run node problem detector as standalone system daemon. # standalone - Run node problem detector as standalone system daemon.
if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
# Enable standalone mode by default for gci.
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-standalone}"
else
export ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}" export ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}"
fi
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}" NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}" NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}" NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"

View File

@ -329,13 +329,7 @@ export ENABLE_DNS_HORIZONTAL_AUTOSCALER=${KUBE_ENABLE_DNS_HORIZONTAL_AUTOSCALER:
# none - Not run node problem detector. # none - Not run node problem detector.
# daemonset - Run node problem detector as daemonset. # daemonset - Run node problem detector as daemonset.
# standalone - Run node problem detector as standalone system daemon. # standalone - Run node problem detector as standalone system daemon.
if [[ "${NODE_OS_DISTRIBUTION}" = 'gci' ]]; then export ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}
# Enable standalone mode by default for gci.
ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-standalone}
else
ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}
fi
export ENABLE_NODE_PROBLEM_DETECTOR
NODE_PROBLEM_DETECTOR_VERSION=${NODE_PROBLEM_DETECTOR_VERSION:-} NODE_PROBLEM_DETECTOR_VERSION=${NODE_PROBLEM_DETECTOR_VERSION:-}
NODE_PROBLEM_DETECTOR_TAR_HASH=${NODE_PROBLEM_DETECTOR_TAR_HASH:-} NODE_PROBLEM_DETECTOR_TAR_HASH=${NODE_PROBLEM_DETECTOR_TAR_HASH:-}
NODE_PROBLEM_DETECTOR_RELEASE_PATH=${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-} NODE_PROBLEM_DETECTOR_RELEASE_PATH=${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}

View File

@ -380,7 +380,7 @@ func getNpdPodStat(ctx context.Context, f *framework.Framework, nodeName string)
hasNpdPod := false hasNpdPod := false
for _, pod := range summary.Pods { for _, pod := range summary.Pods {
if !strings.HasPrefix(pod.PodRef.Name, "npd") { if !strings.HasPrefix(pod.PodRef.Name, "node-problem-detector") {
continue continue
} }
cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9 cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9