mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #25986 from Random-Liu/enable-node-problem-detector
Automatic merge from submit-queue Add node problem detector as an addon pod. ```release-note Introduce a new add-on pod NodeProblemDetector. NodeProblemDetector is a DaemonSet running on each node, monitoring node health and reporting node problems as NodeCondition and Event. Currently it already supports kernel log monitoring, and will support more problem detection in the future. It is enabled by default on gce now. ``` This PR enables NodeProblemDetector as an add-on pod. /cc @mikedanese @kubernetes/sig-node []()
This commit is contained in:
commit
fe15db60f4
6
cluster/addons/node-problem-detector/MAINTAINERS.md
Normal file
6
cluster/addons/node-problem-detector/MAINTAINERS.md
Normal file
@ -0,0 +1,6 @@
|
||||
# Maintainers
|
||||
|
||||
Lantao Liu <lantaol@google.com>
|
||||
|
||||
|
||||
[]()
|
10
cluster/addons/node-problem-detector/README.md
Normal file
10
cluster/addons/node-problem-detector/README.md
Normal file
@ -0,0 +1,10 @@
|
||||
# Node Problem Detector
|
||||
==============
|
||||
|
||||
Node Problem Detector is a DaemonSet running on each node, detecting node
|
||||
problems.
|
||||
|
||||
Learn more at: https://github.com/kubernetes/node-problem-detector
|
||||
|
||||
|
||||
[]()
|
@ -0,0 +1,44 @@
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-problem-detector-v0.1
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: node-problem-detector
|
||||
version: v0.1
|
||||
kubernetes.io/cluster-service: "true"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: node-problem-detector
|
||||
version: v0.1
|
||||
kubernetes.io/cluster-service: "true"
|
||||
spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: node-problem-detector
|
||||
image: gcr.io/google_containers/node-problem-detector:v0.1
|
||||
env:
|
||||
# Config the host ip and port of apiserver.
|
||||
- name: "KUBERNETES_SERVICE_HOST"
|
||||
value: "{{ pillar['master_node'] }}"
|
||||
- name: "KUBERNETES_SERVICE_PORT"
|
||||
value: "443"
|
||||
securityContext:
|
||||
privileged: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: "200m"
|
||||
memory: "100Mi"
|
||||
requests:
|
||||
cpu: "50m"
|
||||
memory: "20Mi"
|
||||
volumeMounts:
|
||||
- name: log
|
||||
mountPath: /log
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: log
|
||||
hostPath:
|
||||
path: /var/log/
|
@ -485,6 +485,7 @@ ENABLE_CLUSTER_MONITORING: $(yaml-quote ${ENABLE_CLUSTER_MONITORING:-none})
|
||||
ENABLE_L7_LOADBALANCING: $(yaml-quote ${ENABLE_L7_LOADBALANCING:-none})
|
||||
ENABLE_CLUSTER_LOGGING: $(yaml-quote ${ENABLE_CLUSTER_LOGGING:-false})
|
||||
ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
|
||||
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-false})
|
||||
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
|
||||
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
|
||||
ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-})
|
||||
@ -578,6 +579,7 @@ ENABLE_MANIFEST_URL: $(yaml-quote ${ENABLE_MANIFEST_URL:-false})
|
||||
MANIFEST_URL: $(yaml-quote ${MANIFEST_URL:-})
|
||||
MANIFEST_URL_HEADER: $(yaml-quote ${MANIFEST_URL_HEADER:-})
|
||||
NUM_NODES: $(yaml-quote ${NUM_NODES})
|
||||
MASTER_NAME: $(yaml-quote ${MASTER_NAME})
|
||||
EOF
|
||||
if [ -n "${APISERVER_TEST_ARGS:-}" ]; then
|
||||
cat >>$file <<EOF
|
||||
|
@ -104,6 +104,9 @@ CLUSTER_REGISTRY_DISK_TYPE_GCE="${CLUSTER_REGISTRY_DISK_TYPE_GCE:-pd-standard}"
|
||||
# Optional: Install Kubernetes UI
|
||||
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
|
||||
|
||||
# Optional: Install node problem detector.
|
||||
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-true}"
|
||||
|
||||
# Optional: Create autoscaler for cluster's nodes.
|
||||
ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}"
|
||||
if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
|
||||
@ -126,6 +129,5 @@ OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}"
|
||||
|
||||
# How should the kubelet configure hairpin mode?
|
||||
HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none
|
||||
|
||||
# Optional: if set to true, kube-up will configure the cluster to run e2e tests.
|
||||
E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}
|
||||
|
@ -125,6 +125,9 @@ CLUSTER_REGISTRY_DISK_TYPE_GCE="${CLUSTER_REGISTRY_DISK_TYPE_GCE:-pd-standard}"
|
||||
# Optional: Install Kubernetes UI
|
||||
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
|
||||
|
||||
# Optional: Install node problem detector.
|
||||
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-true}"
|
||||
|
||||
# Optional: Create autoscaler for cluster's nodes.
|
||||
ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}"
|
||||
if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
|
||||
|
@ -429,6 +429,7 @@ service_cluster_ip_range: '$(echo "$SERVICE_CLUSTER_IP_RANGE" | sed -e "s/'/''/g
|
||||
enable_cluster_monitoring: '$(echo "$ENABLE_CLUSTER_MONITORING" | sed -e "s/'/''/g")'
|
||||
enable_cluster_logging: '$(echo "$ENABLE_CLUSTER_LOGGING" | sed -e "s/'/''/g")'
|
||||
enable_cluster_ui: '$(echo "$ENABLE_CLUSTER_UI" | sed -e "s/'/''/g")'
|
||||
enable_node_problem_detector: '$(echo "$ENABLE_NODE_PROBLEM_DETECTOR" | sed -e "s/'/''/g")'
|
||||
enable_l7_loadbalancing: '$(echo "$ENABLE_L7_LOADBALANCING" | sed -e "s/'/''/g")'
|
||||
enable_node_logging: '$(echo "$ENABLE_NODE_LOGGING" | sed -e "s/'/''/g")'
|
||||
logging_destination: '$(echo "$LOGGING_DESTINATION" | sed -e "s/'/''/g")'
|
||||
@ -448,6 +449,7 @@ enable_manifest_url: '$(echo "${ENABLE_MANIFEST_URL:-}" | sed -e "s/'/''/g")'
|
||||
manifest_url: '$(echo "${MANIFEST_URL:-}" | sed -e "s/'/''/g")'
|
||||
manifest_url_header: '$(echo "${MANIFEST_URL_HEADER:-}" | sed -e "s/'/''/g")'
|
||||
num_nodes: $(echo "${NUM_NODES:-}" | sed -e "s/'/''/g")
|
||||
master_node: $(echo "${MASTER_NAME:-}" | sed -e "s/'/''/g")
|
||||
e2e_storage_test_environment: '$(echo "$E2E_STORAGE_TEST_ENVIRONMENT" | sed -e "s/'/''/g")'
|
||||
kube_uid: '$(echo "${KUBE_UID}" | sed -e "s/'/''/g")'
|
||||
EOF
|
||||
|
@ -150,6 +150,17 @@ addon-dir-create:
|
||||
- file_mode: 644
|
||||
{% endif %}
|
||||
|
||||
{% if pillar.get('enable_node_problem_detector', '').lower() == 'true' %}
|
||||
/etc/kubernetes/addons/node-problem-detector/node-problem-detector.yaml:
|
||||
file.managed:
|
||||
- source: salt://kube-addons/node-problem-detector/node-problem-detector.yaml.in
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- file_mode: 644
|
||||
- makedirs: True
|
||||
{% endif %}
|
||||
|
||||
/etc/kubernetes/manifests/kube-addon-manager.yaml:
|
||||
file.managed:
|
||||
- source: salt://kube-addons/kube-addon-manager.yaml
|
||||
|
Loading…
Reference in New Issue
Block a user