Merge pull request #25986 from Random-Liu/enable-node-problem-detector

Automatic merge from submit-queue

Add node problem detector as an addon pod.

```release-note
Introduce a new add-on pod NodeProblemDetector.

NodeProblemDetector is a DaemonSet running on each node, monitoring node health and reporting
node problems as NodeCondition and Event. Currently it already supports kernel log monitoring, and
will support more problem detection in the future. It is enabled by default on gce now.
```

This PR enables NodeProblemDetector as an add-on pod.

/cc @mikedanese @kubernetes/sig-node 

[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/.github/PULL_REQUEST_TEMPLATE.md?pixel)]()
This commit is contained in:
k8s-merge-robot 2016-05-22 06:17:28 -07:00
commit fe15db60f4
8 changed files with 81 additions and 1 deletions

View File

@ -0,0 +1,6 @@
# Maintainers
Lantao Liu <lantaol@google.com>
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/node-problem-detector/MAINTAINERS.md?pixel)]()

View File

@ -0,0 +1,10 @@
# Node Problem Detector
==============
Node Problem Detector is a DaemonSet running on each node, detecting node
problems.
Learn more at: https://github.com/kubernetes/node-problem-detector
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/node-problem-detector/README.md?pixel)]()

View File

@ -0,0 +1,44 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-problem-detector-v0.1
namespace: kube-system
labels:
k8s-app: node-problem-detector
version: v0.1
kubernetes.io/cluster-service: "true"
spec:
template:
metadata:
labels:
k8s-app: node-problem-detector
version: v0.1
kubernetes.io/cluster-service: "true"
spec:
hostNetwork: true
containers:
- name: node-problem-detector
image: gcr.io/google_containers/node-problem-detector:v0.1
env:
# Config the host ip and port of apiserver.
- name: "KUBERNETES_SERVICE_HOST"
value: "{{ pillar['master_node'] }}"
- name: "KUBERNETES_SERVICE_PORT"
value: "443"
securityContext:
privileged: true
resources:
limits:
cpu: "200m"
memory: "100Mi"
requests:
cpu: "50m"
memory: "20Mi"
volumeMounts:
- name: log
mountPath: /log
readOnly: true
volumes:
- name: log
hostPath:
path: /var/log/

View File

@ -485,6 +485,7 @@ ENABLE_CLUSTER_MONITORING: $(yaml-quote ${ENABLE_CLUSTER_MONITORING:-none})
ENABLE_L7_LOADBALANCING: $(yaml-quote ${ENABLE_L7_LOADBALANCING:-none}) ENABLE_L7_LOADBALANCING: $(yaml-quote ${ENABLE_L7_LOADBALANCING:-none})
ENABLE_CLUSTER_LOGGING: $(yaml-quote ${ENABLE_CLUSTER_LOGGING:-false}) ENABLE_CLUSTER_LOGGING: $(yaml-quote ${ENABLE_CLUSTER_LOGGING:-false})
ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false}) ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-false})
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false}) ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-}) LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-}) ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-})
@ -578,6 +579,7 @@ ENABLE_MANIFEST_URL: $(yaml-quote ${ENABLE_MANIFEST_URL:-false})
MANIFEST_URL: $(yaml-quote ${MANIFEST_URL:-}) MANIFEST_URL: $(yaml-quote ${MANIFEST_URL:-})
MANIFEST_URL_HEADER: $(yaml-quote ${MANIFEST_URL_HEADER:-}) MANIFEST_URL_HEADER: $(yaml-quote ${MANIFEST_URL_HEADER:-})
NUM_NODES: $(yaml-quote ${NUM_NODES}) NUM_NODES: $(yaml-quote ${NUM_NODES})
MASTER_NAME: $(yaml-quote ${MASTER_NAME})
EOF EOF
if [ -n "${APISERVER_TEST_ARGS:-}" ]; then if [ -n "${APISERVER_TEST_ARGS:-}" ]; then
cat >>$file <<EOF cat >>$file <<EOF

View File

@ -104,6 +104,9 @@ CLUSTER_REGISTRY_DISK_TYPE_GCE="${CLUSTER_REGISTRY_DISK_TYPE_GCE:-pd-standard}"
# Optional: Install Kubernetes UI # Optional: Install Kubernetes UI
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}" ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
# Optional: Install node problem detector.
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-true}"
# Optional: Create autoscaler for cluster's nodes. # Optional: Create autoscaler for cluster's nodes.
ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}" ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}"
if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
@ -126,6 +129,5 @@ OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}"
# How should the kubelet configure hairpin mode? # How should the kubelet configure hairpin mode?
HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none
# Optional: if set to true, kube-up will configure the cluster to run e2e tests. # Optional: if set to true, kube-up will configure the cluster to run e2e tests.
E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}

View File

@ -125,6 +125,9 @@ CLUSTER_REGISTRY_DISK_TYPE_GCE="${CLUSTER_REGISTRY_DISK_TYPE_GCE:-pd-standard}"
# Optional: Install Kubernetes UI # Optional: Install Kubernetes UI
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}" ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
# Optional: Install node problem detector.
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-true}"
# Optional: Create autoscaler for cluster's nodes. # Optional: Create autoscaler for cluster's nodes.
ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}" ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}"
if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then

View File

@ -429,6 +429,7 @@ service_cluster_ip_range: '$(echo "$SERVICE_CLUSTER_IP_RANGE" | sed -e "s/'/''/g
enable_cluster_monitoring: '$(echo "$ENABLE_CLUSTER_MONITORING" | sed -e "s/'/''/g")' enable_cluster_monitoring: '$(echo "$ENABLE_CLUSTER_MONITORING" | sed -e "s/'/''/g")'
enable_cluster_logging: '$(echo "$ENABLE_CLUSTER_LOGGING" | sed -e "s/'/''/g")' enable_cluster_logging: '$(echo "$ENABLE_CLUSTER_LOGGING" | sed -e "s/'/''/g")'
enable_cluster_ui: '$(echo "$ENABLE_CLUSTER_UI" | sed -e "s/'/''/g")' enable_cluster_ui: '$(echo "$ENABLE_CLUSTER_UI" | sed -e "s/'/''/g")'
enable_node_problem_detector: '$(echo "$ENABLE_NODE_PROBLEM_DETECTOR" | sed -e "s/'/''/g")'
enable_l7_loadbalancing: '$(echo "$ENABLE_L7_LOADBALANCING" | sed -e "s/'/''/g")' enable_l7_loadbalancing: '$(echo "$ENABLE_L7_LOADBALANCING" | sed -e "s/'/''/g")'
enable_node_logging: '$(echo "$ENABLE_NODE_LOGGING" | sed -e "s/'/''/g")' enable_node_logging: '$(echo "$ENABLE_NODE_LOGGING" | sed -e "s/'/''/g")'
logging_destination: '$(echo "$LOGGING_DESTINATION" | sed -e "s/'/''/g")' logging_destination: '$(echo "$LOGGING_DESTINATION" | sed -e "s/'/''/g")'
@ -448,6 +449,7 @@ enable_manifest_url: '$(echo "${ENABLE_MANIFEST_URL:-}" | sed -e "s/'/''/g")'
manifest_url: '$(echo "${MANIFEST_URL:-}" | sed -e "s/'/''/g")' manifest_url: '$(echo "${MANIFEST_URL:-}" | sed -e "s/'/''/g")'
manifest_url_header: '$(echo "${MANIFEST_URL_HEADER:-}" | sed -e "s/'/''/g")' manifest_url_header: '$(echo "${MANIFEST_URL_HEADER:-}" | sed -e "s/'/''/g")'
num_nodes: $(echo "${NUM_NODES:-}" | sed -e "s/'/''/g") num_nodes: $(echo "${NUM_NODES:-}" | sed -e "s/'/''/g")
master_node: $(echo "${MASTER_NAME:-}" | sed -e "s/'/''/g")
e2e_storage_test_environment: '$(echo "$E2E_STORAGE_TEST_ENVIRONMENT" | sed -e "s/'/''/g")' e2e_storage_test_environment: '$(echo "$E2E_STORAGE_TEST_ENVIRONMENT" | sed -e "s/'/''/g")'
kube_uid: '$(echo "${KUBE_UID}" | sed -e "s/'/''/g")' kube_uid: '$(echo "${KUBE_UID}" | sed -e "s/'/''/g")'
EOF EOF

View File

@ -150,6 +150,17 @@ addon-dir-create:
- file_mode: 644 - file_mode: 644
{% endif %} {% endif %}
{% if pillar.get('enable_node_problem_detector', '').lower() == 'true' %}
/etc/kubernetes/addons/node-problem-detector/node-problem-detector.yaml:
file.managed:
- source: salt://kube-addons/node-problem-detector/node-problem-detector.yaml.in
- template: jinja
- user: root
- group: root
- file_mode: 644
- makedirs: True
{% endif %}
/etc/kubernetes/manifests/kube-addon-manager.yaml: /etc/kubernetes/manifests/kube-addon-manager.yaml:
file.managed: file.managed:
- source: salt://kube-addons/kube-addon-manager.yaml - source: salt://kube-addons/kube-addon-manager.yaml