Adding GCE node termination handler as an optional addon.

This step is a pre-requisite for auto-deploying that addon in GKE.

Signed-off-by: Vishnu kannan <vishnuk@google.com>
This commit is contained in:
Vishnu kannan 2018-08-30 17:12:20 -07:00
parent 113872798d
commit ee65e6ac04
6 changed files with 136 additions and 14 deletions

View File

@ -0,0 +1,4 @@
# GCE Node Termination Handler
This addon deploys [GCE Node Termination Handler](https://github.com/GoogleCloudPlatform/k8s-node-termination-handler) on to kubernetes clusters on GCP.
It is meant to help translate GCE VM termination notifications into kubernetes graceful terminations.

View File

@ -0,0 +1,76 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
k8s-app: node-termination-handler
namespace: kube-system
name: node-termination-handler
spec:
selector:
matchLabels:
k8s-app: node-termination-handler
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
k8s-app: node-termination-handler
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-node-critical
# Necessary to reboot node
hostPID: true
affinity:
nodeAffinity:
# Restrict to GPU nodes or preemptible nodes
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-accelerator
operator: Exists
- matchExpressions:
- key: cloud.google.com/gke-preemptible
operator: Exists
volumes:
- name: klet-service-account
hostPath:
path: /var/lib/kubelet
- name: klet-ca-crt
hostPath:
path: /etc/srv/kubernetes
tolerations:
# Run regardless of any existing taints.
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- image: k8s.gcr.io/gke-node-termination-handler@sha256:e08ca863a547754fa7b75064bdad04f04cbef86c7b0a181ecc7304e747623181
name: node-termination-handler
command: ["./node-termination-handler"]
args: ["--logtostderr", "--exclude-pods=$(POD_NAME):$(POD_NAMESPACE)", "-v=10", "--kubeconfig=/var/lib/kubelet/kubeconfig", "--annotation=cloud.google.com/impending-node-termination"]
securityContext:
capabilities:
# Necessary to reboot node
add: ["SYS_BOOT"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
resources:
limits:
cpu: 50m
memory: 30Mi
volumeMounts:
- name: klet-service-account
mountPath: /var/lib/kubelet
- name: klet-ca-crt
mountPath: /etc/srv/kubernetes

View File

@ -37,6 +37,14 @@ MASTER_ROOT_DISK_SIZE=${MASTER_ROOT_DISK_SIZE:-$(get-master-root-disk-size)}
NODE_DISK_TYPE=${NODE_DISK_TYPE:-pd-standard}
NODE_DISK_SIZE=${NODE_DISK_SIZE:-100GB}
NODE_LOCAL_SSDS=${NODE_LOCAL_SSDS:-0}
# Historically fluentd was a manifest pod and then was migrated to DaemonSet.
# To avoid situation during cluster upgrade when there are two instances
# of fluentd running on a node, kubelet need to mark node on which
# fluentd is not running as a manifest pod with appropriate label.
# TODO(piosz): remove this in 1.8
NODE_LABELS="${KUBE_NODE_LABELS:-beta.kubernetes.io/fluentd-ds-ready=true}"
# An extension to local SSDs allowing users to specify block/fs and SCSI/NVMe devices
# Format of this variable will be "#,scsi/nvme,block/fs" you can specify multiple
# configurations by separating them by a semi-colon ex. "2,scsi,fs;1,nvme,block"
@ -47,6 +55,9 @@ NODE_LOCAL_SSDS_EXT=${NODE_LOCAL_SSDS_EXT:-}
NODE_ACCELERATORS=${NODE_ACCELERATORS:-""}
REGISTER_MASTER_KUBELET=${REGISTER_MASTER:-true}
PREEMPTIBLE_NODE=${PREEMPTIBLE_NODE:-false}
if [[ "${PREEMPTIBLE_NODE}" == "true" ]]; then
NODE_LABELS="${NODE_LABELS},cloud.google.com/gke-preemptible=true"
fi
PREEMPTIBLE_MASTER=${PREEMPTIBLE_MASTER:-false}
KUBE_DELETE_NODES=${KUBE_DELETE_NODES:-true}
KUBE_DELETE_NETWORK=${KUBE_DELETE_NETWORK:-} # default value calculated below
@ -163,13 +174,6 @@ ENABLE_METADATA_AGENT="${KUBE_ENABLE_METADATA_AGENT:-none}"
# Useful for scheduling heapster in large clusters with nodes of small size.
HEAPSTER_MACHINE_TYPE="${HEAPSTER_MACHINE_TYPE:-}"
# Historically fluentd was a manifest pod and then was migrated to DaemonSet.
# To avoid situation during cluster upgrade when there are two instances
# of fluentd running on a node, kubelet need to mark node on which
# fluentd is not running as a manifest pod with appropriate label.
# TODO(piosz): remove this in 1.8
NODE_LABELS="${KUBE_NODE_LABELS:-beta.kubernetes.io/fluentd-ds-ready=true}"
# NON_MASTER_NODE_LABELS are labels will only be applied on non-master nodes.
NON_MASTER_NODE_LABELS="${KUBE_NON_MASTER_NODE_LABELS:-}"
@ -457,3 +461,11 @@ if [[ "${ENABLE_TOKENREQUEST:-}" == "true" ]]; then
SERVICEACCOUNT_ISSUER="https://kubernetes.io/${CLUSTER_NAME}"
SERVICEACCOUNT_API_AUDIENCES="https://kubernetes.default.svc"
fi
# Optional: Enable Node termination Handler for Preemptible and GPU VMs.
# https://github.com/GoogleCloudPlatform/k8s-node-termination-handler
ENABLE_NODE_TERMINATION_HANDLER="${ENABLE_NODE_TERMINATION_HANDLER:-false}"
# Override default Node Termination Handler Image
if [[ "${NODE_TERMINATION_HANDLER_IMAGE:-}" ]]; then
PROVIDER_VARS="${PROVIDER_VARS:-} NODE_TERMINATION_HANDLER_IMAGE"
fi

View File

@ -37,6 +37,14 @@ MASTER_ROOT_DISK_SIZE=${MASTER_ROOT_DISK_SIZE:-$(get-master-root-disk-size)}
NODE_DISK_TYPE=${NODE_DISK_TYPE:-pd-standard}
NODE_DISK_SIZE=${NODE_DISK_SIZE:-100GB}
NODE_LOCAL_SSDS=${NODE_LOCAL_SSDS:-0}
# Historically fluentd was a manifest pod and then was migrated to DaemonSet.
# To avoid situation during cluster upgrade when there are two instances
# of fluentd running on a node, kubelet need to mark node on which
# fluentd is not running as a manifest pod with appropriate label.
# TODO(piosz): remove this in 1.8
NODE_LABELS="${KUBE_NODE_LABELS:-beta.kubernetes.io/fluentd-ds-ready=true}"
# An extension to local SSDs allowing users to specify block/fs and SCSI/NVMe devices
# Format of this variable will be "#,scsi/nvme,block/fs" you can specify multiple
# configurations by separating them by a semi-colon ex. "2,scsi,fs;1,nvme,block"
@ -47,6 +55,9 @@ REGISTER_MASTER_KUBELET=${REGISTER_MASTER:-true}
KUBE_APISERVER_REQUEST_TIMEOUT=300
PREEMPTIBLE_NODE=${PREEMPTIBLE_NODE:-false}
PREEMPTIBLE_MASTER=${PREEMPTIBLE_MASTER:-false}
if [[ "${PREEMPTIBLE_NODE}" == "true" ]]; then
NODE_LABELS="${NODE_LABELS},cloud.google.com/gke-preemptible=true"
fi
KUBE_DELETE_NODES=${KUBE_DELETE_NODES:-true}
KUBE_DELETE_NETWORK=${KUBE_DELETE_NETWORK:-true}
CREATE_CUSTOM_NETWORK=${CREATE_CUSTOM_NETWORK:-false}
@ -201,13 +212,6 @@ CONTROLLER_MANAGER_TEST_ARGS="${CONTROLLER_MANAGER_TEST_ARGS:-} ${TEST_CLUSTER_R
SCHEDULER_TEST_ARGS="${SCHEDULER_TEST_ARGS:-} ${TEST_CLUSTER_API_CONTENT_TYPE}"
KUBEPROXY_TEST_ARGS="${KUBEPROXY_TEST_ARGS:-} ${TEST_CLUSTER_API_CONTENT_TYPE}"
# Historically fluentd was a manifest pod and then was migrated to DaemonSet.
# To avoid situation during cluster upgrade when there are two instances
# of fluentd running on a node, kubelet need to mark node on which
# fluentd is not running as a manifest pod with appropriate label.
# TODO(piosz): remove this in 1.8
NODE_LABELS="${KUBE_NODE_LABELS:-beta.kubernetes.io/fluentd-ds-ready=true}"
# NON_MASTER_NODE_LABELS are labels will only be applied on non-master nodes.
NON_MASTER_NODE_LABELS="${KUBE_NON_MASTER_NODE_LABELS:-}"
@ -476,3 +480,11 @@ if [[ "${ENABLE_TOKENREQUEST:-}" == "true" ]]; then
SERVICEACCOUNT_ISSUER="https://kubernetes.io/${CLUSTER_NAME}"
SERVICEACCOUNT_API_AUDIENCES="https://kubernetes.default.svc"
fi
# Optional: Enable Node termination Handler for Preemptible and GPU VMs.
# https://github.com/GoogleCloudPlatform/k8s-node-termination-handler
ENABLE_NODE_TERMINATION_HANDLER="${ENABLE_NODE_TERMINATION_HANDLER:-false}"
# Override default Node Termination Handler Image
if [[ "${NODE_TERMINATION_HANDLER_IMAGE:-}" ]]; then
PROVIDER_VARS="${PROVIDER_VARS:-} NODE_TERMINATION_HANDLER_IMAGE"
fi

View File

@ -2038,6 +2038,12 @@ function setup-addon-manifests {
copy-manifests "${psp_dir}" "${dst_dir}"
fi
fi
if [[ "${ENABLE_NODE_TERMINATION_HANDLER}" == "true" ]]; then
local -r nth_dir="${src_dir}/${3:-$2}/node-termination-handler"
if [[ -d "${nth_dir}" ]]; then
copy-manifests "${nth_dir}" "${dst_dir}"
fi
fi
}
# A function that downloads extra addons from a URL and puts them in the GCI
@ -2432,6 +2438,10 @@ EOF
if [[ "${ENABLE_NVIDIA_GPU_DEVICE_PLUGIN:-}" == "true" ]]; then
setup-addon-manifests "addons" "device-plugins/nvidia-gpu"
fi
if [[ "${ENABLE_NODE_TERMINATION_HANDLER}" == "true" ]]; then
setup-addon-manifests "addons" "node-termination-handler"
setup-node-termination-handler-manifest
fi
if [[ "${ENABLE_CLUSTER_DNS:-}" == "true" ]]; then
if [[ "${CLUSTER_DNS_CORE_DNS:-}" == "true" ]]; then
setup-addon-manifests "addons" "dns/coredns"
@ -2511,6 +2521,13 @@ EOF
cp "${src_dir}/kube-addon-manager.yaml" /etc/kubernetes/manifests
}
function setup-node-termination-handler-manifest {
local -r nth_manifest="/etc/kubernetes/$1/$2/daemonset.yaml"
if [[ -n "${NODE_TERMINATION_HANDLER_IMAGE}" ]]; then
sed -i "s|image:.*|image: ${NODE_TERMINATION_HANDLER_IMAGE}|" "${nth_manifest}"
fi
}
# Starts an image-puller - used in test clusters.
function start-image-puller {
echo "Start image-puller"

View File

@ -911,6 +911,7 @@ VOLUME_PLUGIN_DIR: $(yaml-quote ${VOLUME_PLUGIN_DIR})
KUBELET_ARGS: $(yaml-quote ${KUBELET_ARGS})
REQUIRE_METADATA_KUBELET_CONFIG_FILE: $(yaml-quote true)
ENABLE_NETD: $(yaml-quote ${ENABLE_NETD:-false})
ENABLE_NODE_TERMINATION_HANDLER: $(yaml-quote ${ENABLE_NODE_TERMINATION_HANDLER:-false})
CUSTOM_NETD_YAML: |
$(echo "${CUSTOM_NETD_YAML:-}" | sed -e "s/'/''/g")
CUSTOM_CALICO_NODE_DAEMONSET_YAML: |