mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Merge pull request #54826 from mindprince/addon-manager
Automatic merge from submit-queue (batch tested with PRs 54826, 53576, 55591, 54946, 54825). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Run nvidia-gpu device-plugin daemonset as an addon on GCE nodes that have nvidia GPUs attached - Instead of the old `Accelerators` feature that added `alpha.kubernetes.io/nvidia-gpu` resource, use the new `DevicePlugins` feature that adds vendor specific resources. (In case of nvidia GPUs it will add `nvidia.com/gpu` resource.) - Add node label to GCE nodes with accelerators attached. This node label is the same as what GKE attaches to node pools with accelerators attached. (For example, for nvidia-tesla-p100 GPU, the label would be `cloud.google.com/gke-accelerator=nvidia-tesla-p100`) This will help us target accelerator specific daemonsets etc. to these nodes. - Run nvidia-gpu device-plugin daemonset as an addon on GCE nodes that have nvidia GPUs attached. - Some minor documentation improvements in addon manager. **Release note**: ```release-note GCE nodes with NVIDIA GPUs attached now expose `nvidia.com/gpu` as a resource instead of `alpha.kubernetes.io/nvidia-gpu`. ``` /sig cluster-lifecycle /sig scheduling /area hw-accelerators https://github.com/kubernetes/features/issues/368
This commit is contained in:
commit
4f91113075
@ -1,26 +1,27 @@
|
||||
### Addon-manager
|
||||
|
||||
addon-manager manages two classes of addons with given template files.
|
||||
addon-manager manages two classes of addons with given template files in
|
||||
`$ADDON_PATH` (default `/etc/kubernetes/addons/`).
|
||||
- Addons with label `addonmanager.kubernetes.io/mode=Reconcile` will be periodically
|
||||
reconciled. Direct manipulation to these addons through apiserver is discouraged because
|
||||
addon-manager will bring them back to the original state. In particular:
|
||||
- Addon will be re-created if it is deleted.
|
||||
- Addon will be reconfigured to the state given by the supplied fields in the template
|
||||
file periodically.
|
||||
- Addon will be deleted when its manifest file is deleted.
|
||||
- Addon will be deleted when its manifest file is deleted from the `$ADDON_PATH`.
|
||||
- Addons with label `addonmanager.kubernetes.io/mode=EnsureExists` will be checked for
|
||||
existence only. Users can edit these addons as they want. In particular:
|
||||
- Addon will only be created/re-created with the given template file when there is no
|
||||
instance of the resource with that name.
|
||||
- Addon will not be deleted when the manifest file is deleted.
|
||||
- Addon will not be deleted when the manifest file is deleted from the `$ADDON_PATH`.
|
||||
|
||||
Notes:
|
||||
- Label `kubernetes.io/cluster-service=true` is deprecated (only for Addon Manager).
|
||||
In future release (after one year), Addon Manager may not respect it anymore. Addons
|
||||
have this label but without `addonmanager.kubernetes.io/mode=EnsureExists` will be
|
||||
treated as "reconcile class addons" for now.
|
||||
- Resources under $ADDON_PATH (default `/etc/kubernetes/addons/`) needs to have either one
|
||||
of these two labels. Meanwhile namespaced resources need to be in `kube-system` namespace.
|
||||
- Resources under `$ADDON_PATH` need to have either one of these two labels.
|
||||
Meanwhile namespaced resources need to be in `kube-system` namespace.
|
||||
Otherwise it will be omitted.
|
||||
- The above label and namespace rule does not stand for `/opt/namespace.yaml` and
|
||||
resources under `/etc/kubernetes/admission-controls/`. addon-manager will attempt to
|
||||
|
@ -26,9 +26,6 @@
|
||||
# 3. Kubectl prints the output to stderr (the output should be captured and then
|
||||
# logged)
|
||||
|
||||
# The business logic for whether a given object should be created
|
||||
# was already enforced by salt, and /etc/kubernetes/addons is the
|
||||
# managed result is of that. Start everything below that directory.
|
||||
KUBECTL=${KUBECTL_BIN:-/usr/local/bin/kubectl}
|
||||
KUBECTL_OPTS=${KUBECTL_OPTS:-}
|
||||
|
||||
|
45
cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml
Normal file
45
cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml
Normal file
@ -0,0 +1,45 @@
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: nvidia-gpu-device-plugin
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: nvidia-gpu-device-plugin
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: nvidia-gpu-device-plugin
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: cloud.google.com/gke-accelerator
|
||||
operator: Exists
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
- name: dev
|
||||
hostPath:
|
||||
path: /dev
|
||||
containers:
|
||||
- image: "gcr.io/google-containers/nvidia-gpu-device-plugin@sha256:943a62949cd80c26e7371d4e123dac61b4cc7281390721aaa95f265171094842"
|
||||
command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"]
|
||||
name: nvidia-gpu-device-plugin
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /device-plugin
|
||||
- name: dev
|
||||
mountPath: /dev
|
@ -877,6 +877,11 @@ EOF
|
||||
if [ -n "${CLUSTER_SIGNING_DURATION:-}" ]; then
|
||||
cat >>$file <<EOF
|
||||
CLUSTER_SIGNING_DURATION: $(yaml-quote ${CLUSTER_SIGNING_DURATION})
|
||||
EOF
|
||||
fi
|
||||
if [[ "${NODE_ACCELERATORS:-}" == *"type=nvidia"* ]]; then
|
||||
cat >>$file <<EOF
|
||||
ENABLE_NVIDIA_GPU_DEVICE_PLUGIN: $(yaml-quote "true")
|
||||
EOF
|
||||
fi
|
||||
|
||||
|
@ -197,7 +197,10 @@ RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
|
||||
FEATURE_GATES="${KUBE_FEATURE_GATES:-ExperimentalCriticalPodAnnotation=true}"
|
||||
|
||||
if [[ ! -z "${NODE_ACCELERATORS}" ]]; then
|
||||
FEATURE_GATES="${FEATURE_GATES},Accelerators=true"
|
||||
FEATURE_GATES="${FEATURE_GATES},DevicePlugins=true"
|
||||
if [[ "${NODE_ACCELERATORS}" =~ .*type=([a-zA-Z0-9-]+).* ]]; then
|
||||
NODE_LABELS="${NODE_LABELS},cloud.google.com/gke-accelerator=${BASH_REMATCH[1]}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Optional: Install cluster DNS.
|
||||
|
@ -114,10 +114,6 @@ RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
|
||||
# Optional: set feature gates
|
||||
FEATURE_GATES="${KUBE_FEATURE_GATES:-ExperimentalCriticalPodAnnotation=true}"
|
||||
|
||||
if [[ ! -z "${NODE_ACCELERATORS}" ]]; then
|
||||
FEATURE_GATES="${FEATURE_GATES},Accelerators=true"
|
||||
fi
|
||||
|
||||
TERMINATED_POD_GC_THRESHOLD=${TERMINATED_POD_GC_THRESHOLD:-100}
|
||||
|
||||
# Extra docker options for nodes.
|
||||
@ -237,6 +233,13 @@ if [[ ${KUBE_ENABLE_INSECURE_REGISTRY:-false} == "true" ]]; then
|
||||
EXTRA_DOCKER_OPTS="${EXTRA_DOCKER_OPTS} --insecure-registry 10.0.0.0/8"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${NODE_ACCELERATORS}" ]]; then
|
||||
FEATURE_GATES="${FEATURE_GATES},DevicePlugins=true"
|
||||
if [[ "${NODE_ACCELERATORS}" =~ .*type=([a-zA-Z0-9-]+).* ]]; then
|
||||
NODE_LABELS="${NODE_LABELS},cloud.google.com/gke-accelerator=${BASH_REMATCH[1]}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Optional: Install cluster DNS.
|
||||
ENABLE_CLUSTER_DNS="${KUBE_ENABLE_CLUSTER_DNS:-true}"
|
||||
DNS_SERVER_IP="10.0.0.10"
|
||||
|
@ -1836,6 +1836,9 @@ EOF
|
||||
if [[ "${ENABLE_METRICS_SERVER:-}" == "true" ]]; then
|
||||
setup-addon-manifests "addons" "metrics-server"
|
||||
fi
|
||||
if [[ "${ENABLE_NVIDIA_GPU_DEVICE_PLUGIN:-}" == "true" ]]; then
|
||||
setup-addon-manifests "addons" "device-plugins/nvidia-gpu"
|
||||
fi
|
||||
if [[ "${ENABLE_CLUSTER_DNS:-}" == "true" ]]; then
|
||||
setup-addon-manifests "addons" "dns"
|
||||
local -r kubedns_file="${dst_dir}/dns/kube-dns.yaml"
|
||||
|
Loading…
Reference in New Issue
Block a user