mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-02-22 06:43:41 +00:00
kata-deploy: add node selector to nvidia runtime classes
The CC runtime classes kata-qemu-nvidia-gpu-snp and kata-qemu-nvidia-gpu-tdx are mutually exclusive with kata-qemu-nvidia-gpu, as dictated by the gpu cc mode setting. In order to properly support a cluster that has both CC and non-CC nodes, we use a node selector so the scheduling is consistent with the GPU mode. The GPU operator sets a label nvidia.com/cc.ready.state=[true, false] to indicate the gpu mode setting Fixes #12431 Signed-off-by: Joji Mekkattuparamban <jojim@nvidia.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
f4dcb66a3c
commit
f3bba08851
@@ -22,7 +22,7 @@
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{- /* Define runtime class configurations with their overhead settings */ -}}
|
||||
{{- /* Define runtime class configurations with their overhead settings and node selectors */ -}}
|
||||
{{- $runtimeClassConfigs := dict
|
||||
"clh" (dict "memory" "130Mi" "cpu" "250m")
|
||||
"cloud-hypervisor" (dict "memory" "130Mi" "cpu" "250m")
|
||||
@@ -49,6 +49,7 @@
|
||||
{{- /* Create RuntimeClass for each enabled shim */ -}}
|
||||
{{- range $shim := $enabledShims }}
|
||||
{{- $config := index $runtimeClassConfigs $shim }}
|
||||
{{- $shimConfig := index $.Values.shims $shim }}
|
||||
{{- if $config }}
|
||||
---
|
||||
kind: RuntimeClass
|
||||
@@ -78,6 +79,11 @@ overhead:
|
||||
scheduling:
|
||||
nodeSelector:
|
||||
katacontainers.io/kata-runtime: "true"
|
||||
{{- if and $shimConfig.runtimeClass $shimConfig.runtimeClass.nodeSelector }}
|
||||
{{- range $key, $value := $shimConfig.runtimeClass.nodeSelector }}
|
||||
{{ $key }}: {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -114,6 +114,11 @@ shims:
|
||||
allowedHypervisorAnnotations: []
|
||||
containerd:
|
||||
snapshotter: ""
|
||||
runtimeClass:
|
||||
# This label is automatically added by gpu-operator. Override it
|
||||
# if you want to use a different label.
|
||||
nodeSelector:
|
||||
nvidia.com/cc.ready.state: "false"
|
||||
|
||||
qemu-nvidia-gpu-snp:
|
||||
enabled: ~
|
||||
@@ -128,6 +133,14 @@ shims:
|
||||
agent:
|
||||
httpsProxy: ""
|
||||
noProxy: ""
|
||||
runtimeClass:
|
||||
# These labels are automatically added by gpu-operator and NFD
|
||||
# respectively. Override if you want to use a different label.
|
||||
# If you don't have NFD, you need to add the snp label by other
|
||||
# means to your SNP nodes.
|
||||
nodeSelector:
|
||||
nvidia.com/cc.ready.state: "true"
|
||||
amd.feature.node.kubernetes.io/snp: "true"
|
||||
|
||||
qemu-nvidia-gpu-tdx:
|
||||
enabled: ~
|
||||
@@ -142,6 +155,14 @@ shims:
|
||||
agent:
|
||||
httpsProxy: ""
|
||||
noProxy: ""
|
||||
runtimeClass:
|
||||
# These labels are automatically added by gpu-operator and NFD
|
||||
# respectively. Override if you want to use a different label.
|
||||
# If you don't have NFD, you need to add the tdx label by other
|
||||
# means to your TDX nodes.
|
||||
nodeSelector:
|
||||
nvidia.com/cc.ready.state: "true"
|
||||
intel.feature.node.kubernetes.io/tdx: "true"
|
||||
|
||||
qemu-snp:
|
||||
enabled: ~
|
||||
|
||||
Reference in New Issue
Block a user