Merge pull request #127714 from dims/re-add-nvidia-gpu-device-plugin.yaml-in-test-suite-itself

Re-add nvidia-gpu-device-plugin.yaml in test suite itself
This commit is contained in:
Kubernetes Prow Robot
2024-09-27 20:52:02 +01:00
committed by GitHub
4 changed files with 78 additions and 8 deletions

View File

@@ -341,7 +341,7 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework) {
} else {
// Using default local DaemonSet
framework.Logf("Using default local nvidia-driver-installer daemonset manifest.")
data, err := e2etestfiles.Read("test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml")
data, err := e2etestfiles.Read("test/e2e/testing-manifests/gpu/gce/nvidia-driver-installer.yaml")
framework.ExpectNoError(err, "failed to read local manifest for nvidia-driver-installer daemonset")
ds, err = e2emanifest.DaemonSetFromData(data)
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset")
@@ -349,14 +349,27 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework) {
prev, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, ds.Name, metav1.GetOptions{})
if err == nil && prev != nil {
framework.Logf("Daemonset already installed, skipping...")
return
framework.Logf("nvidia-driver-installer Daemonset already installed, skipping...")
} else {
ds.Namespace = f.Namespace.Name
_, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, ds, metav1.CreateOptions{})
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
}
ds.Namespace = f.Namespace.Name
_, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, ds, metav1.CreateOptions{})
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
data, err := e2etestfiles.Read("test/e2e/testing-manifests/gpu/gce/nvidia-gpu-device-plugin.yaml")
framework.ExpectNoError(err, "failed to read local manifest for nvidia-gpu-device-plugin daemonset")
ds, err = e2emanifest.DaemonSetFromData(data)
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-gpu-device-plugin daemonset")
prev, err = f.ClientSet.AppsV1().DaemonSets(ds.Namespace).Get(ctx, ds.Name, metav1.GetOptions{})
if err == nil && prev != nil {
framework.Logf("nvidia-gpu-device-plugin Daemonset already installed, skipping...")
} else {
_, err = f.ClientSet.AppsV1().DaemonSets(ds.Namespace).Create(ctx, ds, metav1.CreateOptions{})
framework.ExpectNoError(err, "failed to create nvidia-gpu-device-plugin daemonset")
framework.Logf("Successfully created daemonset to install Nvidia device plugin.")
}
waitForGPUs(ctx, f, ds.Namespace, ds.Name)
}

View File

@@ -22,7 +22,7 @@ import (
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
)
//go:embed dra flexvolume guestbook kubectl sample-device-plugin scheduling/nvidia-driver-installer.yaml statefulset storage-csi
//go:embed dra flexvolume guestbook kubectl sample-device-plugin gpu statefulset storage-csi
var e2eTestingManifestsFS embed.FS
func GetE2ETestingManifestsFS() e2etestfiles.EmbeddedFileSource {

View File

@@ -0,0 +1,57 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-gpu-device-plugin
namespace: kube-system
labels:
k8s-app: nvidia-gpu-device-plugin
addonmanager.kubernetes.io/mode: EnsureExists
spec:
selector:
matchLabels:
k8s-app: nvidia-gpu-device-plugin
template:
metadata:
labels:
k8s-app: nvidia-gpu-device-plugin
spec:
priorityClassName: system-node-critical
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-accelerator
operator: Exists
tolerations:
- operator: "Exists"
effect: "NoExecute"
- operator: "Exists"
effect: "NoSchedule"
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: dev
hostPath:
path: /dev
containers:
- image: "registry.k8s.io/nvidia-gpu-device-plugin@sha256:4b036e8844920336fa48f36edeb7d4398f426d6a934ba022848deed2edbf09aa"
command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"]
name: nvidia-gpu-device-plugin
resources:
requests:
cpu: 50m
memory: 10Mi
limits:
cpu: 50m
memory: 10Mi
securityContext:
privileged: true
volumeMounts:
- name: device-plugin
mountPath: /device-plugin
- name: dev
mountPath: /dev
updateStrategy:
type: RollingUpdate