diff --git a/test/e2e/node/gpu.go b/test/e2e/node/gpu.go index acac83ea21f..b1f057103be 100644 --- a/test/e2e/node/gpu.go +++ b/test/e2e/node/gpu.go @@ -341,7 +341,7 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework) { } else { // Using default local DaemonSet framework.Logf("Using default local nvidia-driver-installer daemonset manifest.") - data, err := e2etestfiles.Read("test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml") + data, err := e2etestfiles.Read("test/e2e/testing-manifests/gpu/gce/nvidia-driver-installer.yaml") framework.ExpectNoError(err, "failed to read local manifest for nvidia-driver-installer daemonset") ds, err = e2emanifest.DaemonSetFromData(data) framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset") @@ -349,14 +349,27 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework) { prev, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, ds.Name, metav1.GetOptions{}) if err == nil && prev != nil { - framework.Logf("Daemonset already installed, skipping...") - return + framework.Logf("nvidia-driver-installer Daemonset already installed, skipping...") + } else { + ds.Namespace = f.Namespace.Name + _, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, ds, metav1.CreateOptions{}) + framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset") + framework.Logf("Successfully created daemonset to install Nvidia drivers.") } - ds.Namespace = f.Namespace.Name - _, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, ds, metav1.CreateOptions{}) - framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset") - framework.Logf("Successfully created daemonset to install Nvidia drivers.") + data, err := e2etestfiles.Read("test/e2e/testing-manifests/gpu/gce/nvidia-gpu-device-plugin.yaml") + framework.ExpectNoError(err, "failed to read local manifest for nvidia-gpu-device-plugin daemonset") + ds, err = e2emanifest.DaemonSetFromData(data) + framework.ExpectNoError(err, "failed to parse local manifest for nvidia-gpu-device-plugin daemonset") + + prev, err = f.ClientSet.AppsV1().DaemonSets(ds.Namespace).Get(ctx, ds.Name, metav1.GetOptions{}) + if err == nil && prev != nil { + framework.Logf("nvidia-gpu-device-plugin Daemonset already installed, skipping...") + } else { + _, err = f.ClientSet.AppsV1().DaemonSets(ds.Namespace).Create(ctx, ds, metav1.CreateOptions{}) + framework.ExpectNoError(err, "failed to create nvidia-gpu-device-plugin daemonset") + framework.Logf("Successfully created daemonset to install Nvidia device plugin.") + } waitForGPUs(ctx, f, ds.Namespace, ds.Name) } diff --git a/test/e2e/testing-manifests/embed.go b/test/e2e/testing-manifests/embed.go index 822017c5cd5..a4ec3f4877b 100644 --- a/test/e2e/testing-manifests/embed.go +++ b/test/e2e/testing-manifests/embed.go @@ -22,7 +22,7 @@ import ( e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" ) -//go:embed dra flexvolume guestbook kubectl sample-device-plugin scheduling/nvidia-driver-installer.yaml statefulset storage-csi +//go:embed dra flexvolume guestbook kubectl sample-device-plugin gpu statefulset storage-csi var e2eTestingManifestsFS embed.FS func GetE2ETestingManifestsFS() e2etestfiles.EmbeddedFileSource { diff --git a/test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml b/test/e2e/testing-manifests/gpu/gce/nvidia-driver-installer.yaml similarity index 100% rename from test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml rename to test/e2e/testing-manifests/gpu/gce/nvidia-driver-installer.yaml diff --git a/test/e2e/testing-manifests/gpu/gce/nvidia-gpu-device-plugin.yaml b/test/e2e/testing-manifests/gpu/gce/nvidia-gpu-device-plugin.yaml new file mode 100644 index 00000000000..02a62e39874 --- /dev/null +++ b/test/e2e/testing-manifests/gpu/gce/nvidia-gpu-device-plugin.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-gpu-device-plugin + namespace: kube-system + labels: + k8s-app: nvidia-gpu-device-plugin + addonmanager.kubernetes.io/mode: EnsureExists +spec: + selector: + matchLabels: + k8s-app: nvidia-gpu-device-plugin + template: + metadata: + labels: + k8s-app: nvidia-gpu-device-plugin + spec: + priorityClassName: system-node-critical + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-accelerator + operator: Exists + tolerations: + - operator: "Exists" + effect: "NoExecute" + - operator: "Exists" + effect: "NoSchedule" + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: dev + hostPath: + path: /dev + containers: + - image: "registry.k8s.io/nvidia-gpu-device-plugin@sha256:4b036e8844920336fa48f36edeb7d4398f426d6a934ba022848deed2edbf09aa" + command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"] + name: nvidia-gpu-device-plugin + resources: + requests: + cpu: 50m + memory: 10Mi + limits: + cpu: 50m + memory: 10Mi + securityContext: + privileged: true + volumeMounts: + - name: device-plugin + mountPath: /device-plugin + - name: dev + mountPath: /dev + updateStrategy: + type: RollingUpdate