diff --git a/test/e2e/scheduling/nvidia-gpus.go b/test/e2e/scheduling/nvidia-gpus.go index be5566a3149..f14245c42f0 100644 --- a/test/e2e/scheduling/nvidia-gpus.go +++ b/test/e2e/scheduling/nvidia-gpus.go @@ -42,8 +42,14 @@ const ( cosOSImage = "Container-Optimized OS from Google" // Nvidia driver installation can take upwards of 5 minutes. driverInstallTimeout = 10 * time.Minute - // Nvidia COS driver installer daemonset. - cosNvidiaDriverInstallerUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/stable/cos-nvidia-gpu-installer/daemonset.yaml" +) + +type podCreationFuncType func() *v1.Pod + +var ( + gpuResourceName v1.ResourceName + dsYamlUrl string + podCreationFunc podCreationFuncType ) func makeCudaAdditionTestPod() *v1.Pod { @@ -60,7 +66,7 @@ func makeCudaAdditionTestPod() *v1.Pod { Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd), Resources: v1.ResourceRequirements{ Limits: v1.ResourceList{ - v1.ResourceNvidiaGPU: *resource.NewQuantity(1, resource.DecimalSI), + gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI), }, }, VolumeMounts: []v1.VolumeMount{ @@ -86,6 +92,30 @@ func makeCudaAdditionTestPod() *v1.Pod { return testPod } +func makeCudaAdditionDevicePluginTestPod() *v1.Pod { + podName := testPodNamePrefix + string(uuid.NewUUID()) + testPod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Name: "vector-addition", + Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd), + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI), + }, + }, + }, + }, + }, + } + return testPod +} + func isClusterRunningCOS(f *framework.Framework) bool { nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) framework.ExpectNoError(err, "getting node list") @@ -105,7 +135,8 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool { if node.Spec.Unschedulable { continue } - if node.Status.Capacity.NvidiaGPU().Value() == 0 { + framework.Logf("gpuResourceName %s", gpuResourceName) + if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 { framework.Logf("Nvidia GPUs not available on Node: %q", node.Name) return false } @@ -119,7 +150,9 @@ func getGPUsAvailable(f *framework.Framework) int64 { framework.ExpectNoError(err, "getting node list") var gpusAvailable int64 for _, node := range nodeList.Items { - gpusAvailable += node.Status.Capacity.NvidiaGPU().Value() + if val, ok := node.Status.Capacity[gpuResourceName]; ok { + gpusAvailable += (&val).Value() + } } return gpusAvailable } @@ -133,10 +166,21 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) { Skip("Nvidia GPU tests are supproted only on Container Optimized OS image currently") } framework.Logf("Cluster is running on COS. Proceeding with test") + + if f.BaseName == "device-plugin-gpus" { + dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/device-plugin-daemonset.yaml" + gpuResourceName = "nvidia.com/gpu" + podCreationFunc = makeCudaAdditionDevicePluginTestPod + } else { + dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml" + gpuResourceName = v1.ResourceNvidiaGPU + podCreationFunc = makeCudaAdditionTestPod + } + // GPU drivers might have already been installed. if !areGPUsAvailableOnAllSchedulableNodes(f) { // Install Nvidia Drivers. - ds := dsFromManifest(cosNvidiaDriverInstallerUrl) + ds := dsFromManifest(dsYamlUrl) ds.Namespace = f.Namespace.Name _, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds) framework.ExpectNoError(err, "failed to create daemonset") @@ -149,7 +193,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) { framework.Logf("Creating as many pods as there are Nvidia GPUs and have the pods run a CUDA app") podList := []*v1.Pod{} for i := int64(0); i < getGPUsAvailable(f); i++ { - podList = append(podList, f.PodClient().Create(makeCudaAdditionTestPod())) + podList = append(podList, f.PodClient().Create(podCreationFunc())) } framework.Logf("Wait for all test pods to succeed") // Wait for all pods to succeed @@ -192,3 +236,10 @@ var _ = SIGDescribe("[Feature:GPU]", func() { testNvidiaGPUsOnCOS(f) }) }) + +var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() { + f := framework.NewDefaultFramework("device-plugin-gpus") + It("run Nvidia GPU Device Plugin tests on Container Optimized OS only", func() { + testNvidiaGPUsOnCOS(f) + }) +})