Merge pull request #86062 from haosdent/clean-e2e-framework-gpu

e2e: move funs of framework/gpu to e2e_node
2025-09-06 11:42:14 +00:00 · 2019-12-28 21:23:39 -08:00
parent 33bba19372 973fddd155
commit a097243cba
3 changed files with 34 additions and 46 deletions
--- a/test/e2e/framework/gpu/BUILD
+++ b/test/e2e/framework/gpu/BUILD
@@ -5,12 +5,6 @@ go_library(
    srcs = ["gpu_util.go"],
    importpath = "k8s.io/kubernetes/test/e2e/framework/gpu",
    visibility = ["//visibility:public"],
-    deps = [
-        "//staging/src/k8s.io/api/core/v1:go_default_library",
-        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
-        "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
-        "//test/e2e/framework:go_default_library",
-    ],
 )

 filegroup(
--- a/test/e2e/framework/gpu/gpu_util.go
+++ b/test/e2e/framework/gpu/gpu_util.go
@@ -16,13 +16,6 @@ limitations under the License.

 package gpu

-import (
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/util/uuid"
-	"k8s.io/kubernetes/test/e2e/framework"
-)
-
 const (
 	// NVIDIAGPUResourceName is the extended name of the GPU resource since v1.8
 	// this uses the device plugin mechanism
@@ -33,31 +26,3 @@ const (
 	// so we can override the daemonset in other setups (non COS).
 	GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml"
 )
-
-// NumberOfNVIDIAGPUs returns the number of GPUs advertised by a node
-// This is based on the Device Plugin system and expected to run on a COS based node
-// After the NVIDIA drivers were installed
-// TODO make this generic and not linked to COS only
-func NumberOfNVIDIAGPUs(node *v1.Node) int64 {
-	val, ok := node.Status.Capacity[NVIDIAGPUResourceName]
-	if !ok {
-		return 0
-	}
-	return val.Value()
-}
-
-// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
-func NVIDIADevicePlugin() *v1.Pod {
-	ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
-	framework.ExpectNoError(err)
-	p := &v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
-			Namespace: metav1.NamespaceSystem,
-		},
-		Spec: ds.Spec.Template.Spec,
-	}
-	// Remove node affinity
-	p.Spec.Affinity = nil
-	return p
-}
--- a/test/e2e_node/gpu_device_plugin_test.go
+++ b/test/e2e_node/gpu_device_plugin_test.go
@@ -23,6 +23,7 @@ import (

 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/uuid"
 	kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
 	"k8s.io/kubernetes/test/e2e/framework"
 	"k8s.io/kubernetes/test/e2e/framework/gpu"
@@ -33,6 +34,34 @@ import (
 	"github.com/prometheus/common/model"
 )

+// numberOfNVIDIAGPUs returns the number of GPUs advertised by a node
+// This is based on the Device Plugin system and expected to run on a COS based node
+// After the NVIDIA drivers were installed
+// TODO make this generic and not linked to COS only
+func numberOfNVIDIAGPUs(node *v1.Node) int64 {
+	val, ok := node.Status.Capacity[gpu.NVIDIAGPUResourceName]
+	if !ok {
+		return 0
+	}
+	return val.Value()
+}
+
+// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
+func NVIDIADevicePlugin() *v1.Pod {
+	ds, err := framework.DsFromManifest(gpu.GPUDevicePluginDSYAML)
+	framework.ExpectNoError(err)
+	p := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
+			Namespace: metav1.NamespaceSystem,
+		},
+		Spec: ds.Spec.Template.Spec,
+	}
+	// Remove node affinity
+	p.Spec.Affinity = nil
+	return p
+}
+
 // Serial because the test restarts Kubelet
 var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeature:GPUDevicePlugin][Serial] [Disruptive]", func() {
 	f := framework.NewDefaultFramework("device-plugin-gpus-errors")
@@ -47,15 +76,15 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
 			}

 			ginkgo.By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
-			devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(gpu.NVIDIADevicePlugin())
+			devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(NVIDIADevicePlugin())
 			framework.ExpectNoError(err)

 			ginkgo.By("Waiting for GPUs to become available on the local node")
 			gomega.Eventually(func() bool {
-				return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
+				return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
 			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())

-			if gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
+			if numberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
 				ginkgo.Skip("Not enough GPUs to execute this test (at least two needed)")
 			}
 		})
@@ -95,7 +124,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
 			restartKubelet()
 			framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
 			gomega.Eventually(func() bool {
-				return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
+				return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
 			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
 			p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))

@@ -110,7 +139,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
 			gomega.Eventually(func() bool {
 				node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{})
 				framework.ExpectNoError(err)
-				return gpu.NumberOfNVIDIAGPUs(node) <= 0
+				return numberOfNVIDIAGPUs(node) <= 0
 			}, 10*time.Minute, framework.Poll).Should(gomega.BeTrue())
 			ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin.")
 			ensurePodContainerRestart(f, p1.Name, p1.Name)