diff --git a/test/e2e/framework/gpu/BUILD b/test/e2e/framework/gpu/BUILD index 42f0f5246bc..0afa0cd498f 100644 --- a/test/e2e/framework/gpu/BUILD +++ b/test/e2e/framework/gpu/BUILD @@ -5,12 +5,6 @@ go_library( srcs = ["gpu_util.go"], importpath = "k8s.io/kubernetes/test/e2e/framework/gpu", visibility = ["//visibility:public"], - deps = [ - "//staging/src/k8s.io/api/core/v1:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", - "//test/e2e/framework:go_default_library", - ], ) filegroup( diff --git a/test/e2e/framework/gpu/gpu_util.go b/test/e2e/framework/gpu/gpu_util.go index 6fc4c1e2b46..fd9de240f4a 100644 --- a/test/e2e/framework/gpu/gpu_util.go +++ b/test/e2e/framework/gpu/gpu_util.go @@ -16,13 +16,6 @@ limitations under the License. package gpu -import ( - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/kubernetes/test/e2e/framework" -) - const ( // NVIDIAGPUResourceName is the extended name of the GPU resource since v1.8 // this uses the device plugin mechanism @@ -33,31 +26,3 @@ const ( // so we can override the daemonset in other setups (non COS). GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml" ) - -// NumberOfNVIDIAGPUs returns the number of GPUs advertised by a node -// This is based on the Device Plugin system and expected to run on a COS based node -// After the NVIDIA drivers were installed -// TODO make this generic and not linked to COS only -func NumberOfNVIDIAGPUs(node *v1.Node) int64 { - val, ok := node.Status.Capacity[NVIDIAGPUResourceName] - if !ok { - return 0 - } - return val.Value() -} - -// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE -func NVIDIADevicePlugin() *v1.Pod { - ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML) - framework.ExpectNoError(err) - p := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()), - Namespace: metav1.NamespaceSystem, - }, - Spec: ds.Spec.Template.Spec, - } - // Remove node affinity - p.Spec.Affinity = nil - return p -} diff --git a/test/e2e_node/gpu_device_plugin_test.go b/test/e2e_node/gpu_device_plugin_test.go index 90cdbb0c4c3..2edb366fa82 100644 --- a/test/e2e_node/gpu_device_plugin_test.go +++ b/test/e2e_node/gpu_device_plugin_test.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/uuid" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework/gpu" @@ -33,6 +34,34 @@ import ( "github.com/prometheus/common/model" ) +// numberOfNVIDIAGPUs returns the number of GPUs advertised by a node +// This is based on the Device Plugin system and expected to run on a COS based node +// After the NVIDIA drivers were installed +// TODO make this generic and not linked to COS only +func numberOfNVIDIAGPUs(node *v1.Node) int64 { + val, ok := node.Status.Capacity[gpu.NVIDIAGPUResourceName] + if !ok { + return 0 + } + return val.Value() +} + +// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE +func NVIDIADevicePlugin() *v1.Pod { + ds, err := framework.DsFromManifest(gpu.GPUDevicePluginDSYAML) + framework.ExpectNoError(err) + p := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()), + Namespace: metav1.NamespaceSystem, + }, + Spec: ds.Spec.Template.Spec, + } + // Remove node affinity + p.Spec.Affinity = nil + return p +} + // Serial because the test restarts Kubelet var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeature:GPUDevicePlugin][Serial] [Disruptive]", func() { f := framework.NewDefaultFramework("device-plugin-gpus-errors") @@ -47,15 +76,15 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi } ginkgo.By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE") - devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(gpu.NVIDIADevicePlugin()) + devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(NVIDIADevicePlugin()) framework.ExpectNoError(err) ginkgo.By("Waiting for GPUs to become available on the local node") gomega.Eventually(func() bool { - return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0 + return numberOfNVIDIAGPUs(getLocalNode(f)) > 0 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) - if gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 { + if numberOfNVIDIAGPUs(getLocalNode(f)) < 2 { ginkgo.Skip("Not enough GPUs to execute this test (at least two needed)") } }) @@ -95,7 +124,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi restartKubelet() framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout) gomega.Eventually(func() bool { - return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0 + return numberOfNVIDIAGPUs(getLocalNode(f)) > 0 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD)) @@ -110,7 +139,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi gomega.Eventually(func() bool { node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{}) framework.ExpectNoError(err) - return gpu.NumberOfNVIDIAGPUs(node) <= 0 + return numberOfNVIDIAGPUs(node) <= 0 }, 10*time.Minute, framework.Poll).Should(gomega.BeTrue()) ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin.") ensurePodContainerRestart(f, p1.Name, p1.Name)