From b970bb5fe0e768a04dadc317cc333fe21ca87afb Mon Sep 17 00:00:00 2001
From: Danielle Lancashire <dani@builds.terrible.systems>
Date: Mon, 6 Sep 2021 16:45:21 +0200
Subject: [PATCH] e2e_node: Update GPU tests to reflect reality

In older versions of Kubernetes (at least pre-0.19, it's the earliest
this test will run unmodified on), Pods that depended on devices could be
restarted after the device plugin had been removed. Currently however,
this isn't possible, as during ContainerManager.GetResources(), we
attempt to DeviceManager.GetDeviceRunContainerOptions() which fails as
there's no cached endpoint information for the plugin type.

This commit therefore breaks apart the existing test into two:
- One active test that validates that assignments are maintained across
  restarts
- One skipped test that validates the behaviour after GPUs have been
  removed, in case we decide that this is a bug that should be fixed in
  the future.
---
 test/e2e_node/gpu_device_plugin_test.go | 144 ++++++++++++------------
 1 file changed, 73 insertions(+), 71 deletions(-)

diff --git a/test/e2e_node/gpu_device_plugin_test.go b/test/e2e_node/gpu_device_plugin_test.go
index 2a81f8dbbb1..20b410b3326 100644
--- a/test/e2e_node/gpu_device_plugin_test.go
+++ b/test/e2e_node/gpu_device_plugin_test.go
@@ -19,18 +19,14 @@ package e2enode
 import (
 	"context"
 	"os/exec"
-	"strconv"
 	"time"
 
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/uuid"
-	"k8s.io/component-base/metrics/testutil"
-	kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2egpu "k8s.io/kubernetes/test/e2e/framework/gpu"
 	e2emanifest "k8s.io/kubernetes/test/e2e/framework/manifest"
-	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
 
 	"github.com/onsi/ginkgo"
 	"github.com/onsi/gomega"
@@ -96,18 +92,33 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
 			l, err := f.PodClient().List(context.TODO(), metav1.ListOptions{})
 			framework.ExpectNoError(err)
 
+			f.PodClient().DeleteSync(devicePluginPod.Name, metav1.DeleteOptions{}, 2*time.Minute)
+
 			for _, p := range l.Items {
 				if p.Namespace != f.Namespace.Name {
 					continue
 				}
 
-				f.PodClient().Delete(context.TODO(), p.Name, metav1.DeleteOptions{})
+				framework.Logf("Deleting pod: %s", p.Name)
+				f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute)
 			}
+
+			restartKubelet()
+
+			ginkgo.By("Waiting for GPUs to become unavailable on the local node")
+			gomega.Eventually(func() bool {
+				node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
+				framework.ExpectNoError(err)
+				return numberOfNVIDIAGPUs(node) <= 0
+			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
 		})
 
-		ginkgo.It("checks that when Kubelet restarts exclusive GPU assignation to pods is kept.", func() {
-			ginkgo.By("Creating one GPU pod on a node with at least two GPUs")
-			podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs && sleep 120"
+		// This test is disabled as this behaviour has not existed since at least
+		// kubernetes 0.19. If this is a bug, then this test should pass when the
+		// issue is resolved. If the behaviour is intentional then it can be removed.
+		ginkgo.XIt("keeps GPU assignation to pods after the device plugin has been removed.", func() {
+			ginkgo.By("Creating one GPU pod")
+			podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs && sleep 180"
 			p1 := f.PodClient().CreateSync(makeBusyboxPod(e2egpu.NVIDIAGPUResourceName, podRECMD))
 
 			deviceIDRE := "gpu devices: (nvidia[0-9]+)"
@@ -115,7 +126,52 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
 			p1, err := f.PodClient().Get(context.TODO(), p1.Name, metav1.GetOptions{})
 			framework.ExpectNoError(err)
 
-			ginkgo.By("Restarting Kubelet and waiting for the current running pod to restart")
+			ginkgo.By("Deleting the device plugin")
+			f.PodClient().DeleteSync(devicePluginPod.Name, metav1.DeleteOptions{}, 2*time.Minute)
+
+			ginkgo.By("Waiting for GPUs to become unavailable on the local node")
+			gomega.Eventually(func() int64 {
+				node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
+				framework.ExpectNoError(err)
+				return numberOfNVIDIAGPUs(node)
+			}, 10*time.Minute, framework.Poll).Should(gomega.BeZero(), "Expected GPUs to eventually be unavailable")
+
+			ginkgo.By("Checking that scheduled pods can continue to run even after we delete the device plugin")
+			ensurePodContainerRestart(f, p1.Name, p1.Name)
+			devIDRestart1 := parseLog(f, p1.Name, p1.Name, deviceIDRE)
+			framework.ExpectEqual(devIDRestart1, devID1)
+
+			ginkgo.By("Restarting Kubelet")
+			restartKubelet()
+			framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
+
+			ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
+			ensurePodContainerRestart(f, p1.Name, p1.Name)
+			devIDRestart1 = parseLog(f, p1.Name, p1.Name, deviceIDRE)
+			framework.ExpectEqual(devIDRestart1, devID1)
+
+			// Cleanup
+			f.PodClient().DeleteSync(p1.Name, metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
+		})
+
+		ginkgo.It("keeps GPU assignment to pods across pod and kubelet restarts.", func() {
+			ginkgo.By("Creating one GPU pod on a node with at least two GPUs")
+			podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs && sleep 40"
+			p1 := f.PodClient().CreateSync(makeBusyboxPod(e2egpu.NVIDIAGPUResourceName, podRECMD))
+
+			deviceIDRE := "gpu devices: (nvidia[0-9]+)"
+			devID1 := parseLog(f, p1.Name, p1.Name, deviceIDRE)
+			p1, err := f.PodClient().Get(context.TODO(), p1.Name, metav1.GetOptions{})
+			framework.ExpectNoError(err)
+
+			ginkgo.By("Confirming that after many pod restarts, GPU assignment is kept")
+			for i := 0; i < 3; i++ {
+				ensurePodContainerRestart(f, p1.Name, p1.Name)
+				devIDRestart1 := parseLog(f, p1.Name, p1.Name, deviceIDRE)
+				framework.ExpectEqual(devIDRestart1, devID1)
+			}
+
+			ginkgo.By("Restarting Kubelet")
 			restartKubelet()
 
 			ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept")
@@ -124,48 +180,22 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
 			framework.ExpectEqual(devIDRestart1, devID1)
 
 			ginkgo.By("Restarting Kubelet and creating another pod")
+
 			restartKubelet()
-			framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
+			framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
+
+			ensurePodContainerRestart(f, p1.Name, p1.Name)
+
 			gomega.Eventually(func() bool {
-				return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
+				return numberOfNVIDIAGPUs(getLocalNode(f)) >= 2
 			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
+
 			p2 := f.PodClient().CreateSync(makeBusyboxPod(e2egpu.NVIDIAGPUResourceName, podRECMD))
 
 			ginkgo.By("Checking that pods got a different GPU")
 			devID2 := parseLog(f, p2.Name, p2.Name, deviceIDRE)
 
-			framework.ExpectEqual(devID1, devID2)
-
-			ginkgo.By("Deleting device plugin.")
-			f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Delete(context.TODO(), devicePluginPod.Name, metav1.DeleteOptions{})
-			ginkgo.By("Waiting for GPUs to become unavailable on the local node")
-			gomega.Eventually(func() bool {
-				node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
-				framework.ExpectNoError(err)
-				return numberOfNVIDIAGPUs(node) <= 0
-			}, 10*time.Minute, framework.Poll).Should(gomega.BeTrue())
-			ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin.")
-			ensurePodContainerRestart(f, p1.Name, p1.Name)
-			devIDRestart1 = parseLog(f, p1.Name, p1.Name, deviceIDRE)
-			framework.ExpectEqual(devIDRestart1, devID1)
-
-			ensurePodContainerRestart(f, p2.Name, p2.Name)
-			devIDRestart2 := parseLog(f, p2.Name, p2.Name, deviceIDRE)
-			framework.ExpectEqual(devIDRestart2, devID2)
-			ginkgo.By("Restarting Kubelet.")
-			restartKubelet()
-			ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
-			ensurePodContainerRestart(f, p1.Name, p1.Name)
-			devIDRestart1 = parseLog(f, p1.Name, p1.Name, deviceIDRE)
-			framework.ExpectEqual(devIDRestart1, devID1)
-			ensurePodContainerRestart(f, p2.Name, p2.Name)
-			devIDRestart2 = parseLog(f, p2.Name, p2.Name, deviceIDRE)
-			framework.ExpectEqual(devIDRestart2, devID2)
-			logDevicePluginMetrics()
-
-			// Cleanup
-			f.PodClient().DeleteSync(p1.Name, metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
-			f.PodClient().DeleteSync(p2.Name, metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
+			framework.ExpectNotEqual(devID1, devID2)
 		})
 	})
 })
@@ -179,31 +209,3 @@ func checkIfNvidiaGPUsExistOnNode() bool {
 	}
 	return true
 }
-
-func logDevicePluginMetrics() {
-	ms, err := e2emetrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName+":10255", "/metrics")
-	framework.ExpectNoError(err)
-	for msKey, samples := range ms {
-		switch msKey {
-		case kubeletmetrics.KubeletSubsystem + "_" + kubeletmetrics.DevicePluginAllocationDurationKey:
-			for _, sample := range samples {
-				latency := sample.Value
-				resource := string(sample.Metric["resource_name"])
-				var quantile float64
-				if val, ok := sample.Metric[testutil.QuantileLabel]; ok {
-					var err error
-					if quantile, err = strconv.ParseFloat(string(val), 64); err != nil {
-						continue
-					}
-					framework.Logf("Metric: %v ResourceName: %v Quantile: %v Latency: %v", msKey, resource, quantile, latency)
-				}
-			}
-		case kubeletmetrics.KubeletSubsystem + "_" + kubeletmetrics.DevicePluginRegistrationCountKey:
-			for _, sample := range samples {
-				resource := string(sample.Metric["resource_name"])
-				count := sample.Value
-				framework.Logf("Metric: %v ResourceName: %v Count: %v", msKey, resource, count)
-			}
-		}
-	}
-}