From 65b76f361e650b18e5b8cc0f9a27d0f9f7f31a45 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Wed, 27 Sep 2017 14:59:34 -0700 Subject: [PATCH] Fixes a flakiness in GPUDevicePlugin e2e test. Waits till nvidia gpu disappears from all nodes after deleting the device plug DaemonSet to make sure its pods are deleted from all nodes. --- test/e2e/scheduling/nvidia-gpus.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/test/e2e/scheduling/nvidia-gpus.go b/test/e2e/scheduling/nvidia-gpus.go index 0c57ef1481a..2e5a229077f 100644 --- a/test/e2e/scheduling/nvidia-gpus.go +++ b/test/e2e/scheduling/nvidia-gpus.go @@ -139,6 +139,24 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool { return true } +func areGPUsAvailableOnAnySchedulableNodes(f *framework.Framework) bool { + framework.Logf("Getting list of Nodes from API server") + nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) + framework.ExpectNoError(err, "getting node list") + for _, node := range nodeList.Items { + if node.Spec.Unschedulable { + continue + } + framework.Logf("gpuResourceName %s", gpuResourceName) + if val, ok := node.Status.Capacity[gpuResourceName]; ok && val.Value() > 0 { + framework.Logf("Nvidia GPUs available on Node: %q", node.Name) + return true + } + } + framework.Logf("Nvidia GPUs don't exist on all schedulable nodes") + return false +} + func getGPUsAvailable(f *framework.Framework) int64 { nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) framework.ExpectNoError(err, "getting node list") @@ -220,10 +238,10 @@ var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() { err = f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar}) framework.ExpectNoError(err, "failed to delete daemonset") framework.Logf("Successfully deleted device plugin daemonset. Wait for resource to be removed.") - // Wait for Nvidia GPUs to be not available on nodes + // Wait for Nvidia GPUs to be unavailable on all nodes. Eventually(func() bool { - return !areGPUsAvailableOnAllSchedulableNodes(f) - }, 5*time.Minute, time.Second).Should(BeTrue()) + return !areGPUsAvailableOnAnySchedulableNodes(f) + }, 10*time.Minute, time.Second).Should(BeTrue()) // 3. Restarts the device plugin DaemonSet. Verifies GPU resource is successfully advertised // on the nodes and we can run pods using GPUs.