From aba61276ed4d86d6e24ee6316f14d6e5808ce0f1 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 9 Jan 2025 11:41:44 +0100
Subject: [PATCH] DRA e2e: adapt to increased ReservedFor limit

We want to be sure that the maximum number of pods per claim are actually
scheduled concurrently. Previously the test just made sure that they ran
eventually.

Running 256 pods only works on more than 2 nodes, so network-attached resources
have to be used. This is what the increased limit is meant for anyway. Because
of the tightened validation of node selectors in 1.32, the E2E test has to
use MatchExpressions because they allow listing node names.
---
 test/e2e/dra/deploy.go |   6 +-
 test/e2e/dra/dra.go    | 150 ++++++++++++++++++++++++++++++-----------
 test/e2e/dra/kind.yaml |   7 ++
 3 files changed, 123 insertions(+), 40 deletions(-)

diff --git a/test/e2e/dra/deploy.go b/test/e2e/dra/deploy.go
index fe6a76dc0ea..ea2ef1f2675 100644
--- a/test/e2e/dra/deploy.go
+++ b/test/e2e/dra/deploy.go
@@ -277,8 +277,10 @@ func (d *Driver) SetUp(nodes *Nodes, resources Resources, devicesPerNode ...map[
 				},
 				NodeSelector: &v1.NodeSelector{
 					NodeSelectorTerms: []v1.NodeSelectorTerm{{
-						MatchFields: []v1.NodeSelectorRequirement{{
-							Key:      "metadata.name",
+						// MatchExpressions allow multiple values,
+						// MatchFields don't.
+						MatchExpressions: []v1.NodeSelectorRequirement{{
+							Key:      "kubernetes.io/hostname",
 							Operator: v1.NodeSelectorOpIn,
 							Values:   nodes.NodeNames,
 						}},
diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go
index d8296d85113..374a23bdbc7 100644
--- a/test/e2e/dra/dra.go
+++ b/test/e2e/dra/dra.go
@@ -553,43 +553,6 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
 			wg.Wait()
 		})
 
-		f.It("supports sharing a claim sequentially", f.WithSlow(), func(ctx context.Context) {
-			var objects []klog.KMetadata
-			objects = append(objects, b.externalClaim())
-
-			// This test used to test usage of the claim by one pod
-			// at a time. After removing the "not sharable"
-			// feature, we have to create more pods than supported
-			// at the same time to get the same effect.
-			numPods := resourceapi.ResourceClaimReservedForMaxSize + 10
-			pods := make([]*v1.Pod, numPods)
-			for i := 0; i < numPods; i++ {
-				pod := b.podExternal()
-				pods[i] = pod
-				objects = append(objects, pod)
-			}
-
-			b.create(ctx, objects...)
-
-			// We don't know the order. All that matters is that all of them get scheduled eventually.
-			f.Timeouts.PodStartSlow *= time.Duration(numPods)
-			var wg sync.WaitGroup
-			wg.Add(numPods)
-			for i := 0; i < numPods; i++ {
-				pod := pods[i]
-				go func() {
-					defer ginkgo.GinkgoRecover()
-					defer wg.Done()
-					b.testPod(ctx, f.ClientSet, pod, expectedEnv...)
-					// We need to delete each running pod, otherwise the others cannot use the claim.
-					err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
-					framework.ExpectNoError(err, "delete pod")
-					framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow))
-				}()
-			}
-			wg.Wait()
-		})
-
 		ginkgo.It("retries pod scheduling after creating device class", func(ctx context.Context) {
 			var objects []klog.KMetadata
 			pod, template := b.podInline()
@@ -666,7 +629,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
 	// The following tests only make sense when there is more than one node.
 	// They get skipped when there's only one node.
 	multiNodeTests := func() {
-		nodes := NewNodes(f, 2, 8)
+		nodes := NewNodes(f, 3, 8)
 
 		ginkgo.Context("with different ResourceSlices", func() {
 			firstDevice := "pre-defined-device-01"
@@ -789,6 +752,117 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
 				}
 			})
 		})
+
+		ginkgo.Context("with network-attached resources", func() {
+			driver := NewDriver(f, nodes, networkResources)
+			b := newBuilder(f, driver)
+
+			f.It("supports sharing a claim sequentially", f.WithSlow(), func(ctx context.Context) {
+				var objects []klog.KMetadata
+				objects = append(objects, b.externalClaim())
+
+				// This test used to test usage of the claim by one pod
+				// at a time. After removing the "not sharable"
+				// feature and bumping up the maximum number of
+				// consumers this is now a stress test which runs
+				// the maximum number of pods per claim in parallel.
+				// This only works on clusters with >= 3 nodes.
+				numMaxPods := resourceapi.ResourceClaimReservedForMaxSize
+				ginkgo.By(fmt.Sprintf("Creating %d pods sharing the same claim", numMaxPods))
+				pods := make([]*v1.Pod, numMaxPods)
+				for i := 0; i < numMaxPods; i++ {
+					pod := b.podExternal()
+					pods[i] = pod
+					objects = append(objects, pod)
+				}
+				b.create(ctx, objects...)
+
+				timeout := f.Timeouts.PodStartSlow * time.Duration(numMaxPods)
+				ensureDuration := f.Timeouts.PodStart // Don't check for too long, even if it is less precise.
+				podIsPending := gomega.HaveField("Spec.NodeName", gomega.BeEmpty())
+				waitForPodScheduled := func(pod *v1.Pod) {
+					ginkgo.GinkgoHelper()
+					gomega.Eventually(ctx, framework.GetObject(f.ClientSet.CoreV1().Pods(pod.Namespace).Get, pod.Name, metav1.GetOptions{})).
+						WithTimeout(timeout).
+						WithPolling(10*time.Second).
+						ShouldNot(podIsPending, "Pod should get scheduled.")
+				}
+				ensurePodNotScheduled := func(pod *v1.Pod) {
+					ginkgo.GinkgoHelper()
+					gomega.Consistently(ctx, framework.GetObject(f.ClientSet.CoreV1().Pods(pod.Namespace).Get, pod.Name, metav1.GetOptions{})).
+						WithTimeout(ensureDuration).
+						WithPolling(10*time.Second).
+						Should(podIsPending, "Pod should remain pending.")
+				}
+
+				// We don't know the order. All that matters is that all of them get scheduled eventually.
+				ginkgo.By(fmt.Sprintf("Waiting for %d pods to be scheduled", numMaxPods))
+				f.Timeouts.PodStartSlow *= time.Duration(numMaxPods)
+				var wg sync.WaitGroup
+				wg.Add(numMaxPods)
+				for i := 0; i < numMaxPods; i++ {
+					pod := pods[i]
+					go func() {
+						defer ginkgo.GinkgoRecover()
+						defer wg.Done()
+						waitForPodScheduled(pod)
+					}()
+				}
+				wg.Wait()
+
+				numMorePods := 10
+				ginkgo.By(fmt.Sprintf("Creating %d additional pods for the same claim", numMorePods))
+				morePods := make([]*v1.Pod, numMorePods)
+				objects = nil
+				for i := 0; i < numMorePods; i++ {
+					pod := b.podExternal()
+					morePods[i] = pod
+					objects = append(objects, pod)
+				}
+				b.create(ctx, objects...)
+
+				// None of the additional pods can run because of the ReservedFor limit.
+				ginkgo.By(fmt.Sprintf("Check for %s that the additional pods don't get scheduled", ensureDuration))
+				wg.Add(numMorePods)
+				for i := 0; i < numMorePods; i++ {
+					pod := morePods[i]
+					go func() {
+						defer ginkgo.GinkgoRecover()
+						defer wg.Done()
+						ensurePodNotScheduled(pod)
+					}()
+				}
+				wg.Wait()
+
+				// We need to delete each running pod, otherwise the new ones cannot use the claim.
+				ginkgo.By(fmt.Sprintf("Deleting the initial %d pods", numMaxPods))
+				wg.Add(numMaxPods)
+				for i := 0; i < numMaxPods; i++ {
+					pod := pods[i]
+					go func() {
+						defer ginkgo.GinkgoRecover()
+						defer wg.Done()
+						err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
+						framework.ExpectNoError(err, "delete pod")
+						framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow))
+					}()
+				}
+				wg.Wait()
+
+				// Now those should also run - eventually...
+				ginkgo.By(fmt.Sprintf("Waiting for the additional %d pods to be scheduled", numMorePods))
+				wg.Add(numMorePods)
+				for i := 0; i < numMorePods; i++ {
+					pod := morePods[i]
+					go func() {
+						defer ginkgo.GinkgoRecover()
+						defer wg.Done()
+						waitForPodScheduled(pod)
+					}()
+				}
+				wg.Wait()
+			})
+		})
 	}
 
 	ginkgo.Context("on single node", func() {
diff --git a/test/e2e/dra/kind.yaml b/test/e2e/dra/kind.yaml
index d682f54d50e..d96893d144f 100644
--- a/test/e2e/dra/kind.yaml
+++ b/test/e2e/dra/kind.yaml
@@ -40,6 +40,13 @@ nodes:
     nodeRegistration:
       kubeletExtraArgs:
         v: "5"
+- role: worker
+  kubeadmConfigPatches:
+  - |
+    kind: JoinConfiguration
+    nodeRegistration:
+      kubeletExtraArgs:
+        v: "5"
 # Feature gates must be the last entry in this YAML.
 # Some Prow jobs add more feature gates with
 #