From 08d40f53a7acece4e617ca40b986847da8e00767 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Mon, 22 May 2023 18:20:33 +0200
Subject: [PATCH] dra: test with and without immediate ReservedFor

The recommendation and default in the controller helper code is to set
ReservedFor to the pod which triggered delayed allocation. However, this
is neither required nor enforced. Therefore we should also test the fallback
path were kube-scheduler itself adds the pod to ReservedFor.
---
 .../controller/controller.go                  | 16 ++++-
 test/e2e/dra/dra.go                           | 66 +++++++++++--------
 test/e2e/dra/test-driver/app/controller.go    | 10 +--
 3 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go
index 22f7e75a043..38507391303 100644
--- a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go
+++ b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go
@@ -50,6 +50,14 @@ import (
 type Controller interface {
 	// Run starts the controller.
 	Run(workers int)
+
+	// SetReservedFor can be used to disable adding the Pod which
+	// triggered allocation to the status.reservedFor. Normally,
+	// DRA drivers should always do that, so it's the default.
+	// But nothing in the protocol between the scheduler and
+	// a driver requires it, so at least for testing the control
+	// plane components it is useful to disable it.
+	SetReservedFor(enabled bool)
 }
 
 // Driver provides the actual allocation and deallocation operations.
@@ -146,6 +154,7 @@ type controller struct {
 	name                string
 	finalizer           string
 	driver              Driver
+	setReservedFor      bool
 	kubeClient          kubernetes.Interface
 	queue               workqueue.RateLimitingInterface
 	eventRecorder       record.EventRecorder
@@ -207,6 +216,7 @@ func New(
 		name:                name,
 		finalizer:           name + "/deletion-protection",
 		driver:              driver,
+		setReservedFor:      true,
 		kubeClient:          kubeClient,
 		rcLister:            rcInformer.Lister(),
 		rcSynced:            rcInformer.Informer().HasSynced,
@@ -232,6 +242,10 @@ func New(
 	return ctrl
 }
 
+func (ctrl *controller) SetReservedFor(enabled bool) {
+	ctrl.setReservedFor = enabled
+}
+
 func resourceEventHandlerFuncs(logger *klog.Logger, ctrl *controller) cache.ResourceEventHandlerFuncs {
 	return cache.ResourceEventHandlerFuncs{
 		AddFunc: func(obj interface{}) {
@@ -609,7 +623,7 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc
 		claim := claimAllocation.Claim.DeepCopy()
 		claim.Status.Allocation = claimAllocation.Allocation
 		claim.Status.DriverName = ctrl.name
-		if selectedUser != nil {
+		if selectedUser != nil && ctrl.setReservedFor {
 			claim.Status.ReservedFor = append(claim.Status.ReservedFor, *selectedUser)
 		}
 		logger.V(6).Info("Updating claim after allocation", "claim", claim)
diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go
index 2ab9ef65f94..a1e4fa42642 100644
--- a/test/e2e/dra/dra.go
+++ b/test/e2e/dra/dra.go
@@ -179,8 +179,6 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
 
 	ginkgo.Context("cluster", func() {
 		nodes := NewNodes(f, 1, 4)
-		driver := NewDriver(f, nodes, networkResources)
-		b := newBuilder(f, driver)
 
 		ginkgo.It("truncates the name of a generated resource claim", func(ctx context.Context) {
 			parameters := b.parameters()
@@ -213,7 +211,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
 
 		// claimTests tries out several different combinations of pods with
 		// claims, both inline and external.
-		claimTests := func(allocationMode resourcev1alpha2.AllocationMode) {
+		claimTests := func(b *builder, driver *Driver, allocationMode resourcev1alpha2.AllocationMode) {
 			ginkgo.It("supports simple pod referencing inline resource claim", func(ctx context.Context) {
 				parameters := b.parameters()
 				pod, template := b.podInline(allocationMode)
@@ -322,35 +320,49 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
 				}).WithTimeout(f.Timeouts.PodStartSlow).Should(gomega.HaveField("Status.ContainerStatuses", gomega.ContainElements(gomega.HaveField("RestartCount", gomega.BeNumerically(">=", 2)))))
 				gomega.Expect(driver.Controller.GetNumAllocations()).To(gomega.Equal(int64(1)), "number of allocations")
 			})
+
+			ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) {
+				parameters := b.parameters()
+				pod := b.podExternal()
+				claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
+				b.create(ctx, parameters, claim, pod)
+
+				gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
+					return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
+				}).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
+
+				b.testPod(ctx, f.ClientSet, pod)
+
+				ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod)))
+				framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
+
+				ginkgo.By("waiting for claim to get deallocated")
+				gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
+					return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
+				}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
+			})
 		}
 
-		ginkgo.Context("with delayed allocation", func() {
-			claimTests(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
+		ginkgo.Context("with delayed allocation and setting ReservedFor", func() {
+			driver := NewDriver(f, nodes, networkResources)
+			b := newBuilder(f, driver)
+			claimTests(b, driver, resourcev1alpha2.AllocationModeWaitForFirstConsumer)
+		})
+
+		ginkgo.Context("with delayed allocation and not setting ReservedFor", func() {
+			driver := NewDriver(f, nodes, func() app.Resources {
+				resources := networkResources()
+				resources.DontSetReservedFor = true
+				return resources
+			})
+			b := newBuilder(f, driver)
+			claimTests(b, driver, resourcev1alpha2.AllocationModeWaitForFirstConsumer)
 		})
 
 		ginkgo.Context("with immediate allocation", func() {
-			claimTests(resourcev1alpha2.AllocationModeImmediate)
-		})
-
-		ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) {
-			parameters := b.parameters()
-			pod := b.podExternal()
-			claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
-			b.create(ctx, parameters, claim, pod)
-
-			gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
-				return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
-			}).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
-
-			b.testPod(ctx, f.ClientSet, pod)
-
-			ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod)))
-			framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
-
-			ginkgo.By("waiting for claim to get deallocated")
-			gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
-				return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
-			}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
+			driver := NewDriver(f, nodes, networkResources)
+			b := newBuilder(f, driver)
+			claimTests(b, driver, resourcev1alpha2.AllocationModeImmediate)
 		})
 	})
 
diff --git a/test/e2e/dra/test-driver/app/controller.go b/test/e2e/dra/test-driver/app/controller.go
index ef099687929..673c7bced26 100644
--- a/test/e2e/dra/test-driver/app/controller.go
+++ b/test/e2e/dra/test-driver/app/controller.go
@@ -38,10 +38,11 @@ import (
 )
 
 type Resources struct {
-	NodeLocal      bool
-	Nodes          []string
-	MaxAllocations int
-	Shareable      bool
+	DontSetReservedFor bool
+	NodeLocal          bool
+	Nodes              []string
+	MaxAllocations     int
+	Shareable          bool
 
 	// AllocateWrapper, if set, gets called for each Allocate call.
 	AllocateWrapper AllocateWrapperType
@@ -80,6 +81,7 @@ func NewController(clientset kubernetes.Interface, driverName string, resources
 func (c *ExampleController) Run(ctx context.Context, workers int) {
 	informerFactory := informers.NewSharedInformerFactory(c.clientset, 0 /* resync period */)
 	ctrl := controller.New(ctx, c.driverName, c, c.clientset, informerFactory)
+	ctrl.SetReservedFor(!c.resources.DontSetReservedFor)
 	informerFactory.Start(ctx.Done())
 	ctrl.Run(workers)
 	// If we get here, the context was canceled and we can wait for informer factory goroutines.