From 08d40f53a7acece4e617ca40b986847da8e00767 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 22 May 2023 18:20:33 +0200 Subject: [PATCH] dra: test with and without immediate ReservedFor The recommendation and default in the controller helper code is to set ReservedFor to the pod which triggered delayed allocation. However, this is neither required nor enforced. Therefore we should also test the fallback path were kube-scheduler itself adds the pod to ReservedFor. --- .../controller/controller.go | 16 ++++- test/e2e/dra/dra.go | 66 +++++++++++-------- test/e2e/dra/test-driver/app/controller.go | 10 +-- 3 files changed, 60 insertions(+), 32 deletions(-) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go index 22f7e75a043..38507391303 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go @@ -50,6 +50,14 @@ import ( type Controller interface { // Run starts the controller. Run(workers int) + + // SetReservedFor can be used to disable adding the Pod which + // triggered allocation to the status.reservedFor. Normally, + // DRA drivers should always do that, so it's the default. + // But nothing in the protocol between the scheduler and + // a driver requires it, so at least for testing the control + // plane components it is useful to disable it. + SetReservedFor(enabled bool) } // Driver provides the actual allocation and deallocation operations. @@ -146,6 +154,7 @@ type controller struct { name string finalizer string driver Driver + setReservedFor bool kubeClient kubernetes.Interface queue workqueue.RateLimitingInterface eventRecorder record.EventRecorder @@ -207,6 +216,7 @@ func New( name: name, finalizer: name + "/deletion-protection", driver: driver, + setReservedFor: true, kubeClient: kubeClient, rcLister: rcInformer.Lister(), rcSynced: rcInformer.Informer().HasSynced, @@ -232,6 +242,10 @@ func New( return ctrl } +func (ctrl *controller) SetReservedFor(enabled bool) { + ctrl.setReservedFor = enabled +} + func resourceEventHandlerFuncs(logger *klog.Logger, ctrl *controller) cache.ResourceEventHandlerFuncs { return cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { @@ -609,7 +623,7 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc claim := claimAllocation.Claim.DeepCopy() claim.Status.Allocation = claimAllocation.Allocation claim.Status.DriverName = ctrl.name - if selectedUser != nil { + if selectedUser != nil && ctrl.setReservedFor { claim.Status.ReservedFor = append(claim.Status.ReservedFor, *selectedUser) } logger.V(6).Info("Updating claim after allocation", "claim", claim) diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 2ab9ef65f94..a1e4fa42642 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -179,8 +179,6 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu ginkgo.Context("cluster", func() { nodes := NewNodes(f, 1, 4) - driver := NewDriver(f, nodes, networkResources) - b := newBuilder(f, driver) ginkgo.It("truncates the name of a generated resource claim", func(ctx context.Context) { parameters := b.parameters() @@ -213,7 +211,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu // claimTests tries out several different combinations of pods with // claims, both inline and external. - claimTests := func(allocationMode resourcev1alpha2.AllocationMode) { + claimTests := func(b *builder, driver *Driver, allocationMode resourcev1alpha2.AllocationMode) { ginkgo.It("supports simple pod referencing inline resource claim", func(ctx context.Context) { parameters := b.parameters() pod, template := b.podInline(allocationMode) @@ -322,35 +320,49 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu }).WithTimeout(f.Timeouts.PodStartSlow).Should(gomega.HaveField("Status.ContainerStatuses", gomega.ContainElements(gomega.HaveField("RestartCount", gomega.BeNumerically(">=", 2))))) gomega.Expect(driver.Controller.GetNumAllocations()).To(gomega.Equal(int64(1)), "number of allocations") }) + + ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) { + parameters := b.parameters() + pod := b.podExternal() + claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer) + b.create(ctx, parameters, claim, pod) + + gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { + return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) + }).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil))) + + b.testPod(ctx, f.ClientSet, pod) + + ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod))) + framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{})) + + ginkgo.By("waiting for claim to get deallocated") + gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { + return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) + }).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil))) + }) } - ginkgo.Context("with delayed allocation", func() { - claimTests(resourcev1alpha2.AllocationModeWaitForFirstConsumer) + ginkgo.Context("with delayed allocation and setting ReservedFor", func() { + driver := NewDriver(f, nodes, networkResources) + b := newBuilder(f, driver) + claimTests(b, driver, resourcev1alpha2.AllocationModeWaitForFirstConsumer) + }) + + ginkgo.Context("with delayed allocation and not setting ReservedFor", func() { + driver := NewDriver(f, nodes, func() app.Resources { + resources := networkResources() + resources.DontSetReservedFor = true + return resources + }) + b := newBuilder(f, driver) + claimTests(b, driver, resourcev1alpha2.AllocationModeWaitForFirstConsumer) }) ginkgo.Context("with immediate allocation", func() { - claimTests(resourcev1alpha2.AllocationModeImmediate) - }) - - ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) { - parameters := b.parameters() - pod := b.podExternal() - claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer) - b.create(ctx, parameters, claim, pod) - - gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { - return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) - }).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil))) - - b.testPod(ctx, f.ClientSet, pod) - - ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod))) - framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{})) - - ginkgo.By("waiting for claim to get deallocated") - gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { - return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) - }).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil))) + driver := NewDriver(f, nodes, networkResources) + b := newBuilder(f, driver) + claimTests(b, driver, resourcev1alpha2.AllocationModeImmediate) }) }) diff --git a/test/e2e/dra/test-driver/app/controller.go b/test/e2e/dra/test-driver/app/controller.go index ef099687929..673c7bced26 100644 --- a/test/e2e/dra/test-driver/app/controller.go +++ b/test/e2e/dra/test-driver/app/controller.go @@ -38,10 +38,11 @@ import ( ) type Resources struct { - NodeLocal bool - Nodes []string - MaxAllocations int - Shareable bool + DontSetReservedFor bool + NodeLocal bool + Nodes []string + MaxAllocations int + Shareable bool // AllocateWrapper, if set, gets called for each Allocate call. AllocateWrapper AllocateWrapperType @@ -80,6 +81,7 @@ func NewController(clientset kubernetes.Interface, driverName string, resources func (c *ExampleController) Run(ctx context.Context, workers int) { informerFactory := informers.NewSharedInformerFactory(c.clientset, 0 /* resync period */) ctrl := controller.New(ctx, c.driverName, c, c.clientset, informerFactory) + ctrl.SetReservedFor(!c.resources.DontSetReservedFor) informerFactory.Start(ctx.Done()) ctrl.Run(workers) // If we get here, the context was canceled and we can wait for informer factory goroutines.