dra: test with and without immediate ReservedFor

The recommendation and default in the controller helper code is to set
ReservedFor to the pod which triggered delayed allocation. However, this
is neither required nor enforced. Therefore we should also test the fallback
path were kube-scheduler itself adds the pod to ReservedFor.
This commit is contained in:
Patrick Ohly 2023-05-22 18:20:33 +02:00
parent 98ba89d31d
commit 08d40f53a7
3 changed files with 60 additions and 32 deletions

View File

@ -50,6 +50,14 @@ import (
type Controller interface { type Controller interface {
// Run starts the controller. // Run starts the controller.
Run(workers int) Run(workers int)
// SetReservedFor can be used to disable adding the Pod which
// triggered allocation to the status.reservedFor. Normally,
// DRA drivers should always do that, so it's the default.
// But nothing in the protocol between the scheduler and
// a driver requires it, so at least for testing the control
// plane components it is useful to disable it.
SetReservedFor(enabled bool)
} }
// Driver provides the actual allocation and deallocation operations. // Driver provides the actual allocation and deallocation operations.
@ -146,6 +154,7 @@ type controller struct {
name string name string
finalizer string finalizer string
driver Driver driver Driver
setReservedFor bool
kubeClient kubernetes.Interface kubeClient kubernetes.Interface
queue workqueue.RateLimitingInterface queue workqueue.RateLimitingInterface
eventRecorder record.EventRecorder eventRecorder record.EventRecorder
@ -207,6 +216,7 @@ func New(
name: name, name: name,
finalizer: name + "/deletion-protection", finalizer: name + "/deletion-protection",
driver: driver, driver: driver,
setReservedFor: true,
kubeClient: kubeClient, kubeClient: kubeClient,
rcLister: rcInformer.Lister(), rcLister: rcInformer.Lister(),
rcSynced: rcInformer.Informer().HasSynced, rcSynced: rcInformer.Informer().HasSynced,
@ -232,6 +242,10 @@ func New(
return ctrl return ctrl
} }
func (ctrl *controller) SetReservedFor(enabled bool) {
ctrl.setReservedFor = enabled
}
func resourceEventHandlerFuncs(logger *klog.Logger, ctrl *controller) cache.ResourceEventHandlerFuncs { func resourceEventHandlerFuncs(logger *klog.Logger, ctrl *controller) cache.ResourceEventHandlerFuncs {
return cache.ResourceEventHandlerFuncs{ return cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { AddFunc: func(obj interface{}) {
@ -609,7 +623,7 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc
claim := claimAllocation.Claim.DeepCopy() claim := claimAllocation.Claim.DeepCopy()
claim.Status.Allocation = claimAllocation.Allocation claim.Status.Allocation = claimAllocation.Allocation
claim.Status.DriverName = ctrl.name claim.Status.DriverName = ctrl.name
if selectedUser != nil { if selectedUser != nil && ctrl.setReservedFor {
claim.Status.ReservedFor = append(claim.Status.ReservedFor, *selectedUser) claim.Status.ReservedFor = append(claim.Status.ReservedFor, *selectedUser)
} }
logger.V(6).Info("Updating claim after allocation", "claim", claim) logger.V(6).Info("Updating claim after allocation", "claim", claim)

View File

@ -179,8 +179,6 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
ginkgo.Context("cluster", func() { ginkgo.Context("cluster", func() {
nodes := NewNodes(f, 1, 4) nodes := NewNodes(f, 1, 4)
driver := NewDriver(f, nodes, networkResources)
b := newBuilder(f, driver)
ginkgo.It("truncates the name of a generated resource claim", func(ctx context.Context) { ginkgo.It("truncates the name of a generated resource claim", func(ctx context.Context) {
parameters := b.parameters() parameters := b.parameters()
@ -213,7 +211,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
// claimTests tries out several different combinations of pods with // claimTests tries out several different combinations of pods with
// claims, both inline and external. // claims, both inline and external.
claimTests := func(allocationMode resourcev1alpha2.AllocationMode) { claimTests := func(b *builder, driver *Driver, allocationMode resourcev1alpha2.AllocationMode) {
ginkgo.It("supports simple pod referencing inline resource claim", func(ctx context.Context) { ginkgo.It("supports simple pod referencing inline resource claim", func(ctx context.Context) {
parameters := b.parameters() parameters := b.parameters()
pod, template := b.podInline(allocationMode) pod, template := b.podInline(allocationMode)
@ -322,35 +320,49 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
}).WithTimeout(f.Timeouts.PodStartSlow).Should(gomega.HaveField("Status.ContainerStatuses", gomega.ContainElements(gomega.HaveField("RestartCount", gomega.BeNumerically(">=", 2))))) }).WithTimeout(f.Timeouts.PodStartSlow).Should(gomega.HaveField("Status.ContainerStatuses", gomega.ContainElements(gomega.HaveField("RestartCount", gomega.BeNumerically(">=", 2)))))
gomega.Expect(driver.Controller.GetNumAllocations()).To(gomega.Equal(int64(1)), "number of allocations") gomega.Expect(driver.Controller.GetNumAllocations()).To(gomega.Equal(int64(1)), "number of allocations")
}) })
ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) {
parameters := b.parameters()
pod := b.podExternal()
claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
b.create(ctx, parameters, claim, pod)
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
b.testPod(ctx, f.ClientSet, pod)
ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod)))
framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
ginkgo.By("waiting for claim to get deallocated")
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
})
} }
ginkgo.Context("with delayed allocation", func() { ginkgo.Context("with delayed allocation and setting ReservedFor", func() {
claimTests(resourcev1alpha2.AllocationModeWaitForFirstConsumer) driver := NewDriver(f, nodes, networkResources)
b := newBuilder(f, driver)
claimTests(b, driver, resourcev1alpha2.AllocationModeWaitForFirstConsumer)
})
ginkgo.Context("with delayed allocation and not setting ReservedFor", func() {
driver := NewDriver(f, nodes, func() app.Resources {
resources := networkResources()
resources.DontSetReservedFor = true
return resources
})
b := newBuilder(f, driver)
claimTests(b, driver, resourcev1alpha2.AllocationModeWaitForFirstConsumer)
}) })
ginkgo.Context("with immediate allocation", func() { ginkgo.Context("with immediate allocation", func() {
claimTests(resourcev1alpha2.AllocationModeImmediate) driver := NewDriver(f, nodes, networkResources)
}) b := newBuilder(f, driver)
claimTests(b, driver, resourcev1alpha2.AllocationModeImmediate)
ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) {
parameters := b.parameters()
pod := b.podExternal()
claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
b.create(ctx, parameters, claim, pod)
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
b.testPod(ctx, f.ClientSet, pod)
ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod)))
framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
ginkgo.By("waiting for claim to get deallocated")
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
}) })
}) })

View File

@ -38,10 +38,11 @@ import (
) )
type Resources struct { type Resources struct {
NodeLocal bool DontSetReservedFor bool
Nodes []string NodeLocal bool
MaxAllocations int Nodes []string
Shareable bool MaxAllocations int
Shareable bool
// AllocateWrapper, if set, gets called for each Allocate call. // AllocateWrapper, if set, gets called for each Allocate call.
AllocateWrapper AllocateWrapperType AllocateWrapper AllocateWrapperType
@ -80,6 +81,7 @@ func NewController(clientset kubernetes.Interface, driverName string, resources
func (c *ExampleController) Run(ctx context.Context, workers int) { func (c *ExampleController) Run(ctx context.Context, workers int) {
informerFactory := informers.NewSharedInformerFactory(c.clientset, 0 /* resync period */) informerFactory := informers.NewSharedInformerFactory(c.clientset, 0 /* resync period */)
ctrl := controller.New(ctx, c.driverName, c, c.clientset, informerFactory) ctrl := controller.New(ctx, c.driverName, c, c.clientset, informerFactory)
ctrl.SetReservedFor(!c.resources.DontSetReservedFor)
informerFactory.Start(ctx.Done()) informerFactory.Start(ctx.Done())
ctrl.Run(workers) ctrl.Run(workers)
// If we get here, the context was canceled and we can wait for informer factory goroutines. // If we get here, the context was canceled and we can wait for informer factory goroutines.