diff --git a/pkg/controller/resourceclaim/controller.go b/pkg/controller/resourceclaim/controller.go index 3e06237ce48..4dae8aef38f 100644 --- a/pkg/controller/resourceclaim/controller.go +++ b/pkg/controller/resourceclaim/controller.go @@ -456,6 +456,28 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err // TODO (#113700): patch claim := claim.DeepCopy() claim.Status.ReservedFor = valid + + // When a ResourceClaim uses delayed allocation, then it makes sense to + // deallocate the claim as soon as the last consumer stops using + // it. This ensures that the claim can be allocated again as needed by + // some future consumer instead of trying to schedule that consumer + // onto the node that was chosen for the previous consumer. It also + // releases the underlying resources for use by other claims. + // + // This has to be triggered by the transition from "was being used" to + // "is not used anymore" because a DRA driver is not required to set + // `status.reservedFor` together with `status.allocation`, i.e. a claim + // that is "currently unused" should not get deallocated. + // + // This does not matter for claims that were created for a pod. For + // those, the resource claim controller will trigger deletion when the + // pod is done. However, it doesn't hurt to also trigger deallocation + // for such claims and not checking for them keeps this code simpler. + if len(valid) == 0 && + claim.Spec.AllocationMode == resourcev1alpha2.AllocationModeWaitForFirstConsumer { + claim.Status.DeallocationRequested = true + } + _, err := ec.kubeClient.ResourceV1alpha2().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{}) if err != nil { return err diff --git a/test/e2e/dra/deploy.go b/test/e2e/dra/deploy.go index a7583462d67..6d762b13c28 100644 --- a/test/e2e/dra/deploy.go +++ b/test/e2e/dra/deploy.go @@ -214,11 +214,11 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { plugin, err := app.StartPlugin(logger, "/cdi", d.Name, nodename, app.FileOperations{ Create: func(name string, content []byte) error { - ginkgo.By(fmt.Sprintf("creating CDI file %s on node %s:\n%s", name, nodename, string(content))) + klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content)) return d.createFile(&pod, name, content) }, Remove: func(name string) error { - ginkgo.By(fmt.Sprintf("deleting CDI file %s on node %s", name, nodename)) + klog.Background().Info("deleting CDI file", "node", nodename, "filename", name) return d.removeFile(&pod, name) }, }, diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 7dca393067e..2147caaf68e 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -254,6 +254,27 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu ginkgo.Context("with immediate allocation", func() { claimTests(resourcev1alpha2.AllocationModeImmediate) }) + + ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) { + parameters := b.parameters() + pod := b.podExternal() + claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer) + b.create(ctx, parameters, claim, pod) + + gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { + return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) + }).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil))) + + b.testPod(ctx, f.ClientSet, pod) + + ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod))) + framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{})) + + ginkgo.By("waiting for claim to get deallocated") + gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { + return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) + }).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil))) + }) }) ginkgo.Context("multiple nodes", func() {