diff --git a/pkg/controller/resourceclaim/controller.go b/pkg/controller/resourceclaim/controller.go index d301183e98a..087e1fc67de 100644 --- a/pkg/controller/resourceclaim/controller.go +++ b/pkg/controller/resourceclaim/controller.go @@ -435,10 +435,10 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err keepEntry = false } else { pod, err := ec.podLister.Pods(claim.Namespace).Get(reservedFor.Name) - if err != nil && !errors.IsNotFound(err) { + switch { + case err != nil && !errors.IsNotFound(err): return err - } - if pod == nil { + case err != nil: // We might not have it in our informer cache // yet. Removing the pod while the scheduler is // scheduling it would be bad. We have to be @@ -449,10 +449,14 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err return err } if pod == nil || pod.UID != reservedFor.UID { + logger.V(6).Info("remove reservation because pod is gone or got replaced", "pod", klog.KObj(pod), "claim", klog.KRef(namespace, name)) keepEntry = false } - } else if pod.UID != reservedFor.UID { - // Pod exists, but is a different incarnation under the same name. + case pod.UID != reservedFor.UID: + logger.V(6).Info("remove reservation because pod got replaced with new instance", "pod", klog.KObj(pod), "claim", klog.KRef(namespace, name)) + keepEntry = false + case isPodDone(pod): + logger.V(6).Info("remove reservation because pod will not run anymore", "pod", klog.KObj(pod), "claim", klog.KRef(namespace, name)) keepEntry = false } } diff --git a/pkg/controller/resourceclaim/controller_test.go b/pkg/controller/resourceclaim/controller_test.go index d6fb1880c2c..2cadb5d1505 100644 --- a/pkg/controller/resourceclaim/controller_test.go +++ b/pkg/controller/resourceclaim/controller_test.go @@ -187,6 +187,27 @@ func TestSyncHandler(t *testing.T) { expectedClaims: []resourcev1alpha2.ResourceClaim{*testClaim}, expectedMetrics: expectedMetrics{0, 0}, }, + { + name: "clear-reserved-when-done", + pods: func() []*v1.Pod { + pods := []*v1.Pod{testPodWithResource.DeepCopy()} + pods[0].Status.Phase = v1.PodSucceeded + return pods + }(), + key: claimKey(testClaimReserved), + claims: func() []*resourcev1alpha2.ResourceClaim { + claims := []*resourcev1alpha2.ResourceClaim{testClaimReserved.DeepCopy()} + claims[0].OwnerReferences = nil + return claims + }(), + expectedClaims: func() []resourcev1alpha2.ResourceClaim { + claims := []resourcev1alpha2.ResourceClaim{*testClaimReserved.DeepCopy()} + claims[0].OwnerReferences = nil + claims[0].Status.ReservedFor = nil + return claims + }(), + expectedMetrics: expectedMetrics{0, 0}, + }, { name: "remove-reserved", pods: []*v1.Pod{testPod}, diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 59dc3376da3..ef916ce0ba7 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -230,6 +230,21 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu b.testPod(ctx, f.ClientSet, pod) }) + + ginkgo.It("removes reservation from claim when pod is done", func(ctx context.Context) { + parameters := b.parameters() + pod := b.podExternal() + claim := b.externalClaim(allocationMode) + pod.Spec.Containers[0].Command = []string{"true"} + b.create(ctx, parameters, claim, pod) + + ginkgo.By("waiting for pod to finish") + framework.ExpectNoError(e2epod.WaitForPodNoLongerRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace), "wait for pod to finish") + ginkgo.By("waiting for claim to be unreserved") + gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) { + return f.ClientSet.ResourceV1alpha2().ResourceClaims(pod.Namespace).Get(ctx, claim.Name, metav1.GetOptions{}) + }).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.ReservedFor", gomega.BeEmpty()), "reservation should have been removed") + }) } ginkgo.Context("with delayed allocation", func() {