Merge pull request #118817 from pohly/dra-delete-claims

DRA: improve handling of completed pods
This commit is contained in:
Kubernetes Prow Robot 2023-07-06 10:15:15 -07:00 committed by GitHub
commit 6f9d1d38d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 182 additions and 27 deletions

View File

@ -353,6 +353,7 @@ const defaultResourceClaimControllerWorkers = 10
func startResourceClaimController(ctx context.Context, controllerContext ControllerContext) (controller.Interface, bool, error) { func startResourceClaimController(ctx context.Context, controllerContext ControllerContext) (controller.Interface, bool, error) {
ephemeralController, err := resourceclaim.NewController( ephemeralController, err := resourceclaim.NewController(
klog.FromContext(ctx),
controllerContext.ClientBuilder.ClientOrDie("resource-claim-controller"), controllerContext.ClientBuilder.ClientOrDie("resource-claim-controller"),
controllerContext.InformerFactory.Core().V1().Pods(), controllerContext.InformerFactory.Core().V1().Pods(),
controllerContext.InformerFactory.Resource().V1alpha2().ResourceClaims(), controllerContext.InformerFactory.Resource().V1alpha2().ResourceClaims(),

View File

@ -26,6 +26,7 @@ import (
resourcev1alpha2 "k8s.io/api/resource/v1alpha2" resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
v1informers "k8s.io/client-go/informers/core/v1" v1informers "k8s.io/client-go/informers/core/v1"
@ -42,6 +43,7 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod" podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/controller/resourceclaim/metrics" "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
"k8s.io/utils/pointer"
) )
const ( const (
@ -98,6 +100,7 @@ const (
// NewController creates a ResourceClaim controller. // NewController creates a ResourceClaim controller.
func NewController( func NewController(
logger klog.Logger,
kubeClient clientset.Interface, kubeClient clientset.Interface,
podInformer v1informers.PodInformer, podInformer v1informers.PodInformer,
claimInformer resourcev1alpha2informers.ResourceClaimInformer, claimInformer resourcev1alpha2informers.ResourceClaimInformer,
@ -120,23 +123,27 @@ func NewController(
if _, err := podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ if _, err := podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { AddFunc: func(obj interface{}) {
ec.enqueuePod(obj, false) ec.enqueuePod(logger, obj, false)
}, },
UpdateFunc: func(old, updated interface{}) { UpdateFunc: func(old, updated interface{}) {
ec.enqueuePod(updated, false) ec.enqueuePod(logger, updated, false)
}, },
DeleteFunc: func(obj interface{}) { DeleteFunc: func(obj interface{}) {
ec.enqueuePod(obj, true) ec.enqueuePod(logger, obj, true)
}, },
}); err != nil { }); err != nil {
return nil, err return nil, err
} }
if _, err := claimInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ if _, err := claimInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: ec.onResourceClaimAddOrUpdate, AddFunc: func(obj interface{}) {
UpdateFunc: func(old, updated interface{}) { ec.onResourceClaimAddOrUpdate(logger, obj)
ec.onResourceClaimAddOrUpdate(updated) },
UpdateFunc: func(old, updated interface{}) {
ec.onResourceClaimAddOrUpdate(logger, updated)
},
DeleteFunc: func(obj interface{}) {
ec.onResourceClaimDelete(logger, obj)
}, },
DeleteFunc: ec.onResourceClaimDelete,
}); err != nil { }); err != nil {
return nil, err return nil, err
} }
@ -147,7 +154,7 @@ func NewController(
return ec, nil return ec, nil
} }
func (ec *Controller) enqueuePod(obj interface{}, deleted bool) { func (ec *Controller) enqueuePod(logger klog.Logger, obj interface{}, deleted bool) {
if d, ok := obj.(cache.DeletedFinalStateUnknown); ok { if d, ok := obj.(cache.DeletedFinalStateUnknown); ok {
obj = d.Obj obj = d.Obj
} }
@ -166,14 +173,15 @@ func (ec *Controller) enqueuePod(obj interface{}, deleted bool) {
return return
} }
logger.V(6).Info("pod with resource claims changed", "pod", klog.KObj(pod), "deleted", deleted)
// Release reservations of a deleted or completed pod? // Release reservations of a deleted or completed pod?
if deleted || if deleted || isPodDone(pod) {
podutil.IsPodTerminal(pod) ||
// Deleted and not scheduled:
pod.DeletionTimestamp != nil && pod.Spec.NodeName == "" {
for _, podClaim := range pod.Spec.ResourceClaims { for _, podClaim := range pod.Spec.ResourceClaims {
claimName := resourceclaim.Name(pod, &podClaim) claimName := resourceclaim.Name(pod, &podClaim)
ec.queue.Add(claimKeyPrefix + pod.Namespace + "/" + claimName) key := claimKeyPrefix + pod.Namespace + "/" + claimName
logger.V(6).Info("pod is deleted or done, process claim", "pod", klog.KObj(pod), "key", key)
ec.queue.Add(key)
} }
} }
@ -182,14 +190,16 @@ func (ec *Controller) enqueuePod(obj interface{}, deleted bool) {
for _, podClaim := range pod.Spec.ResourceClaims { for _, podClaim := range pod.Spec.ResourceClaims {
if podClaim.Source.ResourceClaimTemplateName != nil { if podClaim.Source.ResourceClaimTemplateName != nil {
// It has at least one inline template, work on it. // It has at least one inline template, work on it.
ec.queue.Add(podKeyPrefix + pod.Namespace + "/" + pod.Name) key := podKeyPrefix + pod.Namespace + "/" + pod.Name
logger.V(6).Info("pod is not deleted, process it", "pod", klog.KObj(pod), "key", key)
ec.queue.Add(key)
break break
} }
} }
} }
} }
func (ec *Controller) onResourceClaimAddOrUpdate(obj interface{}) { func (ec *Controller) onResourceClaimAddOrUpdate(logger klog.Logger, obj interface{}) {
claim, ok := obj.(*resourcev1alpha2.ResourceClaim) claim, ok := obj.(*resourcev1alpha2.ResourceClaim)
if !ok { if !ok {
return return
@ -198,10 +208,12 @@ func (ec *Controller) onResourceClaimAddOrUpdate(obj interface{}) {
// When starting up, we have to check all claims to find those with // When starting up, we have to check all claims to find those with
// stale pods in ReservedFor. During an update, a pod might get added // stale pods in ReservedFor. During an update, a pod might get added
// that already no longer exists. // that already no longer exists.
ec.queue.Add(claimKeyPrefix + claim.Namespace + "/" + claim.Name) key := claimKeyPrefix + claim.Namespace + "/" + claim.Name
logger.V(6).Info("claim is new or updated, process it", "key", key)
ec.queue.Add(key)
} }
func (ec *Controller) onResourceClaimDelete(obj interface{}) { func (ec *Controller) onResourceClaimDelete(logger klog.Logger, obj interface{}) {
claim, ok := obj.(*resourcev1alpha2.ResourceClaim) claim, ok := obj.(*resourcev1alpha2.ResourceClaim)
if !ok { if !ok {
return return
@ -218,8 +230,13 @@ func (ec *Controller) onResourceClaimDelete(obj interface{}) {
runtime.HandleError(fmt.Errorf("listing pods from cache: %v", err)) runtime.HandleError(fmt.Errorf("listing pods from cache: %v", err))
return return
} }
if len(objs) == 0 {
logger.V(6).Info("claim got deleted while not needed by any pod, nothing to do", "claim", klog.KObj(claim))
return
}
logger = klog.LoggerWithValues(logger, "claim", klog.KObj(claim))
for _, obj := range objs { for _, obj := range objs {
ec.enqueuePod(obj, false) ec.enqueuePod(logger, obj, false)
} }
} }
@ -385,7 +402,7 @@ func (ec *Controller) handleClaim(ctx context.Context, pod *v1.Pod, podClaim v1.
} }
func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) error { func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) error {
logger := klog.LoggerWithValues(klog.FromContext(ctx), "PVC", klog.KRef(namespace, name)) logger := klog.LoggerWithValues(klog.FromContext(ctx), "claim", klog.KRef(namespace, name))
ctx = klog.NewContext(ctx, logger) ctx = klog.NewContext(ctx, logger)
claim, err := ec.claimLister.ResourceClaims(namespace).Get(name) claim, err := ec.claimLister.ResourceClaims(namespace).Get(name)
if err != nil { if err != nil {
@ -420,10 +437,10 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err
keepEntry = false keepEntry = false
} else { } else {
pod, err := ec.podLister.Pods(claim.Namespace).Get(reservedFor.Name) pod, err := ec.podLister.Pods(claim.Namespace).Get(reservedFor.Name)
if err != nil && !errors.IsNotFound(err) { switch {
case err != nil && !errors.IsNotFound(err):
return err return err
} case err != nil:
if pod == nil {
// We might not have it in our informer cache // We might not have it in our informer cache
// yet. Removing the pod while the scheduler is // yet. Removing the pod while the scheduler is
// scheduling it would be bad. We have to be // scheduling it would be bad. We have to be
@ -434,10 +451,14 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err
return err return err
} }
if pod == nil || pod.UID != reservedFor.UID { if pod == nil || pod.UID != reservedFor.UID {
logger.V(6).Info("remove reservation because pod is gone or got replaced", "pod", klog.KObj(pod), "claim", klog.KRef(namespace, name))
keepEntry = false keepEntry = false
} }
} else if pod.UID != reservedFor.UID { case pod.UID != reservedFor.UID:
// Pod exists, but is a different incarnation under the same name. logger.V(6).Info("remove reservation because pod got replaced with new instance", "pod", klog.KObj(pod), "claim", klog.KRef(namespace, name))
keepEntry = false
case isPodDone(pod):
logger.V(6).Info("remove reservation because pod will not run anymore", "pod", klog.KObj(pod), "claim", klog.KRef(namespace, name))
keepEntry = false keepEntry = false
} }
} }
@ -452,6 +473,7 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err
return fmt.Errorf("unsupported ReservedFor entry: %v", reservedFor) return fmt.Errorf("unsupported ReservedFor entry: %v", reservedFor)
} }
logger.V(5).Info("claim reserved for counts", "currentCount", len(claim.Status.ReservedFor), "claim", klog.KRef(namespace, name), "updatedCount", len(valid))
if len(valid) < len(claim.Status.ReservedFor) { if len(valid) < len(claim.Status.ReservedFor) {
// TODO (#113700): patch // TODO (#113700): patch
claim := claim.DeepCopy() claim := claim.DeepCopy()
@ -484,9 +506,54 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err
} }
} }
if len(valid) == 0 {
// Claim is not reserved. If it was generated for a pod and
// that pod is not going to run, the claim can be
// deleted. Normally the garbage collector does that, but the
// pod itself might not get deleted for a while.
podName, podUID := owningPod(claim)
if podName != "" {
pod, err := ec.podLister.Pods(claim.Namespace).Get(podName)
switch {
case err == nil:
// Pod already replaced or not going to run?
if pod.UID != podUID || isPodDone(pod) {
// We are certain that the owning pod is not going to need
// the claim and therefore remove the claim.
logger.V(5).Info("deleting unused generated claim", "claim", klog.KObj(claim), "pod", klog.KObj(pod))
err := ec.kubeClient.ResourceV1alpha2().ResourceClaims(claim.Namespace).Delete(ctx, claim.Name, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("delete claim: %v", err)
}
} else {
logger.V(6).Info("wrong pod content, not deleting claim", "claim", klog.KObj(claim), "podUID", podUID, "podContent", pod)
}
case errors.IsNotFound(err):
// We might not know the pod *yet*. Instead of doing an expensive API call,
// let the garbage collector handle the case that the pod is truly gone.
logger.V(5).Info("pod for claim not found", "claim", klog.KObj(claim), "pod", klog.KRef(claim.Namespace, podName))
default:
return fmt.Errorf("lookup pod: %v", err)
}
} else {
logger.V(5).Info("claim not generated for a pod", "claim", klog.KObj(claim))
}
}
return nil return nil
} }
func owningPod(claim *resourcev1alpha2.ResourceClaim) (string, types.UID) {
for _, owner := range claim.OwnerReferences {
if pointer.BoolDeref(owner.Controller, false) &&
owner.APIVersion == "v1" &&
owner.Kind == "Pod" {
return owner.Name, owner.UID
}
}
return "", ""
}
// podResourceClaimIndexFunc is an index function that returns ResourceClaim keys (= // podResourceClaimIndexFunc is an index function that returns ResourceClaim keys (=
// namespace/name) for ResourceClaimTemplates in a given pod. // namespace/name) for ResourceClaimTemplates in a given pod.
func podResourceClaimIndexFunc(obj interface{}) ([]string, error) { func podResourceClaimIndexFunc(obj interface{}) ([]string, error) {
@ -503,3 +570,10 @@ func podResourceClaimIndexFunc(obj interface{}) ([]string, error) {
} }
return keys, nil return keys, nil
} }
// isPodDone returns true if it is certain that none of the containers are running and never will run.
func isPodDone(pod *v1.Pod) bool {
return podutil.IsPodPhaseTerminal(pod.Status.Phase) ||
// Deleted and not scheduled:
pod.DeletionTimestamp != nil && pod.Spec.NodeName == ""
}

View File

@ -187,6 +187,27 @@ func TestSyncHandler(t *testing.T) {
expectedClaims: []resourcev1alpha2.ResourceClaim{*testClaim}, expectedClaims: []resourcev1alpha2.ResourceClaim{*testClaim},
expectedMetrics: expectedMetrics{0, 0}, expectedMetrics: expectedMetrics{0, 0},
}, },
{
name: "clear-reserved-when-done",
pods: func() []*v1.Pod {
pods := []*v1.Pod{testPodWithResource.DeepCopy()}
pods[0].Status.Phase = v1.PodSucceeded
return pods
}(),
key: claimKey(testClaimReserved),
claims: func() []*resourcev1alpha2.ResourceClaim {
claims := []*resourcev1alpha2.ResourceClaim{testClaimReserved.DeepCopy()}
claims[0].OwnerReferences = nil
return claims
}(),
expectedClaims: func() []resourcev1alpha2.ResourceClaim {
claims := []resourcev1alpha2.ResourceClaim{*testClaimReserved.DeepCopy()}
claims[0].OwnerReferences = nil
claims[0].Status.ReservedFor = nil
return claims
}(),
expectedMetrics: expectedMetrics{0, 0},
},
{ {
name: "remove-reserved", name: "remove-reserved",
pods: []*v1.Pod{testPod}, pods: []*v1.Pod{testPod},
@ -195,6 +216,18 @@ func TestSyncHandler(t *testing.T) {
expectedClaims: []resourcev1alpha2.ResourceClaim{*testClaimReserved}, expectedClaims: []resourcev1alpha2.ResourceClaim{*testClaimReserved},
expectedMetrics: expectedMetrics{0, 0}, expectedMetrics: expectedMetrics{0, 0},
}, },
{
name: "delete-claim-when-done",
pods: func() []*v1.Pod {
pods := []*v1.Pod{testPodWithResource.DeepCopy()}
pods[0].Status.Phase = v1.PodSucceeded
return pods
}(),
key: claimKey(testClaimReserved),
claims: []*resourcev1alpha2.ResourceClaim{testClaimReserved},
expectedClaims: nil,
expectedMetrics: expectedMetrics{0, 0},
},
} }
for _, tc := range tests { for _, tc := range tests {
@ -226,7 +259,7 @@ func TestSyncHandler(t *testing.T) {
claimInformer := informerFactory.Resource().V1alpha2().ResourceClaims() claimInformer := informerFactory.Resource().V1alpha2().ResourceClaims()
templateInformer := informerFactory.Resource().V1alpha2().ResourceClaimTemplates() templateInformer := informerFactory.Resource().V1alpha2().ResourceClaimTemplates()
ec, err := NewController(fakeKubeClient, podInformer, claimInformer, templateInformer) ec, err := NewController(klog.TODO(), fakeKubeClient, podInformer, claimInformer, templateInformer)
if err != nil { if err != nil {
t.Fatalf("error creating ephemeral controller : %v", err) t.Fatalf("error creating ephemeral controller : %v", err)
} }

View File

@ -212,7 +212,7 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding)
Rules: []rbacv1.PolicyRule{ Rules: []rbacv1.PolicyRule{
rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("pods").RuleOrDie(), rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("pods").RuleOrDie(),
rbacv1helpers.NewRule("update").Groups(legacyGroup).Resources("pods/finalizers").RuleOrDie(), rbacv1helpers.NewRule("update").Groups(legacyGroup).Resources("pods/finalizers").RuleOrDie(),
rbacv1helpers.NewRule("get", "list", "watch", "create").Groups(resourceGroup).Resources("resourceclaims").RuleOrDie(), rbacv1helpers.NewRule("get", "list", "watch", "create", "delete").Groups(resourceGroup).Resources("resourceclaims").RuleOrDie(),
rbacv1helpers.NewRule("update", "patch").Groups(resourceGroup).Resources("resourceclaims/status").RuleOrDie(), rbacv1helpers.NewRule("update", "patch").Groups(resourceGroup).Resources("resourceclaims/status").RuleOrDie(),
eventsRule(), eventsRule(),
}, },

View File

@ -245,6 +245,53 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
b.testPod(ctx, f.ClientSet, pod) b.testPod(ctx, f.ClientSet, pod)
}) })
ginkgo.It("removes reservation from claim when pod is done", func(ctx context.Context) {
parameters := b.parameters()
pod := b.podExternal()
claim := b.externalClaim(allocationMode)
pod.Spec.Containers[0].Command = []string{"true"}
b.create(ctx, parameters, claim, pod)
ginkgo.By("waiting for pod to finish")
framework.ExpectNoError(e2epod.WaitForPodNoLongerRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace), "wait for pod to finish")
ginkgo.By("waiting for claim to be unreserved")
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return f.ClientSet.ResourceV1alpha2().ResourceClaims(pod.Namespace).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.ReservedFor", gomega.BeEmpty()), "reservation should have been removed")
})
ginkgo.It("deletes generated claims when pod is done", func(ctx context.Context) {
parameters := b.parameters()
pod, template := b.podInline(allocationMode)
pod.Spec.Containers[0].Command = []string{"true"}
b.create(ctx, parameters, template, pod)
ginkgo.By("waiting for pod to finish")
framework.ExpectNoError(e2epod.WaitForPodNoLongerRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace), "wait for pod to finish")
ginkgo.By("waiting for claim to be deleted")
gomega.Eventually(ctx, func(ctx context.Context) ([]resourcev1alpha2.ResourceClaim, error) {
claims, err := f.ClientSet.ResourceV1alpha2().ResourceClaims(pod.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
return claims.Items, nil
}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.BeEmpty(), "claim should have been deleted")
})
ginkgo.It("does not delete generated claims when pod is restarting", func(ctx context.Context) {
parameters := b.parameters()
pod, template := b.podInline(allocationMode)
pod.Spec.Containers[0].Command = []string{"sh", "-c", "sleep 1; exit 1"}
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
b.create(ctx, parameters, template, pod)
ginkgo.By("waiting for pod to restart twice")
gomega.Eventually(ctx, func(ctx context.Context) (*v1.Pod, error) {
return f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodStartSlow).Should(gomega.HaveField("Status.ContainerStatuses", gomega.ContainElements(gomega.HaveField("RestartCount", gomega.BeNumerically(">=", 2)))))
gomega.Expect(driver.Controller.GetNumAllocations()).To(gomega.Equal(int64(1)), "number of allocations")
})
} }
ginkgo.Context("with delayed allocation", func() { ginkgo.Context("with delayed allocation", func() {

View File

@ -123,7 +123,7 @@ func CreateResourceClaimController(ctx context.Context, tb testing.TB, clientSet
podInformer := informerFactory.Core().V1().Pods() podInformer := informerFactory.Core().V1().Pods()
claimInformer := informerFactory.Resource().V1alpha2().ResourceClaims() claimInformer := informerFactory.Resource().V1alpha2().ResourceClaims()
claimTemplateInformer := informerFactory.Resource().V1alpha2().ResourceClaimTemplates() claimTemplateInformer := informerFactory.Resource().V1alpha2().ResourceClaimTemplates()
claimController, err := resourceclaim.NewController(clientSet, podInformer, claimInformer, claimTemplateInformer) claimController, err := resourceclaim.NewController(klog.FromContext(ctx), clientSet, podInformer, claimInformer, claimTemplateInformer)
if err != nil { if err != nil {
tb.Fatalf("Error creating claim controller: %v", err) tb.Fatalf("Error creating claim controller: %v", err)
} }