From 603a0b016ec9417aa47dd894fe5f9ccf0cd9be58 Mon Sep 17 00:00:00 2001 From: Jordan Liggitt Date: Thu, 12 Nov 2020 12:24:41 -0500 Subject: [PATCH] Log cluster-scoped owners referencing namespaced owners, avoid retrying lookups forever If a cluster-scoped dependent references a namespace-scoped owner, this is an invalid relationship, and the lookup will never succeed in attemptToDelete. Short-circuit requeueing in attemptToDelete and log. --- pkg/controller/garbagecollector/garbagecollector.go | 12 ++++++++++++ pkg/controller/garbagecollector/operations.go | 8 +++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 537913ed7fd..574391d4ae1 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -297,6 +297,8 @@ func (gc *GarbageCollector) runAttemptToDeleteWorker() { var enqueuedVirtualDeleteEventErr = goerrors.New("enqueued virtual delete event") +var namespacedOwnerOfClusterScopedObjectErr = goerrors.New("cluster-scoped objects cannot refer to namespaced owners") + func (gc *GarbageCollector) attemptToDeleteWorker() bool { item, quit := gc.attemptToDelete.Get() gc.workerLock.RLock() @@ -331,6 +333,9 @@ func (gc *GarbageCollector) attemptToDeleteWorker() bool { if err == enqueuedVirtualDeleteEventErr { // a virtual event was produced and will be handled by processGraphChanges, no need to requeue this node return true + } else if err == namespacedOwnerOfClusterScopedObjectErr { + // a cluster-scoped object referring to a namespaced owner is an error that will not resolve on retry, no need to requeue this node + return true } else if err != nil { if _, ok := err.(*restMappingError); ok { // There are at least two ways this can happen: @@ -389,6 +394,13 @@ func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *no absentOwnerCacheKey.Namespace = "" } + if len(item.identity.Namespace) == 0 && namespaced { + // item is a cluster-scoped object referring to a namespace-scoped owner, which is not valid. + // return a marker error, rather than retrying on the lookup failure forever. + klog.V(2).Infof("object %s is cluster-scoped, but refers to a namespaced owner of type %s/%s", item.identity, reference.APIVersion, reference.Kind) + return false, nil, namespacedOwnerOfClusterScopedObjectErr + } + // TODO: It's only necessary to talk to the API server if the owner node // is a "virtual" node. The local graph could lag behind the real // status, but in practice, the difference is small. diff --git a/pkg/controller/garbagecollector/operations.go b/pkg/controller/garbagecollector/operations.go index ab64a3baceb..de4ae5e722c 100644 --- a/pkg/controller/garbagecollector/operations.go +++ b/pkg/controller/garbagecollector/operations.go @@ -65,7 +65,13 @@ func (gc *GarbageCollector) getObject(item objectReference) (*metav1.PartialObje if err != nil { return nil, err } - return gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.Namespace)).Get(context.TODO(), item.Name, metav1.GetOptions{}) + namespace := resourceDefaultNamespace(namespaced, item.Namespace) + if namespaced && len(namespace) == 0 { + // the type is namespaced, but we have no namespace coordinate. + // the only way this can happen is if a cluster-scoped object referenced this type as an owner. + return nil, namespacedOwnerOfClusterScopedObjectErr + } + return gc.metadataClient.Resource(resource).Namespace(namespace).Get(context.TODO(), item.Name, metav1.GetOptions{}) } func (gc *GarbageCollector) patchObject(item objectReference, patch []byte, pt types.PatchType) (*metav1.PartialObjectMetadata, error) {