diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 341bb921237..44840c399f8 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -18,6 +18,7 @@ package garbagecollector import ( "context" + goerrors "errors" "fmt" "reflect" "sync" @@ -294,6 +295,8 @@ func (gc *GarbageCollector) runAttemptToDeleteWorker() { } } +var enqueuedVirtualDeleteEventErr = goerrors.New("enqueued virtual delete event") + func (gc *GarbageCollector) attemptToDeleteWorker() bool { item, quit := gc.attemptToDelete.Get() gc.workerLock.RLock() @@ -308,7 +311,10 @@ func (gc *GarbageCollector) attemptToDeleteWorker() bool { return true } err := gc.attemptToDeleteItem(n) - if err != nil { + if err == enqueuedVirtualDeleteEventErr { + // a virtual event was produced and will be handled by processGraphChanges, no need to requeue this node + return true + } else if err != nil { if _, ok := err.(*restMappingError); ok { // There are at least two ways this can happen: // 1. The reference is to an object of a custom type that has not yet been @@ -426,9 +432,15 @@ func ownerRefsToUIDs(refs []metav1.OwnerReference) []types.UID { return ret } +// attemptToDeleteItem looks up the live API object associated with the node, +// and issues a delete IFF the uid matches, the item is not blocked on deleting dependents, +// and all owner references are dangling. +// +// if the API get request returns a NotFound error, or the retrieved item's uid does not match, +// a virtual delete event for the node is enqueued and enqueuedVirtualDeleteEventErr is returned. func (gc *GarbageCollector) attemptToDeleteItem(item *node) error { klog.V(2).InfoS("Processing object", "object", klog.KRef(item.identity.Namespace, item.identity.Name), - "objectUID", item.identity.UID, "kind", item.identity.Kind) + "objectUID", item.identity.UID, "kind", item.identity.Kind, "virtual", !item.isObserved()) // "being deleted" is an one-way trip to the final deletion. We'll just wait for the final deletion, and then process the object's dependents. if item.isBeingDeleted() && !item.isDeletingDependents() { @@ -446,10 +458,7 @@ func (gc *GarbageCollector) attemptToDeleteItem(item *node) error { // the virtual node from GraphBuilder.uidToNode. klog.V(5).Infof("item %v not found, generating a virtual delete event", item.identity) gc.dependencyGraphBuilder.enqueueVirtualDeleteEvent(item.identity) - // since we're manually inserting a delete event to remove this node, - // we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete - item.markObserved() - return nil + return enqueuedVirtualDeleteEventErr case err != nil: return err } @@ -457,10 +466,7 @@ func (gc *GarbageCollector) attemptToDeleteItem(item *node) error { if latest.GetUID() != item.identity.UID { klog.V(5).Infof("UID doesn't match, item %v not found, generating a virtual delete event", item.identity) gc.dependencyGraphBuilder.enqueueVirtualDeleteEvent(item.identity) - // since we're manually inserting a delete event to remove this node, - // we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete - item.markObserved() - return nil + return enqueuedVirtualDeleteEventErr } // TODO: attemptToOrphanWorker() routine is similar. Consider merging diff --git a/pkg/controller/garbagecollector/garbagecollector_test.go b/pkg/controller/garbagecollector/garbagecollector_test.go index caeb0910bd4..c5972088a63 100644 --- a/pkg/controller/garbagecollector/garbagecollector_test.go +++ b/pkg/controller/garbagecollector/garbagecollector_test.go @@ -274,12 +274,16 @@ func TestAttemptToDeleteItem(t *testing.T) { Namespace: pod.Namespace, }, // owners are intentionally left empty. The attemptToDeleteItem routine should get the latest item from the server. - owners: nil, + owners: nil, + virtual: true, } err := gc.attemptToDeleteItem(item) if err != nil { t.Errorf("Unexpected Error: %v", err) } + if !item.virtual { + t.Errorf("attemptToDeleteItem changed virtual to false unexpectedly") + } expectedActionSet := sets.NewString() expectedActionSet.Insert("GET=/api/v1/namespaces/ns1/replicationcontrollers/owner1") expectedActionSet.Insert("DELETE=/api/v1/namespaces/ns1/pods/ToBeDeletedPod") diff --git a/pkg/controller/garbagecollector/graph_builder.go b/pkg/controller/garbagecollector/graph_builder.go index 734eaea8bd7..bc1c4b1dc8a 100644 --- a/pkg/controller/garbagecollector/graph_builder.go +++ b/pkg/controller/garbagecollector/graph_builder.go @@ -61,6 +61,8 @@ const ( ) type event struct { + // virtual indicates this event did not come from an informer, but was constructed artificially + virtual bool eventType eventType obj interface{} // the update event comes with an old object, but it's not used by the garbage collector. @@ -329,6 +331,7 @@ func DefaultIgnoredResources() map[schema.GroupResource]struct{} { func (gb *GraphBuilder) enqueueVirtualDeleteEvent(ref objectReference) { gv, _ := schema.ParseGroupVersion(ref.APIVersion) gb.graphChanges.Add(&event{ + virtual: true, eventType: deleteEvent, gvk: gv.WithKind(ref.Kind), obj: &metaonly.MetadataOnlyObject{ @@ -545,10 +548,10 @@ func (gb *GraphBuilder) processGraphChanges() bool { utilruntime.HandleError(fmt.Errorf("cannot access obj: %v", err)) return true } - klog.V(5).Infof("GraphBuilder process object: %s/%s, namespace %s, name %s, uid %s, event type %v", event.gvk.GroupVersion().String(), event.gvk.Kind, accessor.GetNamespace(), accessor.GetName(), string(accessor.GetUID()), event.eventType) + klog.V(5).Infof("GraphBuilder process object: %s/%s, namespace %s, name %s, uid %s, event type %v, virtual=%v", event.gvk.GroupVersion().String(), event.gvk.Kind, accessor.GetNamespace(), accessor.GetName(), string(accessor.GetUID()), event.eventType, event.virtual) // Check if the node already exists existingNode, found := gb.uidToNode.Read(accessor.GetUID()) - if found { + if found && !event.virtual && !existingNode.isObserved() { // this marks the node as having been observed via an informer event // 1. this depends on graphChanges only containing add/update events from the actual informer // 2. this allows things tracking virtual nodes' existence to stop polling and rely on informer events