Switch GC absentOwnerCache to full reference

Before deleting an object based on absent owners, GC verifies absence of those owners with a live lookup.

The coordinates used to perform that live lookup are the ones specified in the ownerReference of the child.

In order to performantly delete multiple children from the same parent (e.g. 1000 pods from a replicaset),
a 404 response to a lookup is cached in absentOwnerCache.

Previously, the cache was a simple uid set. However, since children can disagree on the coordinates
that should be used to look up a given uid, the cache should record the exact coordinates verified absent.
This is a [apiVersion, kind, namespace, name, uid] tuple.
This commit is contained in:
Jordan Liggitt 2020-06-26 20:48:21 -04:00
parent 14f7f3201f
commit 445f20dbdb
6 changed files with 61 additions and 24 deletions

View File

@ -83,6 +83,7 @@ go_test(
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/gonum.org/v1/gonum/graph:go_default_library",
"//vendor/gonum.org/v1/gonum/graph/simple:go_default_library",
"//vendor/k8s.io/utils/pointer:go_default_library",
],
)

View File

@ -72,7 +72,7 @@ type GarbageCollector struct {
attemptToOrphan workqueue.RateLimitingInterface
dependencyGraphBuilder *GraphBuilder
// GC caches the owners that do not exist according to the API server.
absentOwnerCache *UIDCache
absentOwnerCache *ReferenceCache
workerLock sync.RWMutex
}
@ -94,7 +94,7 @@ func NewGarbageCollector(
attemptToDelete := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "garbage_collector_attempt_to_delete")
attemptToOrphan := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "garbage_collector_attempt_to_orphan")
absentOwnerCache := NewUIDCache(500)
absentOwnerCache := NewReferenceCache(500)
gc := &GarbageCollector{
metadataClient: metadataClient,
restMapper: mapper,
@ -338,10 +338,20 @@ func (gc *GarbageCollector) attemptToDeleteWorker() bool {
// returns its latest state.
func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *node) (
dangling bool, owner *metav1.PartialObjectMetadata, err error) {
if gc.absentOwnerCache.Has(reference.UID) {
// check for recorded absent cluster-scoped parent
absentOwnerCacheKey := objectReference{OwnerReference: ownerReferenceCoordinates(reference)}
if gc.absentOwnerCache.Has(absentOwnerCacheKey) {
klog.V(5).Infof("according to the absentOwnerCache, object %s's owner %s/%s, %s does not exist", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name)
return true, nil, nil
}
// check for recorded absent namespaced parent
absentOwnerCacheKey.Namespace = item.identity.Namespace
if gc.absentOwnerCache.Has(absentOwnerCacheKey) {
klog.V(5).Infof("according to the absentOwnerCache, object %s's owner %s/%s, %s does not exist in namespace %s", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name, item.identity.Namespace)
return true, nil, nil
}
// TODO: we need to verify the reference resource is supported by the
// system. If it's not a valid resource, the garbage collector should i)
// ignore the reference when decide if the object should be deleted, and
@ -352,6 +362,9 @@ func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *no
if err != nil {
return false, nil, err
}
if !namespaced {
absentOwnerCacheKey.Namespace = ""
}
// TODO: It's only necessary to talk to the API server if the owner node
// is a "virtual" node. The local graph could lag behind the real
@ -359,7 +372,7 @@ func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *no
owner, err = gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.identity.Namespace)).Get(context.TODO(), reference.Name, metav1.GetOptions{})
switch {
case errors.IsNotFound(err):
gc.absentOwnerCache.Add(reference.UID)
gc.absentOwnerCache.Add(absentOwnerCacheKey)
klog.V(5).Infof("object %s's owner %s/%s, %s is not found", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name)
return true, nil, nil
case err != nil:
@ -368,7 +381,7 @@ func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *no
if owner.GetUID() != reference.UID {
klog.V(5).Infof("object %s's owner %s/%s, %s is not found, UID mismatch", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name)
gc.absentOwnerCache.Add(reference.UID)
gc.absentOwnerCache.Add(absentOwnerCacheKey)
return true, nil, nil
}
return false, owner, nil

View File

@ -29,6 +29,7 @@ import (
"github.com/stretchr/testify/assert"
_ "k8s.io/kubernetes/pkg/apis/core/install"
"k8s.io/utils/pointer"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
@ -398,7 +399,7 @@ func TestProcessEvent(t *testing.T) {
uidToNode: make(map[types.UID]*node),
},
attemptToDelete: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
absentOwnerCache: NewUIDCache(2),
absentOwnerCache: NewReferenceCache(2),
}
for i := 0; i < len(scenario.events); i++ {
dependencyGraphBuilder.graphChanges.Add(&scenario.events[i])
@ -459,13 +460,14 @@ func podToGCNode(pod *v1.Pod) *node {
}
}
func TestAbsentUIDCache(t *testing.T) {
func TestAbsentOwnerCache(t *testing.T) {
rc1Pod1 := getPod("rc1Pod1", []metav1.OwnerReference{
{
Kind: "ReplicationController",
Name: "rc1",
UID: "1",
APIVersion: "v1",
Controller: pointer.BoolPtr(true),
},
})
rc1Pod2 := getPod("rc1Pod2", []metav1.OwnerReference{
@ -474,6 +476,7 @@ func TestAbsentUIDCache(t *testing.T) {
Name: "rc1",
UID: "1",
APIVersion: "v1",
Controller: pointer.BoolPtr(false),
},
})
rc2Pod1 := getPod("rc2Pod1", []metav1.OwnerReference{
@ -528,7 +531,7 @@ func TestAbsentUIDCache(t *testing.T) {
defer srv.Close()
gc := setupGC(t, clientConfig)
defer close(gc.stop)
gc.absentOwnerCache = NewUIDCache(2)
gc.absentOwnerCache = NewReferenceCache(2)
gc.attemptToDeleteItem(podToGCNode(rc1Pod1))
gc.attemptToDeleteItem(podToGCNode(rc2Pod1))
// rc1 should already be in the cache, no request should be sent. rc1 should be promoted in the UIDCache
@ -536,13 +539,13 @@ func TestAbsentUIDCache(t *testing.T) {
// after this call, rc2 should be evicted from the UIDCache
gc.attemptToDeleteItem(podToGCNode(rc3Pod1))
// check cache
if !gc.absentOwnerCache.Has(types.UID("1")) {
if !gc.absentOwnerCache.Has(objectReference{Namespace: "ns1", OwnerReference: metav1.OwnerReference{Kind: "ReplicationController", Name: "rc1", UID: "1", APIVersion: "v1"}}) {
t.Errorf("expected rc1 to be in the cache")
}
if gc.absentOwnerCache.Has(types.UID("2")) {
if gc.absentOwnerCache.Has(objectReference{Namespace: "ns1", OwnerReference: metav1.OwnerReference{Kind: "ReplicationController", Name: "rc2", UID: "2", APIVersion: "v1"}}) {
t.Errorf("expected rc2 to not exist in the cache")
}
if !gc.absentOwnerCache.Has(types.UID("3")) {
if !gc.absentOwnerCache.Has(objectReference{Namespace: "ns1", OwnerReference: metav1.OwnerReference{Kind: "ReplicationController", Name: "rc3", UID: "3", APIVersion: "v1"}}) {
t.Errorf("expected rc3 to be in the cache")
}
// check the request sent to the server

View File

@ -148,6 +148,17 @@ func (n *node) blockingDependents() []*node {
return ret
}
// ownerReferenceCoordinates returns an owner reference containing only the coordinate fields
// from the input reference (uid, name, kind, apiVersion)
func ownerReferenceCoordinates(ref metav1.OwnerReference) metav1.OwnerReference {
return metav1.OwnerReference{
UID: ref.UID,
Name: ref.Name,
Kind: ref.Kind,
APIVersion: ref.APIVersion,
}
}
// String renders node as a string using fmt. Acquires a read lock to ensure the
// reflective dump of dependents doesn't race with any concurrent writes.
func (n *node) String() string {

View File

@ -104,7 +104,7 @@ type GraphBuilder struct {
attemptToOrphan workqueue.RateLimitingInterface
// GraphBuilder and GC share the absentOwnerCache. Objects that are known to
// be non-existent are added to the cached.
absentOwnerCache *UIDCache
absentOwnerCache *ReferenceCache
sharedInformers informerfactory.InformerFactory
ignoredResources map[schema.GroupResource]struct{}
}
@ -327,8 +327,10 @@ func DefaultIgnoredResources() map[schema.GroupResource]struct{} {
// enqueueVirtualDeleteEvent is used to add a virtual delete event to be processed for virtual nodes
// once it is determined they do not have backing objects in storage
func (gb *GraphBuilder) enqueueVirtualDeleteEvent(ref objectReference) {
gv, _ := schema.ParseGroupVersion(ref.APIVersion)
gb.graphChanges.Add(&event{
eventType: deleteEvent,
gvk: gv.WithKind(ref.Kind),
obj: &metaonly.MetadataOnlyObject{
TypeMeta: metav1.TypeMeta{APIVersion: ref.APIVersion, Kind: ref.Kind},
ObjectMeta: metav1.ObjectMeta{Namespace: ref.Namespace, UID: ref.UID, Name: ref.Name},
@ -348,7 +350,7 @@ func (gb *GraphBuilder) addDependentToOwners(n *node, owners []metav1.OwnerRefer
// exist in the graph yet.
ownerNode = &node{
identity: objectReference{
OwnerReference: owner,
OwnerReference: ownerReferenceCoordinates(owner),
Namespace: n.identity.Namespace,
},
dependents: make(map[*node]struct{}),
@ -603,7 +605,15 @@ func (gb *GraphBuilder) processGraphChanges() bool {
existingNode.dependentsLock.RLock()
defer existingNode.dependentsLock.RUnlock()
if len(existingNode.dependents) > 0 {
gb.absentOwnerCache.Add(accessor.GetUID())
gb.absentOwnerCache.Add(objectReference{
OwnerReference: metav1.OwnerReference{
APIVersion: event.gvk.GroupVersion().String(),
Kind: event.gvk.Kind,
Name: accessor.GetName(),
UID: accessor.GetUID(),
},
Namespace: accessor.GetNamespace(),
})
}
for dep := range existingNode.dependents {
gb.attemptToDelete.Add(dep)

View File

@ -20,33 +20,32 @@ import (
"sync"
"github.com/golang/groupcache/lru"
"k8s.io/apimachinery/pkg/types"
)
// UIDCache is an LRU cache for uid.
type UIDCache struct {
// ReferenceCache is an LRU cache for uid.
type ReferenceCache struct {
mutex sync.Mutex
cache *lru.Cache
}
// NewUIDCache returns a UIDCache.
func NewUIDCache(maxCacheEntries int) *UIDCache {
return &UIDCache{
// NewReferenceCache returns a ReferenceCache.
func NewReferenceCache(maxCacheEntries int) *ReferenceCache {
return &ReferenceCache{
cache: lru.New(maxCacheEntries),
}
}
// Add adds a uid to the cache.
func (c *UIDCache) Add(uid types.UID) {
func (c *ReferenceCache) Add(reference objectReference) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.cache.Add(uid, nil)
c.cache.Add(reference, nil)
}
// Has returns if a uid is in the cache.
func (c *UIDCache) Has(uid types.UID) bool {
func (c *ReferenceCache) Has(reference objectReference) bool {
c.mutex.Lock()
defer c.mutex.Unlock()
_, found := c.cache.Get(uid)
_, found := c.cache.Get(reference)
return found
}