From d89f58fcc8a5e84e32417d413962ee70e7db488a Mon Sep 17 00:00:00 2001 From: Dan Mace Date: Mon, 27 Nov 2017 16:11:47 -0500 Subject: [PATCH 1/4] Fix GC sync race condition Remove faulty diff detection logic from GC sync which leads to a race condition: If the GC's discovery client is returning a fully up to date view of server resources during the very first GC sync, the sync function will never sync monitors or reset the REST mapper unless discovery changes again. This causes REST mapping to fail for any custom types already present in discovery. --- pkg/controller/garbagecollector/garbagecollector.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 462bed79fc0..236b3bc1b3c 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -173,12 +173,6 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.DiscoveryInterface, p // Get the current resource list from discovery. newResources := GetDeletableResources(discoveryClient) - // Detect first or abnormal sync and try again later. - if oldResources == nil || len(oldResources) == 0 { - oldResources = newResources - return - } - // Decide whether discovery has reported a change. if reflect.DeepEqual(oldResources, newResources) { glog.V(5).Infof("no resource updates from discovery, skipping garbage collector sync") From 9b2886df29a0c14aa535d518b12936f0ad56acba Mon Sep 17 00:00:00 2001 From: Dan Mace Date: Mon, 27 Nov 2017 16:29:18 -0500 Subject: [PATCH 2/4] Ensure sync failures are correctly retried Only track the last synced resources when all preceding steps have completed to ensure that failures will be correctly retried. --- pkg/controller/garbagecollector/garbagecollector.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 236b3bc1b3c..e11fe4cbf86 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -179,9 +179,8 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.DiscoveryInterface, p return } - // Something has changed, so track the new state and perform a sync. + // Something has changed, time to sync. glog.V(2).Infof("syncing garbage collector with updated resources from discovery: %v", newResources) - oldResources = newResources // Ensure workers are paused to avoid processing events before informers // have resynced. @@ -208,7 +207,13 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.DiscoveryInterface, p } if !controller.WaitForCacheSync("garbage collector", stopCh, gc.dependencyGraphBuilder.IsSynced) { utilruntime.HandleError(fmt.Errorf("timed out waiting for dependency graph builder sync during GC sync")) + return } + + // Finally, keep track of our new state. Do this after all preceding steps + // have succeeded to ensure we'll retry on subsequent syncs if an error + // occured. + oldResources = newResources }, period, stopCh) } From eeeabce831cb95fb46d5b8f1ab71644b5ec9a356 Mon Sep 17 00:00:00 2001 From: Dan Mace Date: Mon, 27 Nov 2017 16:47:37 -0500 Subject: [PATCH 3/4] Add more GC sync logging --- pkg/controller/garbagecollector/garbagecollector.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index e11fe4cbf86..79006a4fabc 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -214,6 +214,7 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.DiscoveryInterface, p // have succeeded to ensure we'll retry on subsequent syncs if an error // occured. oldResources = newResources + glog.V(2).Infof("synced garbage collector") }, period, stopCh) } From a62d07ce2aaf669b3cf48d8c4a23134fe0515d9c Mon Sep 17 00:00:00 2001 From: Dan Mace Date: Mon, 27 Nov 2017 16:50:29 -0500 Subject: [PATCH 4/4] Add a GC deadlock note --- pkg/controller/garbagecollector/garbagecollector.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 79006a4fabc..dad94e6e97e 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -205,6 +205,9 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.DiscoveryInterface, p utilruntime.HandleError(fmt.Errorf("failed to sync resource monitors: %v", err)) return } + // TODO: WaitForCacheSync can block forever during normal operation. Could + // pass a timeout channel, but we have to consider the implications of + // un-pausing the GC with a partially synced graph builder. if !controller.WaitForCacheSync("garbage collector", stopCh, gc.dependencyGraphBuilder.IsSynced) { utilruntime.HandleError(fmt.Errorf("timed out waiting for dependency graph builder sync during GC sync")) return