From 29ad2bc1d4e5ce444c2051dc619ac2db3940b4c4 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 9 Mar 2023 21:19:17 +0100 Subject: [PATCH] client-go: shut down watch reflector as soon as stop channel closes Without this change, sometimes leaked goroutines were reported for test/integration/scheduler_perf. The one that caused the cleanup to get delayed was this one: goleak.go:50: found unexpected goroutines: [Goroutine 2704 in state chan receive, 2 minutes, with k8s.io/client-go/tools/cache.(*Reflector).watch on top of the stack: goroutine 2704 [chan receive, 2 minutes]: k8s.io/client-go/tools/cache.(*Reflector).watch(0xc00453f590, {0x0, 0x0}, 0x1f?, 0xc00a128080?) /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:388 +0x5b3 k8s.io/client-go/tools/cache.(*Reflector).ListAndWatch(0xc00453f590, 0xc006e94900) /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:324 +0x3bd k8s.io/client-go/tools/cache.(*Reflector).Run.func1() /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:279 +0x45 k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc007aafee0) /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:157 +0x49 k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc003e18150?, {0x75e37c0, 0xc00389c280}, 0x1, 0xc006e94900) /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:158 +0xcf k8s.io/client-go/tools/cache.(*Reflector).Run(0xc00453f590, 0xc006e94900) /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:278 +0x257 k8s.io/apimachinery/pkg/util/wait.(*Group).StartWithChannel.func1() /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:58 +0x3f k8s.io/apimachinery/pkg/util/wait.(*Group).Start.func1() /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:75 +0x74 created by k8s.io/apimachinery/pkg/util/wait.(*Group).Start /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:73 +0xe5 watch() was stuck in an exponential backoff timeout. Logging confirmed that: I0309 21:14:21.756149 1572727 reflector.go:387] k8s.io/client-go/informers/factory.go:150: watch of *v1.PersistentVolumeClaim returned Get "https://127.0.0.1:38269/api/v1/persistentvolumeclaims?allowWatchBookmarks=true&resourceVersion=1&timeout=7m47s&timeoutSeconds=467&watch=true": dial tcp 127.0.0.1:38269: connect: connection refused - backing off Kubernetes-commit: b4751a52d53d4acc6a5ce3e796938c9a12f81fcb --- tools/cache/reflector.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/cache/reflector.go b/tools/cache/reflector.go index 7363ce35..2b335c10 100644 --- a/tools/cache/reflector.go +++ b/tools/cache/reflector.go @@ -417,8 +417,12 @@ func (r *Reflector) watch(w watch.Interface, stopCh <-chan struct{}, resyncerrc if err != nil { if canRetry := isWatchErrorRetriable(err); canRetry { klog.V(4).Infof("%s: watch of %v returned %v - backing off", r.name, r.typeDescription, err) - <-r.initConnBackoffManager.Backoff().C() - continue + select { + case <-stopCh: + return nil + case <-r.initConnBackoffManager.Backoff().C(): + continue + } } return err } @@ -439,8 +443,12 @@ func (r *Reflector) watch(w watch.Interface, stopCh <-chan struct{}, resyncerrc klog.V(4).Infof("%s: watch of %v closed with: %v", r.name, r.typeDescription, err) case apierrors.IsTooManyRequests(err): klog.V(2).Infof("%s: watch of %v returned 429 - backing off", r.name, r.typeDescription) - <-r.initConnBackoffManager.Backoff().C() - continue + select { + case <-stopCh: + return nil + case <-r.initConnBackoffManager.Backoff().C(): + continue + } case apierrors.IsInternalError(err) && retry.ShouldRetry(): klog.V(2).Infof("%s: retrying watch of %v internal error: %v", r.name, r.typeDescription, err) continue