client-go: shut down watch reflector as soon as stop channel closes

Without this change, sometimes leaked goroutines were reported for
test/integration/scheduler_perf. The one that caused the cleanup to get delayed
was this one:

    goleak.go:50: found unexpected goroutines:
        [Goroutine 2704 in state chan receive, 2 minutes, with k8s.io/client-go/tools/cache.(*Reflector).watch on top of the stack:
        goroutine 2704 [chan receive, 2 minutes]:
        k8s.io/client-go/tools/cache.(*Reflector).watch(0xc00453f590, {0x0, 0x0}, 0x1f?, 0xc00a128080?)
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:388 +0x5b3
        k8s.io/client-go/tools/cache.(*Reflector).ListAndWatch(0xc00453f590, 0xc006e94900)
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:324 +0x3bd
        k8s.io/client-go/tools/cache.(*Reflector).Run.func1()
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:279 +0x45
        k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc007aafee0)
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:157 +0x49
        k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc003e18150?, {0x75e37c0, 0xc00389c280}, 0x1, 0xc006e94900)
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:158 +0xcf
        k8s.io/client-go/tools/cache.(*Reflector).Run(0xc00453f590, 0xc006e94900)
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/client-go/tools/cache/reflector.go:278 +0x257
        k8s.io/apimachinery/pkg/util/wait.(*Group).StartWithChannel.func1()
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:58 +0x3f
        k8s.io/apimachinery/pkg/util/wait.(*Group).Start.func1()
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:75 +0x74
        created by k8s.io/apimachinery/pkg/util/wait.(*Group).Start
                /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:73 +0xe5

watch() was stuck in an exponential backoff timeout. Logging confirmed that:

        I0309 21:14:21.756149 1572727 reflector.go:387] k8s.io/client-go/informers/factory.go:150: watch of *v1.PersistentVolumeClaim returned Get "https://127.0.0.1:38269/api/v1/persistentvolumeclaims?allowWatchBookmarks=true&resourceVersion=1&timeout=7m47s&timeoutSeconds=467&watch=true": dial tcp 127.0.0.1:38269: connect: connection refused - backing off

Kubernetes-commit: b4751a52d53d4acc6a5ce3e796938c9a12f81fcb
This commit is contained in:
Patrick Ohly 2023-03-09 21:19:17 +01:00 committed by Kubernetes Publisher
parent 1f020beddb
commit 29ad2bc1d4

View File

@ -417,8 +417,12 @@ func (r *Reflector) watch(w watch.Interface, stopCh <-chan struct{}, resyncerrc
if err != nil {
if canRetry := isWatchErrorRetriable(err); canRetry {
klog.V(4).Infof("%s: watch of %v returned %v - backing off", r.name, r.typeDescription, err)
<-r.initConnBackoffManager.Backoff().C()
continue
select {
case <-stopCh:
return nil
case <-r.initConnBackoffManager.Backoff().C():
continue
}
}
return err
}
@ -439,8 +443,12 @@ func (r *Reflector) watch(w watch.Interface, stopCh <-chan struct{}, resyncerrc
klog.V(4).Infof("%s: watch of %v closed with: %v", r.name, r.typeDescription, err)
case apierrors.IsTooManyRequests(err):
klog.V(2).Infof("%s: watch of %v returned 429 - backing off", r.name, r.typeDescription)
<-r.initConnBackoffManager.Backoff().C()
continue
select {
case <-stopCh:
return nil
case <-r.initConnBackoffManager.Backoff().C():
continue
}
case apierrors.IsInternalError(err) && retry.ShouldRetry():
klog.V(2).Infof("%s: retrying watch of %v internal error: %v", r.name, r.typeDescription, err)
continue