From b4e1cfcfab6c4b571c02674acb90a306791c1786 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 29 May 2024 14:10:49 +0200 Subject: [PATCH] client-go record: avoid panic when watch creation failed The previous attempt to fix this in https://github.com/kubernetes/kubernetes/commit/6aa779f4ed3d3acdad2f2bf17fb27e11e23aabe4#diff-efa2cd1347df22ace5a516ea794152d00ef2a079db135c81787ed920ecb73658 didn't address the root cause (or perhaps created it, not sure): the goroutine must not be started if watch creation failed. Instead, the error gets logged (as before) and an empty watch gets returned to the caller (new). This is necessary because the function doesn't have an error return value and changing that now would be disruptive. The empty watch is valid and usable, so callers won't crash when they calls Stop. This showed up recently in failed unit tests, probably because test cancellation makes this error more likely: "Unable start event watcher (will not retry!)" err="broadcaster already stopped" logger="TestGarbageCollectorConstruction leaked goroutine" The logger value and a preceding warning show that this occurs after test completion. Kubernetes-commit: 080432c46a7a49c3abf86d7fc5f2a5d7abc92239 --- tools/record/event.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/record/event.go b/tools/record/event.go index 0b3b14f8..55947d20 100644 --- a/tools/record/event.go +++ b/tools/record/event.go @@ -395,7 +395,11 @@ func (e *eventBroadcasterImpl) StartStructuredLogging(verbosity klog.Level) watc func (e *eventBroadcasterImpl) StartEventWatcher(eventHandler func(*v1.Event)) watch.Interface { watcher, err := e.Watch() if err != nil { + // This function traditionally returns no error even though it can fail. + // Instead, it logs the error and returns an empty watch. The empty + // watch ensures that callers don't crash when calling Stop. klog.FromContext(e.cancelationCtx).Error(err, "Unable start event watcher (will not retry!)") + return watch.NewEmptyWatch() } go func() { defer utilruntime.HandleCrash()