From 498e3efe74e631f5a57dd347adde4bb8bc56aed2 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Sun, 29 Dec 2024 12:18:50 +0100 Subject: [PATCH] client-go cache: fix TestHammerController The test relied on a 100ms sleep to ensure that controller was done. If that race was lost, one goroutine was intentionally prevented from completing by locking a mutex permanently. A TODO was left about detecting that. Adding goroutine leak checking in https://github.com/kubernetes/kubernetes/pull/126387 revealed that this race indeed sometimes is lost because the goroutine leaked (https://github.com/kubernetes/kubernetes/issues/129400). Waiting for controller shutdown instead of relying on timing should fix this. Kubernetes-commit: 8e1403563a60f3b7a258e3bbb64b5c3a7f6548fb --- tools/cache/controller_test.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/cache/controller_test.go b/tools/cache/controller_test.go index 6b263bc8..dba2dfe6 100644 --- a/tools/cache/controller_test.go +++ b/tools/cache/controller_test.go @@ -232,7 +232,12 @@ func TestHammerController(t *testing.T) { // Run the controller and run it until we cancel. _, ctx := ktesting.NewTestContext(t) ctx, cancel := context.WithCancel(ctx) - go controller.RunWithContext(ctx) + var controllerWG sync.WaitGroup + controllerWG.Add(1) + go func() { + defer controllerWG.Done() + controller.RunWithContext(ctx) + }() // Let's wait for the controller to do its initial sync wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) { @@ -293,8 +298,11 @@ func TestHammerController(t *testing.T) { time.Sleep(100 * time.Millisecond) cancel() - // TODO: Verify that no goroutines were leaked here and that everything shut - // down cleanly. + // Before we permanently lock this mutex, we have to be sure + // that the controller has stopped running. At this point, + // all goroutines should have stopped. Leak checking is + // done by TestMain. + controllerWG.Wait() outputSetLock.Lock() t.Logf("got: %#v", outputSet)