From 0301b6b504e562466da07539a1a9904a65a8f333 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 7 Nov 2024 09:36:36 +0100 Subject: [PATCH] scheduler_perf: fix steady-state pod creation/deletion This fixes an issue in TestSchedulerPerf/SteadyStateClusterResourceClaimTemplate: scheduler_perf.go:1542: FATAL ERROR: op 7: delete scheduled pods: client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline That occurs when the test is almost done, but hasn't observed all scheduled pods yet. The previous attempt to address this error wasn't actually 100% correct. It covered the case when the context has already been canceled, but not this particular "will reach deadline soon". --- test/integration/scheduler_perf/scheduler_perf.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go index ba1ba4ff718..f8aed3a8fe4 100644 --- a/test/integration/scheduler_perf/scheduler_perf.go +++ b/test/integration/scheduler_perf/scheduler_perf.go @@ -1953,12 +1953,19 @@ func createPodsSteadily(tCtx ktesting.TContext, namespace string, podInformer co }, metav1.ListOptions{}) // Ignore errors when the time is up. errors.Is(context.Canceled) would // be more precise, but doesn't work because client-go doesn't reliably - // propagate it. Instead, this was seen: - // client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline + // propagate it. if tCtx.Err() != nil { continue } if err != nil { + // Worse, sometimes rate limiting gives up *before* the context deadline is reached. + // Then we get here with this error: + // client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline + // + // This also can be ignored. We'll retry if the test is not done yet. + if strings.Contains(err.Error(), "would exceed context deadline") { + continue + } return fmt.Errorf("delete scheduled pods: %w", err) } err = strategy(tCtx, tCtx.Client(), namespace, cpo.Count)