scheduler_perf: fix steady-state pod creation/deletion

This fixes an issue in TestSchedulerPerf/SteadyStateClusterResourceClaimTemplate: scheduler_perf.go:1542: FATAL ERROR: op 7: delete scheduled pods: client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline That occurs when the test is almost done, but hasn't observed all scheduled pods yet. The previous attempt to address this error wasn't actually 100% correct. It covered the case when the context has already been canceled, but not this particular "will reach deadline soon".
2025-09-14 13:45:06 +00:00 · 2024-11-07 09:36:36 +01:00
parent e273349f3a
commit 0301b6b504
1 changed files with 9 additions and 2 deletions
--- a/test/integration/scheduler_perf/scheduler_perf.go
+++ b/test/integration/scheduler_perf/scheduler_perf.go
@@ -1953,12 +1953,19 @@ func createPodsSteadily(tCtx ktesting.TContext, namespace string, podInformer co
 				}, metav1.ListOptions{})
 				// Ignore errors when the time is up. errors.Is(context.Canceled) would
 				// be more precise, but doesn't work because client-go doesn't reliably
-				// propagate it. Instead, this was seen:
-				//   client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline
+				// propagate it.
 				if tCtx.Err() != nil {
 					continue
 				}
 				if err != nil {
+					// Worse, sometimes rate limiting gives up *before* the context deadline is reached.
+					// Then we get here with this error:
+					//   client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline
+					//
+					// This also can be ignored. We'll retry if the test is not done yet.
+					if strings.Contains(err.Error(), "would exceed context deadline") {
+						continue
+					}
 					return fmt.Errorf("delete scheduled pods: %w", err)
 				}
 				err = strategy(tCtx, tCtx.Client(), namespace, cpo.Count)