e2e: fix flake on loadbalancer tests

validating that one endpoint is reachable from one part of the cluster is not enough condition to consider it will be reachable from any node, as different Services proxies on different nodes will have different propagation delays for the EndpointSlices and Services information.
2025-07-30 15:05:27 +00:00 · 2024-07-18 12:50:41 +00:00 · 2024-07-18 12:50:41 +00:00 · fdbe6912d2
commit fdbe6912d2
parent 7693a7e71a
1 changed files with 4 additions and 4 deletions
--- a/test/e2e/network/loadbalancer.go
+++ b/test/e2e/network/loadbalancer.go
@ -480,10 +480,10 @@ var _ = common.SIGDescribe("LoadBalancers", feature.LoadBalancer, func() {
 		e2eservice.TestReachableHTTP(ctx, ingress, svcPort, loadBalancerLagTimeout)
 		ginkgo.By("checking reachability from pods when LoadBalancerSourceRanges is unset")
-		// We can use timeout 0 here since we know from above that the service is
+		// There are different propagation delay for the APIs for different nodes, so it tries
-		// already running (and we aren't waiting for changes to it to propagate).
+		// a few times, despite previously it was confirmed that the Service was reachable.
-		checkReachabilityFromPod(ctx, true, 0, namespace, acceptPod.Name, ingress)
+		checkReachabilityFromPod(ctx, true, e2eservice.KubeProxyEndpointLagTimeout, namespace, acceptPod.Name, ingress)
-		checkReachabilityFromPod(ctx, true, 0, namespace, dropPod.Name, ingress)
+		checkReachabilityFromPod(ctx, true, e2eservice.KubeProxyEndpointLagTimeout, namespace, dropPod.Name, ingress)
 		// Create source ranges that allow acceptPod but not dropPod or
 		// cluster-external sources. We assume that the LBSR rules will either see