From fdbe6912d210eece43a0056168656f2f12a29e2b Mon Sep 17 00:00:00 2001
From: Antonio Ojea <aojea@google.com>
Date: Thu, 18 Jul 2024 12:50:41 +0000
Subject: [PATCH] e2e: fix flake on loadbalancer tests

validating that one endpoint is reachable from one part of the cluster
is not enough condition to consider it will be reachable from any node,
as different Services proxies on different nodes will have different
propagation delays for the EndpointSlices and Services information.
---
 test/e2e/network/loadbalancer.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/e2e/network/loadbalancer.go b/test/e2e/network/loadbalancer.go
index c2fa76bad02..cd869868b2a 100644
--- a/test/e2e/network/loadbalancer.go
+++ b/test/e2e/network/loadbalancer.go
@@ -480,10 +480,10 @@ var _ = common.SIGDescribe("LoadBalancers", feature.LoadBalancer, func() {
 		e2eservice.TestReachableHTTP(ctx, ingress, svcPort, loadBalancerLagTimeout)
 
 		ginkgo.By("checking reachability from pods when LoadBalancerSourceRanges is unset")
-		// We can use timeout 0 here since we know from above that the service is
-		// already running (and we aren't waiting for changes to it to propagate).
-		checkReachabilityFromPod(ctx, true, 0, namespace, acceptPod.Name, ingress)
-		checkReachabilityFromPod(ctx, true, 0, namespace, dropPod.Name, ingress)
+		// There are different propagation delay for the APIs for different nodes, so it tries
+		// a few times, despite previously it was confirmed that the Service was reachable.
+		checkReachabilityFromPod(ctx, true, e2eservice.KubeProxyEndpointLagTimeout, namespace, acceptPod.Name, ingress)
+		checkReachabilityFromPod(ctx, true, e2eservice.KubeProxyEndpointLagTimeout, namespace, dropPod.Name, ingress)
 
 		// Create source ranges that allow acceptPod but not dropPod or
 		// cluster-external sources. We assume that the LBSR rules will either see