From 0ad8e65d90ec38c6520838c1c8a66c720cad46b1 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Wed, 14 May 2025 08:55:42 +0000 Subject: [PATCH] e2e pod readiness gate network flake Change-Id: I9625682c8dc0ca3c5423ed2ba2fe57eb51611003 --- test/e2e/network/pod_lifecycle.go | 46 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/test/e2e/network/pod_lifecycle.go b/test/e2e/network/pod_lifecycle.go index 588c65bd629..4656654a462 100644 --- a/test/e2e/network/pod_lifecycle.go +++ b/test/e2e/network/pod_lifecycle.go @@ -276,32 +276,36 @@ var _ = common.SIGDescribe("Connectivity Pod Lifecycle", func() { // is a distributed system eventually consistent, so there is a propagation // delay until this information is present on the nodes and a programming delay // until the corresponding node components program the information on the dataplane. - err = wait.PollUntilContextTimeout(ctx, 3*time.Second, 30*time.Second, true, func(ctx context.Context) (done bool, err error) { - cmd := fmt.Sprintf(`curl -q -s --connect-timeout 5 %s/hostname`, scvAddress) - stdout, err := e2eoutput.RunHostCmd(clientPod.Namespace, clientPod.Name, cmd) + // Require at least two consecutive hits on the green pod to avoid flakiness during + // the transition of endpoint slices states and dataplane programming. + for i := 0; i < 2; i++ { + err = wait.PollUntilContextTimeout(ctx, 3*time.Second, 30*time.Second, true, func(ctx context.Context) (done bool, err error) { + cmd := fmt.Sprintf(`curl -q -s --connect-timeout 5 %s/hostname`, scvAddress) + stdout, err := e2eoutput.RunHostCmd(clientPod.Namespace, clientPod.Name, cmd) + if err != nil { + framework.Logf("expected error when trying to connect to cluster IP : %v", err) + return false, nil + } + if strings.TrimSpace(stdout) == "" { + framework.Logf("got empty stdout, retry until timeout") + return false, nil + } + // Ensure we're comparing hostnames and not FQDNs + targetHostname := strings.Split(greenPod.Name, ".")[0] + hostname := strings.TrimSpace(strings.Split(stdout, ".")[0]) + if hostname != targetHostname { + framework.Logf("expecting hostname %s got %s", targetHostname, hostname) + return false, nil + } + return true, nil + }) if err != nil { - framework.Logf("expected error when trying to connect to cluster IP : %v", err) - return false, nil + framework.Failf("can not connect to pod %s on address %s : %v", greenPod.Name, scvAddress, err) } - if strings.TrimSpace(stdout) == "" { - framework.Logf("got empty stdout, retry until timeout") - return false, nil - } - // Ensure we're comparing hostnames and not FQDNs - targetHostname := strings.Split(greenPod.Name, ".")[0] - hostname := strings.TrimSpace(strings.Split(stdout, ".")[0]) - if hostname != targetHostname { - framework.Logf("expecting hostname %s got %s", targetHostname, hostname) - return false, nil - } - return true, nil - }) - if err != nil { - framework.Failf("can not connect to pod %s on address %s : %v", greenPod.Name, scvAddress, err) } ginkgo.By("Try to connect to the green pod through the service") - // assert 5 times that we can connect only to the green pod + // assert 5 times that we can connect ONLY to the green pod for i := 0; i < 5; i++ { err := wait.PollUntilContextTimeout(ctx, 3*time.Second, 30*time.Second, true, func(ctx context.Context) (done bool, err error) { cmd := fmt.Sprintf(`curl -q -s --connect-timeout 5 %s/hostname`, scvAddress)