diff --git a/test/e2e/framework/service_util.go b/test/e2e/framework/service_util.go index 238b7ae1e12..67feae8025c 100644 --- a/test/e2e/framework/service_util.go +++ b/test/e2e/framework/service_util.go @@ -50,6 +50,10 @@ const ( // liberal. Fix tracked in #20567. KubeProxyLagTimeout = 5 * time.Minute + // KubeProxyEndpointLagTimeout is the maximum time a kube-proxy daemon on a node is allowed + // to not notice an Endpoint update. + KubeProxyEndpointLagTimeout = 30 * time.Second + // LoadBalancerLagTimeoutDefault is the maximum time a load balancer is allowed to // not respond after creation. LoadBalancerLagTimeoutDefault = 2 * time.Minute @@ -752,18 +756,24 @@ func testHTTPHealthCheckNodePort(ip string, port int, request string) (bool, err return false, fmt.Errorf("Unexpected HTTP response code %s from health check responder at %s", resp.Status, url) } -func (j *ServiceTestJig) TestHTTPHealthCheckNodePort(host string, port int, request string, tries int) (pass, fail int, statusMsg string) { - for i := 0; i < tries; i++ { - success, err := testHTTPHealthCheckNodePort(host, port, request) - if success { - pass++ - } else { - fail++ +func (j *ServiceTestJig) TestHTTPHealthCheckNodePort(host string, port int, request string, timeout time.Duration, expectSucceed bool, threshold int) error { + count := 0 + condition := func() (bool, error) { + success, _ := testHTTPHealthCheckNodePort(host, port, request) + if success && expectSucceed || + !success && !expectSucceed { + count++ } - statusMsg += fmt.Sprintf("\nAttempt %d Error %v", i, err) - time.Sleep(1 * time.Second) + if count >= threshold { + return true, nil + } + return false, nil } - return pass, fail, statusMsg + + if err := wait.PollImmediate(time.Second, timeout, condition); err != nil { + return fmt.Errorf("error waiting for healthCheckNodePort: expected at least %d succeed=%v on %v%v, got %d", threshold, expectSucceed, host, port, count) + } + return nil } // Simple helper class to avoid too much boilerplate in tests diff --git a/test/e2e/service.go b/test/e2e/service.go index 2724ddad6a5..03282748bcb 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -1287,12 +1287,9 @@ var _ = framework.KubeDescribe("ESIPP [Slow]", func() { jig.ChangeServiceType(svc.Namespace, svc.Name, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout) // Make sure we didn't leak the health check node port. - for name, ips := range jig.GetEndpointNodes(svc) { - _, fail, status := jig.TestHTTPHealthCheckNodePort(ips[0], healthCheckNodePort, "/healthz", 5) - if fail < 2 { - framework.Failf("Health check node port %v not released on node %v: %v", healthCheckNodePort, name, status) - } - break + threshold := 2 + for _, ips := range jig.GetEndpointNodes(svc) { + Expect(jig.TestHTTPHealthCheckNodePort(ips[0], healthCheckNodePort, "/healthz", framework.KubeProxyEndpointLagTimeout, false, threshold)).NotTo(HaveOccurred()) } Expect(cs.Core().Services(svc.Namespace).Delete(svc.Name, nil)).NotTo(HaveOccurred()) }() @@ -1379,16 +1376,12 @@ var _ = framework.KubeDescribe("ESIPP [Slow]", func() { // HealthCheck should pass only on the node where num(endpoints) > 0 // All other nodes should fail the healthcheck on the service healthCheckNodePort for n, publicIP := range ips { + // Make sure the loadbalancer picked up the health check change. + // Confirm traffic can reach backend through LB before checking healthcheck nodeport. + jig.TestReachableHTTP(ingressIP, svcTCPPort, framework.KubeProxyLagTimeout) expectedSuccess := nodes.Items[n].Name == endpointNodeName framework.Logf("Health checking %s, http://%s:%d%s, expectedSuccess %v", nodes.Items[n].Name, publicIP, healthCheckNodePort, path, expectedSuccess) - pass, fail, err := jig.TestHTTPHealthCheckNodePort(publicIP, healthCheckNodePort, path, 5) - if expectedSuccess && pass < threshold { - framework.Failf("Expected %d successes on %v%v, got %d, err %v", threshold, endpointNodeName, path, pass, err) - } else if !expectedSuccess && fail < threshold { - framework.Failf("Expected %d failures on %v%v, got %d, err %v", threshold, endpointNodeName, path, fail, err) - } - // Make sure the loadbalancer picked up the helth check change - jig.TestReachableHTTP(ingressIP, svcTCPPort, framework.KubeProxyLagTimeout) + Expect(jig.TestHTTPHealthCheckNodePort(publicIP, healthCheckNodePort, path, framework.KubeProxyEndpointLagTimeout, expectedSuccess, threshold)).NotTo(HaveOccurred()) } framework.ExpectNoError(framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, namespace, serviceName)) }