Cleanup and fix networking test timeouts for large clusters

2025-10-29 12:49:55 +00:00 · 2020-04-02 16:57:38 +02:00
parent 4c5a963463
commit 268b51d023
7 changed files with 54 additions and 34 deletions
--- a/test/e2e/framework/ingress/ingress_utils.go
+++ b/test/e2e/framework/ingress/ingress_utils.go
@@ -765,14 +765,14 @@ func (j *TestJig) pollIngressWithCert(ing *networkingv1beta1.Ingress, address st
 // WaitForIngress waits for the Ingress to get an address.
 // WaitForIngress returns when it gets the first 200 response
 func (j *TestJig) WaitForIngress(waitForNodePort bool) {
-	if err := j.WaitForGivenIngressWithTimeout(j.Ingress, waitForNodePort, e2eservice.LoadBalancerPollTimeout); err != nil {
+	if err := j.WaitForGivenIngressWithTimeout(j.Ingress, waitForNodePort, e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client)); err != nil {
 		framework.Failf("error in waiting for ingress to get an address: %s", err)
 	}
 }

 // WaitForIngressToStable waits for the LB return 100 consecutive 200 responses.
 func (j *TestJig) WaitForIngressToStable() {
-	if err := wait.Poll(10*time.Second, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
+	if err := wait.Poll(10*time.Second, e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client), func() (bool, error) {
 		_, err := j.GetDistinctResponseFromIngress()
 		if err != nil {
 			return false, nil
@@ -811,12 +811,13 @@ func (j *TestJig) WaitForGivenIngressWithTimeout(ing *networkingv1beta1.Ingress,
 // Ingress. Hostnames and certificate need to be explicitly passed in.
 func (j *TestJig) WaitForIngressWithCert(waitForNodePort bool, knownHosts []string, cert []byte) error {
 	// Wait for the loadbalancer IP.
-	address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, e2eservice.LoadBalancerPollTimeout)
+	propagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client)
+	address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, propagationTimeout)
 	if err != nil {
-		return fmt.Errorf("Ingress failed to acquire an IP address within %v", e2eservice.LoadBalancerPollTimeout)
+		return fmt.Errorf("Ingress failed to acquire an IP address within %v", propagationTimeout)
 	}

-	return j.pollIngressWithCert(j.Ingress, address, knownHosts, cert, waitForNodePort, e2eservice.LoadBalancerPollTimeout)
+	return j.pollIngressWithCert(j.Ingress, address, knownHosts, cert, waitForNodePort, propagationTimeout)
 }

 // VerifyURL polls for the given iterations, in intervals, and fails if the
@@ -960,9 +961,10 @@ func (j *TestJig) ConstructFirewallForIngress(firewallRuleName string, nodeTags
 // GetDistinctResponseFromIngress tries GET call to the ingress VIP and return all distinct responses.
 func (j *TestJig) GetDistinctResponseFromIngress() (sets.String, error) {
 	// Wait for the loadbalancer IP.
-	address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, e2eservice.LoadBalancerPollTimeout)
+	propagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client)
+	address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, propagationTimeout)
 	if err != nil {
-		framework.Failf("Ingress failed to acquire an IP address within %v", e2eservice.LoadBalancerPollTimeout)
+		framework.Failf("Ingress failed to acquire an IP address within %v", propagationTimeout)
 	}
 	responses := sets.NewString()
 	timeoutClient := &http.Client{Timeout: IngressReqTimeout}
--- a/test/e2e/framework/service/const.go
+++ b/test/e2e/framework/service/const.go
@@ -57,14 +57,14 @@ const (
 	// LoadBalancerPropagationTimeoutDefault is the default time to wait for pods to
 	// be targeted by load balancers.
 	LoadBalancerPropagationTimeoutDefault = 10 * time.Minute
+	// LoadBalancerPropagationTimeoutLarge is the maximum time to wait for pods to
+	// be targeted by load balancers.
+	LoadBalancerPropagationTimeoutLarge = time.Hour

 	// LoadBalancerCleanupTimeout is the time required by the loadbalancer to cleanup, proportional to numApps/Ing.
 	// Bring the cleanup timeout back down to 5m once b/33588344 is resolved.
 	LoadBalancerCleanupTimeout = 15 * time.Minute

-	// LoadBalancerPollTimeout is the time required by the loadbalancer to poll.
-	// On average it takes ~6 minutes for a single backend to come online in GCE.
-	LoadBalancerPollTimeout = 22 * time.Minute
 	// LoadBalancerPollInterval is the interval value in which the loadbalancer polls.
 	LoadBalancerPollInterval = 30 * time.Second

--- a/test/e2e/framework/service/jig.go
+++ b/test/e2e/framework/service/jig.go
@@ -300,7 +300,7 @@ func (j *TestJig) GetEndpointNodeNames() (sets.String, error) {

 // WaitForEndpointOnNode waits for a service endpoint on the given node.
 func (j *TestJig) WaitForEndpointOnNode(nodeName string) error {
-	return wait.PollImmediate(framework.Poll, LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
+	return wait.PollImmediate(framework.Poll, KubeProxyLagTimeout, func() (bool, error) {
 		endpoints, err := j.Client.CoreV1().Endpoints(j.Namespace).Get(context.TODO(), j.Name, metav1.GetOptions{})
 		if err != nil {
 			framework.Logf("Get endpoints for service %s/%s failed (%s)", j.Namespace, j.Name, err)
--- a/test/e2e/framework/service/resource.go
+++ b/test/e2e/framework/service/resource.go
@@ -109,6 +109,16 @@ func GetServiceLoadBalancerCreationTimeout(cs clientset.Interface) time.Duration
 	return LoadBalancerCreateTimeoutDefault
 }

+// GetServiceLoadBalancerPropagationTimeout returns a timeout value for propagating a load balancer of a service.
+func GetServiceLoadBalancerPropagationTimeout(cs clientset.Interface) time.Duration {
+	nodes, err := e2enode.GetReadySchedulableNodes(cs)
+	framework.ExpectNoError(err)
+	if len(nodes.Items) > LargeClusterMinNodesNumber {
+		return LoadBalancerPropagationTimeoutLarge
+	}
+	return LoadBalancerPropagationTimeoutDefault
+}
+
 // CreateServiceForSimpleAppWithPods is a convenience wrapper to create a service and its matching pods all at once.
 func CreateServiceForSimpleAppWithPods(c clientset.Interface, contPort int, svcPort int, namespace, appName string, podSpec func(n v1.Node) v1.PodSpec, count int, block bool) (*v1.Service, error) {
 	var err error