Cleanup and fix networking test timeouts for large clusters

This commit is contained in:
wojtekt
2020-04-02 16:57:38 +02:00
parent 4c5a963463
commit 268b51d023
7 changed files with 54 additions and 34 deletions

View File

@@ -765,14 +765,14 @@ func (j *TestJig) pollIngressWithCert(ing *networkingv1beta1.Ingress, address st
// WaitForIngress waits for the Ingress to get an address.
// WaitForIngress returns when it gets the first 200 response
func (j *TestJig) WaitForIngress(waitForNodePort bool) {
if err := j.WaitForGivenIngressWithTimeout(j.Ingress, waitForNodePort, e2eservice.LoadBalancerPollTimeout); err != nil {
if err := j.WaitForGivenIngressWithTimeout(j.Ingress, waitForNodePort, e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client)); err != nil {
framework.Failf("error in waiting for ingress to get an address: %s", err)
}
}
// WaitForIngressToStable waits for the LB return 100 consecutive 200 responses.
func (j *TestJig) WaitForIngressToStable() {
if err := wait.Poll(10*time.Second, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
if err := wait.Poll(10*time.Second, e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client), func() (bool, error) {
_, err := j.GetDistinctResponseFromIngress()
if err != nil {
return false, nil
@@ -811,12 +811,13 @@ func (j *TestJig) WaitForGivenIngressWithTimeout(ing *networkingv1beta1.Ingress,
// Ingress. Hostnames and certificate need to be explicitly passed in.
func (j *TestJig) WaitForIngressWithCert(waitForNodePort bool, knownHosts []string, cert []byte) error {
// Wait for the loadbalancer IP.
address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, e2eservice.LoadBalancerPollTimeout)
propagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client)
address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, propagationTimeout)
if err != nil {
return fmt.Errorf("Ingress failed to acquire an IP address within %v", e2eservice.LoadBalancerPollTimeout)
return fmt.Errorf("Ingress failed to acquire an IP address within %v", propagationTimeout)
}
return j.pollIngressWithCert(j.Ingress, address, knownHosts, cert, waitForNodePort, e2eservice.LoadBalancerPollTimeout)
return j.pollIngressWithCert(j.Ingress, address, knownHosts, cert, waitForNodePort, propagationTimeout)
}
// VerifyURL polls for the given iterations, in intervals, and fails if the
@@ -960,9 +961,10 @@ func (j *TestJig) ConstructFirewallForIngress(firewallRuleName string, nodeTags
// GetDistinctResponseFromIngress tries GET call to the ingress VIP and return all distinct responses.
func (j *TestJig) GetDistinctResponseFromIngress() (sets.String, error) {
// Wait for the loadbalancer IP.
address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, e2eservice.LoadBalancerPollTimeout)
propagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(j.Client)
address, err := j.WaitForIngressAddress(j.Client, j.Ingress.Namespace, j.Ingress.Name, propagationTimeout)
if err != nil {
framework.Failf("Ingress failed to acquire an IP address within %v", e2eservice.LoadBalancerPollTimeout)
framework.Failf("Ingress failed to acquire an IP address within %v", propagationTimeout)
}
responses := sets.NewString()
timeoutClient := &http.Client{Timeout: IngressReqTimeout}

View File

@@ -57,14 +57,14 @@ const (
// LoadBalancerPropagationTimeoutDefault is the default time to wait for pods to
// be targeted by load balancers.
LoadBalancerPropagationTimeoutDefault = 10 * time.Minute
// LoadBalancerPropagationTimeoutLarge is the maximum time to wait for pods to
// be targeted by load balancers.
LoadBalancerPropagationTimeoutLarge = time.Hour
// LoadBalancerCleanupTimeout is the time required by the loadbalancer to cleanup, proportional to numApps/Ing.
// Bring the cleanup timeout back down to 5m once b/33588344 is resolved.
LoadBalancerCleanupTimeout = 15 * time.Minute
// LoadBalancerPollTimeout is the time required by the loadbalancer to poll.
// On average it takes ~6 minutes for a single backend to come online in GCE.
LoadBalancerPollTimeout = 22 * time.Minute
// LoadBalancerPollInterval is the interval value in which the loadbalancer polls.
LoadBalancerPollInterval = 30 * time.Second

View File

@@ -300,7 +300,7 @@ func (j *TestJig) GetEndpointNodeNames() (sets.String, error) {
// WaitForEndpointOnNode waits for a service endpoint on the given node.
func (j *TestJig) WaitForEndpointOnNode(nodeName string) error {
return wait.PollImmediate(framework.Poll, LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
return wait.PollImmediate(framework.Poll, KubeProxyLagTimeout, func() (bool, error) {
endpoints, err := j.Client.CoreV1().Endpoints(j.Namespace).Get(context.TODO(), j.Name, metav1.GetOptions{})
if err != nil {
framework.Logf("Get endpoints for service %s/%s failed (%s)", j.Namespace, j.Name, err)

View File

@@ -109,6 +109,16 @@ func GetServiceLoadBalancerCreationTimeout(cs clientset.Interface) time.Duration
return LoadBalancerCreateTimeoutDefault
}
// GetServiceLoadBalancerPropagationTimeout returns a timeout value for propagating a load balancer of a service.
func GetServiceLoadBalancerPropagationTimeout(cs clientset.Interface) time.Duration {
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > LargeClusterMinNodesNumber {
return LoadBalancerPropagationTimeoutLarge
}
return LoadBalancerPropagationTimeoutDefault
}
// CreateServiceForSimpleAppWithPods is a convenience wrapper to create a service and its matching pods all at once.
func CreateServiceForSimpleAppWithPods(c clientset.Interface, contPort int, svcPort int, namespace, appName string, podSpec func(n v1.Node) v1.PodSpec, count int, block bool) (*v1.Service, error) {
var err error