Fix timeouts for networking tests in large clusters

This commit is contained in:
wojtekt 2019-10-23 12:50:00 +02:00
parent 5f03d33fc9
commit bf34ba0cdf
7 changed files with 25 additions and 40 deletions

View File

@ -901,7 +901,7 @@ func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framew
framework.Logf("Creating a service %s with type=LoadBalancer and externalTrafficPolicy=Local in namespace %s", name, ns)
jig := e2eservice.NewTestJig(c, ns, name)
jig.Labels = podLabels
service, err := jig.CreateLoadBalancerService(e2eservice.LoadBalancerCreateTimeoutDefault, func(svc *v1.Service) {
service, err := jig.CreateLoadBalancerService(e2eservice.GetServiceLoadBalancerCreationTimeout(c), func(svc *v1.Service) {
svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal
})
framework.ExpectNoError(err)

View File

@ -676,7 +676,7 @@ func (j *TestJig) WaitForIngress(waitForNodePort bool) {
// WaitForIngressToStable waits for the LB return 100 consecutive 200 responses.
func (j *TestJig) WaitForIngressToStable() {
if err := wait.Poll(10*time.Second, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) {
if err := wait.Poll(10*time.Second, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
_, err := j.GetDistinctResponseFromIngress()
if err != nil {
return false, nil

View File

@ -56,6 +56,10 @@ const (
// LoadBalancerCreateTimeoutLarge is the maximum time to wait for a load balancer to be created/modified.
LoadBalancerCreateTimeoutLarge = 2 * time.Hour
// LoadBalancerPropagationTimeoutDefault is the default time to wait for pods to
// be targeted by load balancers.
LoadBalancerPropagationTimeoutDefault = 10 * time.Minute
// LoadBalancerCleanupTimeout is the time required by the loadbalancer to cleanup, proportional to numApps/Ing.
// Bring the cleanup timeout back down to 5m once b/33588344 is resolved.
LoadBalancerCleanupTimeout = 15 * time.Minute

View File

@ -302,7 +302,7 @@ func (j *TestJig) GetEndpointNodeNames() (sets.String, error) {
// WaitForEndpointOnNode waits for a service endpoint on the given node.
func (j *TestJig) WaitForEndpointOnNode(nodeName string) error {
return wait.PollImmediate(framework.Poll, LoadBalancerCreateTimeoutDefault, func() (bool, error) {
return wait.PollImmediate(framework.Poll, LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
endpoints, err := j.Client.CoreV1().Endpoints(j.Namespace).Get(j.Name, metav1.GetOptions{})
if err != nil {
framework.Logf("Get endpoints for service %s/%s failed (%s)", j.Namespace, j.Name, err)

View File

@ -82,7 +82,7 @@ var _ = SIGDescribe("Firewall rule", func() {
nodesSet := sets.NewString(nodesNames...)
ginkgo.By("Creating a LoadBalancer type service with ExternalTrafficPolicy=Global")
svc, err := jig.CreateLoadBalancerService(e2eservice.LoadBalancerCreateTimeoutDefault, func(svc *v1.Service) {
svc, err := jig.CreateLoadBalancerService(e2eservice.GetServiceLoadBalancerCreationTimeout(cs), func(svc *v1.Service) {
svc.Spec.Ports = []v1.ServicePort{{Protocol: v1.ProtocolTCP, Port: firewallTestHTTPPort}}
svc.Spec.LoadBalancerSourceRanges = firewallTestSourceRanges
})
@ -129,7 +129,7 @@ var _ = SIGDescribe("Firewall rule", func() {
ginkgo.By("Waiting for the correct local traffic health check firewall rule to be created")
localHCFw := gce.ConstructHealthCheckFirewallForLBService(clusterID, svc, cloudConfig.NodeTag, false)
fw, err = gce.WaitForFirewallRule(gceCloud, localHCFw.Name, true, e2eservice.LoadBalancerCreateTimeoutDefault)
fw, err = gce.WaitForFirewallRule(gceCloud, localHCFw.Name, true, e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err)
err = gce.VerifyFirewallRule(fw, localHCFw, cloudConfig.Network, false)
framework.ExpectNoError(err)
@ -160,7 +160,7 @@ var _ = SIGDescribe("Firewall rule", func() {
// Send requests from outside of the cluster because internal traffic is whitelisted
ginkgo.By("Accessing the external service ip from outside, all non-master nodes should be reached")
err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerCreateTimeoutDefault, nodesSet)
err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerPropagationTimeoutDefault, nodesSet)
framework.ExpectNoError(err)
// Check if there are overlapping tags on the firewall that extend beyond just the vms in our cluster
@ -181,12 +181,12 @@ var _ = SIGDescribe("Firewall rule", func() {
nodesSet.Insert(nodesNames[0])
gce.SetInstanceTags(cloudConfig, nodesNames[0], zone, removedTags)
// Make sure traffic is recovered before exit
err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerCreateTimeoutDefault, nodesSet)
err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerPropagationTimeoutDefault, nodesSet)
framework.ExpectNoError(err)
}()
ginkgo.By("Accessing serivce through the external ip and examine got no response from the node without tags")
err = framework.TestHitNodesFromOutsideWithCount(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerCreateTimeoutDefault, nodesSet, 15)
err = framework.TestHitNodesFromOutsideWithCount(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerPropagationTimeoutDefault, nodesSet, 15)
framework.ExpectNoError(err)
})

View File

@ -562,12 +562,7 @@ var _ = SIGDescribe("Services", func() {
if framework.ProviderIs("aws") {
loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS
}
loadBalancerCreateTimeout := e2eservice.LoadBalancerCreateTimeoutDefault
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
loadBalancerCreateTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
// This test is more monolithic than we'd like because LB turnup can be
// very slow, so we lumped all the tests into one LB lifecycle.
@ -1495,12 +1490,7 @@ var _ = SIGDescribe("Services", func() {
if framework.ProviderIs("aws") {
loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS
}
loadBalancerCreateTimeout := e2eservice.LoadBalancerCreateTimeoutDefault
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
loadBalancerCreateTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
namespace := f.Namespace.Name
serviceName := "lb-sourcerange"
@ -1515,7 +1505,7 @@ var _ = SIGDescribe("Services", func() {
ginkgo.By("creating a pod to be part of the service " + serviceName)
// This container is an nginx container listening on port 80
// See kubernetes/contrib/ingress/echoheaders/nginx.conf for content of response
_, err = jig.Run(nil)
_, err := jig.Run(nil)
framework.ExpectNoError(err)
// Make sure acceptPod is running. There are certain chances that pod might be teminated due to unexpected reasons.
acceptPod, err = cs.CoreV1().Pods(namespace).Get(acceptPod.Name, metav1.GetOptions{})
@ -1576,13 +1566,7 @@ var _ = SIGDescribe("Services", func() {
ginkgo.It("should be able to create an internal type load balancer [Slow] [DisabledForLargeClusters]", func() {
framework.SkipUnlessProviderIs("azure", "gke", "gce")
createTimeout := e2eservice.LoadBalancerCreateTimeoutDefault
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
createTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
createTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
pollInterval := framework.Poll * 10
namespace := f.Namespace.Name
@ -1590,7 +1574,7 @@ var _ = SIGDescribe("Services", func() {
jig := e2eservice.NewTestJig(cs, namespace, serviceName)
ginkgo.By("creating pod to be part of service " + serviceName)
_, err = jig.Run(nil)
_, err := jig.Run(nil)
framework.ExpectNoError(err)
enableILB, disableILB := e2eservice.EnableAndDisableInternalLB()
@ -1732,7 +1716,7 @@ var _ = SIGDescribe("Services", func() {
e2eservice.WaitForServiceDeletedWithFinalizer(cs, svc.Namespace, svc.Name)
}()
svc, err = jig.WaitForLoadBalancer(e2eservice.LoadBalancerCreateTimeoutDefault)
svc, err = jig.WaitForLoadBalancer(e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err)
hcName := gcecloud.MakeNodesHealthCheckName(clusterID)
@ -1758,7 +1742,7 @@ var _ = SIGDescribe("Services", func() {
ginkgo.By("health check should be reconciled")
pollInterval := framework.Poll * 10
if pollErr := wait.PollImmediate(pollInterval, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) {
if pollErr := wait.PollImmediate(pollInterval, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
hc, err := gceCloud.GetHTTPHealthCheck(hcName)
if err != nil {
framework.Logf("ginkgo.Failed to get HttpHealthCheck(%q): %v", hcName, err)
@ -2059,7 +2043,7 @@ var _ = SIGDescribe("Services", func() {
// TODO: Get rid of [DisabledForLargeClusters] tag when issue #56138 is fixed.
var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
f := framework.NewDefaultFramework("esipp")
loadBalancerCreateTimeout := e2eservice.LoadBalancerCreateTimeoutDefault
var loadBalancerCreateTimeout time.Duration
var cs clientset.Interface
serviceLBNames := []string{}
@ -2069,11 +2053,7 @@ var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
framework.SkipUnlessProviderIs("gce", "gke")
cs = f.ClientSet
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
loadBalancerCreateTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
loadBalancerCreateTimeout = e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
})
ginkgo.AfterEach(func() {
@ -2275,7 +2255,7 @@ var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
var srcIP string
ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, pausePod.Name, pausePod.Spec.NodeName))
if pollErr := wait.PollImmediate(framework.Poll, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) {
if pollErr := wait.PollImmediate(framework.Poll, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
stdout, err := framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
if err != nil {
framework.Logf("got err: %v, retry until timeout", err)
@ -2520,7 +2500,7 @@ func execAffinityTestForLBServiceWithOptionalTransition(f *framework.Framework,
framework.ExpectNoError(err, "failed to create replication controller with service in the namespace: %s", ns)
jig := e2eservice.NewTestJig(cs, ns, serviceName)
ginkgo.By("waiting for loadbalancer for service " + ns + "/" + serviceName)
svc, err = jig.WaitForLoadBalancer(e2eservice.LoadBalancerCreateTimeoutDefault)
svc, err = jig.WaitForLoadBalancer(e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err)
defer func() {
podNodePairs, err := e2enode.PodNodePairs(cs, ns)

View File

@ -46,13 +46,14 @@ func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
jig := e2eservice.NewTestJig(f.ClientSet, f.Namespace.Name, serviceName)
ns := f.Namespace
cs := f.ClientSet
ginkgo.By("creating a TCP service " + serviceName + " with type=LoadBalancer in namespace " + ns.Name)
tcpService, err := jig.CreateTCPService(func(s *v1.Service) {
s.Spec.Type = v1.ServiceTypeLoadBalancer
})
framework.ExpectNoError(err)
tcpService, err = jig.WaitForLoadBalancer(e2eservice.LoadBalancerCreateTimeoutDefault)
tcpService, err = jig.WaitForLoadBalancer(e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err)
// Get info to hit it with