Fix timeouts for networking tests in large clusters

This commit is contained in:
wojtekt 2019-10-23 12:50:00 +02:00
parent 5f03d33fc9
commit bf34ba0cdf
7 changed files with 25 additions and 40 deletions

View File

@ -901,7 +901,7 @@ func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framew
framework.Logf("Creating a service %s with type=LoadBalancer and externalTrafficPolicy=Local in namespace %s", name, ns) framework.Logf("Creating a service %s with type=LoadBalancer and externalTrafficPolicy=Local in namespace %s", name, ns)
jig := e2eservice.NewTestJig(c, ns, name) jig := e2eservice.NewTestJig(c, ns, name)
jig.Labels = podLabels jig.Labels = podLabels
service, err := jig.CreateLoadBalancerService(e2eservice.LoadBalancerCreateTimeoutDefault, func(svc *v1.Service) { service, err := jig.CreateLoadBalancerService(e2eservice.GetServiceLoadBalancerCreationTimeout(c), func(svc *v1.Service) {
svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal
}) })
framework.ExpectNoError(err) framework.ExpectNoError(err)

View File

@ -676,7 +676,7 @@ func (j *TestJig) WaitForIngress(waitForNodePort bool) {
// WaitForIngressToStable waits for the LB return 100 consecutive 200 responses. // WaitForIngressToStable waits for the LB return 100 consecutive 200 responses.
func (j *TestJig) WaitForIngressToStable() { func (j *TestJig) WaitForIngressToStable() {
if err := wait.Poll(10*time.Second, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) { if err := wait.Poll(10*time.Second, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
_, err := j.GetDistinctResponseFromIngress() _, err := j.GetDistinctResponseFromIngress()
if err != nil { if err != nil {
return false, nil return false, nil

View File

@ -56,6 +56,10 @@ const (
// LoadBalancerCreateTimeoutLarge is the maximum time to wait for a load balancer to be created/modified. // LoadBalancerCreateTimeoutLarge is the maximum time to wait for a load balancer to be created/modified.
LoadBalancerCreateTimeoutLarge = 2 * time.Hour LoadBalancerCreateTimeoutLarge = 2 * time.Hour
// LoadBalancerPropagationTimeoutDefault is the default time to wait for pods to
// be targeted by load balancers.
LoadBalancerPropagationTimeoutDefault = 10 * time.Minute
// LoadBalancerCleanupTimeout is the time required by the loadbalancer to cleanup, proportional to numApps/Ing. // LoadBalancerCleanupTimeout is the time required by the loadbalancer to cleanup, proportional to numApps/Ing.
// Bring the cleanup timeout back down to 5m once b/33588344 is resolved. // Bring the cleanup timeout back down to 5m once b/33588344 is resolved.
LoadBalancerCleanupTimeout = 15 * time.Minute LoadBalancerCleanupTimeout = 15 * time.Minute

View File

@ -302,7 +302,7 @@ func (j *TestJig) GetEndpointNodeNames() (sets.String, error) {
// WaitForEndpointOnNode waits for a service endpoint on the given node. // WaitForEndpointOnNode waits for a service endpoint on the given node.
func (j *TestJig) WaitForEndpointOnNode(nodeName string) error { func (j *TestJig) WaitForEndpointOnNode(nodeName string) error {
return wait.PollImmediate(framework.Poll, LoadBalancerCreateTimeoutDefault, func() (bool, error) { return wait.PollImmediate(framework.Poll, LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
endpoints, err := j.Client.CoreV1().Endpoints(j.Namespace).Get(j.Name, metav1.GetOptions{}) endpoints, err := j.Client.CoreV1().Endpoints(j.Namespace).Get(j.Name, metav1.GetOptions{})
if err != nil { if err != nil {
framework.Logf("Get endpoints for service %s/%s failed (%s)", j.Namespace, j.Name, err) framework.Logf("Get endpoints for service %s/%s failed (%s)", j.Namespace, j.Name, err)

View File

@ -82,7 +82,7 @@ var _ = SIGDescribe("Firewall rule", func() {
nodesSet := sets.NewString(nodesNames...) nodesSet := sets.NewString(nodesNames...)
ginkgo.By("Creating a LoadBalancer type service with ExternalTrafficPolicy=Global") ginkgo.By("Creating a LoadBalancer type service with ExternalTrafficPolicy=Global")
svc, err := jig.CreateLoadBalancerService(e2eservice.LoadBalancerCreateTimeoutDefault, func(svc *v1.Service) { svc, err := jig.CreateLoadBalancerService(e2eservice.GetServiceLoadBalancerCreationTimeout(cs), func(svc *v1.Service) {
svc.Spec.Ports = []v1.ServicePort{{Protocol: v1.ProtocolTCP, Port: firewallTestHTTPPort}} svc.Spec.Ports = []v1.ServicePort{{Protocol: v1.ProtocolTCP, Port: firewallTestHTTPPort}}
svc.Spec.LoadBalancerSourceRanges = firewallTestSourceRanges svc.Spec.LoadBalancerSourceRanges = firewallTestSourceRanges
}) })
@ -129,7 +129,7 @@ var _ = SIGDescribe("Firewall rule", func() {
ginkgo.By("Waiting for the correct local traffic health check firewall rule to be created") ginkgo.By("Waiting for the correct local traffic health check firewall rule to be created")
localHCFw := gce.ConstructHealthCheckFirewallForLBService(clusterID, svc, cloudConfig.NodeTag, false) localHCFw := gce.ConstructHealthCheckFirewallForLBService(clusterID, svc, cloudConfig.NodeTag, false)
fw, err = gce.WaitForFirewallRule(gceCloud, localHCFw.Name, true, e2eservice.LoadBalancerCreateTimeoutDefault) fw, err = gce.WaitForFirewallRule(gceCloud, localHCFw.Name, true, e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err) framework.ExpectNoError(err)
err = gce.VerifyFirewallRule(fw, localHCFw, cloudConfig.Network, false) err = gce.VerifyFirewallRule(fw, localHCFw, cloudConfig.Network, false)
framework.ExpectNoError(err) framework.ExpectNoError(err)
@ -160,7 +160,7 @@ var _ = SIGDescribe("Firewall rule", func() {
// Send requests from outside of the cluster because internal traffic is whitelisted // Send requests from outside of the cluster because internal traffic is whitelisted
ginkgo.By("Accessing the external service ip from outside, all non-master nodes should be reached") ginkgo.By("Accessing the external service ip from outside, all non-master nodes should be reached")
err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerCreateTimeoutDefault, nodesSet) err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerPropagationTimeoutDefault, nodesSet)
framework.ExpectNoError(err) framework.ExpectNoError(err)
// Check if there are overlapping tags on the firewall that extend beyond just the vms in our cluster // Check if there are overlapping tags on the firewall that extend beyond just the vms in our cluster
@ -181,12 +181,12 @@ var _ = SIGDescribe("Firewall rule", func() {
nodesSet.Insert(nodesNames[0]) nodesSet.Insert(nodesNames[0])
gce.SetInstanceTags(cloudConfig, nodesNames[0], zone, removedTags) gce.SetInstanceTags(cloudConfig, nodesNames[0], zone, removedTags)
// Make sure traffic is recovered before exit // Make sure traffic is recovered before exit
err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerCreateTimeoutDefault, nodesSet) err = framework.TestHitNodesFromOutside(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerPropagationTimeoutDefault, nodesSet)
framework.ExpectNoError(err) framework.ExpectNoError(err)
}() }()
ginkgo.By("Accessing serivce through the external ip and examine got no response from the node without tags") ginkgo.By("Accessing serivce through the external ip and examine got no response from the node without tags")
err = framework.TestHitNodesFromOutsideWithCount(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerCreateTimeoutDefault, nodesSet, 15) err = framework.TestHitNodesFromOutsideWithCount(svcExternalIP, firewallTestHTTPPort, e2eservice.LoadBalancerPropagationTimeoutDefault, nodesSet, 15)
framework.ExpectNoError(err) framework.ExpectNoError(err)
}) })

View File

@ -562,12 +562,7 @@ var _ = SIGDescribe("Services", func() {
if framework.ProviderIs("aws") { if framework.ProviderIs("aws") {
loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS
} }
loadBalancerCreateTimeout := e2eservice.LoadBalancerCreateTimeoutDefault loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
loadBalancerCreateTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
// This test is more monolithic than we'd like because LB turnup can be // This test is more monolithic than we'd like because LB turnup can be
// very slow, so we lumped all the tests into one LB lifecycle. // very slow, so we lumped all the tests into one LB lifecycle.
@ -1495,12 +1490,7 @@ var _ = SIGDescribe("Services", func() {
if framework.ProviderIs("aws") { if framework.ProviderIs("aws") {
loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS
} }
loadBalancerCreateTimeout := e2eservice.LoadBalancerCreateTimeoutDefault loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
loadBalancerCreateTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
namespace := f.Namespace.Name namespace := f.Namespace.Name
serviceName := "lb-sourcerange" serviceName := "lb-sourcerange"
@ -1515,7 +1505,7 @@ var _ = SIGDescribe("Services", func() {
ginkgo.By("creating a pod to be part of the service " + serviceName) ginkgo.By("creating a pod to be part of the service " + serviceName)
// This container is an nginx container listening on port 80 // This container is an nginx container listening on port 80
// See kubernetes/contrib/ingress/echoheaders/nginx.conf for content of response // See kubernetes/contrib/ingress/echoheaders/nginx.conf for content of response
_, err = jig.Run(nil) _, err := jig.Run(nil)
framework.ExpectNoError(err) framework.ExpectNoError(err)
// Make sure acceptPod is running. There are certain chances that pod might be teminated due to unexpected reasons. // Make sure acceptPod is running. There are certain chances that pod might be teminated due to unexpected reasons.
acceptPod, err = cs.CoreV1().Pods(namespace).Get(acceptPod.Name, metav1.GetOptions{}) acceptPod, err = cs.CoreV1().Pods(namespace).Get(acceptPod.Name, metav1.GetOptions{})
@ -1576,13 +1566,7 @@ var _ = SIGDescribe("Services", func() {
ginkgo.It("should be able to create an internal type load balancer [Slow] [DisabledForLargeClusters]", func() { ginkgo.It("should be able to create an internal type load balancer [Slow] [DisabledForLargeClusters]", func() {
framework.SkipUnlessProviderIs("azure", "gke", "gce") framework.SkipUnlessProviderIs("azure", "gke", "gce")
createTimeout := e2eservice.LoadBalancerCreateTimeoutDefault createTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
nodes, err := e2enode.GetReadySchedulableNodes(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
createTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
pollInterval := framework.Poll * 10 pollInterval := framework.Poll * 10
namespace := f.Namespace.Name namespace := f.Namespace.Name
@ -1590,7 +1574,7 @@ var _ = SIGDescribe("Services", func() {
jig := e2eservice.NewTestJig(cs, namespace, serviceName) jig := e2eservice.NewTestJig(cs, namespace, serviceName)
ginkgo.By("creating pod to be part of service " + serviceName) ginkgo.By("creating pod to be part of service " + serviceName)
_, err = jig.Run(nil) _, err := jig.Run(nil)
framework.ExpectNoError(err) framework.ExpectNoError(err)
enableILB, disableILB := e2eservice.EnableAndDisableInternalLB() enableILB, disableILB := e2eservice.EnableAndDisableInternalLB()
@ -1732,7 +1716,7 @@ var _ = SIGDescribe("Services", func() {
e2eservice.WaitForServiceDeletedWithFinalizer(cs, svc.Namespace, svc.Name) e2eservice.WaitForServiceDeletedWithFinalizer(cs, svc.Namespace, svc.Name)
}() }()
svc, err = jig.WaitForLoadBalancer(e2eservice.LoadBalancerCreateTimeoutDefault) svc, err = jig.WaitForLoadBalancer(e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err) framework.ExpectNoError(err)
hcName := gcecloud.MakeNodesHealthCheckName(clusterID) hcName := gcecloud.MakeNodesHealthCheckName(clusterID)
@ -1758,7 +1742,7 @@ var _ = SIGDescribe("Services", func() {
ginkgo.By("health check should be reconciled") ginkgo.By("health check should be reconciled")
pollInterval := framework.Poll * 10 pollInterval := framework.Poll * 10
if pollErr := wait.PollImmediate(pollInterval, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) { if pollErr := wait.PollImmediate(pollInterval, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
hc, err := gceCloud.GetHTTPHealthCheck(hcName) hc, err := gceCloud.GetHTTPHealthCheck(hcName)
if err != nil { if err != nil {
framework.Logf("ginkgo.Failed to get HttpHealthCheck(%q): %v", hcName, err) framework.Logf("ginkgo.Failed to get HttpHealthCheck(%q): %v", hcName, err)
@ -2059,7 +2043,7 @@ var _ = SIGDescribe("Services", func() {
// TODO: Get rid of [DisabledForLargeClusters] tag when issue #56138 is fixed. // TODO: Get rid of [DisabledForLargeClusters] tag when issue #56138 is fixed.
var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() { var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
f := framework.NewDefaultFramework("esipp") f := framework.NewDefaultFramework("esipp")
loadBalancerCreateTimeout := e2eservice.LoadBalancerCreateTimeoutDefault var loadBalancerCreateTimeout time.Duration
var cs clientset.Interface var cs clientset.Interface
serviceLBNames := []string{} serviceLBNames := []string{}
@ -2069,11 +2053,7 @@ var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
framework.SkipUnlessProviderIs("gce", "gke") framework.SkipUnlessProviderIs("gce", "gke")
cs = f.ClientSet cs = f.ClientSet
nodes, err := e2enode.GetReadySchedulableNodes(cs) loadBalancerCreateTimeout = e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
framework.ExpectNoError(err)
if len(nodes.Items) > e2eservice.LargeClusterMinNodesNumber {
loadBalancerCreateTimeout = e2eservice.LoadBalancerCreateTimeoutLarge
}
}) })
ginkgo.AfterEach(func() { ginkgo.AfterEach(func() {
@ -2275,7 +2255,7 @@ var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
var srcIP string var srcIP string
ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, pausePod.Name, pausePod.Spec.NodeName)) ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, pausePod.Name, pausePod.Spec.NodeName))
if pollErr := wait.PollImmediate(framework.Poll, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) { if pollErr := wait.PollImmediate(framework.Poll, e2eservice.LoadBalancerPropagationTimeoutDefault, func() (bool, error) {
stdout, err := framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd) stdout, err := framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
if err != nil { if err != nil {
framework.Logf("got err: %v, retry until timeout", err) framework.Logf("got err: %v, retry until timeout", err)
@ -2520,7 +2500,7 @@ func execAffinityTestForLBServiceWithOptionalTransition(f *framework.Framework,
framework.ExpectNoError(err, "failed to create replication controller with service in the namespace: %s", ns) framework.ExpectNoError(err, "failed to create replication controller with service in the namespace: %s", ns)
jig := e2eservice.NewTestJig(cs, ns, serviceName) jig := e2eservice.NewTestJig(cs, ns, serviceName)
ginkgo.By("waiting for loadbalancer for service " + ns + "/" + serviceName) ginkgo.By("waiting for loadbalancer for service " + ns + "/" + serviceName)
svc, err = jig.WaitForLoadBalancer(e2eservice.LoadBalancerCreateTimeoutDefault) svc, err = jig.WaitForLoadBalancer(e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err) framework.ExpectNoError(err)
defer func() { defer func() {
podNodePairs, err := e2enode.PodNodePairs(cs, ns) podNodePairs, err := e2enode.PodNodePairs(cs, ns)

View File

@ -46,13 +46,14 @@ func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
jig := e2eservice.NewTestJig(f.ClientSet, f.Namespace.Name, serviceName) jig := e2eservice.NewTestJig(f.ClientSet, f.Namespace.Name, serviceName)
ns := f.Namespace ns := f.Namespace
cs := f.ClientSet
ginkgo.By("creating a TCP service " + serviceName + " with type=LoadBalancer in namespace " + ns.Name) ginkgo.By("creating a TCP service " + serviceName + " with type=LoadBalancer in namespace " + ns.Name)
tcpService, err := jig.CreateTCPService(func(s *v1.Service) { tcpService, err := jig.CreateTCPService(func(s *v1.Service) {
s.Spec.Type = v1.ServiceTypeLoadBalancer s.Spec.Type = v1.ServiceTypeLoadBalancer
}) })
framework.ExpectNoError(err) framework.ExpectNoError(err)
tcpService, err = jig.WaitForLoadBalancer(e2eservice.LoadBalancerCreateTimeoutDefault) tcpService, err = jig.WaitForLoadBalancer(e2eservice.GetServiceLoadBalancerCreationTimeout(cs))
framework.ExpectNoError(err) framework.ExpectNoError(err)
// Get info to hit it with // Get info to hit it with