From 4daac74de7f1f548bda08828a9ea05ea8f7fbdaa Mon Sep 17 00:00:00 2001 From: wojtekt Date: Thu, 5 Apr 2018 13:34:59 +0200 Subject: [PATCH] Fix disruptive tests for GKE regional clusters --- test/e2e/apps/network_partition.go | 15 +++++++++----- test/e2e/framework/util.go | 33 ++++++++++++++++++++++-------- test/e2e/lifecycle/resize_nodes.go | 12 +++++++---- test/e2e/lifecycle/restart.go | 5 +++-- 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/test/e2e/apps/network_partition.go b/test/e2e/apps/network_partition.go index 6cf4c588e42..d94f14c23db 100644 --- a/test/e2e/apps/network_partition.go +++ b/test/e2e/apps/network_partition.go @@ -233,9 +233,11 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname name := "my-hostname-net" common.NewSVCByName(c, ns, name) - replicas := int32(framework.TestContext.CloudConfig.NumNodes) + numNodes, err := framework.NumberOfRegisteredNodes(f.ClientSet) + framework.ExpectNoError(err) + replicas := int32(numNodes) common.NewRCByName(c, ns, name, replicas, nil) - err := framework.VerifyPods(c, ns, name, true, replicas) + err = framework.VerifyPods(c, ns, name, true, replicas) Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding") By("choose a node with at least one pod - we will block some network traffic on this node") @@ -298,9 +300,11 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { gracePeriod := int64(30) common.NewSVCByName(c, ns, name) - replicas := int32(framework.TestContext.CloudConfig.NumNodes) + numNodes, err := framework.NumberOfRegisteredNodes(f.ClientSet) + framework.ExpectNoError(err) + replicas := int32(numNodes) common.NewRCByName(c, ns, name, replicas, &gracePeriod) - err := framework.VerifyPods(c, ns, name, true, replicas) + err = framework.VerifyPods(c, ns, name, true, replicas) Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding") By("choose a node with at least one pod - we will block some network traffic on this node") @@ -371,7 +375,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { pst := framework.NewStatefulSetTester(c) - nn := framework.TestContext.CloudConfig.NumNodes + nn, err := framework.NumberOfRegisteredNodes(f.ClientSet) + framework.ExpectNoError(err) nodeNames, err := framework.CheckNodesReady(f.ClientSet, framework.NodeReadyInitialTimeout, nn) framework.ExpectNoError(err) common.RestartNodes(f.ClientSet, nodeNames) diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 1b886ed2201..19e090e4be9 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -2506,8 +2506,8 @@ func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event { return events.Items } -// waitListSchedulableNodesOrDie is a wrapper around listing nodes supporting retries. -func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList { +// waitListSchedulableNodes is a wrapper around listing nodes supporting retries. +func waitListSchedulableNodes(c clientset.Interface) (*v1.NodeList, error) { var nodes *v1.NodeList var err error if wait.PollImmediate(Poll, SingleCallTimeout, func() (bool, error) { @@ -2522,6 +2522,15 @@ func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList { } return true, nil }) != nil { + return nodes, err + } + return nodes, nil +} + +// waitListSchedulableNodesOrDie is a wrapper around listing nodes supporting retries. +func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList { + nodes, err := waitListSchedulableNodes(c) + if err != nil { ExpectNoError(err, "Non-retryable failure or timed out while listing nodes for e2e cluster.") } return nodes @@ -4095,11 +4104,19 @@ func CheckForControllerManagerHealthy(duration time.Duration) error { return nil } -// Returns number of ready Nodes excluding Master Node. +// NumberOfRegisteredNodes returns number of registered Nodes excluding Master Node. +func NumberOfRegisteredNodes(c clientset.Interface) (int, error) { + nodes, err := waitListSchedulableNodes(c) + if err != nil { + Logf("Failed to list nodes: %v", err) + return 0, err + } + return len(nodes.Items), nil +} + +// NumberOfReadyNodes returns number of ready Nodes excluding Master Node. func NumberOfReadyNodes(c clientset.Interface) (int, error) { - nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ - "spec.unschedulable": "false", - }.AsSelector().String()}) + nodes, err := waitListSchedulableNodes(c) if err != nil { Logf("Failed to list nodes: %v", err) return 0, err @@ -4116,9 +4133,7 @@ func NumberOfReadyNodes(c clientset.Interface) (int, error) { // By cluster size we mean number of Nodes excluding Master Node. func WaitForReadyNodes(c clientset.Interface, size int, timeout time.Duration) error { for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) { - nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ - "spec.unschedulable": "false", - }.AsSelector().String()}) + nodes, err := waitListSchedulableNodes(c) if err != nil { Logf("Failed to list nodes: %v", err) continue diff --git a/test/e2e/lifecycle/resize_nodes.go b/test/e2e/lifecycle/resize_nodes.go index bbbf6c6ed89..8d9f41d5b63 100644 --- a/test/e2e/lifecycle/resize_nodes.go +++ b/test/e2e/lifecycle/resize_nodes.go @@ -112,9 +112,11 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() { // Create a replication controller for a service that serves its hostname. // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname name := "my-hostname-delete-node" - replicas := int32(framework.TestContext.CloudConfig.NumNodes) + numNodes, err := framework.NumberOfRegisteredNodes(c) + Expect(err).NotTo(HaveOccurred()) + replicas := int32(numNodes) common.NewRCByName(c, ns, name, replicas, nil) - err := framework.VerifyPods(c, ns, name, true, replicas) + err = framework.VerifyPods(c, ns, name, true, replicas) Expect(err).NotTo(HaveOccurred()) By(fmt.Sprintf("decreasing cluster size to %d", replicas-1)) @@ -140,9 +142,11 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() { // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname name := "my-hostname-add-node" common.NewSVCByName(c, ns, name) - replicas := int32(framework.TestContext.CloudConfig.NumNodes) + numNodes, err := framework.NumberOfRegisteredNodes(c) + Expect(err).NotTo(HaveOccurred()) + replicas := int32(numNodes) common.NewRCByName(c, ns, name, replicas, nil) - err := framework.VerifyPods(c, ns, name, true, replicas) + err = framework.VerifyPods(c, ns, name, true, replicas) Expect(err).NotTo(HaveOccurred()) By(fmt.Sprintf("increasing cluster size to %d", replicas+1)) diff --git a/test/e2e/lifecycle/restart.go b/test/e2e/lifecycle/restart.go index d26c1f94b77..3f2ab125f01 100644 --- a/test/e2e/lifecycle/restart.go +++ b/test/e2e/lifecycle/restart.go @@ -68,11 +68,12 @@ var _ = SIGDescribe("Restart [Disruptive]", func() { // check must be identical to that call. framework.SkipUnlessProviderIs("gce", "gke") ps = testutils.NewPodStore(f.ClientSet, metav1.NamespaceSystem, labels.Everything(), fields.Everything()) - numNodes = framework.TestContext.CloudConfig.NumNodes + var err error + numNodes, err = framework.NumberOfRegisteredNodes(f.ClientSet) + Expect(err).NotTo(HaveOccurred()) systemNamespace = metav1.NamespaceSystem By("ensuring all nodes are ready") - var err error originalNodeNames, err = framework.CheckNodesReady(f.ClientSet, framework.NodeReadyInitialTimeout, numNodes) Expect(err).NotTo(HaveOccurred()) framework.Logf("Got the following nodes before restart: %v", originalNodeNames)