Merge pull request #62160 from wojtek-t/fix_disruptive_regional_tests

Automatic merge from submit-queue (batch tested with PRs 62160, 62148). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix disruptive tests for GKE regional clusters
This commit is contained in:
Kubernetes Submit Queue 2018-04-05 11:37:00 -07:00 committed by GitHub
commit 5b0906b989
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 45 additions and 20 deletions

View File

@ -233,9 +233,11 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
name := "my-hostname-net" name := "my-hostname-net"
common.NewSVCByName(c, ns, name) common.NewSVCByName(c, ns, name)
replicas := int32(framework.TestContext.CloudConfig.NumNodes) numNodes, err := framework.NumberOfRegisteredNodes(f.ClientSet)
framework.ExpectNoError(err)
replicas := int32(numNodes)
common.NewRCByName(c, ns, name, replicas, nil) common.NewRCByName(c, ns, name, replicas, nil)
err := framework.VerifyPods(c, ns, name, true, replicas) err = framework.VerifyPods(c, ns, name, true, replicas)
Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding") Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding")
By("choose a node with at least one pod - we will block some network traffic on this node") By("choose a node with at least one pod - we will block some network traffic on this node")
@ -298,9 +300,11 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
gracePeriod := int64(30) gracePeriod := int64(30)
common.NewSVCByName(c, ns, name) common.NewSVCByName(c, ns, name)
replicas := int32(framework.TestContext.CloudConfig.NumNodes) numNodes, err := framework.NumberOfRegisteredNodes(f.ClientSet)
framework.ExpectNoError(err)
replicas := int32(numNodes)
common.NewRCByName(c, ns, name, replicas, &gracePeriod) common.NewRCByName(c, ns, name, replicas, &gracePeriod)
err := framework.VerifyPods(c, ns, name, true, replicas) err = framework.VerifyPods(c, ns, name, true, replicas)
Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding") Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding")
By("choose a node with at least one pod - we will block some network traffic on this node") By("choose a node with at least one pod - we will block some network traffic on this node")
@ -371,7 +375,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
pst := framework.NewStatefulSetTester(c) pst := framework.NewStatefulSetTester(c)
nn := framework.TestContext.CloudConfig.NumNodes nn, err := framework.NumberOfRegisteredNodes(f.ClientSet)
framework.ExpectNoError(err)
nodeNames, err := framework.CheckNodesReady(f.ClientSet, framework.NodeReadyInitialTimeout, nn) nodeNames, err := framework.CheckNodesReady(f.ClientSet, framework.NodeReadyInitialTimeout, nn)
framework.ExpectNoError(err) framework.ExpectNoError(err)
common.RestartNodes(f.ClientSet, nodeNames) common.RestartNodes(f.ClientSet, nodeNames)

View File

@ -2506,8 +2506,8 @@ func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event {
return events.Items return events.Items
} }
// waitListSchedulableNodesOrDie is a wrapper around listing nodes supporting retries. // waitListSchedulableNodes is a wrapper around listing nodes supporting retries.
func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList { func waitListSchedulableNodes(c clientset.Interface) (*v1.NodeList, error) {
var nodes *v1.NodeList var nodes *v1.NodeList
var err error var err error
if wait.PollImmediate(Poll, SingleCallTimeout, func() (bool, error) { if wait.PollImmediate(Poll, SingleCallTimeout, func() (bool, error) {
@ -2522,6 +2522,15 @@ func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList {
} }
return true, nil return true, nil
}) != nil { }) != nil {
return nodes, err
}
return nodes, nil
}
// waitListSchedulableNodesOrDie is a wrapper around listing nodes supporting retries.
func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList {
nodes, err := waitListSchedulableNodes(c)
if err != nil {
ExpectNoError(err, "Non-retryable failure or timed out while listing nodes for e2e cluster.") ExpectNoError(err, "Non-retryable failure or timed out while listing nodes for e2e cluster.")
} }
return nodes return nodes
@ -4095,11 +4104,19 @@ func CheckForControllerManagerHealthy(duration time.Duration) error {
return nil return nil
} }
// Returns number of ready Nodes excluding Master Node. // NumberOfRegisteredNodes returns number of registered Nodes excluding Master Node.
func NumberOfRegisteredNodes(c clientset.Interface) (int, error) {
nodes, err := waitListSchedulableNodes(c)
if err != nil {
Logf("Failed to list nodes: %v", err)
return 0, err
}
return len(nodes.Items), nil
}
// NumberOfReadyNodes returns number of ready Nodes excluding Master Node.
func NumberOfReadyNodes(c clientset.Interface) (int, error) { func NumberOfReadyNodes(c clientset.Interface) (int, error) {
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ nodes, err := waitListSchedulableNodes(c)
"spec.unschedulable": "false",
}.AsSelector().String()})
if err != nil { if err != nil {
Logf("Failed to list nodes: %v", err) Logf("Failed to list nodes: %v", err)
return 0, err return 0, err
@ -4116,9 +4133,7 @@ func NumberOfReadyNodes(c clientset.Interface) (int, error) {
// By cluster size we mean number of Nodes excluding Master Node. // By cluster size we mean number of Nodes excluding Master Node.
func WaitForReadyNodes(c clientset.Interface, size int, timeout time.Duration) error { func WaitForReadyNodes(c clientset.Interface, size int, timeout time.Duration) error {
for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) { for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) {
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ nodes, err := waitListSchedulableNodes(c)
"spec.unschedulable": "false",
}.AsSelector().String()})
if err != nil { if err != nil {
Logf("Failed to list nodes: %v", err) Logf("Failed to list nodes: %v", err)
continue continue

View File

@ -112,9 +112,11 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
// Create a replication controller for a service that serves its hostname. // Create a replication controller for a service that serves its hostname.
// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
name := "my-hostname-delete-node" name := "my-hostname-delete-node"
replicas := int32(framework.TestContext.CloudConfig.NumNodes) numNodes, err := framework.NumberOfRegisteredNodes(c)
Expect(err).NotTo(HaveOccurred())
replicas := int32(numNodes)
common.NewRCByName(c, ns, name, replicas, nil) common.NewRCByName(c, ns, name, replicas, nil)
err := framework.VerifyPods(c, ns, name, true, replicas) err = framework.VerifyPods(c, ns, name, true, replicas)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1)) By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
@ -140,9 +142,11 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
name := "my-hostname-add-node" name := "my-hostname-add-node"
common.NewSVCByName(c, ns, name) common.NewSVCByName(c, ns, name)
replicas := int32(framework.TestContext.CloudConfig.NumNodes) numNodes, err := framework.NumberOfRegisteredNodes(c)
Expect(err).NotTo(HaveOccurred())
replicas := int32(numNodes)
common.NewRCByName(c, ns, name, replicas, nil) common.NewRCByName(c, ns, name, replicas, nil)
err := framework.VerifyPods(c, ns, name, true, replicas) err = framework.VerifyPods(c, ns, name, true, replicas)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("increasing cluster size to %d", replicas+1)) By(fmt.Sprintf("increasing cluster size to %d", replicas+1))

View File

@ -68,11 +68,12 @@ var _ = SIGDescribe("Restart [Disruptive]", func() {
// check must be identical to that call. // check must be identical to that call.
framework.SkipUnlessProviderIs("gce", "gke") framework.SkipUnlessProviderIs("gce", "gke")
ps = testutils.NewPodStore(f.ClientSet, metav1.NamespaceSystem, labels.Everything(), fields.Everything()) ps = testutils.NewPodStore(f.ClientSet, metav1.NamespaceSystem, labels.Everything(), fields.Everything())
numNodes = framework.TestContext.CloudConfig.NumNodes var err error
numNodes, err = framework.NumberOfRegisteredNodes(f.ClientSet)
Expect(err).NotTo(HaveOccurred())
systemNamespace = metav1.NamespaceSystem systemNamespace = metav1.NamespaceSystem
By("ensuring all nodes are ready") By("ensuring all nodes are ready")
var err error
originalNodeNames, err = framework.CheckNodesReady(f.ClientSet, framework.NodeReadyInitialTimeout, numNodes) originalNodeNames, err = framework.CheckNodesReady(f.ClientSet, framework.NodeReadyInitialTimeout, numNodes)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
framework.Logf("Got the following nodes before restart: %v", originalNodeNames) framework.Logf("Got the following nodes before restart: %v", originalNodeNames)