mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 05:57:25 +00:00
Merge pull request #9392 from jszczepkowski/e2e-net
Added e2e test case for network partition.
This commit is contained in:
commit
2bb0fc00e5
@ -207,7 +207,23 @@ func verifyPodsResponding(c *client.Client, ns, name string, pods *api.PodList)
|
|||||||
return wait.Poll(retryInterval, retryTimeout, podResponseChecker{c, ns, label, name, pods}.checkAllResponses)
|
return wait.Poll(retryInterval, retryTimeout, podResponseChecker{c, ns, label, name, pods}.checkAllResponses)
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ = Describe("ResizeNodes", func() {
|
func waitForPodsCreatedRunningResponding(c *client.Client, ns, name string, replicas int) error {
|
||||||
|
pods, err := waitForPodsCreated(c, ns, name, replicas)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
e := waitForPodsRunning(c, pods)
|
||||||
|
if len(e) > 0 {
|
||||||
|
return fmt.Errorf("Failed to wait for pods running: %v", e)
|
||||||
|
}
|
||||||
|
err = verifyPodsResponding(c, ns, name, pods)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("Nodes", func() {
|
||||||
supportedProviders := []string{"gce"}
|
supportedProviders := []string{"gce"}
|
||||||
var testName string
|
var testName string
|
||||||
var c *client.Client
|
var c *client.Client
|
||||||
@ -223,116 +239,173 @@ var _ = Describe("ResizeNodes", func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
if !providerIs(supportedProviders...) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
By(fmt.Sprintf("destroying namespace for this suite %s", ns))
|
By(fmt.Sprintf("destroying namespace for this suite %s", ns))
|
||||||
if err := c.Namespaces().Delete(ns); err != nil {
|
if err := c.Namespaces().Delete(ns); err != nil {
|
||||||
Failf("Couldn't delete namespace '%s', %v", ns, err)
|
Failf("Couldn't delete namespace '%s', %v", ns, err)
|
||||||
}
|
}
|
||||||
By("restoring the original node instance group size")
|
|
||||||
if err := resizeNodeInstanceGroup(testContext.CloudConfig.NumNodes); err != nil {
|
|
||||||
Failf("Couldn't restore the original node instance group size: %v", err)
|
|
||||||
}
|
|
||||||
if err := waitForNodeInstanceGroupSize(testContext.CloudConfig.NumNodes); err != nil {
|
|
||||||
Failf("Couldn't restore the original node instance group size: %v", err)
|
|
||||||
}
|
|
||||||
if err := waitForClusterSize(c, testContext.CloudConfig.NumNodes); err != nil {
|
|
||||||
Failf("Couldn't restore the original cluster size: %v", err)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
testName = "should be able to delete nodes."
|
Describe("Resize", func() {
|
||||||
It(testName, func() {
|
BeforeEach(func() {
|
||||||
Logf("starting test %s", testName)
|
if !providerIs(supportedProviders...) {
|
||||||
|
Failf("Nodes.Resize test is only supported for providers %v (not %s). You can avoid this failure by using ginkgo.skip=Nodes.Resize in your environment.",
|
||||||
|
supportedProviders, testContext.Provider)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
if !providerIs(supportedProviders...) {
|
AfterEach(func() {
|
||||||
By(fmt.Sprintf("Skipping %s test, which is only supported for providers %v (not %s)",
|
if !providerIs(supportedProviders...) {
|
||||||
testName, supportedProviders, testContext.Provider))
|
return
|
||||||
return
|
}
|
||||||
}
|
By("restoring the original node instance group size")
|
||||||
|
if err := resizeNodeInstanceGroup(testContext.CloudConfig.NumNodes); err != nil {
|
||||||
|
Failf("Couldn't restore the original node instance group size: %v", err)
|
||||||
|
}
|
||||||
|
if err := waitForNodeInstanceGroupSize(testContext.CloudConfig.NumNodes); err != nil {
|
||||||
|
Failf("Couldn't restore the original node instance group size: %v", err)
|
||||||
|
}
|
||||||
|
if err := waitForClusterSize(c, testContext.CloudConfig.NumNodes); err != nil {
|
||||||
|
Failf("Couldn't restore the original cluster size: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
if testContext.CloudConfig.NumNodes < 2 {
|
testName = "should be able to delete nodes."
|
||||||
By(fmt.Sprintf("skipping %s test, which requires at lease 2 nodes (not %d)",
|
It(testName, func() {
|
||||||
testName, testContext.CloudConfig.NumNodes))
|
Logf("starting test %s", testName)
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a replication controller for a service that serves its hostname.
|
if testContext.CloudConfig.NumNodes < 2 {
|
||||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
Failf("Failing test %s as it requires at lease 2 nodes (not %d)", testName, testContext.CloudConfig.NumNodes)
|
||||||
name := "my-hostname-delete-node-" + string(util.NewUUID())
|
return
|
||||||
replicas := testContext.CloudConfig.NumNodes
|
}
|
||||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
|
||||||
pods, err := waitForPodsCreated(c, ns, name, replicas)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
e := waitForPodsRunning(c, pods)
|
|
||||||
if len(e) > 0 {
|
|
||||||
Failf("Failed to wait for pods running: %v", e)
|
|
||||||
}
|
|
||||||
err = verifyPodsResponding(c, ns, name, pods)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
|
// Create a replication controller for a service that serves its hostname.
|
||||||
err = resizeNodeInstanceGroup(replicas - 1)
|
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||||
Expect(err).NotTo(HaveOccurred())
|
name := "my-hostname-delete-node"
|
||||||
err = waitForNodeInstanceGroupSize(replicas - 1)
|
replicas := testContext.CloudConfig.NumNodes
|
||||||
Expect(err).NotTo(HaveOccurred())
|
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||||
err = waitForClusterSize(c, replicas-1)
|
err := waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
By("verifying whether the pods from the removed node are recreated")
|
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
|
||||||
pods, err = waitForPodsCreated(c, ns, name, replicas)
|
err = resizeNodeInstanceGroup(replicas - 1)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
e = waitForPodsRunning(c, pods)
|
err = waitForNodeInstanceGroupSize(replicas - 1)
|
||||||
if len(e) > 0 {
|
Expect(err).NotTo(HaveOccurred())
|
||||||
Failf("Failed to wait for pods running: %v", e)
|
err = waitForClusterSize(c, replicas-1)
|
||||||
}
|
Expect(err).NotTo(HaveOccurred())
|
||||||
err = verifyPodsResponding(c, ns, name, pods)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
By("verifying whether the pods from the removed node are recreated")
|
||||||
|
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
testName = "should be able to add nodes."
|
||||||
|
It(testName, func() {
|
||||||
|
Logf("starting test %s", testName)
|
||||||
|
|
||||||
|
if testContext.CloudConfig.NumNodes < 2 {
|
||||||
|
Failf("Failing test %s as it requires at lease 2 nodes (not %d)", testName, testContext.CloudConfig.NumNodes)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a replication controller for a service that serves its hostname.
|
||||||
|
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||||
|
name := "my-hostname-add-node"
|
||||||
|
createServiceWithNameSelector(c, ns, name)
|
||||||
|
replicas := testContext.CloudConfig.NumNodes
|
||||||
|
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||||
|
err := waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
|
||||||
|
err = resizeNodeInstanceGroup(replicas + 1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
err = waitForNodeInstanceGroupSize(replicas + 1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
err = waitForClusterSize(c, replicas+1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
|
||||||
|
err = resizeReplicationController(c, ns, name, replicas+1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas+1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
testName = "should be able to add nodes."
|
Describe("Network", func() {
|
||||||
It(testName, func() {
|
BeforeEach(func() {
|
||||||
Logf("starting test %s", testName)
|
if !providerIs(supportedProviders...) {
|
||||||
|
Failf("Nodes.Network test is only supported for providers %v (not %s). You can avoid this failure by using ginkgo.skip=Nodes.Network in your environment.",
|
||||||
|
supportedProviders, testContext.Provider)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
if !providerIs(supportedProviders...) {
|
testName = "should survive network partition."
|
||||||
By(fmt.Sprintf("Skipping %s test, which is only supported for providers %v (not %s)",
|
It(testName, func() {
|
||||||
testName, supportedProviders, testContext.Provider))
|
if testContext.CloudConfig.NumNodes < 2 {
|
||||||
return
|
By(fmt.Sprintf("skipping %s test, which requires at lease 2 nodes (not %d)",
|
||||||
}
|
testName, testContext.CloudConfig.NumNodes))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Create a replication controller for a service that serves its hostname.
|
// Create a replication controller for a service that serves its hostname.
|
||||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||||
name := "my-hostname-add-node-" + string(util.NewUUID())
|
name := "my-hostname-net"
|
||||||
createServiceWithNameSelector(c, ns, name)
|
createServiceWithNameSelector(c, ns, name)
|
||||||
replicas := testContext.CloudConfig.NumNodes
|
replicas := testContext.CloudConfig.NumNodes
|
||||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||||
pods, err := waitForPodsCreated(c, ns, name, replicas)
|
err := waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
e := waitForPodsRunning(c, pods)
|
|
||||||
if len(e) > 0 {
|
|
||||||
Failf("Failed to wait for pods running: %v", e)
|
|
||||||
}
|
|
||||||
err = verifyPodsResponding(c, ns, name, pods)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
|
By("cause network partition on one node")
|
||||||
err = resizeNodeInstanceGroup(replicas + 1)
|
nodelist, err := c.Nodes().List(labels.Everything(), fields.Everything())
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
err = waitForNodeInstanceGroupSize(replicas + 1)
|
node := nodelist.Items[0]
|
||||||
Expect(err).NotTo(HaveOccurred())
|
pod, err := waitForRCPodOnNode(c, ns, name, node.Name)
|
||||||
err = waitForClusterSize(c, replicas+1)
|
Expect(err).NotTo(HaveOccurred())
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Logf("Getting external IP address for %s", name)
|
||||||
|
host := ""
|
||||||
|
for _, a := range node.Status.Addresses {
|
||||||
|
if a.Type == api.NodeExternalIP {
|
||||||
|
host = a.Address + ":22"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Logf("Setting network partition on %s", node.Name)
|
||||||
|
dropCmd := fmt.Sprintf("sudo iptables -I OUTPUT 1 -d %s -j DROP", testContext.CloudConfig.MasterName)
|
||||||
|
if _, _, code, err := SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
|
||||||
|
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
||||||
|
dropCmd, node, code, err)
|
||||||
|
}
|
||||||
|
|
||||||
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
|
Logf("Waiting for node %s to be not ready", node.Name)
|
||||||
resizeReplicationController(c, ns, name, replicas+1)
|
waitForNodeToBe(c, node.Name, false, 2*time.Minute)
|
||||||
pods, err = waitForPodsCreated(c, ns, name, replicas+1)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Logf("Waiting for pod %s to be removed", pod.Name)
|
||||||
e = waitForPodsRunning(c, pods)
|
waitForRCPodToDisappear(c, ns, name, pod.Name)
|
||||||
if len(e) > 0 {
|
|
||||||
Failf("Failed to wait for pods running: %v", e)
|
By("verifying whether the pod from the partitioned node is recreated")
|
||||||
}
|
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||||
err = verifyPodsResponding(c, ns, name, pods)
|
Expect(err).NotTo(HaveOccurred())
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
|
By("remove network partition")
|
||||||
|
undropCmd := "sudo iptables --delete OUTPUT 1"
|
||||||
|
if _, _, code, err := SSH(undropCmd, host, testContext.Provider); code != 0 || err != nil {
|
||||||
|
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
||||||
|
undropCmd, node, code, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
Logf("Waiting for node %s to be ready", node.Name)
|
||||||
|
waitForNodeToBe(c, node.Name, true, 2*time.Minute)
|
||||||
|
|
||||||
|
By("verify wheter new pods can be created on the re-attached node")
|
||||||
|
err = resizeReplicationController(c, ns, name, replicas+1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas+1)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
_, err = waitForRCPodOnNode(c, ns, name, node.Name)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
@ -42,6 +42,7 @@ import (
|
|||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||||
|
|
||||||
"code.google.com/p/go-uuid/uuid"
|
"code.google.com/p/go-uuid/uuid"
|
||||||
@ -397,6 +398,54 @@ func waitForPodSuccess(c *client.Client, podName string, contName string) error
|
|||||||
return waitForPodSuccessInNamespace(c, podName, contName, api.NamespaceDefault)
|
return waitForPodSuccessInNamespace(c, podName, contName, api.NamespaceDefault)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// waitForRCPodOnNode returns the pod from the given replication controller (decribed by rcName) which is scheduled on the given node.
|
||||||
|
// In case of failure or too long waiting time, an error is returned.
|
||||||
|
func waitForRCPodOnNode(c *client.Client, ns, rcName, node string) (*api.Pod, error) {
|
||||||
|
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": rcName}))
|
||||||
|
var p *api.Pod = nil
|
||||||
|
err := wait.Poll(10*time.Second, 5*time.Minute, func() (bool, error) {
|
||||||
|
Logf("Waiting for pod %s to appear on node %s", rcName, node)
|
||||||
|
pods, err := c.Pods(ns).List(label, fields.Everything())
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
for _, pod := range pods.Items {
|
||||||
|
if pod.Spec.NodeName == node {
|
||||||
|
Logf("Pod %s found on node %s", pod.Name, node)
|
||||||
|
p = &pod
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
return p, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// waitForRCPodOnNode returns nil if the pod from the given replication controller (decribed by rcName) no longer exists.
|
||||||
|
// In case of failure or too long waiting time, an error is returned.
|
||||||
|
func waitForRCPodToDisappear(c *client.Client, ns, rcName, podName string) error {
|
||||||
|
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": rcName}))
|
||||||
|
return wait.Poll(20*time.Second, 5*time.Minute, func() (bool, error) {
|
||||||
|
Logf("Waiting for pod %s to disappear", podName)
|
||||||
|
pods, err := c.Pods(ns).List(label, fields.Everything())
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, pod := range pods.Items {
|
||||||
|
if pod.Name == podName {
|
||||||
|
Logf("Pod %s still exists", podName)
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
Logf("Pod %s no longer exists", podName)
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Context for checking pods responses by issuing GETs to them and verifying if the answer with pod name.
|
// Context for checking pods responses by issuing GETs to them and verifying if the answer with pod name.
|
||||||
type podResponseChecker struct {
|
type podResponseChecker struct {
|
||||||
c *client.Client
|
c *client.Client
|
||||||
|
Loading…
Reference in New Issue
Block a user