diff --git a/test/e2e/storage/pd.go b/test/e2e/storage/pd.go index 5885f7c4d2c..bc88a4a2c3b 100644 --- a/test/e2e/storage/pd.go +++ b/test/e2e/storage/pd.go @@ -34,6 +34,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/uuid" + clientset "k8s.io/client-go/kubernetes" v1core "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/kubernetes/pkg/api/testapi" "k8s.io/kubernetes/test/e2e/framework" @@ -46,6 +47,7 @@ const ( nodeStatusTimeout = 10 * time.Minute nodeStatusPollTime = 1 * time.Second maxReadRetry = 3 + minNodes = 2 ) var _ = SIGDescribe("Pod Disks", func() { @@ -59,467 +61,334 @@ var _ = SIGDescribe("Pod Disks", func() { f := framework.NewDefaultFramework("pod-disks") BeforeEach(func() { - framework.SkipUnlessNodeCountIsAtLeast(2) + framework.SkipUnlessNodeCountIsAtLeast(minNodes) podClient = f.ClientSet.Core().Pods(f.Namespace.Name) nodeClient = f.ClientSet.Core().Nodes() nodes = framework.GetReadySchedulableNodesOrDie(f.ClientSet) - - Expect(len(nodes.Items)).To(BeNumerically(">=", 2), "Requires at least 2 nodes") - + Expect(len(nodes.Items)).To(BeNumerically(">=", minNodes), fmt.Sprintf("Requires at least %d nodes", minNodes)) host0Name = types.NodeName(nodes.Items[0].ObjectMeta.Name) host1Name = types.NodeName(nodes.Items[1].ObjectMeta.Name) mathrand.Seed(time.Now().UTC().UnixNano()) }) - It("should schedule a pod w/ a RW PD, ungracefully remove it, then schedule it on another host [Slow]", func() { - framework.SkipUnlessProviderIs("gce", "gke", "aws") - - By("creating PD") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD") - - host0Pod := testPDPod([]string{diskName}, host0Name, false /* readOnly */, 1 /* numContainers */) - host1Pod := testPDPod([]string{diskName}, host1Name, false /* readOnly */, 1 /* numContainers */) - containerName := "mycontainer" - - defer func() { - // Teardown pods, PD. Ignore errors. - // Teardown should do nothing unless test failed. - By("cleaning up PD-RW test environment") - podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)) - podClient.Delete(host1Pod.Name, metav1.NewDeleteOptions(0)) - detachAndDeletePDs(diskName, []types.NodeName{host0Name, host1Name}) - }() - - By("submitting host0Pod to kubernetes") - _, err = podClient.Create(host0Pod) - framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) - - testFile := "/testpd1/tracker" - testFileContents := fmt.Sprintf("%v", mathrand.Int()) - - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) - framework.Logf("Wrote value: %v", testFileContents) - - // Verify that disk shows up for in node 1's VolumeInUse list - framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* shouldExist */)) - - By("deleting host0Pod") - // Delete pod with 0 grace period - framework.ExpectNoError(podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0Pod") - - By("submitting host1Pod to kubernetes") - _, err = podClient.Create(host1Pod) - framework.ExpectNoError(err, "Failed to create host1Pod") - - framework.ExpectNoError(f.WaitForPodRunningSlow(host1Pod.Name)) - - verifyPDContentsViaContainer(f, host1Pod.Name, containerName, map[string]string{testFile: testFileContents}) - - // Verify that disk is removed from node 1's VolumeInUse list - framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, false /* shouldExist */)) - - By("deleting host1Pod") - framework.ExpectNoError(podClient.Delete(host1Pod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host1Pod") - - By("Test completed successfully, waiting for PD to safely detach") - waitForPDDetach(diskName, host0Name) - waitForPDDetach(diskName, host1Name) - - return - }) - - It("Should schedule a pod w/ a RW PD, gracefully remove it, then schedule it on another host [Slow]", func() { - framework.SkipUnlessProviderIs("gce", "gke", "aws") - - By("creating PD") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD") - - host0Pod := testPDPod([]string{diskName}, host0Name, false /* readOnly */, 1 /* numContainers */) - host1Pod := testPDPod([]string{diskName}, host1Name, false /* readOnly */, 1 /* numContainers */) - containerName := "mycontainer" - - defer func() { - // Teardown pods, PD. Ignore errors. - // Teardown should do nothing unless test failed. - By("cleaning up PD-RW test environment") - podClient.Delete(host0Pod.Name, &metav1.DeleteOptions{}) - podClient.Delete(host1Pod.Name, &metav1.DeleteOptions{}) - detachAndDeletePDs(diskName, []types.NodeName{host0Name, host1Name}) - }() - - By("submitting host0Pod to kubernetes") - _, err = podClient.Create(host0Pod) - framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) - - testFile := "/testpd1/tracker" - testFileContents := fmt.Sprintf("%v", mathrand.Int()) - - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) - framework.Logf("Wrote value: %v", testFileContents) - - // Verify that disk shows up for in node 1's VolumeInUse list - framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* shouldExist */)) - - By("deleting host0Pod") - // Delete pod with default grace period 30s - framework.ExpectNoError(podClient.Delete(host0Pod.Name, &metav1.DeleteOptions{}), "Failed to delete host0Pod") - - By("submitting host1Pod to kubernetes") - _, err = podClient.Create(host1Pod) - framework.ExpectNoError(err, "Failed to create host1Pod") - - framework.ExpectNoError(f.WaitForPodRunningSlow(host1Pod.Name)) - - verifyPDContentsViaContainer(f, host1Pod.Name, containerName, map[string]string{testFile: testFileContents}) - - // Verify that disk is removed from node 1's VolumeInUse list - framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, false /* shouldExist */)) - - By("deleting host1Pod") - framework.ExpectNoError(podClient.Delete(host1Pod.Name, &metav1.DeleteOptions{}), "Failed to delete host1Pod") - - By("Test completed successfully, waiting for PD to safely detach") - waitForPDDetach(diskName, host0Name) - waitForPDDetach(diskName, host1Name) - - return - }) - - It("should schedule a pod w/ a readonly PD on two hosts, then remove both ungracefully. [Slow]", func() { - framework.SkipUnlessProviderIs("gce", "gke") - - By("creating PD") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD") - - rwPod := testPDPod([]string{diskName}, host0Name, false /* readOnly */, 1 /* numContainers */) - host0ROPod := testPDPod([]string{diskName}, host0Name, true /* readOnly */, 1 /* numContainers */) - host1ROPod := testPDPod([]string{diskName}, host1Name, true /* readOnly */, 1 /* numContainers */) - - defer func() { - By("cleaning up PD-RO test environment") - // Teardown pods, PD. Ignore errors. - // Teardown should do nothing unless test failed. - podClient.Delete(rwPod.Name, metav1.NewDeleteOptions(0)) - podClient.Delete(host0ROPod.Name, metav1.NewDeleteOptions(0)) - podClient.Delete(host1ROPod.Name, metav1.NewDeleteOptions(0)) - detachAndDeletePDs(diskName, []types.NodeName{host0Name, host1Name}) - }() - - By("submitting rwPod to ensure PD is formatted") - _, err = podClient.Create(rwPod) - framework.ExpectNoError(err, "Failed to create rwPod") - framework.ExpectNoError(f.WaitForPodRunningSlow(rwPod.Name)) - // Delete pod with 0 grace period - framework.ExpectNoError(podClient.Delete(rwPod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0Pod") - framework.ExpectNoError(waitForPDDetach(diskName, host0Name)) - - By("submitting host0ROPod to kubernetes") - _, err = podClient.Create(host0ROPod) - framework.ExpectNoError(err, "Failed to create host0ROPod") - - By("submitting host1ROPod to kubernetes") - _, err = podClient.Create(host1ROPod) - framework.ExpectNoError(err, "Failed to create host1ROPod") - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0ROPod.Name)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host1ROPod.Name)) - - By("deleting host0ROPod") - framework.ExpectNoError(podClient.Delete(host0ROPod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0ROPod") - - By("deleting host1ROPod") - framework.ExpectNoError(podClient.Delete(host1ROPod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host1ROPod") - - By("Test completed successfully, waiting for PD to safely detach") - waitForPDDetach(diskName, host0Name) - waitForPDDetach(diskName, host1Name) - }) - - It("Should schedule a pod w/ a readonly PD on two hosts, then remove both gracefully. [Slow]", func() { - framework.SkipUnlessProviderIs("gce", "gke") - - By("creating PD") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD") - - rwPod := testPDPod([]string{diskName}, host0Name, false /* readOnly */, 1 /* numContainers */) - host0ROPod := testPDPod([]string{diskName}, host0Name, true /* readOnly */, 1 /* numContainers */) - host1ROPod := testPDPod([]string{diskName}, host1Name, true /* readOnly */, 1 /* numContainers */) - - defer func() { - By("cleaning up PD-RO test environment") - // Teardown pods, PD. Ignore errors. - // Teardown should do nothing unless test failed. - podClient.Delete(rwPod.Name, &metav1.DeleteOptions{}) - podClient.Delete(host0ROPod.Name, &metav1.DeleteOptions{}) - podClient.Delete(host1ROPod.Name, &metav1.DeleteOptions{}) - detachAndDeletePDs(diskName, []types.NodeName{host0Name, host1Name}) - }() - - By("submitting rwPod to ensure PD is formatted") - _, err = podClient.Create(rwPod) - framework.ExpectNoError(err, "Failed to create rwPod") - framework.ExpectNoError(f.WaitForPodRunningSlow(rwPod.Name)) - // Delete pod with default grace period 30s - framework.ExpectNoError(podClient.Delete(rwPod.Name, &metav1.DeleteOptions{}), "Failed to delete host0Pod") - framework.ExpectNoError(waitForPDDetach(diskName, host0Name)) - - By("submitting host0ROPod to kubernetes") - _, err = podClient.Create(host0ROPod) - framework.ExpectNoError(err, "Failed to create host0ROPod") - - By("submitting host1ROPod to kubernetes") - _, err = podClient.Create(host1ROPod) - framework.ExpectNoError(err, "Failed to create host1ROPod") - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0ROPod.Name)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host1ROPod.Name)) - - By("deleting host0ROPod") - framework.ExpectNoError(podClient.Delete(host0ROPod.Name, &metav1.DeleteOptions{}), "Failed to delete host0ROPod") - - By("deleting host1ROPod") - framework.ExpectNoError(podClient.Delete(host1ROPod.Name, &metav1.DeleteOptions{}), "Failed to delete host1ROPod") - - By("Test completed successfully, waiting for PD to safely detach") - waitForPDDetach(diskName, host0Name) - waitForPDDetach(diskName, host1Name) - }) - - It("should schedule a pod w/ a RW PD shared between multiple containers, write to PD, delete pod, verify contents, and repeat in rapid succession [Slow]", func() { - framework.SkipUnlessProviderIs("gce", "gke", "aws") - - By("creating PD") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD") - numContainers := 4 - var host0Pod *v1.Pod - - defer func() { - By("cleaning up PD-RW test environment") - // Teardown pods, PD. Ignore errors. - // Teardown should do nothing unless test failed. - if host0Pod != nil { - podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)) - } - detachAndDeletePDs(diskName, []types.NodeName{host0Name}) - }() - - fileAndContentToVerify := make(map[string]string) - for i := 0; i < 3; i++ { - framework.Logf("PD Read/Writer Iteration #%v", i) - By("submitting host0Pod to kubernetes") - host0Pod = testPDPod([]string{diskName}, host0Name, false /* readOnly */, numContainers) - _, err = podClient.Create(host0Pod) - framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) - - // randomly select a container and read/verify pd contents from it - containerName := fmt.Sprintf("mycontainer%v", mathrand.Intn(numContainers)+1) - verifyPDContentsViaContainer(f, host0Pod.Name, containerName, fileAndContentToVerify) - - // Randomly select a container to write a file to PD from - containerName = fmt.Sprintf("mycontainer%v", mathrand.Intn(numContainers)+1) - testFile := fmt.Sprintf("/testpd1/tracker%v", i) - testFileContents := fmt.Sprintf("%v", mathrand.Int()) - fileAndContentToVerify[testFile] = testFileContents - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) - framework.Logf("Wrote value: \"%v\" to PD %q from pod %q container %q", testFileContents, diskName, host0Pod.Name, containerName) - - // Randomly select a container and read/verify pd contents from it - containerName = fmt.Sprintf("mycontainer%v", mathrand.Intn(numContainers)+1) - verifyPDContentsViaContainer(f, host0Pod.Name, containerName, fileAndContentToVerify) - - By("deleting host0Pod") - framework.ExpectNoError(podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0Pod") + Context("schedule pods each with a PD, delete pod and verify detach [Slow]", func() { + const ( + podDefaultGrace = "default (30s)" + podImmediateGrace = "immediate (0s)" + ) + var readOnlyMap = map[bool]string{ + true: "read-only", + false: "RW", + } + type testT struct { + descr string // It description + readOnly bool // true means pd is read-only + deleteOpt *metav1.DeleteOptions // pod delete option + } + tests := []testT{ + { + descr: podImmediateGrace, + readOnly: false, + deleteOpt: metav1.NewDeleteOptions(0), + }, + { + descr: podDefaultGrace, + readOnly: false, + deleteOpt: &metav1.DeleteOptions{}, + }, + { + descr: podImmediateGrace, + readOnly: true, + deleteOpt: metav1.NewDeleteOptions(0), + }, + { + descr: podDefaultGrace, + readOnly: true, + deleteOpt: &metav1.DeleteOptions{}, + }, } - By("Test completed successfully, waiting for PD to safely detach") - waitForPDDetach(diskName, host0Name) + for _, t := range tests { + podDelOpt := t.deleteOpt + readOnly := t.readOnly + readOnlyTxt := readOnlyMap[readOnly] + + It(fmt.Sprintf("for %s PD with pod delete grace period of %q", readOnlyTxt, t.descr), func() { + framework.SkipUnlessProviderIs("gce", "gke", "aws") + if readOnly { + framework.SkipIfProviderIs("aws") + } + + By("creating PD") + diskName, err := framework.CreatePDWithRetry() + framework.ExpectNoError(err, "Error creating PD") + + var fmtPod *v1.Pod + if readOnly { + // if all test pods are RO then need a RW pod to format pd + By("creating RW fmt Pod to ensure PD is formatted") + fmtPod = testPDPod([]string{diskName}, host0Name, false, 1) + _, err = podClient.Create(fmtPod) + framework.ExpectNoError(err, "Failed to create fmtPod") + framework.ExpectNoError(f.WaitForPodRunningSlow(fmtPod.Name)) + + By("deleting the fmtPod") + framework.ExpectNoError(podClient.Delete(fmtPod.Name, metav1.NewDeleteOptions(0)), "Failed to delete fmtPod") + framework.Logf("deleted fmtPod %q", fmtPod.Name) + By("waiting for PD to detach") + framework.ExpectNoError(waitForPDDetach(diskName, host0Name)) + } + + // prepare to create two test pods on separate nodes + host0Pod := testPDPod([]string{diskName}, host0Name, readOnly, 1) + host1Pod := testPDPod([]string{diskName}, host1Name, readOnly, 1) + + defer func() { + // Teardown should do nothing unless test failed + By("defer: cleaning up PD-RW test environment") + framework.Logf("defer cleanup errors can usually be ignored") + if fmtPod != nil { + podClient.Delete(fmtPod.Name, podDelOpt) + } + podClient.Delete(host0Pod.Name, podDelOpt) + podClient.Delete(host1Pod.Name, podDelOpt) + detachAndDeletePDs(diskName, []types.NodeName{host0Name, host1Name}) + }() + + By("creating host0Pod on node0") + _, err = podClient.Create(host0Pod) + framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) + framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) + framework.Logf("host0Pod: %q, node0: %q", host0Pod.Name, host0Name) + + var containerName, testFile, testFileContents string + if !readOnly { + By("writing content to host0Pod on node0") + containerName = "mycontainer" + testFile = "/testpd1/tracker" + testFileContents = fmt.Sprintf("%v", mathrand.Int()) + framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) + framework.Logf("wrote %q to file %q in pod %q on node %q", testFileContents, testFile, host0Pod.Name, host0Name) + By("verifying PD is present in node0's VolumeInUse list") + framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* shouldExist */)) + By("deleting host0Pod") // delete this pod before creating next pod + framework.ExpectNoError(podClient.Delete(host0Pod.Name, podDelOpt), "Failed to delete host0Pod") + framework.Logf("deleted host0Pod %q", host0Pod.Name) + } + + By("creating host1Pod on node1") + _, err = podClient.Create(host1Pod) + framework.ExpectNoError(err, "Failed to create host1Pod") + framework.ExpectNoError(f.WaitForPodRunningSlow(host1Pod.Name)) + framework.Logf("host1Pod: %q, node1: %q", host1Pod.Name, host1Name) + + if readOnly { + By("deleting host0Pod") + framework.ExpectNoError(podClient.Delete(host0Pod.Name, podDelOpt), "Failed to delete host0Pod") + framework.Logf("deleted host0Pod %q", host0Pod.Name) + } else { + By("verifying PD contents in host1Pod") + verifyPDContentsViaContainer(f, host1Pod.Name, containerName, map[string]string{testFile: testFileContents}) + framework.Logf("verified PD contents in pod %q", host1Pod.Name) + By("verifying PD is removed from node0") + framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, false /* shouldExist */)) + framework.Logf("PD %q removed from node %q's VolumeInUse list", diskName, host1Pod.Name) + } + + By("deleting host1Pod") + framework.ExpectNoError(podClient.Delete(host1Pod.Name, podDelOpt), "Failed to delete host1Pod") + framework.Logf("deleted host1Pod %q", host1Pod.Name) + + By("Test completed successfully, waiting for PD to detach from both nodes") + waitForPDDetach(diskName, host0Name) + waitForPDDetach(diskName, host1Name) + }) + } }) - It("should schedule a pod w/two RW PDs both mounted to one container, write to PD, verify contents, delete pod, recreate pod, verify contents, and repeat in rapid succession [Slow]", func() { - framework.SkipUnlessProviderIs("gce", "gke", "aws") - - By("creating PD1") - disk1Name, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD1") - By("creating PD2") - disk2Name, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating PD2") - var host0Pod *v1.Pod - - defer func() { - By("cleaning up PD-RW test environment") - // Teardown pods, PD. Ignore errors. - // Teardown should do nothing unless test failed. - if host0Pod != nil { - podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)) - } - detachAndDeletePDs(disk1Name, []types.NodeName{host0Name}) - detachAndDeletePDs(disk2Name, []types.NodeName{host0Name}) - }() - - containerName := "mycontainer" - fileAndContentToVerify := make(map[string]string) - for i := 0; i < 3; i++ { - framework.Logf("PD Read/Writer Iteration #%v", i) - By("submitting host0Pod to kubernetes") - host0Pod = testPDPod([]string{disk1Name, disk2Name}, host0Name, false /* readOnly */, 1 /* numContainers */) - _, err = podClient.Create(host0Pod) - framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) - - // Read/verify pd contents for both disks from container - verifyPDContentsViaContainer(f, host0Pod.Name, containerName, fileAndContentToVerify) - - // Write a file to both PDs from container - testFilePD1 := fmt.Sprintf("/testpd1/tracker%v", i) - testFilePD2 := fmt.Sprintf("/testpd2/tracker%v", i) - testFilePD1Contents := fmt.Sprintf("%v", mathrand.Int()) - testFilePD2Contents := fmt.Sprintf("%v", mathrand.Int()) - fileAndContentToVerify[testFilePD1] = testFilePD1Contents - fileAndContentToVerify[testFilePD2] = testFilePD2Contents - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFilePD1, testFilePD1Contents)) - framework.Logf("Wrote value: \"%v\" to PD1 (%q) from pod %q container %q", testFilePD1Contents, disk1Name, host0Pod.Name, containerName) - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFilePD2, testFilePD2Contents)) - framework.Logf("Wrote value: \"%v\" to PD2 (%q) from pod %q container %q", testFilePD2Contents, disk2Name, host0Pod.Name, containerName) - - // Read/verify pd contents for both disks from container - verifyPDContentsViaContainer(f, host0Pod.Name, containerName, fileAndContentToVerify) - - By("deleting host0Pod") - framework.ExpectNoError(podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0Pod") + Context("schedule a pod w/ RW PD(s) mounted to 1 or more containers, write to PD, verify content, delete pod, and repeat in rapid succession [Slow]", func() { + var diskNames []string + type testT struct { + numContainers int + numPDs int + repeatCnt int + } + tests := []testT{ + { + numContainers: 4, + numPDs: 1, + repeatCnt: 3, + }, + { + numContainers: 1, + numPDs: 2, + repeatCnt: 3, + }, } - By("Test completed successfully, waiting for PD to safely detach") - waitForPDDetach(disk1Name, host0Name) - waitForPDDetach(disk2Name, host0Name) + for _, t := range tests { + numPDs := t.numPDs + numContainers := t.numContainers + + It(fmt.Sprintf("using %d containers and %d PDs", numContainers, numPDs), func() { + framework.SkipUnlessProviderIs("gce", "gke", "aws") + var host0Pod *v1.Pod + var err error + fileAndContentToVerify := make(map[string]string) + + By(fmt.Sprintf("creating %d PD(s)", numPDs)) + for i := 0; i < numPDs; i++ { + name, err := framework.CreatePDWithRetry() + framework.ExpectNoError(err, fmt.Sprintf("Error creating PD %d", i)) + diskNames = append(diskNames, name) + } + + defer func() { + // Teardown should do nothing unless test failed. + By("defer: cleaning up PD-RW test environment") + framework.Logf("defer cleanup errors can usually be ignored") + if host0Pod != nil { + podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)) + } + for _, diskName := range diskNames { + detachAndDeletePDs(diskName, []types.NodeName{host0Name}) + } + }() + + for i := 0; i < t.repeatCnt; i++ { // "rapid" repeat loop + framework.Logf("PD Read/Writer Iteration #%v", i) + By(fmt.Sprintf("creating host0Pod with %d containers on node0", numContainers)) + host0Pod = testPDPod(diskNames, host0Name, false /* readOnly */, numContainers) + _, err = podClient.Create(host0Pod) + framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) + framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) + + By(fmt.Sprintf("writing %d file(s) via a container", numPDs)) + containerName := "mycontainer" + if numContainers > 1 { + containerName = fmt.Sprintf("mycontainer%v", mathrand.Intn(numContainers)+1) + } + for x := 1; x <= numPDs; x++ { + testFile := fmt.Sprintf("/testpd%d/tracker%d", x, i) + testFileContents := fmt.Sprintf("%v", mathrand.Int()) + fileAndContentToVerify[testFile] = testFileContents + framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) + framework.Logf("wrote %q to file %q in pod %q (container %q) on node %q", testFileContents, testFile, host0Pod.Name, containerName, host0Name) + } + + By("verifying PD contents via a container") + if numContainers > 1 { + containerName = fmt.Sprintf("mycontainer%v", mathrand.Intn(numContainers)+1) + } + verifyPDContentsViaContainer(f, host0Pod.Name, containerName, fileAndContentToVerify) + + By("deleting host0Pod") + framework.ExpectNoError(podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0Pod") + } + By(fmt.Sprintf("Test completed successfully, waiting for %d PD(s) to detach from node0", numPDs)) + for _, diskName := range diskNames { + waitForPDDetach(diskName, host0Name) + } + }) + } }) - It("should be able to detach from a node which was deleted [Slow] [Disruptive]", func() { - framework.SkipUnlessProviderIs("gce") + Context("detach from a disrupted node [Slow] [Disruptive]", func() { + const ( + deleteNode = 1 // delete physical node + deleteNodeObj = 2 // delete node's api object only + ) + type testT struct { + descr string // It description + nodeOp int // disruptive operation performed on target node + } + tests := []testT{ + { + descr: "node is deleted", + nodeOp: deleteNode, + }, + { + descr: "node's API object is deleted", + nodeOp: deleteNodeObj, + }, + } - initialGroupSize, err := framework.GroupSize(framework.TestContext.CloudConfig.NodeInstanceGroup) - framework.ExpectNoError(err, "Error getting group size") + for _, t := range tests { + nodeOp := t.nodeOp + It(fmt.Sprintf("when %s", t.descr), func() { + framework.SkipUnlessProviderIs("gce") + origNodeCnt := len(nodes.Items) // healhy nodes running kublet - By("Creating a pd") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating a pd") + By("creating a pd") + diskName, err := framework.CreatePDWithRetry() + framework.ExpectNoError(err, "Error creating a pd") - host0Pod := testPDPod([]string{diskName}, host0Name, false, 1) + targetNode := &nodes.Items[0] + host0Pod := testPDPod([]string{diskName}, host0Name, false, 1) + containerName := "mycontainer" - containerName := "mycontainer" + defer func() { + By("defer: cleaning up PD-RW test env") + framework.Logf("defer cleanup errors can usually be ignored") + if nodeOp == deleteNode { + podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)) + } + detachAndDeletePDs(diskName, []types.NodeName{host0Name}) + if nodeOp == deleteNodeObj { + targetNode.ObjectMeta.SetResourceVersion("0") + // need to set the resource version or else the Create() fails + _, err := nodeClient.Create(targetNode) + framework.ExpectNoError(err, "defer: Unable to re-create the deleted node") + } + numNodes := countReadyNodes(f.ClientSet, host0Name) + Expect(numNodes).To(Equal(origNodeCnt), fmt.Sprintf("defer: Requires current node count (%d) to return to original node count (%d)", numNodes, origNodeCnt)) + }() - defer func() { - By("Cleaning up PD-RW test env") - podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)) - detachAndDeletePDs(diskName, []types.NodeName{host0Name}) - framework.WaitForNodeToBeReady(f.ClientSet, string(host0Name), nodeStatusTimeout) - framework.WaitForAllNodesSchedulable(f.ClientSet, nodeStatusTimeout) - nodes = framework.GetReadySchedulableNodesOrDie(f.ClientSet) - Expect(len(nodes.Items)).To(Equal(initialGroupSize), "Requires node count to return to initial group size.") - }() + By("creating host0Pod on node0") + _, err = podClient.Create(host0Pod) + framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) + framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) - By("submitting host0Pod to kubernetes") - _, err = podClient.Create(host0Pod) - framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0pod: %v", err)) + By("writing content to host0Pod") + testFile := "/testpd1/tracker" + testFileContents := fmt.Sprintf("%v", mathrand.Int()) + framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) + framework.Logf("wrote %q to file %q in pod %q on node %q", testFileContents, testFile, host0Pod.Name, host0Name) - framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) + By("verifying PD is present in node0's VolumeInUse list") + framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* should exist*/)) - testFile := "/testpd1/tracker" - testFileContents := fmt.Sprintf("%v", mathrand.Int()) + if nodeOp == deleteNode { + By("getting gce instances") + gceCloud, err := framework.GetGCECloud() + framework.ExpectNoError(err, fmt.Sprintf("Unable to create gcloud client err=%v", err)) + output, err := gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone) + framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output)) + Expect(true, strings.Contains(string(output), string(host0Name))) - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) - framework.Logf("Wrote value: %v", testFileContents) + By("deleting host0") + resp, err := gceCloud.DeleteInstance(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone, string(host0Name)) + framework.ExpectNoError(err, fmt.Sprintf("Failed to delete host0Pod: err=%v response=%#v", err, resp)) + By("expecting host0 node to be recreated") + numNodes := countReadyNodes(f.ClientSet, host0Name) + Expect(numNodes).To(Equal(origNodeCnt), fmt.Sprintf("Requires current node count (%d) to return to original node count (%d)", numNodes, origNodeCnt)) + output, err = gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone) + framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output)) + Expect(false, strings.Contains(string(output), string(host0Name))) - // Verify that disk shows up in node 0's volumeInUse list - framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* should exist*/)) + } else if nodeOp == deleteNodeObj { + By("deleting host0's node api object") + framework.ExpectNoError(nodeClient.Delete(string(host0Name), metav1.NewDeleteOptions(0)), "Unable to delete host0's node object") + By("deleting host0Pod") + framework.ExpectNoError(podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)), "Unable to delete host0Pod") + } - gceCloud, err := framework.GetGCECloud() - framework.ExpectNoError(err, fmt.Sprintf("Unable to create gcloud client err=%v", err)) - - output, err := gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone) - framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output)) - Expect(true, strings.Contains(string(output), string(host0Name))) - - By("deleting host0") - resp, err := gceCloud.DeleteInstance(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone, string(host0Name)) - framework.ExpectNoError(err, fmt.Sprintf("Failed to delete host0pod: err=%v response=%#v", err, resp)) - - output, err = gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone) - framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output)) - Expect(false, strings.Contains(string(output), string(host0Name))) - - // The disk should be detached from host0 on it's deletion - By("Waiting for pd to detach from host0") - waitForPDDetach(diskName, host0Name) - framework.ExpectNoError(framework.WaitForGroupSize(framework.TestContext.CloudConfig.NodeInstanceGroup, int32(initialGroupSize)), "Unable to get back the cluster to inital size") - return - }) - - It("should be able to detach from a node whose api object was deleted [Slow] [Disruptive]", func() { - framework.SkipUnlessProviderIs("gce") - initialGroupSize, err := framework.GroupSize(framework.TestContext.CloudConfig.NodeInstanceGroup) - framework.ExpectNoError(err, "Error getting group size") - By("Creating a pd") - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err, "Error creating a pd") - - host0Pod := testPDPod([]string{diskName}, host0Name, false, 1) - originalCount := len(nodes.Items) - containerName := "mycontainer" - nodeToDelete := &nodes.Items[0] - defer func() { - By("Cleaning up PD-RW test env") - detachAndDeletePDs(diskName, []types.NodeName{host0Name}) - nodeToDelete.ObjectMeta.SetResourceVersion("0") - // need to set the resource version or else the Create() fails - _, err := nodeClient.Create(nodeToDelete) - framework.ExpectNoError(err, "Unable to re-create the deleted node") - framework.ExpectNoError(framework.WaitForGroupSize(framework.TestContext.CloudConfig.NodeInstanceGroup, int32(initialGroupSize)), "Unable to get the node group back to the original size") - framework.WaitForNodeToBeReady(f.ClientSet, nodeToDelete.Name, nodeStatusTimeout) - framework.WaitForAllNodesSchedulable(f.ClientSet, nodeStatusTimeout) - nodes = framework.GetReadySchedulableNodesOrDie(f.ClientSet) - Expect(len(nodes.Items)).To(Equal(originalCount), "Requires node count to return to original node count.") - }() - - By("submitting host0Pod to kubernetes") - _, err = podClient.Create(host0Pod) - framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0pod: %v", err)) - - framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name)) - - testFile := "/testpd1/tracker" - testFileContents := fmt.Sprintf("%v", mathrand.Int()) - - framework.ExpectNoError(f.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents)) - framework.Logf("Wrote value: %v", testFileContents) - - // Verify that disk shows up in node 0's volumeInUse list - framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* should exist*/)) - - By("deleting api object of host0") - framework.ExpectNoError(nodeClient.Delete(string(host0Name), metav1.NewDeleteOptions(0)), "Unable to delete host0") - - By("deleting host0pod") - framework.ExpectNoError(podClient.Delete(host0Pod.Name, metav1.NewDeleteOptions(0)), "Unable to delete host0Pod") - // The disk should be detached from host0 on its deletion - By("Waiting for pd to detach from host0") - framework.ExpectNoError(waitForPDDetach(diskName, host0Name), "Timed out waiting for detach pd") + By("waiting for pd to detach from host0") + waitForPDDetach(diskName, host0Name) + }) + } }) It("should be able to delete a non-existent PD without error", func() { @@ -530,6 +399,13 @@ var _ = SIGDescribe("Pod Disks", func() { }) }) +func countReadyNodes(c clientset.Interface, hostName types.NodeName) int { + framework.WaitForNodeToBeReady(c, string(hostName), nodeStatusTimeout) + framework.WaitForAllNodesSchedulable(c, nodeStatusTimeout) + nodes := framework.GetReadySchedulableNodesOrDie(c) + return len(nodes.Items) +} + func verifyPDContentsViaContainer(f *framework.Framework, podName, containerName string, fileAndContentToVerify map[string]string) { for filePath, expectedContents := range fileAndContentToVerify { var value string @@ -563,59 +439,62 @@ func detachPD(nodeName types.NodeName, pdName string) error { if err != nil { return err } - err = gceCloud.DetachDisk(pdName, nodeName) if err != nil { if gerr, ok := err.(*googleapi.Error); ok && strings.Contains(gerr.Message, "Invalid value for field 'disk'") { // PD already detached, ignore error. return nil } - framework.Logf("Error detaching PD %q: %v", pdName, err) } - return err + } else if framework.TestContext.Provider == "aws" { client := ec2.New(session.New()) - tokens := strings.Split(pdName, "/") awsVolumeID := tokens[len(tokens)-1] - request := ec2.DetachVolumeInput{ VolumeId: aws.String(awsVolumeID), } - _, err := client.DetachVolume(&request) if err != nil { return fmt.Errorf("error detaching EBS volume: %v", err) } return nil + } else { return fmt.Errorf("Provider does not support volume detaching") } } +// Returns pod spec suitable for api Create call. Handles gce, gke and aws providers only and +// escapes if a different provider is supplied. +// The first container name is hard-coded to "mycontainer". Subsequent containers are named: +// "mycontainer where is 1..numContainers. Note if there is only one container it's +// name has no number. +// Container's volumeMounts are hard-coded to "/testpd" where is 1..len(diskNames). func testPDPod(diskNames []string, targetNode types.NodeName, readOnly bool, numContainers int) *v1.Pod { + // escape if not a supported provider + if !(framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" || + framework.TestContext.Provider == "aws") { + framework.Failf(fmt.Sprintf("func `testPDPod` only supports gce, gke, and aws providers, not %v", framework.TestContext.Provider)) + } + containers := make([]v1.Container, numContainers) for i := range containers { containers[i].Name = "mycontainer" if numContainers > 1 { containers[i].Name = fmt.Sprintf("mycontainer%v", i+1) } - containers[i].Image = imageutils.GetBusyBoxImage() - containers[i].Command = []string{"sleep", "6000"} - containers[i].VolumeMounts = make([]v1.VolumeMount, len(diskNames)) for k := range diskNames { containers[i].VolumeMounts[k].Name = fmt.Sprintf("testpd%v", k+1) containers[i].VolumeMounts[k].MountPath = fmt.Sprintf("/testpd%v", k+1) } - containers[i].Resources.Limits = v1.ResourceList{} containers[i].Resources.Limits[v1.ResourceCPU] = *resource.NewQuantity(int64(0), resource.DecimalSI) - } pod := &v1.Pod{ @@ -632,10 +511,18 @@ func testPDPod(diskNames []string, targetNode types.NodeName, readOnly bool, num }, } - if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" { - pod.Spec.Volumes = make([]v1.Volume, len(diskNames)) - for k, diskName := range diskNames { - pod.Spec.Volumes[k].Name = fmt.Sprintf("testpd%v", k+1) + pod.Spec.Volumes = make([]v1.Volume, len(diskNames)) + for k, diskName := range diskNames { + pod.Spec.Volumes[k].Name = fmt.Sprintf("testpd%v", k+1) + if framework.TestContext.Provider == "aws" { + pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{ + AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{ + VolumeID: diskName, + FSType: "ext4", + ReadOnly: readOnly, + }, + } + } else { // "gce" or "gke" pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{ GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{ PDName: diskName, @@ -644,22 +531,7 @@ func testPDPod(diskNames []string, targetNode types.NodeName, readOnly bool, num }, } } - } else if framework.TestContext.Provider == "aws" { - pod.Spec.Volumes = make([]v1.Volume, len(diskNames)) - for k, diskName := range diskNames { - pod.Spec.Volumes[k].Name = fmt.Sprintf("testpd%v", k+1) - pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{ - AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{ - VolumeID: diskName, - FSType: "ext4", - ReadOnly: readOnly, - }, - } - } - } else { - panic("Unknown provider: " + framework.TestContext.Provider) } - return pod } @@ -671,26 +543,21 @@ func waitForPDDetach(diskName string, nodeName types.NodeName) error { if err != nil { return err } - for start := time.Now(); time.Since(start) < gcePDDetachTimeout; time.Sleep(gcePDDetachPollTime) { diskAttached, err := gceCloud.DiskIsAttached(diskName, nodeName) if err != nil { framework.Logf("Error waiting for PD %q to detach from node %q. 'DiskIsAttached(...)' failed with %v", diskName, nodeName, err) return err } - if !diskAttached { // Specified disk does not appear to be attached to specified node framework.Logf("GCE PD %q appears to have successfully detached from %q.", diskName, nodeName) return nil } - framework.Logf("Waiting for GCE PD %q to detach from %q.", diskName, nodeName) } - return fmt.Errorf("Gave up waiting for GCE PD %q to detach from %q after %v", diskName, nodeName, gcePDDetachTimeout) } - return nil } @@ -715,42 +582,28 @@ func waitForPDInVolumesInUse( if !shouldExist { logStr = "to NOT contain" } - framework.Logf( - "Waiting for node %s's VolumesInUse Status %s PD %q", - nodeName, logStr, diskName) + framework.Logf("Waiting for node %s's VolumesInUse Status %s PD %q", nodeName, logStr, diskName) for start := time.Now(); time.Since(start) < timeout; time.Sleep(nodeStatusPollTime) { nodeObj, err := nodeClient.Get(string(nodeName), metav1.GetOptions{}) if err != nil || nodeObj == nil { - framework.Logf( - "Failed to fetch node object %q from API server. err=%v", - nodeName, err) + framework.Logf("Failed to fetch node object %q from API server. err=%v", nodeName, err) continue } - exists := false for _, volumeInUse := range nodeObj.Status.VolumesInUse { volumeInUseStr := string(volumeInUse) if strings.Contains(volumeInUseStr, diskName) { if shouldExist { - framework.Logf( - "Found PD %q in node %q's VolumesInUse Status: %q", - diskName, nodeName, volumeInUseStr) + framework.Logf("Found PD %q in node %q's VolumesInUse Status: %q", diskName, nodeName, volumeInUseStr) return nil } - exists = true } } - if !shouldExist && !exists { - framework.Logf( - "Verified PD %q does not exist in node %q's VolumesInUse Status.", - diskName, nodeName) + framework.Logf("Verified PD %q does not exist in node %q's VolumesInUse Status.", diskName, nodeName) return nil } } - - return fmt.Errorf( - "Timed out waiting for node %s VolumesInUse Status %s diskName %q", - nodeName, logStr, diskName) + return fmt.Errorf("Timed out waiting for node %s VolumesInUse Status %s diskName %q", nodeName, logStr, diskName) }