From 7e10741f944455f0c7dd349cee4a00868807248d Mon Sep 17 00:00:00 2001 From: David Zhu Date: Thu, 7 Sep 2017 11:52:52 -0700 Subject: [PATCH] E2E test to make sure controller does not crash because of nil volume spec. --- test/e2e/framework/util.go | 27 ++++++ test/e2e/framework/volume_util.go | 12 +++ .../storage/persistent_volumes-disruptive.go | 91 +++++++++++++++++++ .../upgrades/storage/persistent_volumes.go | 13 +-- 4 files changed, 131 insertions(+), 12 deletions(-) diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 7fdb068545b..a81d17d2f51 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -3920,6 +3920,33 @@ func WaitForControllerManagerUp() error { return fmt.Errorf("waiting for controller-manager timed out") } +// CheckForControllerManagerHealthy checks that the controller manager does not crash within "duration" +func CheckForControllerManagerHealthy(duration time.Duration) error { + var PID string + cmd := "sudo docker ps | grep k8s_kube-controller-manager | cut -d ' ' -f 1" + for start := time.Now(); time.Since(start) < duration; time.Sleep(5 * time.Second) { + result, err := SSH(cmd, GetMasterHost()+":22", TestContext.Provider) + if err != nil { + // We don't necessarily know that it crashed, pipe could just be broken + LogSSHResult(result) + return fmt.Errorf("master unreachable after %v", time.Since(start)) + } else if result.Code != 0 { + LogSSHResult(result) + return fmt.Errorf("SSH result code not 0. actually: %v after %v", result.Code, time.Since(start)) + } else if result.Stdout != PID { + if PID == "" { + PID = result.Stdout + } else { + //its dead + return fmt.Errorf("controller manager crashed, old PID: %s, new PID: %s", PID, result.Stdout) + } + } else { + Logf("kube-controller-manager still healthy after %v", time.Since(start)) + } + } + return nil +} + // Returns number of ready Nodes excluding Master Node. func NumberOfReadyNodes(c clientset.Interface) (int, error) { nodes, err := c.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ diff --git a/test/e2e/framework/volume_util.go b/test/e2e/framework/volume_util.go index 8a86fe70d73..84214a15ab4 100644 --- a/test/e2e/framework/volume_util.go +++ b/test/e2e/framework/volume_util.go @@ -492,3 +492,15 @@ func InjectHtml(client clientset.Interface, config VolumeTestConfig, volume v1.V err = WaitForPodSuccessInNamespace(client, injectPod.Name, injectPod.Namespace) Expect(err).NotTo(HaveOccurred()) } + +func CreateGCEVolume() (*v1.PersistentVolumeSource, string) { + diskName, err := CreatePDWithRetry() + ExpectNoError(err) + return &v1.PersistentVolumeSource{ + GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{ + PDName: diskName, + FSType: "ext3", + ReadOnly: false, + }, + }, diskName +} diff --git a/test/e2e/storage/persistent_volumes-disruptive.go b/test/e2e/storage/persistent_volumes-disruptive.go index a91c3489bfa..2eef71c476a 100644 --- a/test/e2e/storage/persistent_volumes-disruptive.go +++ b/test/e2e/storage/persistent_volumes-disruptive.go @@ -27,6 +27,7 @@ import ( apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + utilerrors "k8s.io/apimachinery/pkg/util/errors" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" ) @@ -108,6 +109,96 @@ var _ = SIGDescribe("PersistentVolumes[Disruptive][Flaky]", func() { framework.DeletePodWithWait(f, c, nfsServerPod) }) + Context("when kube-controller-manager restarts", func() { + var ( + diskName1, diskName2 string + err error + pvConfig1, pvConfig2 framework.PersistentVolumeConfig + pv1, pv2 *v1.PersistentVolume + pvSource1, pvSource2 *v1.PersistentVolumeSource + pvc1, pvc2 *v1.PersistentVolumeClaim + clientPod *v1.Pod + ) + + BeforeEach(func() { + framework.SkipUnlessProviderIs("gce") + framework.SkipUnlessSSHKeyPresent() + + By("Initializing first PD with PVPVC binding") + pvSource1, diskName1 = framework.CreateGCEVolume() + Expect(err).NotTo(HaveOccurred()) + pvConfig1 = framework.PersistentVolumeConfig{ + NamePrefix: "gce-", + Labels: volLabel, + PVSource: *pvSource1, + Prebind: nil, + } + pv1, pvc1, err = framework.CreatePVPVC(c, pvConfig1, pvcConfig, ns, false) + Expect(err).NotTo(HaveOccurred()) + framework.ExpectNoError(framework.WaitOnPVandPVC(c, ns, pv1, pvc1)) + + By("Initializing second PD with PVPVC binding") + pvSource2, diskName2 = framework.CreateGCEVolume() + Expect(err).NotTo(HaveOccurred()) + pvConfig2 = framework.PersistentVolumeConfig{ + NamePrefix: "gce-", + Labels: volLabel, + PVSource: *pvSource2, + Prebind: nil, + } + pv2, pvc2, err = framework.CreatePVPVC(c, pvConfig2, pvcConfig, ns, false) + Expect(err).NotTo(HaveOccurred()) + framework.ExpectNoError(framework.WaitOnPVandPVC(c, ns, pv2, pvc2)) + + By("Attaching both PVC's to a single pod") + clientPod, err = framework.CreatePod(c, ns, []*v1.PersistentVolumeClaim{pvc1, pvc2}, true, "") + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + // Delete client/user pod first + framework.ExpectNoError(framework.DeletePodWithWait(f, c, clientPod)) + + // Delete PV and PVCs + if errs := framework.PVPVCCleanup(c, ns, pv1, pvc1); len(errs) > 0 { + framework.Failf("AfterEach: Failed to delete PVC and/or PV. Errors: %v", utilerrors.NewAggregate(errs)) + } + pv1, pvc1 = nil, nil + if errs := framework.PVPVCCleanup(c, ns, pv2, pvc2); len(errs) > 0 { + framework.Failf("AfterEach: Failed to delete PVC and/or PV. Errors: %v", utilerrors.NewAggregate(errs)) + } + pv2, pvc2 = nil, nil + + // Delete the actual disks + if diskName1 != "" { + framework.ExpectNoError(framework.DeletePDWithRetry(diskName1)) + } + if diskName2 != "" { + framework.ExpectNoError(framework.DeletePDWithRetry(diskName2)) + } + }) + + It("should delete a bound PVC from a clientPod, restart the kube-control-manager, and ensure the kube-controller-manager does not crash", func() { + By("Deleting PVC for volume 2") + err = framework.DeletePersistentVolumeClaim(c, pvc2.Name, ns) + Expect(err).NotTo(HaveOccurred()) + pvc2 = nil + + By("Restarting the kube-controller-manager") + err = framework.RestartControllerManager() + Expect(err).NotTo(HaveOccurred()) + err = framework.WaitForControllerManagerUp() + Expect(err).NotTo(HaveOccurred()) + framework.Logf("kube-controller-manager restarted") + + By("Observing the kube-controller-manager healthy for at least 2 minutes") + // Continue checking for 2 minutes to make sure kube-controller-manager is healthy + err = framework.CheckForControllerManagerHealthy(2 * time.Minute) + Expect(err).NotTo(HaveOccurred()) + }) + + }) + Context("when kubelet restarts", func() { var ( clientPod *v1.Pod diff --git a/test/e2e/upgrades/storage/persistent_volumes.go b/test/e2e/upgrades/storage/persistent_volumes.go index 2901fea35f9..fe41a79d9e6 100644 --- a/test/e2e/upgrades/storage/persistent_volumes.go +++ b/test/e2e/upgrades/storage/persistent_volumes.go @@ -42,17 +42,6 @@ const ( pvReadCmd string = "cat " + pvTestFile ) -func (t *PersistentVolumeUpgradeTest) createGCEVolume() *v1.PersistentVolumeSource { - diskName, err := framework.CreatePDWithRetry() - framework.ExpectNoError(err) - return &v1.PersistentVolumeSource{ - GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{ - PDName: diskName, - FSType: "ext3", - ReadOnly: false, - }, - } -} func (t *PersistentVolumeUpgradeTest) deleteGCEVolume(pvSource *v1.PersistentVolumeSource) error { return framework.DeletePDWithRetry(pvSource.GCEPersistentDisk.PDName) } @@ -67,7 +56,7 @@ func (t *PersistentVolumeUpgradeTest) Setup(f *framework.Framework) { ns := f.Namespace.Name By("Initializing PV source") - t.pvSource = t.createGCEVolume() + t.pvSource, _ = framework.CreateGCEVolume() pvConfig := framework.PersistentVolumeConfig{ NamePrefix: "pv-upgrade", PVSource: *t.pvSource,