From c1cc18ccd5f7652d5a8225eb747ee1f8aa13f6bd Mon Sep 17 00:00:00 2001 From: Rahul Rangith Date: Tue, 18 Oct 2022 17:05:16 -0400 Subject: [PATCH] Automatically recreate pvc when sts pod is stuck in pending --- .../statefulset/stateful_pod_control.go | 8 +- .../statefulset/stateful_set_control.go | 12 +++ .../statefulset/stateful_set_control_test.go | 47 ++++++++++ .../statefulset/stateful_set_utils.go | 5 ++ test/e2e/apps/statefulset.go | 87 +++++++++++++++++++ 5 files changed, 155 insertions(+), 4 deletions(-) diff --git a/pkg/controller/statefulset/stateful_pod_control.go b/pkg/controller/statefulset/stateful_pod_control.go index 2b0546b4632..f762f87a743 100644 --- a/pkg/controller/statefulset/stateful_pod_control.go +++ b/pkg/controller/statefulset/stateful_pod_control.go @@ -114,7 +114,7 @@ func (om *realStatefulPodControlObjectManager) UpdateClaim(claim *v1.PersistentV func (spc *StatefulPodControl) CreateStatefulPod(ctx context.Context, set *apps.StatefulSet, pod *v1.Pod) error { // Create the Pod's PVCs prior to creating the Pod - if err := spc.createPersistentVolumeClaims(set, pod); err != nil { + if err := spc.CreatePersistentVolumeClaims(set, pod); err != nil { spc.recordPodEvent("create", set, pod, err) return err } @@ -150,7 +150,7 @@ func (spc *StatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod *v1. if !storageMatches(set, pod) { updateStorage(set, pod) consistent = false - if err := spc.createPersistentVolumeClaims(set, pod); err != nil { + if err := spc.CreatePersistentVolumeClaims(set, pod); err != nil { spc.recordPodEvent("update", set, pod, err) return err } @@ -315,11 +315,11 @@ func (spc *StatefulPodControl) recordClaimEvent(verb string, set *apps.StatefulS } } -// createPersistentVolumeClaims creates all of the required PersistentVolumeClaims for pod, which must be a member of +// CreatePersistentVolumeClaims creates all of the required PersistentVolumeClaims for pod, which must be a member of // set. If all of the claims for Pod are successfully created, the returned error is nil. If creation fails, this method // may be called again until no error is returned, indicating the PersistentVolumeClaims for pod are consistent with // set's Spec. -func (spc *StatefulPodControl) createPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error { +func (spc *StatefulPodControl) CreatePersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error { var errs []error for _, claim := range getPersistentVolumeClaims(set, pod) { pvc, err := spc.objectMgr.GetClaim(claim.Namespace, claim.Name) diff --git a/pkg/controller/statefulset/stateful_set_control.go b/pkg/controller/statefulset/stateful_set_control.go index 47722c3524f..64f30e2f7a5 100644 --- a/pkg/controller/statefulset/stateful_set_control.go +++ b/pkg/controller/statefulset/stateful_set_control.go @@ -445,6 +445,18 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet( // pod created, no more work possible for this round continue } + + // If the Pod is in pending state then trigger PVC creation to create missing PVCs + if isPending(replicas[i]) { + klog.V(4).Infof( + "StatefulSet %s/%s is triggering PVC creation for pending Pod %s", + set.Namespace, + set.Name, + replicas[i].Name) + if err := ssc.podControl.CreatePersistentVolumeClaims(set, replicas[i]); err != nil { + return &status, err + } + } // If we find a Pod that is currently terminating, we must wait until graceful deletion // completes before we continue to make progress. if isTerminating(replicas[i]) && monotonic { diff --git a/pkg/controller/statefulset/stateful_set_control_test.go b/pkg/controller/statefulset/stateful_set_control_test.go index e3a9c61666d..a97b5210f8c 100644 --- a/pkg/controller/statefulset/stateful_set_control_test.go +++ b/pkg/controller/statefulset/stateful_set_control_test.go @@ -174,6 +174,7 @@ func TestStatefulSetControl(t *testing.T) { {UpdateSetStatusFailure, simpleSetFn}, {PodRecreateDeleteFailure, simpleSetFn}, {NewRevisionDeletePodFailure, simpleSetFn}, + {RecreatesPVCForPendingPod, simpleSetFn}, } for _, testCase := range testCases { @@ -697,6 +698,45 @@ func CreatesPodsWithStartOrdinal(t *testing.T, set *apps.StatefulSet, invariants } } +func RecreatesPVCForPendingPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) { + client := fake.NewSimpleClientset() + om, _, ssc := setupController(client) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err := om.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err := invariants(set, om); err != nil { + t.Error(err) + } + pods, err = om.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + for _, claim := range getPersistentVolumeClaims(set, pods[0]) { + om.claimsIndexer.Delete(&claim) + } + pods[0].Status.Phase = v1.PodPending + om.podsIndexer.Update(pods[0]) + if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + // invariants check if there any missing PVCs for the Pods + if err := invariants(set, om); err != nil { + t.Error(err) + } + _, err = om.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } +} + func TestStatefulSetControlScaleDownDeleteError(t *testing.T) { runTestOverPVCRetentionPolicies( t, "", func(t *testing.T, policy *apps.StatefulSetPersistentVolumeClaimRetentionPolicy) { @@ -2401,6 +2441,13 @@ func (om *fakeObjectManager) DeletePod(pod *v1.Pod) error { return nil // Not found, no error in deleting. } +func (om *fakeObjectManager) CreatePersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error { + for _, claim := range getPersistentVolumeClaims(set, pod) { + om.claimsIndexer.Update(&claim) + } + return nil +} + func (om *fakeObjectManager) CreateClaim(claim *v1.PersistentVolumeClaim) error { om.claimsIndexer.Update(claim) return nil diff --git a/pkg/controller/statefulset/stateful_set_utils.go b/pkg/controller/statefulset/stateful_set_utils.go index f3aa894a585..ac6b78d4a6f 100644 --- a/pkg/controller/statefulset/stateful_set_utils.go +++ b/pkg/controller/statefulset/stateful_set_utils.go @@ -413,6 +413,11 @@ func isCreated(pod *v1.Pod) bool { return pod.Status.Phase != "" } +// isPending returns true if pod has a Phase of PodPending +func isPending(pod *v1.Pod) bool { + return pod.Status.Phase == v1.PodPending +} + // isFailed returns true if pod has a Phase of PodFailed func isFailed(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed diff --git a/test/e2e/apps/statefulset.go b/test/e2e/apps/statefulset.go index 55ca49af3ce..e27929635a7 100644 --- a/test/e2e/apps/statefulset.go +++ b/test/e2e/apps/statefulset.go @@ -38,6 +38,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/strategicpatch" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" clientset "k8s.io/client-go/kubernetes" @@ -1347,6 +1348,92 @@ var _ = SIGDescribe("StatefulSet", func() { framework.ExpectNoError(err) }) }) + + ginkgo.It("PVC should be recreated when pod is pending due to missing PVC [Feature:AutomaticPVCRecreation]", func() { + // This test must be run in an environment that will allow the test pod to be scheduled on a node with local volumes + ssName := "test-ss" + headlessSvcName := "test" + // Define StatefulSet Labels + ssPodLabels := map[string]string{ + "name": "sample-pod", + "pod": WebserverImageName, + } + + statefulPodMounts := []v1.VolumeMount{{Name: "datadir", MountPath: "/data/"}} + ss := e2estatefulset.NewStatefulSet(ssName, ns, headlessSvcName, 1, statefulPodMounts, nil, ssPodLabels) + e2epv.SkipIfNoDefaultStorageClass(c) + ginkgo.By("Creating statefulset " + ssName + " in namespace " + ns) + _, err := c.AppsV1().StatefulSets(ns).Create(context.TODO(), ss, metav1.CreateOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Confirming PVC exists") + err = verifyStatefulSetPVCsExist(c, ss, []int{0}) + framework.ExpectNoError(err) + + ginkgo.By("Confirming Pod is ready") + e2estatefulset.WaitForStatusReadyReplicas(c, ss, 1) + podName := getStatefulSetPodNameAtIndex(0, ss) + pod, err := c.CoreV1().Pods(ns).Get(context.TODO(), podName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + nodeName := pod.Spec.NodeName + node, err := c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + oldData, err := json.Marshal(node) + framework.ExpectNoError(err) + + node.Spec.Unschedulable = true + + newData, err := json.Marshal(node) + framework.ExpectNoError(err) + + // cordon node, to make sure pod does not get scheduled to the node until the pvc is deleted + patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{}) + framework.ExpectNoError(err) + ginkgo.By("Cordoning Node") + _, err = c.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) + framework.ExpectNoError(err) + + // wait for the node to be patched + time.Sleep(5 * time.Second) + node, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + framework.ExpectEqual(node.Spec.Unschedulable, true) + + ginkgo.By("Deleting Pod") + err = c.CoreV1().Pods(ns).Delete(context.TODO(), podName, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + + // wait for the pod to be recreated + time.Sleep(10 * time.Second) + _, err = c.CoreV1().Pods(ns).Get(context.TODO(), podName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + pvcList, err := c.CoreV1().PersistentVolumeClaims(ns).List(context.TODO(), metav1.ListOptions{LabelSelector: klabels.Everything().String()}) + framework.ExpectNoError(err) + pvcName := pvcList.Items[0].Name + + ginkgo.By("Deleting PVC") + err = c.CoreV1().PersistentVolumeClaims(ns).Delete(context.TODO(), pvcName, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Uncordoning Node") + // uncordon node, by reverting patch + revertPatchBytes, err := strategicpatch.CreateTwoWayMergePatch(newData, oldData, v1.Node{}) + framework.ExpectNoError(err) + _, err = c.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, revertPatchBytes, metav1.PatchOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Confirming PVC recreated") + err = verifyStatefulSetPVCsExist(c, ss, []int{0}) + framework.ExpectNoError(err) + + ginkgo.By("Confirming Pod is ready after being recreated") + e2estatefulset.WaitForStatusReadyReplicas(c, ss, 1) + _, err = c.CoreV1().Pods(ns).Get(context.TODO(), podName, metav1.GetOptions{}) + framework.ExpectNoError(err) + }) }) func kubectlExecWithRetries(ns string, args ...string) (out string) {