Merge pull request #130685 from carlory/automated-cherry-pick-of-#130335-upstream-release-1.31

Automated cherry pick of #130335: Fix kubelet restart unmounts volumes of running pods if the referenced PVC is being deleted by the user
This commit is contained in:
Kubernetes Prow Robot 2025-04-17 10:31:07 -07:00 committed by GitHub
commit 3a67945ee1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 113 additions and 6 deletions

View File

@ -24,6 +24,7 @@ import (
"context"
"errors"
"fmt"
"slices"
"sync"
"time"
@ -558,15 +559,21 @@ func (dswp *desiredStateOfWorldPopulator) getPVCExtractPV(
return nil, fmt.Errorf("failed to fetch PVC from API server: %v", err)
}
// Pods that uses a PVC that is being deleted must not be started.
// Pods that uses a PVC that is being deleted and not protected by
// kubernetes.io/pvc-protection must not be started.
//
// In case an old kubelet is running without this check or some kubelets
// have this feature disabled, the worst that can happen is that such
// pod is scheduled. This was the default behavior in 1.8 and earlier
// and users should not be that surprised.
// 1) In case an old kubelet is running without this check, the worst
// that can happen is that such pod is scheduled. This was the default
// behavior in 1.8 and earlier and users should not be that surprised.
// It should happen only in very rare case when scheduler schedules
// a pod and user deletes a PVC that's used by it at the same time.
if pvc.ObjectMeta.DeletionTimestamp != nil {
//
// 2) Adding a check for kubernetes.io/pvc-protection here to prevent
// the existing running pods from being affected during the rebuild of
// the desired state of the world cache when the kubelet is restarted.
// It is safe for kubelet to add this check here because the PVC will
// be stuck in Terminating state until the pod is deleted.
if pvc.ObjectMeta.DeletionTimestamp != nil && !slices.Contains(pvc.Finalizers, util.PVCProtectionFinalizer) {
return nil, errors.New("PVC is being deleted")
}

View File

@ -0,0 +1,100 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package csimock
import (
"context"
"fmt"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
"k8s.io/kubernetes/test/e2e/storage/drivers"
"k8s.io/kubernetes/test/e2e/storage/utils"
admissionapi "k8s.io/pod-security-admission/api"
)
var _ = utils.SIGDescribe("CSI Mock when kubelet restart", framework.WithSerial(), framework.WithDisruptive(), func() {
f := framework.NewDefaultFramework("csi-mock-when-kubelet-restart")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
m := newMockDriverSetup(f)
ginkgo.BeforeEach(func() {
// These tests requires SSH to nodes, so the provider check should be identical to there
// (the limiting factor is the implementation of util.go's e2essh.GetSigner(...)).
// Cluster must support node reboot
e2eskipper.SkipUnlessProviderIs(framework.ProvidersWithSSH...)
e2eskipper.SkipUnlessSSHKeyPresent()
})
ginkgo.It("should not umount volume when the pvc is terminating but still used by a running pod", func(ctx context.Context) {
m.init(ctx, testParameters{
registerDriver: true,
})
ginkgo.DeferCleanup(m.cleanup)
ginkgo.By("Creating a Pod with a PVC backed by a CSI volume")
_, pvc, pod := m.createPod(ctx, pvcReference)
ginkgo.By("Waiting for the Pod to be running")
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
framework.ExpectNoError(err, "failed to wait for pod %s to be running", pod.Name)
pod, err = f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "failed to get pod %s", pod.Name)
ginkgo.By("Deleting the PVC")
err = f.ClientSet.CoreV1().PersistentVolumeClaims(pvc.Namespace).Delete(ctx, pvc.Name, metav1.DeleteOptions{})
framework.ExpectNoError(err, "failed to delete PVC %s", pvc.Name)
ginkgo.By("Restarting kubelet")
utils.KubeletCommand(ctx, utils.KRestart, f.ClientSet, pod)
ginkgo.DeferCleanup(utils.KubeletCommand, utils.KStart, f.ClientSet, pod)
ginkgo.By("Verifying the PVC is terminating during kubelet restart")
pvc, err = f.ClientSet.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(ctx, pvc.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "failed to get PVC %s", pvc.Name)
gomega.Expect(pvc.DeletionTimestamp).NotTo(gomega.BeNil(), "PVC %s should have deletion timestamp", pvc.Name)
ginkgo.By(fmt.Sprintf("Verifying that the driver didn't receive NodeUnpublishVolume call for PVC %s", pvc.Name))
gomega.Consistently(ctx,
func(ctx context.Context) []drivers.MockCSICall {
calls, err := m.driver.GetCalls(ctx)
if err != nil {
if apierrors.IsUnexpectedServerError(err) {
// kubelet might not be ready yet when getting the calls
gomega.TryAgainAfter(framework.Poll).Wrap(err).Now()
return nil
}
return nil
}
return calls
}).
WithPolling(framework.Poll).
WithTimeout(framework.ClaimProvisionShortTimeout).
ShouldNot(gomega.ContainElement(gomega.HaveField("Method", gomega.Equal("NodeUnpublishVolume"))))
ginkgo.By("Verifying the Pod is still running")
err = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
framework.ExpectNoError(err, "failed to wait for pod %s to be running", pod.Name)
})
})