mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Mark volume mount as uncertain in case of volume expansion fails
should mark volume mount in actual state even if volume expansion fails so that reconciler can tear down the volume when needed. To avoid pods start using it, mark volume as uncertain instead of mounted. Will add unit test after the logic is reviewed. Change-Id: I5aebfa11ec93235a87af8f17bea7f7b1570b603d
This commit is contained in:
parent
671251a8b9
commit
0fa01c371c
@ -386,7 +386,7 @@ func (rc *reconciler) syncStates() {
|
||||
// Get volumes information by reading the pod's directory
|
||||
podVolumes, err := getVolumesFromPodDir(rc.kubeletPodsDir)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Cannot get volumes from disk")
|
||||
klog.ErrorS(err, "Cannot get volumes from disk, skip sync states for volume reconstruction")
|
||||
return
|
||||
}
|
||||
volumesNeedUpdate := make(map[v1.UniqueVolumeName]*reconstructedVolume)
|
||||
|
@ -43,6 +43,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/volume/util"
|
||||
"k8s.io/kubernetes/pkg/volume/util/hostutil"
|
||||
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
|
||||
"k8s.io/kubernetes/pkg/volume/util/types"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -1004,6 +1005,7 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
|
||||
name string
|
||||
volumeMode *v1.PersistentVolumeMode
|
||||
expansionFailed bool
|
||||
uncertainTest bool
|
||||
pvName string
|
||||
pvcSize resource.Quantity
|
||||
pvcStatusSize resource.Quantity
|
||||
@ -1164,9 +1166,6 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
|
||||
if !cache.IsFSResizeRequiredError(podExistErr) {
|
||||
t.Fatalf("Volume should be marked as fsResizeRequired, but receive unexpected error: %v", podExistErr)
|
||||
}
|
||||
|
||||
// Start the reconciler again, we hope reconciler will perform the
|
||||
// resize operation and clear the fsResizeRequired flag for volume.
|
||||
go reconciler.Run(wait.NeverStop)
|
||||
|
||||
waitErr := retryWithExponentialBackOff(testOperationBackOffDuration, func() (done bool, err error) {
|
||||
@ -1174,7 +1173,7 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
|
||||
return mounted && err == nil, nil
|
||||
})
|
||||
if waitErr != nil {
|
||||
t.Fatal("Volume resize should succeeded")
|
||||
t.Fatalf("Volume resize should succeeded %v", waitErr)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1371,6 +1370,8 @@ func Test_UncertainVolumeMountState(t *testing.T) {
|
||||
unmountVolumeCount int
|
||||
volumeName string
|
||||
supportRemount bool
|
||||
pvcStatusSize resource.Quantity
|
||||
pvSize resource.Quantity
|
||||
}{
|
||||
{
|
||||
name: "timed out operations should result in volume marked as uncertain",
|
||||
@ -1409,6 +1410,16 @@ func Test_UncertainVolumeMountState(t *testing.T) {
|
||||
volumeName: volumetesting.SuccessAndFailOnSetupVolumeName,
|
||||
supportRemount: true,
|
||||
},
|
||||
{
|
||||
name: "mount success but fail to expand filesystem",
|
||||
volumeState: operationexecutor.VolumeMountUncertain,
|
||||
unmountDeviceCallCount: 1,
|
||||
unmountVolumeCount: 1,
|
||||
volumeName: volumetesting.FailVolumeExpansion,
|
||||
supportRemount: true,
|
||||
pvSize: resource.MustParse("10G"),
|
||||
pvcStatusSize: resource.MustParse("2G"),
|
||||
},
|
||||
}
|
||||
modes := []v1.PersistentVolumeMode{v1.PersistentVolumeBlock, v1.PersistentVolumeFilesystem}
|
||||
|
||||
@ -1431,6 +1442,11 @@ func Test_UncertainVolumeMountState(t *testing.T) {
|
||||
VolumeMode: &mode,
|
||||
},
|
||||
}
|
||||
if tc.pvSize.CmpInt64(0) > 0 {
|
||||
pv.Spec.Capacity = v1.ResourceList{
|
||||
v1.ResourceStorage: tc.pvSize,
|
||||
}
|
||||
}
|
||||
pvc := &v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "pvc",
|
||||
@ -1441,6 +1457,13 @@ func Test_UncertainVolumeMountState(t *testing.T) {
|
||||
VolumeMode: &mode,
|
||||
},
|
||||
}
|
||||
if tc.pvcStatusSize.CmpInt64(0) > 0 {
|
||||
pvc.Status = v1.PersistentVolumeClaimStatus{
|
||||
Capacity: v1.ResourceList{
|
||||
v1.ResourceStorage: tc.pvcStatusSize,
|
||||
},
|
||||
}
|
||||
}
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "pod1",
|
||||
@ -1525,7 +1548,7 @@ func Test_UncertainVolumeMountState(t *testing.T) {
|
||||
}
|
||||
|
||||
if tc.volumeState == operationexecutor.VolumeMountUncertain {
|
||||
waitForUncertainPodMount(t, volumeName, asw)
|
||||
waitForUncertainPodMount(t, volumeName, podName, asw)
|
||||
}
|
||||
|
||||
if tc.volumeState == operationexecutor.VolumeMounted {
|
||||
@ -1607,11 +1630,15 @@ func waitForGlobalMount(t *testing.T, volumeName v1.UniqueVolumeName, asw cache.
|
||||
}
|
||||
}
|
||||
|
||||
func waitForUncertainPodMount(t *testing.T, volumeName v1.UniqueVolumeName, asw cache.ActualStateOfWorld) {
|
||||
func waitForUncertainPodMount(t *testing.T, volumeName v1.UniqueVolumeName, podName types.UniquePodName, asw cache.ActualStateOfWorld) {
|
||||
// check if volume is locally pod mounted in uncertain state
|
||||
err := retryWithExponentialBackOff(
|
||||
testOperationBackOffDuration,
|
||||
func() (bool, error) {
|
||||
mounted, _, err := asw.PodExistsInVolume(podName, volumeName)
|
||||
if mounted || err != nil {
|
||||
return false, nil
|
||||
}
|
||||
allMountedVolumes := asw.GetAllMountedVolumes()
|
||||
for _, v := range allMountedVolumes {
|
||||
if v.VolumeName == volumeName {
|
||||
|
@ -81,6 +81,8 @@ const (
|
||||
// FailWithInUseVolumeName will cause NodeExpandVolume to result in FailedPrecondition error
|
||||
FailWithInUseVolumeName = "fail-expansion-in-use"
|
||||
|
||||
FailVolumeExpansion = "fail-expansion-test"
|
||||
|
||||
deviceNotMounted = "deviceNotMounted"
|
||||
deviceMountUncertain = "deviceMountUncertain"
|
||||
deviceMounted = "deviceMounted"
|
||||
@ -467,6 +469,14 @@ func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions NodeResizeOptions) (boo
|
||||
if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName {
|
||||
return false, volumetypes.NewFailedPreconditionError("volume-in-use")
|
||||
}
|
||||
// Set up fakeVolumePlugin not support STAGE_UNSTAGE for testing the behavior
|
||||
// so as volume can be node published before we can resize
|
||||
if resizeOptions.CSIVolumePhase == volume.CSIVolumeStaged {
|
||||
return false, nil
|
||||
}
|
||||
if resizeOptions.CSIVolumePhase == volume.CSIVolumePublished && resizeOptions.VolumeSpec.Name() == FailVolumeExpansion {
|
||||
return false, fmt.Errorf("fail volume expansion for volume: %s", FailVolumeExpansion)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
|
@ -708,6 +708,14 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
|
||||
if resizeError != nil {
|
||||
klog.Errorf("MountVolume.NodeExpandVolume failed with %v", resizeError)
|
||||
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.Setup failed while expanding volume", resizeError)
|
||||
// At this point, MountVolume.Setup already succeeded, we should add volume into actual state
|
||||
// so that reconciler can clean up volume when needed. However, volume resize failed,
|
||||
// we should not mark the volume as mounted to avoid pod starts using it.
|
||||
// Considering the above situations, we mark volume as uncertain here so that reconciler will tigger
|
||||
// volume tear down when pod is deleted, and also makes sure pod will not start using it.
|
||||
if err := actualStateOfWorld.MarkVolumeMountAsUncertain(markOpts); err != nil {
|
||||
klog.Errorf(volumeToMount.GenerateErrorDetailed("MountVolume.MarkVolumeMountAsUncertain failed", err).Error())
|
||||
}
|
||||
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
|
||||
}
|
||||
}
|
||||
@ -718,7 +726,6 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
|
||||
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.MarkVolumeAsMounted failed", markVolMountedErr)
|
||||
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
|
||||
}
|
||||
|
||||
return volumetypes.NewOperationContext(nil, nil, migrated)
|
||||
}
|
||||
|
||||
@ -1172,6 +1179,14 @@ func (og *operationGenerator) GenerateMapVolumeFunc(
|
||||
if resizeError != nil {
|
||||
klog.Errorf("MapVolume.NodeExpandVolume failed with %v", resizeError)
|
||||
eventErr, detailedErr := volumeToMount.GenerateError("MapVolume.MarkVolumeAsMounted failed while expanding volume", resizeError)
|
||||
// At this point, MountVolume.Setup already succeeded, we should add volume into actual state
|
||||
// so that reconciler can clean up volume when needed. However, if nodeExpandVolume failed,
|
||||
// we should not mark the volume as mounted to avoid pod starts using it.
|
||||
// Considering the above situations, we mark volume as uncertain here so that reconciler will tigger
|
||||
// volume tear down when pod is deleted, and also makes sure pod will not start using it.
|
||||
if err := actualStateOfWorld.MarkVolumeMountAsUncertain(markVolumeOpts); err != nil {
|
||||
klog.Errorf(volumeToMount.GenerateErrorDetailed("MountVolume.MarkVolumeMountAsUncertain failed", err).Error())
|
||||
}
|
||||
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user