Mark volume mount as uncertain in case of volume expansion fails

should mark volume mount in actual state even if volume expansion fails so that
reconciler can tear down the volume when needed. To avoid pods start
using it, mark volume as uncertain instead of mounted.

Will add unit test after the logic is reviewed.

Change-Id: I5aebfa11ec93235a87af8f17bea7f7b1570b603d
This commit is contained in:
Jing Xu 2021-03-25 12:32:17 -07:00
parent 671251a8b9
commit 0fa01c371c
4 changed files with 60 additions and 8 deletions

View File

@ -386,7 +386,7 @@ func (rc *reconciler) syncStates() {
// Get volumes information by reading the pod's directory // Get volumes information by reading the pod's directory
podVolumes, err := getVolumesFromPodDir(rc.kubeletPodsDir) podVolumes, err := getVolumesFromPodDir(rc.kubeletPodsDir)
if err != nil { if err != nil {
klog.ErrorS(err, "Cannot get volumes from disk") klog.ErrorS(err, "Cannot get volumes from disk, skip sync states for volume reconstruction")
return return
} }
volumesNeedUpdate := make(map[v1.UniqueVolumeName]*reconstructedVolume) volumesNeedUpdate := make(map[v1.UniqueVolumeName]*reconstructedVolume)

View File

@ -43,6 +43,7 @@ import (
"k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/pkg/volume/util/hostutil" "k8s.io/kubernetes/pkg/volume/util/hostutil"
"k8s.io/kubernetes/pkg/volume/util/operationexecutor" "k8s.io/kubernetes/pkg/volume/util/operationexecutor"
"k8s.io/kubernetes/pkg/volume/util/types"
) )
const ( const (
@ -1004,6 +1005,7 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
name string name string
volumeMode *v1.PersistentVolumeMode volumeMode *v1.PersistentVolumeMode
expansionFailed bool expansionFailed bool
uncertainTest bool
pvName string pvName string
pvcSize resource.Quantity pvcSize resource.Quantity
pvcStatusSize resource.Quantity pvcStatusSize resource.Quantity
@ -1164,9 +1166,6 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
if !cache.IsFSResizeRequiredError(podExistErr) { if !cache.IsFSResizeRequiredError(podExistErr) {
t.Fatalf("Volume should be marked as fsResizeRequired, but receive unexpected error: %v", podExistErr) t.Fatalf("Volume should be marked as fsResizeRequired, but receive unexpected error: %v", podExistErr)
} }
// Start the reconciler again, we hope reconciler will perform the
// resize operation and clear the fsResizeRequired flag for volume.
go reconciler.Run(wait.NeverStop) go reconciler.Run(wait.NeverStop)
waitErr := retryWithExponentialBackOff(testOperationBackOffDuration, func() (done bool, err error) { waitErr := retryWithExponentialBackOff(testOperationBackOffDuration, func() (done bool, err error) {
@ -1174,7 +1173,7 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
return mounted && err == nil, nil return mounted && err == nil, nil
}) })
if waitErr != nil { if waitErr != nil {
t.Fatal("Volume resize should succeeded") t.Fatalf("Volume resize should succeeded %v", waitErr)
} }
} }
@ -1371,6 +1370,8 @@ func Test_UncertainVolumeMountState(t *testing.T) {
unmountVolumeCount int unmountVolumeCount int
volumeName string volumeName string
supportRemount bool supportRemount bool
pvcStatusSize resource.Quantity
pvSize resource.Quantity
}{ }{
{ {
name: "timed out operations should result in volume marked as uncertain", name: "timed out operations should result in volume marked as uncertain",
@ -1409,6 +1410,16 @@ func Test_UncertainVolumeMountState(t *testing.T) {
volumeName: volumetesting.SuccessAndFailOnSetupVolumeName, volumeName: volumetesting.SuccessAndFailOnSetupVolumeName,
supportRemount: true, supportRemount: true,
}, },
{
name: "mount success but fail to expand filesystem",
volumeState: operationexecutor.VolumeMountUncertain,
unmountDeviceCallCount: 1,
unmountVolumeCount: 1,
volumeName: volumetesting.FailVolumeExpansion,
supportRemount: true,
pvSize: resource.MustParse("10G"),
pvcStatusSize: resource.MustParse("2G"),
},
} }
modes := []v1.PersistentVolumeMode{v1.PersistentVolumeBlock, v1.PersistentVolumeFilesystem} modes := []v1.PersistentVolumeMode{v1.PersistentVolumeBlock, v1.PersistentVolumeFilesystem}
@ -1431,6 +1442,11 @@ func Test_UncertainVolumeMountState(t *testing.T) {
VolumeMode: &mode, VolumeMode: &mode,
}, },
} }
if tc.pvSize.CmpInt64(0) > 0 {
pv.Spec.Capacity = v1.ResourceList{
v1.ResourceStorage: tc.pvSize,
}
}
pvc := &v1.PersistentVolumeClaim{ pvc := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "pvc", Name: "pvc",
@ -1441,6 +1457,13 @@ func Test_UncertainVolumeMountState(t *testing.T) {
VolumeMode: &mode, VolumeMode: &mode,
}, },
} }
if tc.pvcStatusSize.CmpInt64(0) > 0 {
pvc.Status = v1.PersistentVolumeClaimStatus{
Capacity: v1.ResourceList{
v1.ResourceStorage: tc.pvcStatusSize,
},
}
}
pod := &v1.Pod{ pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "pod1", Name: "pod1",
@ -1525,7 +1548,7 @@ func Test_UncertainVolumeMountState(t *testing.T) {
} }
if tc.volumeState == operationexecutor.VolumeMountUncertain { if tc.volumeState == operationexecutor.VolumeMountUncertain {
waitForUncertainPodMount(t, volumeName, asw) waitForUncertainPodMount(t, volumeName, podName, asw)
} }
if tc.volumeState == operationexecutor.VolumeMounted { if tc.volumeState == operationexecutor.VolumeMounted {
@ -1607,11 +1630,15 @@ func waitForGlobalMount(t *testing.T, volumeName v1.UniqueVolumeName, asw cache.
} }
} }
func waitForUncertainPodMount(t *testing.T, volumeName v1.UniqueVolumeName, asw cache.ActualStateOfWorld) { func waitForUncertainPodMount(t *testing.T, volumeName v1.UniqueVolumeName, podName types.UniquePodName, asw cache.ActualStateOfWorld) {
// check if volume is locally pod mounted in uncertain state // check if volume is locally pod mounted in uncertain state
err := retryWithExponentialBackOff( err := retryWithExponentialBackOff(
testOperationBackOffDuration, testOperationBackOffDuration,
func() (bool, error) { func() (bool, error) {
mounted, _, err := asw.PodExistsInVolume(podName, volumeName)
if mounted || err != nil {
return false, nil
}
allMountedVolumes := asw.GetAllMountedVolumes() allMountedVolumes := asw.GetAllMountedVolumes()
for _, v := range allMountedVolumes { for _, v := range allMountedVolumes {
if v.VolumeName == volumeName { if v.VolumeName == volumeName {

View File

@ -81,6 +81,8 @@ const (
// FailWithInUseVolumeName will cause NodeExpandVolume to result in FailedPrecondition error // FailWithInUseVolumeName will cause NodeExpandVolume to result in FailedPrecondition error
FailWithInUseVolumeName = "fail-expansion-in-use" FailWithInUseVolumeName = "fail-expansion-in-use"
FailVolumeExpansion = "fail-expansion-test"
deviceNotMounted = "deviceNotMounted" deviceNotMounted = "deviceNotMounted"
deviceMountUncertain = "deviceMountUncertain" deviceMountUncertain = "deviceMountUncertain"
deviceMounted = "deviceMounted" deviceMounted = "deviceMounted"
@ -467,6 +469,14 @@ func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions NodeResizeOptions) (boo
if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName { if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName {
return false, volumetypes.NewFailedPreconditionError("volume-in-use") return false, volumetypes.NewFailedPreconditionError("volume-in-use")
} }
// Set up fakeVolumePlugin not support STAGE_UNSTAGE for testing the behavior
// so as volume can be node published before we can resize
if resizeOptions.CSIVolumePhase == volume.CSIVolumeStaged {
return false, nil
}
if resizeOptions.CSIVolumePhase == volume.CSIVolumePublished && resizeOptions.VolumeSpec.Name() == FailVolumeExpansion {
return false, fmt.Errorf("fail volume expansion for volume: %s", FailVolumeExpansion)
}
return true, nil return true, nil
} }

View File

@ -708,6 +708,14 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
if resizeError != nil { if resizeError != nil {
klog.Errorf("MountVolume.NodeExpandVolume failed with %v", resizeError) klog.Errorf("MountVolume.NodeExpandVolume failed with %v", resizeError)
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.Setup failed while expanding volume", resizeError) eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.Setup failed while expanding volume", resizeError)
// At this point, MountVolume.Setup already succeeded, we should add volume into actual state
// so that reconciler can clean up volume when needed. However, volume resize failed,
// we should not mark the volume as mounted to avoid pod starts using it.
// Considering the above situations, we mark volume as uncertain here so that reconciler will tigger
// volume tear down when pod is deleted, and also makes sure pod will not start using it.
if err := actualStateOfWorld.MarkVolumeMountAsUncertain(markOpts); err != nil {
klog.Errorf(volumeToMount.GenerateErrorDetailed("MountVolume.MarkVolumeMountAsUncertain failed", err).Error())
}
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated) return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
} }
} }
@ -718,7 +726,6 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.MarkVolumeAsMounted failed", markVolMountedErr) eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.MarkVolumeAsMounted failed", markVolMountedErr)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated) return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
} }
return volumetypes.NewOperationContext(nil, nil, migrated) return volumetypes.NewOperationContext(nil, nil, migrated)
} }
@ -1172,6 +1179,14 @@ func (og *operationGenerator) GenerateMapVolumeFunc(
if resizeError != nil { if resizeError != nil {
klog.Errorf("MapVolume.NodeExpandVolume failed with %v", resizeError) klog.Errorf("MapVolume.NodeExpandVolume failed with %v", resizeError)
eventErr, detailedErr := volumeToMount.GenerateError("MapVolume.MarkVolumeAsMounted failed while expanding volume", resizeError) eventErr, detailedErr := volumeToMount.GenerateError("MapVolume.MarkVolumeAsMounted failed while expanding volume", resizeError)
// At this point, MountVolume.Setup already succeeded, we should add volume into actual state
// so that reconciler can clean up volume when needed. However, if nodeExpandVolume failed,
// we should not mark the volume as mounted to avoid pod starts using it.
// Considering the above situations, we mark volume as uncertain here so that reconciler will tigger
// volume tear down when pod is deleted, and also makes sure pod will not start using it.
if err := actualStateOfWorld.MarkVolumeMountAsUncertain(markVolumeOpts); err != nil {
klog.Errorf(volumeToMount.GenerateErrorDetailed("MountVolume.MarkVolumeMountAsUncertain failed", err).Error())
}
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated) return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
} }