Merge pull request #100567 from jingxu97/mar/mark

Mark volume mount as uncertain in case of volume expansion fails
This commit is contained in:
Kubernetes Prow Robot 2021-07-13 22:20:26 -07:00 committed by GitHub
commit 2da4d48e6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 8 deletions

View File

@ -386,7 +386,7 @@ func (rc *reconciler) syncStates() {
// Get volumes information by reading the pod's directory
podVolumes, err := getVolumesFromPodDir(rc.kubeletPodsDir)
if err != nil {
klog.ErrorS(err, "Cannot get volumes from disk")
klog.ErrorS(err, "Cannot get volumes from disk, skip sync states for volume reconstruction")
return
}
volumesNeedUpdate := make(map[v1.UniqueVolumeName]*reconstructedVolume)

View File

@ -43,6 +43,7 @@ import (
"k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/pkg/volume/util/hostutil"
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
"k8s.io/kubernetes/pkg/volume/util/types"
)
const (
@ -1004,6 +1005,7 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
name string
volumeMode *v1.PersistentVolumeMode
expansionFailed bool
uncertainTest bool
pvName string
pvcSize resource.Quantity
pvcStatusSize resource.Quantity
@ -1164,9 +1166,6 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
if !cache.IsFSResizeRequiredError(podExistErr) {
t.Fatalf("Volume should be marked as fsResizeRequired, but receive unexpected error: %v", podExistErr)
}
// Start the reconciler again, we hope reconciler will perform the
// resize operation and clear the fsResizeRequired flag for volume.
go reconciler.Run(wait.NeverStop)
waitErr := retryWithExponentialBackOff(testOperationBackOffDuration, func() (done bool, err error) {
@ -1174,7 +1173,7 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
return mounted && err == nil, nil
})
if waitErr != nil {
t.Fatal("Volume resize should succeeded")
t.Fatalf("Volume resize should succeeded %v", waitErr)
}
}
@ -1371,6 +1370,8 @@ func Test_UncertainVolumeMountState(t *testing.T) {
unmountVolumeCount int
volumeName string
supportRemount bool
pvcStatusSize resource.Quantity
pvSize resource.Quantity
}{
{
name: "timed out operations should result in volume marked as uncertain",
@ -1409,6 +1410,16 @@ func Test_UncertainVolumeMountState(t *testing.T) {
volumeName: volumetesting.SuccessAndFailOnSetupVolumeName,
supportRemount: true,
},
{
name: "mount success but fail to expand filesystem",
volumeState: operationexecutor.VolumeMountUncertain,
unmountDeviceCallCount: 1,
unmountVolumeCount: 1,
volumeName: volumetesting.FailVolumeExpansion,
supportRemount: true,
pvSize: resource.MustParse("10G"),
pvcStatusSize: resource.MustParse("2G"),
},
}
modes := []v1.PersistentVolumeMode{v1.PersistentVolumeBlock, v1.PersistentVolumeFilesystem}
@ -1431,6 +1442,11 @@ func Test_UncertainVolumeMountState(t *testing.T) {
VolumeMode: &mode,
},
}
if tc.pvSize.CmpInt64(0) > 0 {
pv.Spec.Capacity = v1.ResourceList{
v1.ResourceStorage: tc.pvSize,
}
}
pvc := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc",
@ -1441,6 +1457,13 @@ func Test_UncertainVolumeMountState(t *testing.T) {
VolumeMode: &mode,
},
}
if tc.pvcStatusSize.CmpInt64(0) > 0 {
pvc.Status = v1.PersistentVolumeClaimStatus{
Capacity: v1.ResourceList{
v1.ResourceStorage: tc.pvcStatusSize,
},
}
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
@ -1525,7 +1548,7 @@ func Test_UncertainVolumeMountState(t *testing.T) {
}
if tc.volumeState == operationexecutor.VolumeMountUncertain {
waitForUncertainPodMount(t, volumeName, asw)
waitForUncertainPodMount(t, volumeName, podName, asw)
}
if tc.volumeState == operationexecutor.VolumeMounted {
@ -1607,11 +1630,15 @@ func waitForGlobalMount(t *testing.T, volumeName v1.UniqueVolumeName, asw cache.
}
}
func waitForUncertainPodMount(t *testing.T, volumeName v1.UniqueVolumeName, asw cache.ActualStateOfWorld) {
func waitForUncertainPodMount(t *testing.T, volumeName v1.UniqueVolumeName, podName types.UniquePodName, asw cache.ActualStateOfWorld) {
// check if volume is locally pod mounted in uncertain state
err := retryWithExponentialBackOff(
testOperationBackOffDuration,
func() (bool, error) {
mounted, _, err := asw.PodExistsInVolume(podName, volumeName)
if mounted || err != nil {
return false, nil
}
allMountedVolumes := asw.GetAllMountedVolumes()
for _, v := range allMountedVolumes {
if v.VolumeName == volumeName {

View File

@ -81,6 +81,8 @@ const (
// FailWithInUseVolumeName will cause NodeExpandVolume to result in FailedPrecondition error
FailWithInUseVolumeName = "fail-expansion-in-use"
FailVolumeExpansion = "fail-expansion-test"
deviceNotMounted = "deviceNotMounted"
deviceMountUncertain = "deviceMountUncertain"
deviceMounted = "deviceMounted"
@ -467,6 +469,14 @@ func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions NodeResizeOptions) (boo
if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName {
return false, volumetypes.NewFailedPreconditionError("volume-in-use")
}
// Set up fakeVolumePlugin not support STAGE_UNSTAGE for testing the behavior
// so as volume can be node published before we can resize
if resizeOptions.CSIVolumePhase == volume.CSIVolumeStaged {
return false, nil
}
if resizeOptions.CSIVolumePhase == volume.CSIVolumePublished && resizeOptions.VolumeSpec.Name() == FailVolumeExpansion {
return false, fmt.Errorf("fail volume expansion for volume: %s", FailVolumeExpansion)
}
return true, nil
}

View File

@ -722,6 +722,14 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
if resizeError != nil {
klog.Errorf("MountVolume.NodeExpandVolume failed with %v", resizeError)
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.Setup failed while expanding volume", resizeError)
// At this point, MountVolume.Setup already succeeded, we should add volume into actual state
// so that reconciler can clean up volume when needed. However, volume resize failed,
// we should not mark the volume as mounted to avoid pod starts using it.
// Considering the above situations, we mark volume as uncertain here so that reconciler will tigger
// volume tear down when pod is deleted, and also makes sure pod will not start using it.
if err := actualStateOfWorld.MarkVolumeMountAsUncertain(markOpts); err != nil {
klog.Errorf(volumeToMount.GenerateErrorDetailed("MountVolume.MarkVolumeMountAsUncertain failed", err).Error())
}
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
}
@ -732,7 +740,6 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume.MarkVolumeAsMounted failed", markVolMountedErr)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
return volumetypes.NewOperationContext(nil, nil, migrated)
}
@ -1198,6 +1205,14 @@ func (og *operationGenerator) GenerateMapVolumeFunc(
if resizeError != nil {
klog.Errorf("MapVolume.NodeExpandVolume failed with %v", resizeError)
eventErr, detailedErr := volumeToMount.GenerateError("MapVolume.MarkVolumeAsMounted failed while expanding volume", resizeError)
// At this point, MountVolume.Setup already succeeded, we should add volume into actual state
// so that reconciler can clean up volume when needed. However, if nodeExpandVolume failed,
// we should not mark the volume as mounted to avoid pod starts using it.
// Considering the above situations, we mark volume as uncertain here so that reconciler will tigger
// volume tear down when pod is deleted, and also makes sure pod will not start using it.
if err := actualStateOfWorld.MarkVolumeMountAsUncertain(markVolumeOpts); err != nil {
klog.Errorf(volumeToMount.GenerateErrorDetailed("MountVolume.MarkVolumeMountAsUncertain failed", err).Error())
}
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}