diff --git a/pkg/api/persistentvolumeclaim/util_test.go b/pkg/api/persistentvolumeclaim/util_test.go index d75aadde912..f24d3f61b22 100644 --- a/pkg/api/persistentvolumeclaim/util_test.go +++ b/pkg/api/persistentvolumeclaim/util_test.go @@ -498,7 +498,7 @@ func TestDropDisabledFieldsFromStatus(t *testing.T) { name: "for:newPVC=hasResizeStatus,oldPVC=nil, featuregate=false should drop field", enableRecoverVolumeExpansionFailure: false, enableVolumeAttributesClass: false, - pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), + pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), oldPVC: nil, expected: getPVC(), }, @@ -506,25 +506,25 @@ func TestDropDisabledFieldsFromStatus(t *testing.T) { name: "for:newPVC=hasResizeStatus,oldPVC=doesnot,featuregate=RecoverVolumeExpansionFailure=true; should keep field", enableRecoverVolumeExpansionFailure: true, enableVolumeAttributesClass: false, - pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), + pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), oldPVC: getPVC(), - expected: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), + expected: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), }, { name: "for:newPVC=hasResizeStatus,oldPVC=hasResizeStatus,featuregate=RecoverVolumeExpansionFailure=true; should keep field", enableRecoverVolumeExpansionFailure: true, enableVolumeAttributesClass: false, - pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), - oldPVC: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), - expected: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), + pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), + oldPVC: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), + expected: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), }, { name: "for:newPVC=hasResizeStatus,oldPVC=hasResizeStatus,featuregate=false; should keep field", enableRecoverVolumeExpansionFailure: false, enableVolumeAttributesClass: false, - pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), - oldPVC: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), - expected: withResizeStatus(core.PersistentVolumeClaimNodeResizeFailed), + pvc: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), + oldPVC: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), + expected: withResizeStatus(core.PersistentVolumeClaimNodeResizeInfeasible), }, { name: "for:newPVC=hasVolumeAttributeClass,oldPVC=nil, featuregate=false should drop field", diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index beff17be7ed..dc815d15425 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -548,16 +548,28 @@ type TypedObjectReference struct { } // PersistentVolumeClaimConditionType defines the condition of PV claim. -// Valid values are either "Resizing" or "FileSystemResizePending". +// Valid values are: +// - "Resizing", "FileSystemResizePending" +// +// If RecoverVolumeExpansionFailure feature gate is enabled, then following additional values can be expected: +// - "ControllerResizeError", "NodeResizeError" +// +// If VolumeAttributesClass feature gate is enabled, then following additional values can be expected: +// - "ModifyVolumeError", "ModifyingVolume" type PersistentVolumeClaimConditionType string -// These are valid conditions of Pvc +// These are valid conditions of PVC const ( // An user trigger resize of pvc has been started PersistentVolumeClaimResizing PersistentVolumeClaimConditionType = "Resizing" // PersistentVolumeClaimFileSystemResizePending - controller resize is finished and a file system resize is pending on node PersistentVolumeClaimFileSystemResizePending PersistentVolumeClaimConditionType = "FileSystemResizePending" + // PersistentVolumeClaimControllerResizeError indicates an error while resizing volume for size in the controller + PersistentVolumeClaimControllerResizeError PersistentVolumeClaimConditionType = "ControllerResizeError" + // PersistentVolumeClaimNodeResizeError indicates an error while resizing volume for size in the node. + PersistentVolumeClaimNodeResizeError PersistentVolumeClaimConditionType = "NodeResizeError" + // Applying the target VolumeAttributesClass encountered an error PersistentVolumeClaimVolumeModifyVolumeError PersistentVolumeClaimConditionType = "ModifyVolumeError" // Volume is being modified @@ -574,18 +586,19 @@ const ( // State set when resize controller starts resizing the volume in control-plane PersistentVolumeClaimControllerResizeInProgress ClaimResourceStatus = "ControllerResizeInProgress" - // State set when resize has failed in resize controller with a terminal error. + // State set when resize has failed in resize controller with a terminal unrecoverable error. // Transient errors such as timeout should not set this status and should leave allocatedResourceStatus // unmodified, so as resize controller can resume the volume expansion. - PersistentVolumeClaimControllerResizeFailed ClaimResourceStatus = "ControllerResizeFailed" + PersistentVolumeClaimControllerResizeInfeasible ClaimResourceStatus = "ControllerResizeInfeasible" // State set when resize controller has finished resizing the volume but further resizing of volume // is needed on the node. PersistentVolumeClaimNodeResizePending ClaimResourceStatus = "NodeResizePending" // State set when kubelet starts resizing the volume. PersistentVolumeClaimNodeResizeInProgress ClaimResourceStatus = "NodeResizeInProgress" - // State set when resizing has failed in kubelet with a terminal error. Transient errors don't set NodeResizeFailed - PersistentVolumeClaimNodeResizeFailed ClaimResourceStatus = "NodeResizeFailed" + // State set when resizing has failed in kubelet with a terminal unrecoverable error. Transient errors + // shouldn't set this status + PersistentVolumeClaimNodeResizeInfeasible ClaimResourceStatus = "NodeResizeInfeasible" ) // +enum diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index f21cbde7d3e..6be9b84a17e 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -2491,10 +2491,10 @@ func validatePersistentVolumeClaimResourceKey(value string, fldPath *field.Path) } var resizeStatusSet = sets.New(core.PersistentVolumeClaimControllerResizeInProgress, - core.PersistentVolumeClaimControllerResizeFailed, + core.PersistentVolumeClaimControllerResizeInfeasible, core.PersistentVolumeClaimNodeResizePending, core.PersistentVolumeClaimNodeResizeInProgress, - core.PersistentVolumeClaimNodeResizeFailed) + core.PersistentVolumeClaimNodeResizeInfeasible) // ValidatePersistentVolumeClaimStatusUpdate validates an update to status of a PersistentVolumeClaim func ValidatePersistentVolumeClaimStatusUpdate(newPvc, oldPvc *core.PersistentVolumeClaim, validationOpts PersistentVolumeClaimSpecValidationOptions) field.ErrorList { diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 0dd04e92914..f7a724a1683 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -19081,7 +19081,7 @@ func TestValidatePersistentVolumeClaimStatusUpdate(t *testing.T) { }, }, core.PersistentVolumeClaimStatus{ AllocatedResourceStatuses: map[core.ResourceName]core.ClaimResourceStatus{ - core.ResourceStorage: core.PersistentVolumeClaimControllerResizeFailed, + core.ResourceStorage: core.PersistentVolumeClaimControllerResizeInfeasible, }, }) @@ -19111,7 +19111,7 @@ func TestValidatePersistentVolumeClaimStatusUpdate(t *testing.T) { }, }, core.PersistentVolumeClaimStatus{ AllocatedResourceStatuses: map[core.ResourceName]core.ClaimResourceStatus{ - core.ResourceStorage: core.PersistentVolumeClaimNodeResizeFailed, + core.ResourceStorage: core.PersistentVolumeClaimNodeResizeInfeasible, }, }) @@ -19155,7 +19155,7 @@ func TestValidatePersistentVolumeClaimStatusUpdate(t *testing.T) { validResizeKeyCustom: resource.MustParse("10Gi"), }, AllocatedResourceStatuses: map[core.ResourceName]core.ClaimResourceStatus{ - core.ResourceStorage: core.PersistentVolumeClaimControllerResizeFailed, + core.ResourceStorage: core.PersistentVolumeClaimControllerResizeInfeasible, validResizeKeyCustom: core.PersistentVolumeClaimControllerResizeInProgress, }, }) diff --git a/pkg/controller/volume/expand/expand_controller.go b/pkg/controller/volume/expand/expand_controller.go index 735bc388bdc..27ee243906c 100644 --- a/pkg/controller/volume/expand/expand_controller.go +++ b/pkg/controller/volume/expand/expand_controller.go @@ -68,6 +68,8 @@ type CSINameTranslator interface { GetCSINameFromInTreeName(pluginName string) (string, error) } +// Deprecated: This controller is deprecated and for now exists for the sole purpose of adding +// necessary annotations if necessary, so as volume can be expanded externally in the control-plane type expandController struct { // kubeClient is the kube API client used by volumehost to communicate with // the API server. diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 9526e2c28dc..f0269988a50 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -25750,7 +25750,7 @@ func schema_k8sio_api_core_v1_PersistentVolumeClaimStatus(ref common.ReferenceCa Default: "", Type: []string{"string"}, Format: "", - Enum: []interface{}{"ControllerResizeFailed", "ControllerResizeInProgress", "NodeResizeFailed", "NodeResizeInProgress", "NodeResizePending"}, + Enum: []interface{}{"ControllerResizeInProgress", "ControllerResizeInfeasible", "NodeResizeInProgress", "NodeResizeInfeasible", "NodeResizePending"}, }, }, }, diff --git a/pkg/kubelet/volumemanager/cache/actual_state_of_world.go b/pkg/kubelet/volumemanager/cache/actual_state_of_world.go index 91e2d3436ac..38017a588f7 100644 --- a/pkg/kubelet/volumemanager/cache/actual_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/actual_state_of_world.go @@ -27,6 +27,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/features" @@ -211,10 +212,11 @@ func NewActualStateOfWorld( nodeName types.NodeName, volumePluginMgr *volume.VolumePluginMgr) ActualStateOfWorld { return &actualStateOfWorld{ - nodeName: nodeName, - attachedVolumes: make(map[v1.UniqueVolumeName]attachedVolume), - foundDuringReconstruction: make(map[v1.UniqueVolumeName]map[volumetypes.UniquePodName]types.UID), - volumePluginMgr: volumePluginMgr, + nodeName: nodeName, + attachedVolumes: make(map[v1.UniqueVolumeName]attachedVolume), + foundDuringReconstruction: make(map[v1.UniqueVolumeName]map[volumetypes.UniquePodName]types.UID), + volumePluginMgr: volumePluginMgr, + volumesWithFinalExpansionErrors: sets.New[v1.UniqueVolumeName](), } } @@ -247,6 +249,8 @@ type actualStateOfWorld struct { // from kubelet root directory when kubelet was restarted. foundDuringReconstruction map[v1.UniqueVolumeName]map[volumetypes.UniquePodName]types.UID + volumesWithFinalExpansionErrors sets.Set[v1.UniqueVolumeName] + // volumePluginMgr is the volume plugin manager used to create volume // plugin objects. volumePluginMgr *volume.VolumePluginMgr @@ -396,6 +400,27 @@ func (asw *actualStateOfWorld) MarkVolumeAsDetached( asw.DeleteVolume(volumeName) } +func (asw *actualStateOfWorld) MarkVolumeExpansionFailedWithFinalError(volumeName v1.UniqueVolumeName) { + asw.Lock() + defer asw.Unlock() + + asw.volumesWithFinalExpansionErrors.Insert(volumeName) +} + +func (asw *actualStateOfWorld) RemoveVolumeFromFailedWithFinalErrors(volumeName v1.UniqueVolumeName) { + asw.Lock() + defer asw.Unlock() + + asw.volumesWithFinalExpansionErrors.Delete(volumeName) +} + +func (asw *actualStateOfWorld) CheckVolumeInFailedExpansionWithFinalErrors(volumeName v1.UniqueVolumeName) bool { + asw.RLock() + defer asw.RUnlock() + + return asw.volumesWithFinalExpansionErrors.Has(volumeName) +} + func (asw *actualStateOfWorld) IsVolumeReconstructed(volumeName v1.UniqueVolumeName, podName volumetypes.UniquePodName) bool { volumeState := asw.GetVolumeMountState(volumeName, podName) diff --git a/pkg/volume/csi/expander.go b/pkg/volume/csi/expander.go index d6aae060101..e56627203af 100644 --- a/pkg/volume/csi/expander.go +++ b/pkg/volume/csi/expander.go @@ -119,6 +119,11 @@ func (c *csiPlugin) nodeExpandWithClient( failedConditionErr := fmt.Errorf("Expander.NodeExpand failed to expand the volume : %w", volumetypes.NewFailedPreconditionError(err.Error())) return false, failedConditionErr } + + if isInfeasibleError(err) { + infeasibleError := volumetypes.NewInfeasibleError(fmt.Sprintf("Expander.NodeExpand failed to expand the volume %s", err.Error())) + return false, infeasibleError + } return false, fmt.Errorf("Expander.NodeExpand failed to expand the volume : %w", err) } return true, nil @@ -135,3 +140,25 @@ func inUseError(err error) bool { // More info - https://github.com/container-storage-interface/spec/blob/master/spec.md#controllerexpandvolume-errors return st.Code() == codes.FailedPrecondition } + +// IsInfeasibleError returns true for grpc errors that are considered terminal in a way +// that they indicate CSI operation as infeasible. +// This function returns a subset of final errors. All infeasible errors are also final errors. +func isInfeasibleError(err error) bool { + st, ok := status.FromError(err) + if !ok { + // This is not gRPC error. The operation must have failed before gRPC + // method was called, otherwise we would get gRPC error. + // We don't know if any previous volume operation is in progress, be on the safe side. + return false + } + switch st.Code() { + case codes.InvalidArgument, + codes.OutOfRange, + codes.NotFound: + return true + } + // All other errors mean that operation either did not + // even start or failed. It is for sure are not infeasible errors + return false +} diff --git a/pkg/volume/testing/testing.go b/pkg/volume/testing/testing.go index 1710dc63f32..f0ebe444811 100644 --- a/pkg/volume/testing/testing.go +++ b/pkg/volume/testing/testing.go @@ -88,7 +88,8 @@ const ( FailVolumeExpansion = "fail-expansion-test" - AlwaysFailNodeExpansion = "always-fail-node-expansion" + InfeasibleNodeExpansion = "infeasible-fail-node-expansion" + OtherFinalNodeExpansionError = "other-final-node-expansion-error" deviceNotMounted = "deviceNotMounted" deviceMountUncertain = "deviceMountUncertain" @@ -179,6 +180,7 @@ type FakeVolumePlugin struct { Host volume.VolumeHost Config volume.VolumeConfig LastProvisionerOptions volume.VolumeOptions + LastResizeOptions volume.NodeResizeOptions NewAttacherCallCount int NewDetacherCallCount int NodeExpandCallCount int @@ -493,6 +495,7 @@ func (plugin *FakeVolumePlugin) RequiresFSResize() bool { func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions volume.NodeResizeOptions) (bool, error) { plugin.NodeExpandCallCount++ + plugin.LastResizeOptions = resizeOptions if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName { return false, volumetypes.NewFailedPreconditionError("volume-in-use") } @@ -500,8 +503,12 @@ func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions volume.NodeResizeOption return false, volumetypes.NewOperationNotSupportedError("volume-unsupported") } - if resizeOptions.VolumeSpec.Name() == AlwaysFailNodeExpansion { - return false, fmt.Errorf("test failure: NodeExpand") + if resizeOptions.VolumeSpec.Name() == InfeasibleNodeExpansion { + return false, volumetypes.NewInfeasibleError("infeasible-expansion") + } + + if resizeOptions.VolumeSpec.Name() == OtherFinalNodeExpansionError { + return false, fmt.Errorf("other-final-node-expansion-error") } if resizeOptions.VolumeSpec.Name() == FailVolumeExpansion { diff --git a/pkg/volume/util/operationexecutor/node_expander.go b/pkg/volume/util/operationexecutor/node_expander.go index cf9c57504e8..19547c94e09 100644 --- a/pkg/volume/util/operationexecutor/node_expander.go +++ b/pkg/volume/util/operationexecutor/node_expander.go @@ -39,6 +39,10 @@ type NodeExpander struct { pvCap resource.Quantity resizeStatus v1.ClaimResourceStatus + // indicates that if volume expansion failed on the node, then current expansion should be marked + // as infeasible so as controller can reconcile the resizing operation by using new user requested size. + markExpansionInfeasibleOnFailure bool + // pvcAlreadyUpdated if true indicates that although we are calling NodeExpandVolume on the kubelet // PVC has already been updated - possibly because expansion already succeeded on different node. // This can happen when a RWX PVC is expanded. @@ -79,6 +83,17 @@ func (ne *NodeExpander) runPreCheck() bool { ne.resizeStatus = currentStatus } + pvcSpecCap := ne.pvc.Spec.Resources.Requests[v1.ResourceStorage] + + // usually when are performing node expansion, we expect pv size and pvc spec size + // to be the same, but if user has edited pvc since then and volume expansion failed + // with final error, then we should let controller reconcile this state, by marking entire + // node expansion as infeasible. + if pvcSpecCap.Cmp(ne.pluginResizeOpts.NewSize) != 0 && + ne.actualStateOfWorld.CheckVolumeInFailedExpansionWithFinalErrors(ne.vmt.VolumeName) { + ne.markExpansionInfeasibleOnFailure = true + } + // PVC is already expanded but we are still trying to expand the volume because // last recorded size in ASOW is older. This can happen for RWX volume types. if ne.pvcStatusCap.Cmp(ne.pluginResizeOpts.NewSize) >= 0 && ne.resizeStatus == "" { @@ -124,9 +139,17 @@ func (ne *NodeExpander) expandOnPlugin() (bool, error, testResponseData) { if resizeErr != nil { if volumetypes.IsOperationFinishedError(resizeErr) { var markFailedError error - ne.pvc, markFailedError = util.MarkNodeExpansionFailed(ne.pvc, ne.kubeClient) - if markFailedError != nil { - klog.Errorf(ne.vmt.GenerateErrorDetailed("MountMount.NodeExpandVolume failed to mark node expansion as failed: %v", err).Error()) + ne.actualStateOfWorld.MarkVolumeExpansionFailedWithFinalError(ne.vmt.VolumeName) + if volumetypes.IsInfeasibleError(resizeErr) || ne.markExpansionInfeasibleOnFailure { + ne.pvc, markFailedError = util.MarkNodeExpansionInfeasible(ne.pvc, ne.kubeClient, resizeErr) + if markFailedError != nil { + klog.Errorf(ne.vmt.GenerateErrorDetailed("MountMount.NodeExpandVolume failed to mark node expansion as failed: %v", err).Error()) + } + } else { + ne.pvc, markFailedError = util.MarkNodeExpansionFailedCondition(ne.pvc, ne.kubeClient, resizeErr) + if markFailedError != nil { + klog.Errorf(ne.vmt.GenerateErrorDetailed("MountMount.NodeExpandVolume failed to mark node expansion as failed: %v", err).Error()) + } } } diff --git a/pkg/volume/util/operationexecutor/node_expander_test.go b/pkg/volume/util/operationexecutor/node_expander_test.go index 189663359a3..40d09e15c26 100644 --- a/pkg/volume/util/operationexecutor/node_expander_test.go +++ b/pkg/volume/util/operationexecutor/node_expander_test.go @@ -17,19 +17,35 @@ limitations under the License. package operationexecutor import ( + "sync" "testing" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/volume" volumetesting "k8s.io/kubernetes/pkg/volume/testing" + volumetypes "k8s.io/kubernetes/pkg/volume/util/types" ) +type fakeActualStateOfWorld struct { + volumesWithFinalExpansionErrors sets.Set[v1.UniqueVolumeName] + sync.RWMutex +} + +var _ ActualStateOfWorldMounterUpdater = &fakeActualStateOfWorld{} + +func newFakeActualStateOfWorld() *fakeActualStateOfWorld { + return &fakeActualStateOfWorld{ + volumesWithFinalExpansionErrors: sets.New[v1.UniqueVolumeName](), + } +} + func TestNodeExpander(t *testing.T) { - nodeResizeFailed := v1.PersistentVolumeClaimNodeResizeFailed + nodeResizeFailed := v1.PersistentVolumeClaimNodeResizeInfeasible nodeResizePending := v1.PersistentVolumeClaimNodeResizePending var tests = []struct { @@ -46,6 +62,7 @@ func TestNodeExpander(t *testing.T) { expectedResizeStatus v1.ClaimResourceStatus expectedStatusSize resource.Quantity expectResizeCall bool + expectFinalErrors bool assumeResizeOpAsFinished bool expectError bool }{ @@ -57,6 +74,7 @@ func TestNodeExpander(t *testing.T) { expectedResizeStatus: nodeResizeFailed, expectResizeCall: false, assumeResizeOpAsFinished: true, + expectFinalErrors: false, expectedStatusSize: resource.MustParse("1G"), }, { @@ -66,16 +84,29 @@ func TestNodeExpander(t *testing.T) { expectedResizeStatus: "", expectResizeCall: true, assumeResizeOpAsFinished: true, + expectFinalErrors: false, expectedStatusSize: resource.MustParse("2G"), }, { - name: "pv.spec.cap > pvc.status.cap, resizeStatus=node_expansion_pending, reize_op=failing", - pvc: getTestPVC(volumetesting.AlwaysFailNodeExpansion, "2G", "1G", "2G", &nodeResizePending), - pv: getTestPV(volumetesting.AlwaysFailNodeExpansion, "2G"), + name: "pv.spec.cap > pvc.status.cap, resizeStatus=node_expansion_pending, reize_op=infeasible", + pvc: getTestPVC(volumetesting.InfeasibleNodeExpansion, "2G", "1G", "2G", &nodeResizePending), + pv: getTestPV(volumetesting.InfeasibleNodeExpansion, "2G"), expectError: true, expectedResizeStatus: nodeResizeFailed, expectResizeCall: true, assumeResizeOpAsFinished: true, + expectFinalErrors: true, + expectedStatusSize: resource.MustParse("1G"), + }, + { + name: "pv.spec.cap > pvc.status.cap, resizeStatus=node_expansion_pending, reize_op=failing", + pvc: getTestPVC(volumetesting.OtherFinalNodeExpansionError, "2G", "1G", "2G", &nodeResizePending), + pv: getTestPV(volumetesting.OtherFinalNodeExpansionError, "2G"), + expectError: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInProgress, + expectResizeCall: true, + assumeResizeOpAsFinished: true, + expectFinalErrors: true, expectedStatusSize: resource.MustParse("1G"), }, { @@ -86,6 +117,7 @@ func TestNodeExpander(t *testing.T) { expectedResizeStatus: "", expectResizeCall: true, assumeResizeOpAsFinished: true, + expectFinalErrors: false, expectedStatusSize: resource.MustParse("2G"), }, } @@ -114,12 +146,13 @@ func TestNodeExpander(t *testing.T) { if actualSize == nil { actualSize = pvc.Status.Capacity.Storage() } + asow := newFakeActualStateOfWorld() resizeOp := nodeResizeOperationOpts{ pvc: pvc, pv: pv, volumePlugin: fakePlugin, vmt: vmt, - actualStateOfWorld: nil, + actualStateOfWorld: asow, pluginResizeOpts: volume.NodeResizeOptions{ VolumeSpec: vmt.VolumeSpec, NewSize: *desiredSize, @@ -153,9 +186,110 @@ func TestNodeExpander(t *testing.T) { if test.expectedResizeStatus != resizeStatus { t.Errorf("For test %s, expected resizeStatus %v, got %v", test.name, test.expectedResizeStatus, resizeStatus) } + + if test.expectFinalErrors != asow.CheckVolumeInFailedExpansionWithFinalErrors(vmt.VolumeName) { + t.Errorf("For test %s, expected final errors %t, got %t", test.name, test.expectFinalErrors, !test.expectFinalErrors) + } if pvcStatusCap.Cmp(test.expectedStatusSize) != 0 { t.Errorf("For test %s, expected status size %s, got %s", test.name, test.expectedStatusSize.String(), pvcStatusCap.String()) } }) } } + +// CheckAndMarkDeviceUncertainViaReconstruction implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) CheckAndMarkDeviceUncertainViaReconstruction(volumeName v1.UniqueVolumeName, deviceMountPath string) bool { + panic("unimplemented") +} + +// CheckAndMarkVolumeAsUncertainViaReconstruction implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) CheckAndMarkVolumeAsUncertainViaReconstruction(opts MarkVolumeOpts) (bool, error) { + panic("unimplemented") +} + +// CheckVolumeInFailedExpansionWithFinalErrors implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) CheckVolumeInFailedExpansionWithFinalErrors(volumeName v1.UniqueVolumeName) bool { + f.RLock() + defer f.RUnlock() + return f.volumesWithFinalExpansionErrors.Has(volumeName) +} + +// GetDeviceMountState implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) GetDeviceMountState(volumeName v1.UniqueVolumeName) DeviceMountState { + panic("unimplemented") +} + +// GetVolumeMountState implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) GetVolumeMountState(volumName v1.UniqueVolumeName, podName volumetypes.UniquePodName) VolumeMountState { + panic("unimplemented") +} + +// IsVolumeDeviceReconstructed implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) IsVolumeDeviceReconstructed(volumeName v1.UniqueVolumeName) bool { + panic("unimplemented") +} + +// IsVolumeMountedElsewhere implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) IsVolumeMountedElsewhere(volumeName v1.UniqueVolumeName, podName volumetypes.UniquePodName) bool { + panic("unimplemented") +} + +// IsVolumeReconstructed implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) IsVolumeReconstructed(volumeName v1.UniqueVolumeName, podName volumetypes.UniquePodName) bool { + panic("unimplemented") +} + +// MarkDeviceAsMounted implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkDeviceAsMounted(volumeName v1.UniqueVolumeName, devicePath string, deviceMountPath string, seLinuxMountContext string) error { + panic("unimplemented") +} + +// MarkDeviceAsUncertain implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkDeviceAsUncertain(volumeName v1.UniqueVolumeName, devicePath string, deviceMountPath string, seLinuxMountContext string) error { + panic("unimplemented") +} + +// MarkDeviceAsUnmounted implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkDeviceAsUnmounted(volumeName v1.UniqueVolumeName) error { + panic("unimplemented") +} + +// MarkForInUseExpansionError implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkForInUseExpansionError(volumeName v1.UniqueVolumeName) { + panic("unimplemented") +} + +// MarkVolumeAsMounted implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkVolumeAsMounted(markVolumeOpts MarkVolumeOpts) error { + panic("unimplemented") +} + +// MarkVolumeAsResized implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkVolumeAsResized(volumeName v1.UniqueVolumeName, claimSize *resource.Quantity) bool { + panic("unimplemented") +} + +// MarkVolumeAsUnmounted implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkVolumeAsUnmounted(podName volumetypes.UniquePodName, volumeName v1.UniqueVolumeName) error { + panic("unimplemented") +} + +// MarkVolumeExpansionFailedWithFinalError implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkVolumeExpansionFailedWithFinalError(volumeName v1.UniqueVolumeName) { + f.Lock() + defer f.Unlock() + + f.volumesWithFinalExpansionErrors.Insert(volumeName) +} + +// MarkVolumeMountAsUncertain implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) MarkVolumeMountAsUncertain(markVolumeOpts MarkVolumeOpts) error { + panic("unimplemented") +} + +// RemoveVolumeFromFailedWithFinalErrors implements ActualStateOfWorldMounterUpdater. +func (f *fakeActualStateOfWorld) RemoveVolumeFromFailedWithFinalErrors(volumeName v1.UniqueVolumeName) { + f.Lock() + defer f.Unlock() + f.volumesWithFinalExpansionErrors.Delete(volumeName) +} diff --git a/pkg/volume/util/operationexecutor/operation_executor.go b/pkg/volume/util/operationexecutor/operation_executor.go index 85646de50b2..eaca8ca2282 100644 --- a/pkg/volume/util/operationexecutor/operation_executor.go +++ b/pkg/volume/util/operationexecutor/operation_executor.go @@ -233,6 +233,18 @@ type ActualStateOfWorldMounterUpdater interface { // IsVolumeDeviceReconstructed returns true if volume device identified by volumeName has been // found during reconstruction. IsVolumeDeviceReconstructed(volumeName v1.UniqueVolumeName) bool + + // MarkVolumeExpansionFailedWithFinalError marks volume as failed with a final error, so as + // this state doesn't have to be recorded in the API server + MarkVolumeExpansionFailedWithFinalError(volumeName v1.UniqueVolumeName) + + // RemoveVolumeFromFailedWithFinalErrors removes volume from list that indicates that volume + // has failed expansion with a final error + RemoveVolumeFromFailedWithFinalErrors(volumeName v1.UniqueVolumeName) + + // CheckVolumeInFailedExpansionWithFinalErrors verifies if volume expansion has failed with a final + // error + CheckVolumeInFailedExpansionWithFinalErrors(volumeName v1.UniqueVolumeName) bool } // ActualStateOfWorldAttacherUpdater defines a set of operations updating the diff --git a/pkg/volume/util/operationexecutor/operation_generator.go b/pkg/volume/util/operationexecutor/operation_generator.go index 06f96024c33..30788ee2844 100644 --- a/pkg/volume/util/operationexecutor/operation_generator.go +++ b/pkg/volume/util/operationexecutor/operation_generator.go @@ -1679,6 +1679,8 @@ func (og *operationGenerator) GenerateExpandAndRecoverVolumeFunc( }, nil } +// Deprecated: This function should not called by any controller code in future and should be removed +// from kubernetes code func (og *operationGenerator) expandAndRecoverFunction(resizeOpts inTreeResizeOpts) inTreeResizeResponse { pvc := resizeOpts.pvc pv := resizeOpts.pv @@ -1718,7 +1720,7 @@ func (og *operationGenerator) expandAndRecoverFunction(resizeOpts inTreeResizeOp case v1.PersistentVolumeClaimControllerResizeInProgress, v1.PersistentVolumeClaimNodeResizePending, v1.PersistentVolumeClaimNodeResizeInProgress, - v1.PersistentVolumeClaimNodeResizeFailed: + v1.PersistentVolumeClaimNodeResizeInfeasible: if allocatedSize != nil { newSize = *allocatedSize } @@ -1742,14 +1744,14 @@ func (og *operationGenerator) expandAndRecoverFunction(resizeOpts inTreeResizeOp // we don't need to do any work. We could be here because of a spurious update event. // This is case #1 return resizeResponse - case v1.PersistentVolumeClaimNodeResizeFailed: + case v1.PersistentVolumeClaimNodeResizeInfeasible: // This is case#3 pvc, err = og.markForPendingNodeExpansion(pvc, pv) resizeResponse.pvc = pvc resizeResponse.err = err return resizeResponse case v1.PersistentVolumeClaimControllerResizeInProgress, - v1.PersistentVolumeClaimControllerResizeFailed: + v1.PersistentVolumeClaimControllerResizeInfeasible: // This is case#2 or it could also be case#4 when user manually shrunk the PVC // after expanding it. if allocatedSize != nil { @@ -2001,6 +2003,7 @@ func (og *operationGenerator) expandVolumeDuringMount(volumeToMount VolumeToMoun rsOpts.NewSize = pvSpecCap rsOpts.OldSize = pvcStatusCap + rsOpts.VolumeSpec = volumeToMount.VolumeSpec resizeOp := nodeResizeOperationOpts{ vmt: volumeToMount, pvc: pvc, diff --git a/pkg/volume/util/operationexecutor/operation_generator_test.go b/pkg/volume/util/operationexecutor/operation_generator_test.go index f0527c39c15..0c267222e89 100644 --- a/pkg/volume/util/operationexecutor/operation_generator_test.go +++ b/pkg/volume/util/operationexecutor/operation_generator_test.go @@ -94,7 +94,7 @@ func TestOperationGenerator_GenerateUnmapVolumeFunc_PluginName(t *testing.T) { func TestOperationGenerator_GenerateExpandAndRecoverVolumeFunc(t *testing.T) { nodeResizePending := v1.PersistentVolumeClaimNodeResizePending - nodeResizeFailed := v1.PersistentVolumeClaimNodeResizeFailed + nodeResizeFailed := v1.PersistentVolumeClaimNodeResizeInfeasible var tests = []struct { name string pvc *v1.PersistentVolumeClaim @@ -194,7 +194,7 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { return &x } - nodeResizeFailed := v1.PersistentVolumeClaimNodeResizeFailed + nodeResizeFailed := v1.PersistentVolumeClaimNodeResizeInfeasible nodeResizePending := v1.PersistentVolumeClaimNodeResizePending var tests = []struct { name string @@ -217,7 +217,7 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { pvc: getTestPVC("test-vol0", "2G", "1G", "", &nodeResizeFailed), pv: getTestPV("test-vol0", "2G"), - expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeFailed, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, resizeCallCount: 0, expectedStatusSize: resource.MustParse("1G"), }, @@ -231,10 +231,10 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { }, { name: "pv.spec.cap > pvc.status.cap, resizeStatus=node_expansion_pending, reize_op=failing", - pvc: getTestPVC(volumetesting.AlwaysFailNodeExpansion, "2G", "1G", "2G", &nodeResizePending), - pv: getTestPV(volumetesting.AlwaysFailNodeExpansion, "2G"), + pvc: getTestPVC(volumetesting.InfeasibleNodeExpansion, "2G", "1G", "2G", &nodeResizePending), + pv: getTestPV(volumetesting.InfeasibleNodeExpansion, "2G"), expectError: true, - expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeFailed, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, resizeCallCount: 1, expectedStatusSize: resource.MustParse("1G"), }, @@ -265,7 +265,7 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { desiredSize: getSizeFunc("2G"), actualSize: getSizeFunc("1G"), - expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeFailed, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, resizeCallCount: 0, expectedStatusSize: resource.MustParse("2G"), }, @@ -302,9 +302,10 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { NewSize: *desiredSize, OldSize: *actualSize, } + asow := newFakeActualStateOfWorld() ogInstance, _ := og.(*operationGenerator) - _, err := ogInstance.nodeExpandVolume(vmt, nil, pluginResizeOpts) + _, err := ogInstance.nodeExpandVolume(vmt, asow, pluginResizeOpts) if !test.expectError && err != nil { t.Errorf("For test %s, expected no error got: %v", test.name, err) @@ -319,6 +320,89 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { } } +func TestExpandDuringMount(t *testing.T) { + nodeResizePending := v1.PersistentVolumeClaimNodeResizePending + var tests = []struct { + name string + pvc *v1.PersistentVolumeClaim + pv *v1.PersistentVolume + + // desired size, defaults to pv.Spec.Capacity + desiredSize *resource.Quantity + // actualSize, defaults to pvc.Status.Capacity + actualSize *resource.Quantity + + // expectations of test + expectedResizeStatus v1.ClaimResourceStatus + expectedStatusSize resource.Quantity + resizeCallCount int + expectError bool + }{ + { + name: "pv.spec.cap > pvc.status.cap, resizeStatus=node_expansion_pending", + pvc: getTestPVC("test-vol0", "2G", "1G", "2G", &nodeResizePending), + pv: getTestPV("test-vol0", "2G"), + expectedResizeStatus: "", + resizeCallCount: 1, + expectedStatusSize: resource.MustParse("2G"), + }, + } + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RecoverVolumeExpansionFailure, true) + volumePluginMgr, fakePlugin := volumetesting.GetTestKubeletVolumePluginMgr(t) + test.pv.Spec.ClaimRef = &v1.ObjectReference{ + Namespace: test.pvc.Namespace, + Name: test.pvc.Name, + } + + pvc := test.pvc + pv := test.pv + pod := getTestPod("test-pod", pvc.Name) + og := getTestOperatorGeneratorWithPVPVC(volumePluginMgr, pvc, pv) + vmt := VolumeToMount{ + Pod: pod, + VolumeName: v1.UniqueVolumeName(pv.Name), + VolumeSpec: volume.NewSpecFromPersistentVolume(pv, false), + } + desiredSize := test.desiredSize + if desiredSize == nil { + desiredSize = pv.Spec.Capacity.Storage() + } + actualSize := test.actualSize + if actualSize == nil { + actualSize = pvc.Status.Capacity.Storage() + } + pluginResizeOpts := volume.NodeResizeOptions{ + NewSize: *desiredSize, + OldSize: *actualSize, + } + asow := newFakeActualStateOfWorld() + + ogInstance, _ := og.(*operationGenerator) + _, err := ogInstance.expandVolumeDuringMount(vmt, asow, pluginResizeOpts) + + if !test.expectError && err != nil { + t.Errorf("For test %s, expected no error got: %v", test.name, err) + } + if test.expectError && err == nil { + t.Errorf("For test %s, expected error but got none", test.name) + } + if test.resizeCallCount != fakePlugin.NodeExpandCallCount { + t.Errorf("for test %s, expected node-expand call count to be %d, got %d", test.name, test.resizeCallCount, fakePlugin.NodeExpandCallCount) + } + + if test.resizeCallCount > 0 { + resizeOptions := fakePlugin.LastResizeOptions + if resizeOptions.VolumeSpec == nil { + t.Errorf("for test %s, expected volume spec to be set", test.name) + } + } + }) + } +} + func getTestPod(podName, pvcName string) *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/volume/util/resize_util.go b/pkg/volume/util/resize_util.go index 0f1495f7b34..2bf54b4b85c 100644 --- a/pkg/volume/util/resize_util.go +++ b/pkg/volume/util/resize_util.go @@ -40,6 +40,8 @@ var ( knownResizeConditions = map[v1.PersistentVolumeClaimConditionType]bool{ v1.PersistentVolumeClaimFileSystemResizePending: true, v1.PersistentVolumeClaimResizing: true, + v1.PersistentVolumeClaimControllerResizeError: true, + v1.PersistentVolumeClaimNodeResizeError: true, } // AnnPreResizeCapacity annotation is added to a PV when expanding volume. @@ -140,7 +142,7 @@ func MarkResizeInProgressWithResizer( } conditions := []v1.PersistentVolumeClaimCondition{progressCondition} newPVC := pvc.DeepCopy() - newPVC = MergeResizeConditionOnPVC(newPVC, conditions) + newPVC = MergeResizeConditionOnPVC(newPVC, conditions, false /* keepOldResizeConditions */) newPVC = setResizer(newPVC, resizerName) return PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient) } @@ -154,7 +156,7 @@ func MarkControllerReisizeInProgress(pvc *v1.PersistentVolumeClaim, resizerName } conditions := []v1.PersistentVolumeClaimCondition{progressCondition} newPVC := pvc.DeepCopy() - newPVC = MergeResizeConditionOnPVC(newPVC, conditions) + newPVC = MergeResizeConditionOnPVC(newPVC, conditions, false /* keepOldResizeConditions */) newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimControllerResizeInProgress) newPVC = mergeStorageAllocatedResources(newPVC, newSize) newPVC = setResizer(newPVC, resizerName) @@ -196,7 +198,7 @@ func MarkForFSResize( newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimNodeResizePending) } - newPVC = MergeResizeConditionOnPVC(newPVC, conditions) + newPVC = MergeResizeConditionOnPVC(newPVC, conditions, true /* keepOldResizeConditions */) updatedPVC, err := PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient) return updatedPVC, err } @@ -229,16 +231,25 @@ func MarkFSResizeFinished( } } - newPVC = MergeResizeConditionOnPVC(newPVC, []v1.PersistentVolumeClaimCondition{}) + newPVC = MergeResizeConditionOnPVC(newPVC, []v1.PersistentVolumeClaimCondition{}, false /* keepOldResizeConditions */) updatedPVC, err := PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient) return updatedPVC, err } -// MarkNodeExpansionFailed marks a PVC for node expansion as failed. Kubelet should not retry expansion +// MarkNodeExpansionInfeasible marks a PVC for node expansion as failed. Kubelet should not retry expansion // of volumes which are in failed state. -func MarkNodeExpansionFailed(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) { +func MarkNodeExpansionInfeasible(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface, err error) (*v1.PersistentVolumeClaim, error) { newPVC := pvc.DeepCopy() - newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimNodeResizeFailed) + newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimNodeResizeInfeasible) + errorCondition := v1.PersistentVolumeClaimCondition{ + Type: v1.PersistentVolumeClaimNodeResizeError, + Status: v1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Message: fmt.Sprintf("failed to expand pvc with %v", err), + } + newPVC = MergeResizeConditionOnPVC(newPVC, + []v1.PersistentVolumeClaimCondition{errorCondition}, + true /* keepOldResizeConditions */) patchBytes, err := createPVCPatch(pvc, newPVC, false /* addResourceVersionCheck */) if err != nil { @@ -253,6 +264,30 @@ func MarkNodeExpansionFailed(pvc *v1.PersistentVolumeClaim, kubeClient clientset return updatedClaim, nil } +func MarkNodeExpansionFailedCondition(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface, err error) (*v1.PersistentVolumeClaim, error) { + newPVC := pvc.DeepCopy() + errorCondition := v1.PersistentVolumeClaimCondition{ + Type: v1.PersistentVolumeClaimNodeResizeError, + Status: v1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Message: fmt.Sprintf("failed to expand pvc with %v", err), + } + newPVC = MergeResizeConditionOnPVC(newPVC, + []v1.PersistentVolumeClaimCondition{errorCondition}, + true /* keepOldResizeConditions */) + patchBytes, err := createPVCPatch(pvc, newPVC, false /* addResourceVersionCheck */) + if err != nil { + return pvc, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %w", pvc.Name, err) + } + + updatedClaim, updateErr := kubeClient.CoreV1().PersistentVolumeClaims(pvc.Namespace). + Patch(context.TODO(), pvc.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status") + if updateErr != nil { + return pvc, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %w", pvc.Name, updateErr) + } + return updatedClaim, nil +} + // MarkNodeExpansionInProgress marks pvc expansion in progress on node func MarkNodeExpansionInProgress(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) { newPVC := pvc.DeepCopy() @@ -333,7 +368,7 @@ func addResourceVersion(patchBytes []byte, resourceVersion string) ([]byte, erro // leaving other conditions untouched. func MergeResizeConditionOnPVC( pvc *v1.PersistentVolumeClaim, - resizeConditions []v1.PersistentVolumeClaimCondition) *v1.PersistentVolumeClaim { + resizeConditions []v1.PersistentVolumeClaimCondition, keepOldResizeConditions bool) *v1.PersistentVolumeClaim { resizeConditionMap := map[v1.PersistentVolumeClaimConditionType]*resizeProcessStatus{} for _, condition := range resizeConditions { @@ -356,6 +391,10 @@ func MergeResizeConditionOnPVC( newConditions = append(newConditions, condition) } newCondition.processed = true + } else if keepOldResizeConditions { + // if keepOldResizeConditions is true, we keep the old resize conditions that were present in the + // existing pvc.Status.Conditions field. + newConditions = append(newConditions, condition) } } diff --git a/pkg/volume/util/resize_util_test.go b/pkg/volume/util/resize_util_test.go index 2292ee91513..b10b04ceb02 100644 --- a/pkg/volume/util/resize_util_test.go +++ b/pkg/volume/util/resize_util_test.go @@ -33,10 +33,11 @@ import ( ) type conditionMergeTestCase struct { - description string - pvc *v1.PersistentVolumeClaim - newConditions []v1.PersistentVolumeClaimCondition - finalConditions []v1.PersistentVolumeClaimCondition + description string + pvc *v1.PersistentVolumeClaim + keepOldResizeConditions bool + newConditions []v1.PersistentVolumeClaimCondition + finalConditions []v1.PersistentVolumeClaimCondition } func TestMergeResizeCondition(t *testing.T) { @@ -132,10 +133,34 @@ func TestMergeResizeCondition(t *testing.T) { }, }, }, + { + description: "when adding new condition with existing resize conditions", + pvc: pvc.DeepCopy(), + keepOldResizeConditions: true, + newConditions: []v1.PersistentVolumeClaimCondition{ + { + Type: v1.PersistentVolumeClaimNodeResizeError, + Status: v1.ConditionTrue, + LastTransitionTime: currentTime, + }, + }, + finalConditions: []v1.PersistentVolumeClaimCondition{ + { + Type: v1.PersistentVolumeClaimResizing, + Status: v1.ConditionTrue, + LastTransitionTime: currentTime, + }, + { + Type: v1.PersistentVolumeClaimNodeResizeError, + Status: v1.ConditionTrue, + LastTransitionTime: currentTime, + }, + }, + }, } for _, testcase := range testCases { - updatePVC := MergeResizeConditionOnPVC(testcase.pvc, testcase.newConditions) + updatePVC := MergeResizeConditionOnPVC(testcase.pvc, testcase.newConditions, testcase.keepOldResizeConditions) updateConditions := updatePVC.Status.Conditions if !reflect.DeepEqual(updateConditions, testcase.finalConditions) { @@ -166,8 +191,8 @@ func TestResizeFunctions(t *testing.T) { }, { name: "mark fs resize, when other resource statuses are present", - pvc: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeFailed).get(), - expectedPVC: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeFailed). + pvc: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeInfeasible).get(), + expectedPVC: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeInfeasible). withStorageResourceStatus(v1.PersistentVolumeClaimNodeResizePending).get(), testFunc: func(pvc *v1.PersistentVolumeClaim, c clientset.Interface, _ resource.Quantity) (*v1.PersistentVolumeClaim, error) { return MarkForFSResize(pvc, c) @@ -183,9 +208,9 @@ func TestResizeFunctions(t *testing.T) { }, { name: "mark resize finished", - pvc: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeFailed). + pvc: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeInfeasible). withStorageResourceStatus(v1.PersistentVolumeClaimNodeResizePending).get(), - expectedPVC: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeFailed). + expectedPVC: basePVC.withResourceStatus(v1.ResourceCPU, v1.PersistentVolumeClaimControllerResizeInfeasible). withStorageResourceStatus("").get(), testFunc: func(pvc *v1.PersistentVolumeClaim, i clientset.Interface, q resource.Quantity) (*v1.PersistentVolumeClaim, error) { return MarkFSResizeFinished(pvc, q, i) diff --git a/pkg/volume/util/types/types.go b/pkg/volume/util/types/types.go index 238e919b331..316962f8f5d 100644 --- a/pkg/volume/util/types/types.go +++ b/pkg/volume/util/types/types.go @@ -102,6 +102,27 @@ func IsFailedPreconditionError(err error) bool { return errors.As(err, &failedPreconditionError) } +// InfeasibleError errors are a subset of OperationFinished or final error +// codes. In terms of CSI - this usually means that, the operation is not possible +// in current state with given arguments. +type InfeasibleError struct { + msg string +} + +func (err *InfeasibleError) Error() string { + return err.msg +} + +// NewInfeasibleError returns a new instance of InfeasibleError +func NewInfeasibleError(msg string) *InfeasibleError { + return &InfeasibleError{msg: msg} +} + +func IsInfeasibleError(err error) bool { + var infeasibleError *InfeasibleError + return errors.As(err, &infeasibleError) +} + type OperationNotSupported struct { msg string } diff --git a/plugin/pkg/admission/noderestriction/admission_test.go b/plugin/pkg/admission/noderestriction/admission_test.go index e90b1fbcc1c..0ddf9f10579 100644 --- a/plugin/pkg/admission/noderestriction/admission_test.go +++ b/plugin/pkg/admission/noderestriction/admission_test.go @@ -1496,7 +1496,7 @@ func TestAdmitPVCStatus(t *testing.T) { noExistingPods := corev1lister.NewPodLister(noExistingPodsIndex) mynode := &user.DefaultInfo{Name: "system:node:mynode", Groups: []string{"system:nodes"}} - nodeExpansionFailed := api.PersistentVolumeClaimNodeResizeFailed + nodeExpansionFailed := api.PersistentVolumeClaimNodeResizeInfeasible tests := []struct { name string diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index 95088dcd733..007114b22ab 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -599,15 +599,29 @@ type TypedObjectReference struct { Namespace *string `json:"namespace,omitempty" protobuf:"bytes,4,opt,name=namespace"` } -// PersistentVolumeClaimConditionType is a valid value of PersistentVolumeClaimCondition.Type +// PersistentVolumeClaimConditionType defines the condition of PV claim. +// Valid values are: +// - "Resizing", "FileSystemResizePending" +// +// If RecoverVolumeExpansionFailure feature gate is enabled, then following additional values can be expected: +// - "ControllerResizeError", "NodeResizeError" +// +// If VolumeAttributesClass feature gate is enabled, then following additional values can be expected: +// - "ModifyVolumeError", "ModifyingVolume" type PersistentVolumeClaimConditionType string +// These are valid conditions of PVC const ( // PersistentVolumeClaimResizing - a user trigger resize of pvc has been started PersistentVolumeClaimResizing PersistentVolumeClaimConditionType = "Resizing" // PersistentVolumeClaimFileSystemResizePending - controller resize is finished and a file system resize is pending on node PersistentVolumeClaimFileSystemResizePending PersistentVolumeClaimConditionType = "FileSystemResizePending" + // PersistentVolumeClaimControllerResizeError indicates an error while resizing volume for size in the controller + PersistentVolumeClaimControllerResizeError PersistentVolumeClaimConditionType = "ControllerResizeError" + // PersistentVolumeClaimNodeResizeError indicates an error while resizing volume for size in the node. + PersistentVolumeClaimNodeResizeError PersistentVolumeClaimConditionType = "NodeResizeError" + // Applying the target VolumeAttributesClass encountered an error PersistentVolumeClaimVolumeModifyVolumeError PersistentVolumeClaimConditionType = "ModifyVolumeError" // Volume is being modified @@ -624,18 +638,19 @@ const ( // State set when resize controller starts resizing the volume in control-plane. PersistentVolumeClaimControllerResizeInProgress ClaimResourceStatus = "ControllerResizeInProgress" - // State set when resize has failed in resize controller with a terminal error. + // State set when resize has failed in resize controller with a terminal unrecoverable error. // Transient errors such as timeout should not set this status and should leave allocatedResourceStatus // unmodified, so as resize controller can resume the volume expansion. - PersistentVolumeClaimControllerResizeFailed ClaimResourceStatus = "ControllerResizeFailed" + PersistentVolumeClaimControllerResizeInfeasible ClaimResourceStatus = "ControllerResizeInfeasible" // State set when resize controller has finished resizing the volume but further resizing of volume // is needed on the node. PersistentVolumeClaimNodeResizePending ClaimResourceStatus = "NodeResizePending" // State set when kubelet starts resizing the volume. PersistentVolumeClaimNodeResizeInProgress ClaimResourceStatus = "NodeResizeInProgress" - // State set when resizing has failed in kubelet with a terminal error. Transient errors don't set NodeResizeFailed - PersistentVolumeClaimNodeResizeFailed ClaimResourceStatus = "NodeResizeFailed" + // State set when resizing has failed in kubelet with a terminal unrecoverable error. Transient errors + // shouldn't set this status + PersistentVolumeClaimNodeResizeInfeasible ClaimResourceStatus = "NodeResizeInfeasible" ) // +enum diff --git a/test/e2e/storage/csimock/csi_volume_expansion.go b/test/e2e/storage/csimock/csi_volume_expansion.go index bf55eec8404..47ae8be9884 100644 --- a/test/e2e/storage/csimock/csi_volume_expansion.go +++ b/test/e2e/storage/csimock/csi_volume_expansion.go @@ -411,7 +411,7 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller allocatedResource: "11Gi", simulatedCSIDriverError: expansionFailedOnController, - expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeFailed, + expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInfeasible, recoverySize: resource.MustParse("4Gi"), }, { @@ -419,7 +419,7 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node allocatedResource: "9Gi", simulatedCSIDriverError: expansionFailedOnNode, - expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeFailed, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, recoverySize: resource.MustParse("5Gi"), }, } @@ -473,11 +473,11 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest) { var err error ginkgo.By("Waiting for resizer to set allocated resource") - err = waitForAllocatedResource(pvc, m, test.allocatedResource) + err = waitForAllocatedResource(ctx, pvc, m, test.allocatedResource) framework.ExpectNoError(err, "While waiting for allocated resource to be updated") ginkgo.By("Waiting for resizer to set resize status") - err = waitForResizeStatus(pvc, m.cs, test.expectedResizeStatus) + err = waitForResizeStatus(ctx, pvc, m.cs, test.expectedResizeStatus) framework.ExpectNoError(err, "While waiting for resize status to be set") ginkgo.By("Recover pvc size") @@ -500,7 +500,7 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai // if expansion succeeded on controller but failed on the node if test.simulatedCSIDriverError == expansionFailedOnNode { ginkgo.By("Wait for expansion to fail on node again") - err = waitForResizeStatus(pvc, m.cs, v1.PersistentVolumeClaimNodeResizeFailed) + err = waitForResizeStatus(ctx, pvc, m.cs, v1.PersistentVolumeClaimNodeResizeInfeasible) framework.ExpectNoError(err, "While waiting for resize status to be set to expansion-failed-on-node") ginkgo.By("verify allocated resources after recovery") @@ -541,31 +541,31 @@ func validateExpansionSuccess(ctx context.Context, pvc *v1.PersistentVolumeClaim gomega.Expect(resizeStatus).To(gomega.BeZero(), "resize status should be empty") } -func waitForResizeStatus(pvc *v1.PersistentVolumeClaim, c clientset.Interface, expectedState v1.ClaimResourceStatus) error { - var actualResizeStatus *v1.ClaimResourceStatus +func waitForResizeStatus(ctx context.Context, pvc *v1.PersistentVolumeClaim, c clientset.Interface, expectedState v1.ClaimResourceStatus) error { + var actualResizeStatus v1.ClaimResourceStatus - waitErr := wait.PollImmediate(resizePollInterval, csiResizeWaitPeriod, func() (bool, error) { + waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, csiResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) { var err error - updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pollContext, pvc.Name, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %w", pvc.Name, err) } - actualResizeStatus := updatedPVC.Status.AllocatedResourceStatuses[v1.ResourceStorage] + actualResizeStatus = updatedPVC.Status.AllocatedResourceStatuses[v1.ResourceStorage] return (actualResizeStatus == expectedState), nil }) if waitErr != nil { - return fmt.Errorf("error while waiting for resize status to sync to %v, actualStatus %s: %v", expectedState, *actualResizeStatus, waitErr) + return fmt.Errorf("error while waiting for resize status to sync to %v, actualStatus %s: %w", expectedState, actualResizeStatus, waitErr) } return nil } -func waitForAllocatedResource(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, expectedSize string) error { +func waitForAllocatedResource(ctx context.Context, pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, expectedSize string) error { expectedQuantity := resource.MustParse(expectedSize) - waitErr := wait.PollImmediate(resizePollInterval, csiResizeWaitPeriod, func() (bool, error) { + waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, csiResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) { var err error - updatedPVC, err := m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + updatedPVC, err := m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pollContext, pvc.Name, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %w", pvc.Name, err)