diff --git a/pkg/kubelet/volumemanager/cache/actual_state_of_world.go b/pkg/kubelet/volumemanager/cache/actual_state_of_world.go index 38017a588f7..d4627326726 100644 --- a/pkg/kubelet/volumemanager/cache/actual_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/actual_state_of_world.go @@ -960,6 +960,8 @@ func (asw *actualStateOfWorld) volumeNeedsExpansion(volumeObj attachedVolume, de return currentSize, false } + klog.V(5).InfoS("NodeExpandVolume checking size", "actualSize", volumeObj.persistentVolumeSize.String(), "desiredSize", desiredVolumeSize.String(), "volume", volumeObj.volumeName) + if desiredVolumeSize.Cmp(*volumeObj.persistentVolumeSize) > 0 { volumePlugin, err := asw.volumePluginMgr.FindNodeExpandablePluginBySpec(volumeObj.spec) if err != nil || volumePlugin == nil { diff --git a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go index 2d9b07a6021..5d0504dddbb 100644 --- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go +++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go @@ -380,10 +380,11 @@ func (dswp *desiredStateOfWorldPopulator) checkVolumeFSResize( klog.V(5).InfoS("Skip file system resize check for the volume, as the volume is mounted as readonly", "pod", klog.KObj(pod), "volumeName", podVolume.Name) return } + pvCap := volumeSpec.PersistentVolume.Spec.Capacity.Storage() pvcStatusCap := pvc.Status.Capacity.Storage() dswp.desiredStateOfWorld.UpdatePersistentVolumeSize(uniqueVolumeName, pvCap) - + klog.V(5).InfoS("NodeExpandVolume updating size", "actualSize", pvcStatusCap.String(), "desiredSize", pvCap.String(), "volumeName", uniqueVolumeName) // in case the actualStateOfWorld was rebuild after kubelet restart ensure that claimSize is set to accurate value dswp.actualStateOfWorld.InitializeClaimSize(klog.TODO(), uniqueVolumeName, pvcStatusCap) } diff --git a/pkg/volume/util/operationexecutor/node_expander.go b/pkg/volume/util/operationexecutor/node_expander.go index 19547c94e09..c909949a475 100644 --- a/pkg/volume/util/operationexecutor/node_expander.go +++ b/pkg/volume/util/operationexecutor/node_expander.go @@ -36,7 +36,6 @@ type NodeExpander struct { // computed via precheck pvcStatusCap resource.Quantity - pvCap resource.Quantity resizeStatus v1.ClaimResourceStatus // indicates that if volume expansion failed on the node, then current expansion should be marked @@ -47,6 +46,9 @@ type NodeExpander struct { // PVC has already been updated - possibly because expansion already succeeded on different node. // This can happen when a RWX PVC is expanded. pvcAlreadyUpdated bool + + // testStatus is used for testing purposes only. + testStatus testResponseData } func newNodeExpander(resizeOp nodeResizeOperationOpts, client clientset.Interface, recorder record.EventRecorder) *NodeExpander { @@ -76,7 +78,6 @@ type testResponseData struct { // it returns false. func (ne *NodeExpander) runPreCheck() bool { ne.pvcStatusCap = ne.pvc.Status.Capacity[v1.ResourceStorage] - ne.pvCap = ne.pv.Spec.Capacity[v1.ResourceStorage] allocatedResourceStatus := ne.pvc.Status.AllocatedResourceStatuses if currentStatus, ok := allocatedResourceStatus[v1.ResourceStorage]; ok { @@ -117,10 +118,12 @@ func (ne *NodeExpander) runPreCheck() bool { return false } -func (ne *NodeExpander) expandOnPlugin() (bool, error, testResponseData) { +func (ne *NodeExpander) expandOnPlugin() (bool, resource.Quantity, error) { allowExpansion := ne.runPreCheck() if !allowExpansion { - return false, nil, testResponseData{false, true} + klog.V(3).Infof("NodeExpandVolume is not allowed to proceed for volume %s with resizeStatus %s", ne.vmt.VolumeName, ne.resizeStatus) + ne.testStatus = testResponseData{false /* resizeCalledOnPlugin */, true /* assumeResizeFinished */} + return false, ne.pluginResizeOpts.OldSize, nil } var err error @@ -132,7 +135,8 @@ func (ne *NodeExpander) expandOnPlugin() (bool, error, testResponseData) { if err != nil { msg := ne.vmt.GenerateErrorDetailed("MountVolume.NodeExpandVolume failed to mark node expansion in progress: %v", err) klog.Errorf(msg.Error()) - return false, err, testResponseData{} + ne.testStatus = testResponseData{} + return false, ne.pluginResizeOpts.OldSize, err } } _, resizeErr := ne.volumePlugin.NodeExpand(ne.pluginResizeOpts) @@ -159,24 +163,27 @@ func (ne *NodeExpander) expandOnPlugin() (bool, error, testResponseData) { if volumetypes.IsFailedPreconditionError(resizeErr) { ne.actualStateOfWorld.MarkForInUseExpansionError(ne.vmt.VolumeName) klog.Errorf(ne.vmt.GenerateErrorDetailed("MountVolume.NodeExapndVolume failed with %v", resizeErr).Error()) - return false, nil, testResponseData{assumeResizeFinished: true, resizeCalledOnPlugin: true} + ne.testStatus = testResponseData{assumeResizeFinished: true, resizeCalledOnPlugin: true} + return false, ne.pluginResizeOpts.OldSize, nil } - return false, resizeErr, testResponseData{assumeResizeFinished: true, resizeCalledOnPlugin: true} + ne.testStatus = testResponseData{assumeResizeFinished: true, resizeCalledOnPlugin: true} + return false, ne.pluginResizeOpts.OldSize, resizeErr } simpleMsg, detailedMsg := ne.vmt.GenerateMsg("MountVolume.NodeExpandVolume succeeded", nodeName) ne.recorder.Eventf(ne.vmt.Pod, v1.EventTypeNormal, kevents.FileSystemResizeSuccess, simpleMsg) ne.recorder.Eventf(ne.pvc, v1.EventTypeNormal, kevents.FileSystemResizeSuccess, simpleMsg) klog.InfoS(detailedMsg, "pod", klog.KObj(ne.vmt.Pod)) + ne.testStatus = testResponseData{true /*resizeCalledOnPlugin */, true /* assumeResizeFinished */} // no need to update PVC object if we already updated it if ne.pvcAlreadyUpdated { - return true, nil, testResponseData{true, true} + return true, ne.pluginResizeOpts.NewSize, nil } // File system resize succeeded, now update the PVC's Capacity to match the PV's ne.pvc, err = util.MarkFSResizeFinished(ne.pvc, ne.pluginResizeOpts.NewSize, ne.kubeClient) if err != nil { - return true, fmt.Errorf("mountVolume.NodeExpandVolume update pvc status failed: %v", err), testResponseData{true, true} + return true, ne.pluginResizeOpts.NewSize, fmt.Errorf("mountVolume.NodeExpandVolume update pvc status failed: %w", err) } - return true, nil, testResponseData{true, true} + return true, ne.pluginResizeOpts.NewSize, nil } diff --git a/pkg/volume/util/operationexecutor/node_expander_test.go b/pkg/volume/util/operationexecutor/node_expander_test.go index 40d09e15c26..81938d509c6 100644 --- a/pkg/volume/util/operationexecutor/node_expander_test.go +++ b/pkg/volume/util/operationexecutor/node_expander_test.go @@ -162,7 +162,8 @@ func TestNodeExpander(t *testing.T) { ogInstance, _ := og.(*operationGenerator) nodeExpander := newNodeExpander(resizeOp, ogInstance.kubeClient, ogInstance.recorder) - _, err, expansionResponse := nodeExpander.expandOnPlugin() + _, _, err := nodeExpander.expandOnPlugin() + expansionResponse := nodeExpander.testStatus pvc = nodeExpander.pvc pvcStatusCap := pvc.Status.Capacity[v1.ResourceStorage] diff --git a/pkg/volume/util/operationexecutor/operation_generator.go b/pkg/volume/util/operationexecutor/operation_generator.go index e246719c23e..5611e53e77a 100644 --- a/pkg/volume/util/operationexecutor/operation_generator.go +++ b/pkg/volume/util/operationexecutor/operation_generator.go @@ -1957,14 +1957,14 @@ func (og *operationGenerator) doOnlineExpansion(volumeToMount VolumeToMount, actualStateOfWorld ActualStateOfWorldMounterUpdater, resizeOptions volume.NodeResizeOptions) (bool, error, error) { - resizeDone, err := og.nodeExpandVolume(volumeToMount, actualStateOfWorld, resizeOptions) + resizeDone, newSize, err := og.nodeExpandVolume(volumeToMount, actualStateOfWorld, resizeOptions) if err != nil { e1, e2 := volumeToMount.GenerateError("NodeExpandVolume.NodeExpandVolume failed", err) klog.Errorf(e2.Error()) return false, e1, e2 } if resizeDone { - markingDone := actualStateOfWorld.MarkVolumeAsResized(volumeToMount.VolumeName, &resizeOptions.NewSize) + markingDone := actualStateOfWorld.MarkVolumeAsResized(volumeToMount.VolumeName, &newSize) if !markingDone { // On failure, return error. Caller will log and retry. genericFailureError := fmt.Errorf("unable to mark volume as resized") @@ -2009,8 +2009,11 @@ func (og *operationGenerator) expandVolumeDuringMount(volumeToMount VolumeToMoun actualStateOfWorld: actualStateOfWorld, } if og.checkForRecoveryFromExpansion(pvc, volumeToMount) { + // if recovery feature is enabled, we can use allocated size from PVC status as new size + rsOpts.NewSize = pvc.Status.AllocatedResources[v1.ResourceStorage] + resizeOp.pluginResizeOpts = rsOpts nodeExpander := newNodeExpander(resizeOp, og.kubeClient, og.recorder) - resizeFinished, err, _ := nodeExpander.expandOnPlugin() + resizeFinished, _, err := nodeExpander.expandOnPlugin() return resizeFinished, err } else { return og.legacyCallNodeExpandOnPlugin(resizeOp) @@ -2041,7 +2044,7 @@ func (og *operationGenerator) checkIfSupportsNodeExpansion(volumeToMount VolumeT func (og *operationGenerator) nodeExpandVolume( volumeToMount VolumeToMount, actualStateOfWorld ActualStateOfWorldMounterUpdater, - rsOpts volume.NodeResizeOptions) (bool, error) { + rsOpts volume.NodeResizeOptions) (bool, resource.Quantity, error) { supportsExpansion, expandableVolumePlugin := og.checkIfSupportsNodeExpansion(volumeToMount) @@ -2050,9 +2053,10 @@ func (og *operationGenerator) nodeExpandVolume( if rsOpts.NewSize.Cmp(rsOpts.OldSize) > 0 { pv := volumeToMount.VolumeSpec.PersistentVolume pvc, err := og.kubeClient.CoreV1().PersistentVolumeClaims(pv.Spec.ClaimRef.Namespace).Get(context.TODO(), pv.Spec.ClaimRef.Name, metav1.GetOptions{}) + currentSize := pvc.Status.Capacity.Storage() if err != nil { // Return error rather than leave the file system un-resized, caller will log and retry - return false, fmt.Errorf("mountVolume.NodeExpandVolume get PVC failed : %v", err) + return false, *currentSize, fmt.Errorf("mountVolume.NodeExpandVolume get PVC failed : %w", err) } if volumeToMount.VolumeSpec.ReadOnly { @@ -2060,7 +2064,7 @@ func (og *operationGenerator) nodeExpandVolume( klog.Warningf(detailedMsg) og.recorder.Eventf(volumeToMount.Pod, v1.EventTypeWarning, kevents.FileSystemResizeFailed, simpleMsg) og.recorder.Eventf(pvc, v1.EventTypeWarning, kevents.FileSystemResizeFailed, simpleMsg) - return true, nil + return true, *currentSize, nil } resizeOp := nodeResizeOperationOpts{ vmt: volumeToMount, @@ -2072,15 +2076,20 @@ func (og *operationGenerator) nodeExpandVolume( } if og.checkForRecoveryFromExpansion(pvc, volumeToMount) { + // if recovery feature is enabled, we can use allocated size from PVC status as new size + newSize := pvc.Status.AllocatedResources[v1.ResourceStorage] + rsOpts.NewSize = newSize + resizeOp.pluginResizeOpts.NewSize = newSize nodeExpander := newNodeExpander(resizeOp, og.kubeClient, og.recorder) - resizeFinished, err, _ := nodeExpander.expandOnPlugin() - return resizeFinished, err + resizeFinished, newSize, err := nodeExpander.expandOnPlugin() + return resizeFinished, newSize, err } else { - return og.legacyCallNodeExpandOnPlugin(resizeOp) + resizeFinished, err := og.legacyCallNodeExpandOnPlugin(resizeOp) + return resizeFinished, rsOpts.NewSize, err } } } - return true, nil + return true, rsOpts.OldSize, nil } func (og *operationGenerator) checkForRecoveryFromExpansion(pvc *v1.PersistentVolumeClaim, volumeToMount VolumeToMount) bool { diff --git a/pkg/volume/util/operationexecutor/operation_generator_test.go b/pkg/volume/util/operationexecutor/operation_generator_test.go index 0c267222e89..9689590e3e8 100644 --- a/pkg/volume/util/operationexecutor/operation_generator_test.go +++ b/pkg/volume/util/operationexecutor/operation_generator_test.go @@ -305,7 +305,7 @@ func TestOperationGenerator_nodeExpandVolume(t *testing.T) { asow := newFakeActualStateOfWorld() ogInstance, _ := og.(*operationGenerator) - _, err := ogInstance.nodeExpandVolume(vmt, asow, pluginResizeOpts) + _, _, err := ogInstance.nodeExpandVolume(vmt, asow, pluginResizeOpts) if !test.expectError && err != nil { t.Errorf("For test %s, expected no error got: %v", test.name, err) diff --git a/test/e2e/storage/csimock/base.go b/test/e2e/storage/csimock/base.go index 71cfeb8bcbd..dcad86bd06a 100644 --- a/test/e2e/storage/csimock/base.go +++ b/test/e2e/storage/csimock/base.go @@ -96,6 +96,7 @@ type testParameters struct { enableNodeExpansion bool // enable node expansion for CSI mock driver // just disable resizing on driver it overrides enableResizing flag for CSI mock driver disableResizingOnDriver bool + disableControllerExpansion bool enableSnapshot bool enableVolumeMountGroup bool // enable the VOLUME_MOUNT_GROUP node capability in the CSI mock driver. enableNodeVolumeCondition bool @@ -172,6 +173,7 @@ func (m *mockDriverSetup) init(ctx context.Context, tp testParameters) { EnableResizing: tp.enableResizing, EnableNodeExpansion: tp.enableNodeExpansion, EnableNodeVolumeCondition: tp.enableNodeVolumeCondition, + DisableControllerExpansion: tp.disableControllerExpansion, EnableSnapshot: tp.enableSnapshot, EnableVolumeMountGroup: tp.enableVolumeMountGroup, TokenRequests: tp.tokenRequests, diff --git a/test/e2e/storage/csimock/csi_volume_expansion.go b/test/e2e/storage/csimock/csi_volume_expansion.go index 47ae8be9884..67eb123dc30 100644 --- a/test/e2e/storage/csimock/csi_volume_expansion.go +++ b/test/e2e/storage/csimock/csi_volume_expansion.go @@ -45,8 +45,10 @@ type expansionStatus int const ( expansionSuccess = iota - expansionFailedOnController - expansionFailedOnNode + expansionFailedOnControllerWithInfeasibleError + expansionFailedOnControllerWithFinalError + expansionFailedOnNodeWithInfeasibleError + expansionFailedOnNodeWithFinalError expansionFailedMissingStagingPath ) @@ -61,12 +63,13 @@ var ( ) type recoveryTest struct { - name string - pvcRequestSize string - allocatedResource string - simulatedCSIDriverError expansionStatus - expectedResizeStatus v1.ClaimResourceStatus - recoverySize resource.Quantity + name string + pvcRequestSize string + allocatedResource string + simulatedCSIDriverError expansionStatus + disableControllerExpansion bool + expectedResizeStatus v1.ClaimResourceStatus + recoverySize resource.Quantity } var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { @@ -400,27 +403,57 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { f.Context("Expansion with recovery", feature.RecoverVolumeExpansionFailure, func() { tests := []recoveryTest{ { - name: "should record target size in allocated resources", - pvcRequestSize: "4Gi", - allocatedResource: "4Gi", - simulatedCSIDriverError: expansionSuccess, - expectedResizeStatus: "", + name: "should record target size in allocated resources", + pvcRequestSize: "4Gi", + allocatedResource: "4Gi", + disableControllerExpansion: false, + simulatedCSIDriverError: expansionSuccess, + expectedResizeStatus: "", }, { - name: "should allow recovery if controller expansion fails with final error", - pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller - allocatedResource: "11Gi", - simulatedCSIDriverError: expansionFailedOnController, - expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInfeasible, - recoverySize: resource.MustParse("4Gi"), + name: "should allow recovery if controller expansion fails with infeasible error", + pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller + allocatedResource: "11Gi", + disableControllerExpansion: false, + simulatedCSIDriverError: expansionFailedOnControllerWithInfeasibleError, + expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInfeasible, + recoverySize: resource.MustParse("4Gi"), }, { - name: "recovery should not be possible in partially expanded volumes", - pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node - allocatedResource: "9Gi", - simulatedCSIDriverError: expansionFailedOnNode, - expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, - recoverySize: resource.MustParse("5Gi"), + name: "should allow recovery if controller expansion fails with final error", + pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller + allocatedResource: "11Gi", + disableControllerExpansion: false, + simulatedCSIDriverError: expansionFailedOnControllerWithFinalError, + expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInProgress, + recoverySize: resource.MustParse("4Gi"), + }, + { + name: "recovery should not be possible in partially expanded volumes", + pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node + allocatedResource: "9Gi", + disableControllerExpansion: false, + simulatedCSIDriverError: expansionFailedOnNodeWithInfeasibleError, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, + recoverySize: resource.MustParse("5Gi"), + }, + { + name: "recovery should be possible for node-only expanded volumes with infeasible error", + pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node + allocatedResource: "9Gi", + disableControllerExpansion: true, + simulatedCSIDriverError: expansionFailedOnNodeWithInfeasibleError, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible, + recoverySize: resource.MustParse("5Gi"), + }, + { + name: "recovery should be possible for node-only expanded volumes with final error", + pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node + allocatedResource: "9Gi", + disableControllerExpansion: true, + simulatedCSIDriverError: expansionFailedOnNodeWithFinalError, + expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInProgress, + recoverySize: resource.MustParse("5Gi"), }, } @@ -428,7 +461,7 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { test := t ginkgo.It(test.name, func(ctx context.Context) { var err error - params := testParameters{enableResizing: true, enableNodeExpansion: true, enableRecoverExpansionFailure: true} + params := testParameters{enableResizing: true, enableNodeExpansion: true, enableRecoverExpansionFailure: true, disableControllerExpansion: test.disableControllerExpansion} if test.simulatedCSIDriverError != expansionSuccess { params.hooks = createExpansionHook(test.simulatedCSIDriverError) @@ -476,9 +509,15 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai err = waitForAllocatedResource(ctx, pvc, m, test.allocatedResource) framework.ExpectNoError(err, "While waiting for allocated resource to be updated") - ginkgo.By("Waiting for resizer to set resize status") - err = waitForResizeStatus(ctx, pvc, m.cs, test.expectedResizeStatus) - framework.ExpectNoError(err, "While waiting for resize status to be set") + if test.expectedResizeStatus == v1.PersistentVolumeClaimNodeResizeInfeasible { + ginkgo.By("Waiting for kubelet to fail expansion on the node") + err = waitForResizeToFailOnNode(ctx, pvc, m.cs) + framework.ExpectNoError(err, "While waiting for resize status to be set") + } else { + ginkgo.By("Waiting for resizer to set resize status") + err = waitForResizeStatus(ctx, pvc, m.cs, test.expectedResizeStatus) + framework.ExpectNoError(err, "While waiting for resize status to be set") + } ginkgo.By("Recover pvc size") newPVC, err := testsuites.ExpandPVCSize(ctx, pvc, test.recoverySize, m.cs) @@ -492,16 +531,25 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai } // if expansion failed on controller with final error, then recovery should be possible - if test.simulatedCSIDriverError == expansionFailedOnController { + if test.simulatedCSIDriverError == expansionFailedOnControllerWithInfeasibleError { + validateExpansionSuccess(ctx, pvc, m, test, test.recoverySize.String()) + return + } + + // if expansion failed on node with final error but volume was only expanded on the node + // then recovery should be possible + if test.disableControllerExpansion && + (test.simulatedCSIDriverError == expansionFailedOnNodeWithInfeasibleError || + test.simulatedCSIDriverError == expansionFailedOnNodeWithFinalError) { validateExpansionSuccess(ctx, pvc, m, test, test.recoverySize.String()) return } // if expansion succeeded on controller but failed on the node - if test.simulatedCSIDriverError == expansionFailedOnNode { + if test.simulatedCSIDriverError == expansionFailedOnNodeWithInfeasibleError { ginkgo.By("Wait for expansion to fail on node again") - err = waitForResizeStatus(ctx, pvc, m.cs, v1.PersistentVolumeClaimNodeResizeInfeasible) - framework.ExpectNoError(err, "While waiting for resize status to be set to expansion-failed-on-node") + err = waitForResizeToFailOnNode(ctx, pvc, m.cs) + framework.ExpectNoError(err, "While waiting for resize status to be set") ginkgo.By("verify allocated resources after recovery") pvc, err = m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) @@ -520,11 +568,11 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai func validateExpansionSuccess(ctx context.Context, pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest, expectedAllocatedSize string) { var err error - ginkgo.By("Waiting for persistent volume resize to finish") + ginkgo.By(fmt.Sprintf("Waiting for PV %s to be expanded to %s", pvc.Spec.VolumeName, test.recoverySize.String())) err = testsuites.WaitForControllerVolumeResize(ctx, pvc, m.cs, csiResizeWaitPeriod) framework.ExpectNoError(err, "While waiting for PV resize to finish") - ginkgo.By("Waiting for PVC resize to finish") + ginkgo.By(fmt.Sprintf("Waiting for PVC %s to be expanded to %s", pvc.Name, test.recoverySize.String())) pvc, err = testsuites.WaitForFSResize(ctx, pvc, m.cs) framework.ExpectNoError(err, "while waiting for PVC to finish") @@ -561,6 +609,31 @@ func waitForResizeStatus(ctx context.Context, pvc *v1.PersistentVolumeClaim, c c return nil } +func waitForResizeToFailOnNode(ctx context.Context, pvc *v1.PersistentVolumeClaim, c clientset.Interface) error { + var finalConditions []v1.PersistentVolumeClaimCondition + waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, csiResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) { + var err error + updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pollContext, pvc.Name, metav1.GetOptions{}) + + if err != nil { + return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %w", pvc.Name, err) + } + pvcConditions := updatedPVC.Status.Conditions + for _, cond := range pvcConditions { + if cond.Type == v1.PersistentVolumeClaimNodeResizeError { + return true, nil + } + } + finalConditions = pvcConditions + return false, nil + }) + + if waitErr != nil { + return fmt.Errorf("error while waiting for resize condition sync to NodeResizeError, actualStatus %+v: %w", finalConditions, waitErr) + } + return nil +} + func waitForAllocatedResource(ctx context.Context, pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, expectedSize string) error { expectedQuantity := resource.MustParse(expectedSize) waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, csiResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) { @@ -596,7 +669,7 @@ func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks } } - case expansionFailedOnController: + case expansionFailedOnControllerWithInfeasibleError: expansionRequest, ok := request.(*csipbv1.ControllerExpandVolumeRequest) if ok { requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI) @@ -604,7 +677,16 @@ func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks return nil, status.Error(codes.InvalidArgument, "invalid expansion request") } } - case expansionFailedOnNode: + case expansionFailedOnControllerWithFinalError: + // This simulates a condition that a final, but not infeasible error is returned when expansion fails in the controller. + expansionRequest, ok := request.(*csipbv1.ControllerExpandVolumeRequest) + if ok { + requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI) + if requestedSize.Cmp(maxControllerSizeLimit) > 0 { + return nil, status.Error(codes.PermissionDenied, "permission denied for expansion") + } + } + case expansionFailedOnNodeWithInfeasibleError: expansionRequest, ok := request.(*csipbv1.NodeExpandVolumeRequest) if ok { requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI) @@ -613,6 +695,14 @@ func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks } } + case expansionFailedOnNodeWithFinalError: + expansionRequest, ok := request.(*csipbv1.NodeExpandVolumeRequest) + if ok { + requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI) + if requestedSize.Cmp(maxNodeExpansionLimit) > 0 { + return nil, status.Error(codes.PermissionDenied, "permission denied for expansion") + } + } } return nil, nil diff --git a/test/e2e/storage/drivers/csi.go b/test/e2e/storage/drivers/csi.go index f11964a0bb3..c7ce8a7663e 100644 --- a/test/e2e/storage/drivers/csi.go +++ b/test/e2e/storage/drivers/csi.go @@ -350,6 +350,7 @@ type mockCSIDriver struct { embeddedCSIDriver *mockdriver.CSIDriver enableSELinuxMount *bool enableRecoverExpansionFailure bool + disableControllerExpansion bool enableHonorPVReclaimPolicy bool // Additional values set during PrepareTest @@ -392,6 +393,7 @@ type CSIMockDriverOpts struct { EnableTopology bool EnableResizing bool EnableNodeExpansion bool + DisableControllerExpansion bool EnableSnapshot bool EnableVolumeMountGroup bool EnableNodeVolumeCondition bool @@ -550,6 +552,7 @@ func InitMockCSIDriver(driverOpts CSIMockDriverOpts) MockCSITestDriver { attachLimit: driverOpts.AttachLimit, enableNodeExpansion: driverOpts.EnableNodeExpansion, enableNodeVolumeCondition: driverOpts.EnableNodeVolumeCondition, + disableControllerExpansion: driverOpts.DisableControllerExpansion, tokenRequests: driverOpts.TokenRequests, requiresRepublish: driverOpts.RequiresRepublish, fsGroupPolicy: driverOpts.FSGroupPolicy, @@ -628,6 +631,7 @@ func (m *mockCSIDriver) PrepareTest(ctx context.Context, f *framework.Framework) DriverName: "csi-mock-" + f.UniqueName, AttachLimit: int64(m.attachLimit), NodeExpansionRequired: m.enableNodeExpansion, + DisableControllerExpansion: m.disableControllerExpansion, NodeVolumeConditionRequired: m.enableNodeVolumeCondition, VolumeMountGroupRequired: m.enableVolumeMountGroup, EnableTopology: m.enableTopology, diff --git a/test/e2e/storage/testsuites/volume_expand.go b/test/e2e/storage/testsuites/volume_expand.go index 8f64ec4bd49..4146eb2e561 100644 --- a/test/e2e/storage/testsuites/volume_expand.go +++ b/test/e2e/storage/testsuites/volume_expand.go @@ -184,7 +184,7 @@ func (v *volumeExpandTestSuite) DefineTests(driver storageframework.TestDriver, newSize := currentPvcSize.DeepCopy() newSize.Add(resource.MustParse("1Gi")) framework.Logf("currentPvcSize %v, newSize %v", currentPvcSize, newSize) - _, err = ExpandPVCSize(ctx, l.resource.Pvc, newSize, f.ClientSet) + _, err = ExpandPVCSizeToError(ctx, l.resource.Pvc, newSize, f.ClientSet) gomega.Expect(err).To(gomega.MatchError(apierrors.IsForbidden, "While updating non-expandable PVC")) }) } else { @@ -318,15 +318,15 @@ func ExpandPVCSize(ctx context.Context, origPVC *v1.PersistentVolumeClaim, size // Retry the update on error, until we hit a timeout. // TODO: Determine whether "retry with timeout" is appropriate here. Maybe we should only retry on version conflict. var lastUpdateError error - waitErr := wait.PollImmediate(resizePollInterval, 30*time.Second, func() (bool, error) { + waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, 30*time.Second, true, func(pollContext context.Context) (bool, error) { var err error - updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(ctx, pvcName, metav1.GetOptions{}) + updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(pollContext, pvcName, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching pvc %q for resizing: %w", pvcName, err) } updatedPVC.Spec.Resources.Requests[v1.ResourceStorage] = size - updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(ctx, updatedPVC, metav1.UpdateOptions{}) + updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(pollContext, updatedPVC, metav1.UpdateOptions{}) if err != nil { framework.Logf("Error updating pvc %s: %v", pvcName, err) lastUpdateError = err @@ -343,6 +343,42 @@ func ExpandPVCSize(ctx context.Context, origPVC *v1.PersistentVolumeClaim, size return updatedPVC, nil } +func ExpandPVCSizeToError(ctx context.Context, origPVC *v1.PersistentVolumeClaim, size resource.Quantity, c clientset.Interface) (*v1.PersistentVolumeClaim, error) { + pvcName := origPVC.Name + updatedPVC := origPVC.DeepCopy() + + var lastUpdateError error + + waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, 30*time.Second, true, func(pollContext context.Context) (bool, error) { + var err error + updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(pollContext, pvcName, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("error fetching pvc %q for resizing: %w", pvcName, err) + } + + updatedPVC.Spec.Resources.Requests[v1.ResourceStorage] = size + updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(pollContext, updatedPVC, metav1.UpdateOptions{}) + if err == nil { + return false, fmt.Errorf("pvc %s should not be allowed to be updated", pvcName) + } else { + lastUpdateError = err + if apierrors.IsForbidden(err) { + return true, nil + } + framework.Logf("Error updating pvc %s: %v", pvcName, err) + return false, nil + } + }) + + if wait.Interrupted(waitErr) { + return nil, fmt.Errorf("timed out attempting to update PVC size. last update error: %w", lastUpdateError) + } + if waitErr != nil { + return nil, fmt.Errorf("failed to expand PVC size (check logs for error): %w", waitErr) + } + return updatedPVC, lastUpdateError +} + // WaitForResizingCondition waits for the pvc condition to be PersistentVolumeClaimResizing func WaitForResizingCondition(ctx context.Context, pvc *v1.PersistentVolumeClaim, c clientset.Interface, duration time.Duration) error { waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, duration, true, func(ctx context.Context) (bool, error) { @@ -423,9 +459,9 @@ func WaitForPendingFSResizeCondition(ctx context.Context, pvc *v1.PersistentVolu // WaitForFSResize waits for the filesystem in the pv to be resized func WaitForFSResize(ctx context.Context, pvc *v1.PersistentVolumeClaim, c clientset.Interface) (*v1.PersistentVolumeClaim, error) { var updatedPVC *v1.PersistentVolumeClaim - waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, totalResizeWaitPeriod, true, func(ctx context.Context) (bool, error) { + waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, totalResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) { var err error - updatedPVC, err = c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(ctx, pvc.Name, metav1.GetOptions{}) + updatedPVC, err = c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pollContext, pvc.Name, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching pvc %q for checking for resize status : %w", pvc.Name, err)