mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Fix race condition between external-resizer and kubelet
This fixes the race condition that could happen because resize controller just finished volume expansiona and has only finished marking PV and yet to mark PVC. The workaround proposed here should not be necessary once RecoverVolumeExpansionFailure goes GA/beta.
This commit is contained in:
parent
a9e4f5b786
commit
d190fa3e7d
@ -1279,6 +1279,16 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
|
||||
newPVSize: resource.MustParse("15G"),
|
||||
oldPVSize: resource.MustParse("13G"),
|
||||
},
|
||||
{
|
||||
name: "expand-fs-volume with unsupported error",
|
||||
volumeMode: &fsMode,
|
||||
expansionFailed: false,
|
||||
pvName: volumetesting.FailWithUnSupportedVolumeName,
|
||||
pvcSize: resource.MustParse("10G"),
|
||||
pvcStatusSize: resource.MustParse("10G"),
|
||||
newPVSize: resource.MustParse("15G"),
|
||||
oldPVSize: resource.MustParse("13G"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
|
@ -72,7 +72,7 @@ func (c *csiPlugin) nodeExpandWithClient(
|
||||
}
|
||||
|
||||
if !nodeExpandSet {
|
||||
return false, fmt.Errorf("Expander.NodeExpand found CSI plugin %s/%s to not support node expansion", c.GetPluginName(), driverName)
|
||||
return false, volumetypes.NewOperationNotSupportedError(fmt.Sprintf("NodeExpand is not supported by the CSI driver %s", driverName))
|
||||
}
|
||||
|
||||
pv := resizeOptions.VolumeSpec.PersistentVolume
|
||||
|
@ -84,6 +84,7 @@ const (
|
||||
|
||||
// FailWithInUseVolumeName will cause NodeExpandVolume to result in FailedPrecondition error
|
||||
FailWithInUseVolumeName = "fail-expansion-in-use"
|
||||
FailWithUnSupportedVolumeName = "fail-expansion-unsupported"
|
||||
|
||||
FailVolumeExpansion = "fail-expansion-test"
|
||||
|
||||
@ -500,8 +501,12 @@ func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions volume.NodeResizeOption
|
||||
if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName {
|
||||
return false, volumetypes.NewFailedPreconditionError("volume-in-use")
|
||||
}
|
||||
if resizeOptions.VolumeSpec.Name() == FailWithUnSupportedVolumeName {
|
||||
return false, volumetypes.NewOperationNotSupportedError("volume-unsupported")
|
||||
}
|
||||
|
||||
if resizeOptions.VolumeSpec.Name() == AlwaysFailNodeExpansion {
|
||||
return false, fmt.Errorf("Test failure: NodeExpand")
|
||||
return false, fmt.Errorf("test failure: NodeExpand")
|
||||
}
|
||||
|
||||
if resizeOptions.VolumeSpec.Name() == FailVolumeExpansion {
|
||||
|
@ -2205,6 +2205,14 @@ func (og *operationGenerator) legacyCallNodeExpandOnPlugin(resizeOp nodeResizeOp
|
||||
|
||||
_, resizeErr := expandableVolumePlugin.NodeExpand(rsOpts)
|
||||
if resizeErr != nil {
|
||||
// This is a workaround for now, until RecoverFromVolumeExpansionFailure feature goes GA.
|
||||
// If RecoverFromVolumeExpansionFailure feature is enabled, we will not ever hit this state, because
|
||||
// we will wait for VolumeExpansionPendingOnNode before trying to expand volume in kubelet.
|
||||
if volumetypes.IsOperationNotSupportedError(resizeErr) {
|
||||
klog.V(4).InfoS(volumeToMount.GenerateMsgDetailed("MountVolume.NodeExpandVolume failed", "NodeExpandVolume not supported"), "pod", klog.KObj(volumeToMount.Pod))
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// if driver returned FailedPrecondition error that means
|
||||
// volume expansion should not be retried on this node but
|
||||
// expansion operation should not block mounting
|
||||
|
@ -102,6 +102,23 @@ func IsFailedPreconditionError(err error) bool {
|
||||
return errors.As(err, &failedPreconditionError)
|
||||
}
|
||||
|
||||
type OperationNotSupported struct {
|
||||
msg string
|
||||
}
|
||||
|
||||
func (err *OperationNotSupported) Error() string {
|
||||
return err.msg
|
||||
}
|
||||
|
||||
func NewOperationNotSupportedError(msg string) *OperationNotSupported {
|
||||
return &OperationNotSupported{msg: msg}
|
||||
}
|
||||
|
||||
func IsOperationNotSupportedError(err error) bool {
|
||||
var operationNotSupportedError *OperationNotSupported
|
||||
return errors.As(err, &operationNotSupportedError)
|
||||
}
|
||||
|
||||
// TransientOperationFailure indicates operation failed with a transient error
|
||||
// and may fix itself when retried.
|
||||
type TransientOperationFailure struct {
|
||||
|
Loading…
Reference in New Issue
Block a user