kubelet: Fix the volume manager did't check the device mount state in the actual state of the world before marking the volume as detached. It may cause a pod to be stuck in the Terminating state due to the above issue when it was deleted.

This commit is contained in:
carlory 2024-10-21 15:01:45 +08:00
parent b73931a601
commit 04f5b20388
2 changed files with 22 additions and 0 deletions

View File

@ -169,6 +169,11 @@ type ActualStateOfWorld interface {
// or have a mount/unmount operation pending.
GetAttachedVolumes() []AttachedVolume
// GetAttachedVolume returns the volume that is known to be attached to the node
// with the given volume name. If the volume is not found, the second return value
// is false.
GetAttachedVolume(volumeName v1.UniqueVolumeName) (AttachedVolume, bool)
// Add the specified volume to ASW as uncertainly attached.
AddAttachUncertainReconstructedVolume(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error
@ -1125,6 +1130,18 @@ func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
return allAttachedVolumes
}
func (asw *actualStateOfWorld) GetAttachedVolume(volumeName v1.UniqueVolumeName) (AttachedVolume, bool) {
asw.RLock()
defer asw.RUnlock()
volumeObj, ok := asw.attachedVolumes[volumeName]
if !ok {
return AttachedVolume{}, false
}
return asw.newAttachedVolume(&volumeObj), true
}
func (asw *actualStateOfWorld) GetUnmountedVolumes() []AttachedVolume {
asw.RLock()
defer asw.RUnlock()

View File

@ -269,6 +269,11 @@ func (rc *reconciler) unmountDetachDevices() {
// Check IsOperationPending to avoid marking a volume as detached if it's in the process of mounting.
if !rc.desiredStateOfWorld.VolumeExists(attachedVolume.VolumeName, attachedVolume.SELinuxMountContext) &&
!rc.operationExecutor.IsOperationPending(attachedVolume.VolumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
// Re-read the actual state of the world, maybe the volume got mounted in the meantime.
// This is safe, because there is no pending operation (checked above) and no new operation
// could start in the meantime. The only goroutine that adds new operations is this reconciler.
attachedVolume, _ = rc.actualStateOfWorld.GetAttachedVolume(attachedVolume.VolumeName)
if attachedVolume.DeviceMayBeMounted() {
// Volume is globally mounted to device, unmount it
klog.V(5).InfoS(attachedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountDevice", ""))