Fix UnmountDevice error cases

When UnmountDevice fails, kubelet treat the volume mount as uncertain,
because it does not know at which stage UnmountDevice failed. It may be
already partially unmonted / destroyed.

As result, MountDevice will be performer when a new Pod is started on the
node after UnmountDevice faiure.
This commit is contained in:
Jan Safranek 2021-03-11 12:59:01 +01:00
parent 54ad7e40f1
commit f4b41c0a17

View File

@ -821,6 +821,22 @@ func (og *operationGenerator) GenerateUnmountVolumeFunc(
// Execute unmount
unmountErr := volumeUnmounter.TearDown()
if unmountErr != nil {
// Mark the volume as uncertain, so SetUp is called for new pods. Teardown may be already in progress.
opts := MarkVolumeOpts{
PodName: volumeToUnmount.PodName,
PodUID: volumeToUnmount.PodUID,
VolumeName: volumeToUnmount.VolumeName,
OuterVolumeSpecName: volumeToUnmount.OuterVolumeSpecName,
VolumeGidVolume: volumeToUnmount.VolumeGidValue,
VolumeSpec: volumeToUnmount.VolumeSpec,
VolumeMountState: VolumeMountUncertain,
}
markMountUncertainErr := actualStateOfWorld.MarkVolumeMountAsUncertain(opts)
if markMountUncertainErr != nil {
// There is nothing else we can do. Hope that UnmountVolume will be re-tried shortly.
klog.Errorf(volumeToUnmount.GenerateErrorDetailed("UnmountVolume.MarkVolumeMountAsUncertain failed", markMountUncertainErr).Error())
}
// On failure, return error. Caller will log and retry.
eventErr, detailedErr := volumeToUnmount.GenerateError("UnmountVolume.TearDown failed", unmountErr)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
@ -907,6 +923,13 @@ func (og *operationGenerator) GenerateUnmountDeviceFunc(
// Execute unmount
unmountDeviceErr := volumeDeviceUnmounter.UnmountDevice(deviceMountPath)
if unmountDeviceErr != nil {
// Mark the device as uncertain, so MountDevice is called for new pods. UnmountDevice may be already in progress.
markDeviceUncertainErr := actualStateOfWorld.MarkDeviceAsUncertain(deviceToDetach.VolumeName, deviceToDetach.DevicePath, deviceMountPath)
if markDeviceUncertainErr != nil {
// There is nothing else we can do. Hope that UnmountDevice will be re-tried shortly.
klog.Errorf(deviceToDetach.GenerateErrorDetailed("UnmountDevice.MarkDeviceAsUncertain failed", markDeviceUncertainErr).Error())
}
// On failure, return error. Caller will log and retry.
eventErr, detailedErr := deviceToDetach.GenerateError("UnmountDevice failed", unmountDeviceErr)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
@ -1208,6 +1231,25 @@ func (og *operationGenerator) GenerateUnmapVolumeFunc(
// plugins/kubernetes.io/{PluginName}/volumeDevices/{volumePluginDependentPath}/{podUID}
globalUnmapPath := volumeToUnmount.DeviceMountPath
// Mark the device as uncertain to make sure kubelet calls UnmapDevice again in all the "return err"
// cases below. The volume is marked as fully un-mapped at the end of this function, when everything
// succeeds.
markVolumeOpts := MarkVolumeOpts{
PodName: volumeToUnmount.PodName,
PodUID: volumeToUnmount.PodUID,
VolumeName: volumeToUnmount.VolumeName,
OuterVolumeSpecName: volumeToUnmount.OuterVolumeSpecName,
VolumeGidVolume: volumeToUnmount.VolumeGidValue,
VolumeSpec: volumeToUnmount.VolumeSpec,
VolumeMountState: VolumeMountUncertain,
}
markVolumeUncertainErr := actualStateOfWorld.MarkVolumeMountAsUncertain(markVolumeOpts)
if markVolumeUncertainErr != nil {
// On failure, return error. Caller will log and retry.
eventErr, detailedErr := volumeToUnmount.GenerateError("UnmapVolume.MarkDeviceAsUncertain failed", markVolumeUncertainErr)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
// Execute common unmap
unmapErr := util.UnmapBlockVolume(og.blkUtil, globalUnmapPath, podDeviceUnmapPath, volName, volumeToUnmount.PodUID)
if unmapErr != nil {
@ -1309,6 +1351,17 @@ func (og *operationGenerator) GenerateUnmapDeviceFunc(
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
// Mark the device as uncertain to make sure kubelet calls UnmapDevice again in all the "return err"
// cases below. The volume is marked as fully un-mapped at the end of this function, when everything
// succeeds.
markDeviceUncertainErr := actualStateOfWorld.MarkDeviceAsUncertain(
deviceToDetach.VolumeName, deviceToDetach.DevicePath, globalMapPath)
if markDeviceUncertainErr != nil {
// On failure, return error. Caller will log and retry.
eventErr, detailedErr := deviceToDetach.GenerateError("UnmapDevice.MarkDeviceAsUncertain failed", markDeviceUncertainErr)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
// Call TearDownDevice if blockVolumeUnmapper implements CustomBlockVolumeUnmapper
if customBlockVolumeUnmapper, ok := blockVolumeUnmapper.(volume.CustomBlockVolumeUnmapper); ok {
// Execute tear down device