Fix code to process volumes which were skipped during reconstruction

This commit is contained in:
Hemant Kumar
2022-06-14 15:37:21 -04:00
parent c8b85fb470
commit eb071c2755
4 changed files with 149 additions and 34 deletions

View File

@@ -119,6 +119,7 @@ func NewReconciler(
operationExecutor: operationExecutor,
mounter: mounter,
hostutil: hostutil,
skippedDuringReconstruction: map[v1.UniqueVolumeName]*globalVolumeInfo{},
volumePluginMgr: volumePluginMgr,
kubeletPodsDir: kubeletPodsDir,
timeOfLastSync: time.Time{},
@@ -138,6 +139,7 @@ type reconciler struct {
mounter mount.Interface
hostutil hostutil.HostUtils
volumePluginMgr *volumepkg.VolumePluginMgr
skippedDuringReconstruction map[v1.UniqueVolumeName]*globalVolumeInfo
kubeletPodsDir string
timeOfLastSync time.Time
}
@@ -175,6 +177,10 @@ func (rc *reconciler) reconcile() {
// Ensure devices that should be detached/unmounted are detached/unmounted.
rc.unmountDetachDevices()
if len(rc.skippedDuringReconstruction) > 0 {
rc.processReconstructedVolumes()
}
}
func (rc *reconciler) unmountVolumes() {
@@ -249,6 +255,63 @@ func (rc *reconciler) mountAttachedVolumes(volumeToMount cache.VolumeToMount, po
}
}
// processReconstructedVolumes checks volumes which were skipped during the reconstruction
// process because it was assumed that since these volumes were present in DSOW they would get
// mounted correctly and make it into ASOW.
// But if mount operation fails for some reason then we still need to mark the volume as uncertain
// and wait for the next reconciliation loop to deal with it.
func (rc *reconciler) processReconstructedVolumes() {
if rc.kubeClient != nil {
rc.updateDevicePath(rc.skippedDuringReconstruction)
}
for volumeName, glblVolumeInfo := range rc.skippedDuringReconstruction {
// check if volume is marked as attached to the node
// for now lets only process volumes which are at least known as attached to the node
// this should help with most volume types (including secret, configmap etc)
if !rc.actualStateOfWorld.VolumeExists(volumeName) {
klog.V(4).InfoS("Volume is not marked as attached to the node. Skipping processing of the volume", "volumeName", volumeName)
delete(rc.skippedDuringReconstruction, volumeName)
continue
}
uncertainVolumeCount := 0
for podName, volume := range glblVolumeInfo.podVolumes {
volumeNotMounted := rc.actualStateOfWorld.PodRemovedFromVolume(podName, volume.volumeName)
// if volume is not mounted then lets mark volume mounted in uncertain state in ASOW
if volumeNotMounted {
err := rc.markVolumeState(volume, operationexecutor.VolumeMountUncertain)
uncertainVolumeCount += 1
if err != nil {
klog.ErrorS(err, "Could not add pod to volume information to actual state of world", "pod", klog.KObj(volume.pod))
continue
}
klog.V(4).InfoS("Volume is marked as mounted and added into the actual state", "pod", klog.KObj(volume.pod), "podName", volume.podName, "volumeName", volume.volumeName)
}
}
if uncertainVolumeCount > 0 {
// If the volume has device to mount, we mark its device as mounted.
if glblVolumeInfo.deviceMounter != nil || glblVolumeInfo.blockVolumeMapper != nil {
deviceMountPath, err := getDeviceMountPath(glblVolumeInfo)
if err != nil {
klog.ErrorS(err, "Could not find device mount path for volume", "volumeName", glblVolumeInfo.volumeName)
continue
}
currentMountState := rc.actualStateOfWorld.GetDeviceMountState(glblVolumeInfo.volumeName)
if currentMountState == operationexecutor.DeviceNotMounted {
err = rc.actualStateOfWorld.MarkDeviceAsUncertain(glblVolumeInfo.volumeName, glblVolumeInfo.devicePath, deviceMountPath)
if err != nil {
klog.ErrorS(err, "Could not mark device is mounted to actual state of world", "volume", glblVolumeInfo.volumeName)
continue
}
klog.V(4).InfoS("Volume is marked device as mounted and added into the actual state", "volumeName", glblVolumeInfo.volumeName)
}
}
}
}
rc.skippedDuringReconstruction = make(map[v1.UniqueVolumeName]*globalVolumeInfo)
}
func (rc *reconciler) waitForVolumeAttach(volumeToMount cache.VolumeToMount) {
if rc.controllerAttachDetachEnabled || !volumeToMount.PluginIsAttachable {
//// lets not spin a goroutine and unnecessarily trigger exponential backoff if this happens
@@ -418,21 +481,6 @@ func (rc *reconciler) syncStates(kubeletPodDir string) {
rc.cleanupMounts(volume)
continue
}
if volumeInDSW {
// Some pod needs the volume. And it exists on disk. Some previous
// kubelet must have created the directory, therefore it must have
// reported the volume as in use. Mark the volume as in use also in
// this new kubelet so reconcile() calls SetUp and re-mounts the
// volume if it's necessary.
volumeNeedReport = append(volumeNeedReport, reconstructedVolume.volumeName)
klog.V(4).InfoS("Volume exists in desired state, marking as InUse", "podName", volume.podName, "volumeSpecName", volume.volumeSpecName)
continue
}
// There is no pod that uses the volume.
if rc.operationExecutor.IsOperationPending(reconstructedVolume.volumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
klog.InfoS("Volume is in pending operation, skip cleaning up mounts")
}
klog.V(2).InfoS("Reconciler sync states: could not find pod information in desired state, update it in actual state", "reconstructedVolume", reconstructedVolume)
gvl := &globalVolumeInfo{
volumeName: reconstructedVolume.volumeName,
volumeSpec: reconstructedVolume.volumeSpec,
@@ -445,6 +493,22 @@ func (rc *reconciler) syncStates(kubeletPodDir string) {
gvl = cachedInfo
}
gvl.addPodVolume(reconstructedVolume)
if volumeInDSW {
// Some pod needs the volume. And it exists on disk. Some previous
// kubelet must have created the directory, therefore it must have
// reported the volume as in use. Mark the volume as in use also in
// this new kubelet so reconcile() calls SetUp and re-mounts the
// volume if it's necessary.
volumeNeedReport = append(volumeNeedReport, reconstructedVolume.volumeName)
rc.skippedDuringReconstruction[reconstructedVolume.volumeName] = gvl
klog.V(4).InfoS("Volume exists in desired state, marking as InUse", "podName", volume.podName, "volumeSpecName", volume.volumeSpecName)
continue
}
// There is no pod that uses the volume.
if rc.operationExecutor.IsOperationPending(reconstructedVolume.volumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
klog.InfoS("Volume is in pending operation, skip cleaning up mounts")
}
klog.V(2).InfoS("Reconciler sync states: could not find pod information in desired state, update it in actual state", "reconstructedVolume", reconstructedVolume)
volumesNeedUpdate[reconstructedVolume.volumeName] = gvl
}
@@ -666,18 +730,7 @@ func (rc *reconciler) updateStates(volumesNeedUpdate map[v1.UniqueVolumeName]*gl
continue
}
for _, volume := range gvl.podVolumes {
markVolumeOpts := operationexecutor.MarkVolumeOpts{
PodName: volume.podName,
PodUID: types.UID(volume.podName),
VolumeName: volume.volumeName,
Mounter: volume.mounter,
BlockVolumeMapper: volume.blockVolumeMapper,
OuterVolumeSpecName: volume.outerVolumeSpecName,
VolumeGidVolume: volume.volumeGidValue,
VolumeSpec: volume.volumeSpec,
VolumeMountState: operationexecutor.VolumeMounted,
}
err = rc.actualStateOfWorld.MarkVolumeAsMounted(markVolumeOpts)
err = rc.markVolumeState(volume, operationexecutor.VolumeMounted)
if err != nil {
klog.ErrorS(err, "Could not add pod to volume information to actual state of world", "pod", klog.KObj(volume.pod))
continue
@@ -702,6 +755,22 @@ func (rc *reconciler) updateStates(volumesNeedUpdate map[v1.UniqueVolumeName]*gl
return nil
}
func (rc *reconciler) markVolumeState(volume *reconstructedVolume, volumeState operationexecutor.VolumeMountState) error {
markVolumeOpts := operationexecutor.MarkVolumeOpts{
PodName: volume.podName,
PodUID: types.UID(volume.podName),
VolumeName: volume.volumeName,
Mounter: volume.mounter,
BlockVolumeMapper: volume.blockVolumeMapper,
OuterVolumeSpecName: volume.outerVolumeSpecName,
VolumeGidVolume: volume.volumeGidValue,
VolumeSpec: volume.volumeSpec,
VolumeMountState: operationexecutor.VolumeMounted,
}
err := rc.actualStateOfWorld.MarkVolumeAsMounted(markVolumeOpts)
return err
}
// getVolumesFromPodDir scans through the volumes directories under the given pod directory.
// It returns a list of pod volume information including pod's uid, volume's plugin name, mount path,
// and volume spec name.