From a95842095ec262e05ea474a813daf6a3b1ff5e5c Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 19 May 2021 18:11:52 +0200 Subject: [PATCH] Retry reading /proc/mounts indifinetly in FC and iSCSI volume reconstruction iSCSI and FC volume plugins do not implement real 3rd party attach/detach. If reconstruction fails with an error on a FC or iSCSI volume, it will not be unmounted from the volume global dir and at the same time it will be marked as unused, to be available to be mounted on another node. The volume can then be mounted on several nodes, resulting in volume corruption. The other block based volume plugins implement attach/detach that either makes the volume stuck (can't be detached) or will be force-detached from a node before attaching it somewhere else. --- pkg/volume/fc/fc.go | 7 ++++++- pkg/volume/iscsi/iscsi.go | 8 +++++++- pkg/volume/util/util.go | 42 ++++++++++++++++++++++++++++++++------- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/pkg/volume/fc/fc.go b/pkg/volume/fc/fc.go index d7ef09fa24c..4422cf7102e 100644 --- a/pkg/volume/fc/fc.go +++ b/pkg/volume/fc/fc.go @@ -239,8 +239,13 @@ func (plugin *fcPlugin) ConstructVolumeSpec(volumeName, mountPath string) (*volu // mountPath: pods/{podUid}/volumes/kubernetes.io~fc/{volumeName} // globalPDPath : plugins/kubernetes.io/fc/50060e801049cfd1-lun-0 var globalPDPath string + mounter := plugin.host.GetMounter(plugin.GetPluginName()) - paths, err := mounter.GetMountRefs(mountPath) + // Try really hard to get the global mount of the volume, an error returned from here would + // leave the global mount still mounted, while marking the volume as unused. + // The volume can then be mounted on several nodes, resulting in volume + // corruption. + paths, err := util.GetReliableMountRefs(mounter, mountPath) if err != nil { return nil, err } diff --git a/pkg/volume/iscsi/iscsi.go b/pkg/volume/iscsi/iscsi.go index 5cbf3069369..6f7ec6ef42a 100644 --- a/pkg/volume/iscsi/iscsi.go +++ b/pkg/volume/iscsi/iscsi.go @@ -34,6 +34,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/kubernetes/pkg/volume" + "k8s.io/kubernetes/pkg/volume/util" ioutil "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util/volumepathhandler" ) @@ -218,10 +219,15 @@ func (plugin *iscsiPlugin) ConstructVolumeSpec(volumeName, mountPath string) (*v // Find globalPDPath from pod volume directory(mountPath) var globalPDPath string mounter := plugin.host.GetMounter(plugin.GetPluginName()) - paths, err := mounter.GetMountRefs(mountPath) + // Try really hard to get the global mount of the volume, an error returned from here would + // leave the global mount still mounted, while marking the volume as unused. + // The volume can then be mounted on several nodes, resulting in volume + // corruption. + paths, err := util.GetReliableMountRefs(mounter, mountPath) if err != nil { return nil, err } + for _, path := range paths { if strings.Contains(path, plugin.host.GetPluginDir(iscsiPluginName)) { globalPDPath = path diff --git a/pkg/volume/util/util.go b/pkg/volume/util/util.go index 3bffa61139e..83cb170cb6b 100644 --- a/pkg/volume/util/util.go +++ b/pkg/volume/util/util.go @@ -20,18 +20,12 @@ import ( "context" "fmt" "io/ioutil" - storagehelpers "k8s.io/component-helpers/storage/volume" "os" "path/filepath" "reflect" "runtime" "strings" - - "k8s.io/component-helpers/scheduling/corev1" - "k8s.io/klog/v2" - "k8s.io/mount-utils" - utilexec "k8s.io/utils/exec" - utilstrings "k8s.io/utils/strings" + "time" v1 "k8s.io/api/core/v1" storage "k8s.io/api/storage/v1" @@ -40,13 +34,21 @@ import ( apiruntime "k8s.io/apimachinery/pkg/runtime" utypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-helpers/scheduling/corev1" + storagehelpers "k8s.io/component-helpers/storage/volume" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/api/legacyscheme" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/securitycontext" "k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume/util/types" "k8s.io/kubernetes/pkg/volume/util/volumepathhandler" + "k8s.io/mount-utils" + utilexec "k8s.io/utils/exec" + "k8s.io/utils/io" + utilstrings "k8s.io/utils/strings" ) const ( @@ -732,3 +734,29 @@ func IsDeviceMountableVolume(volumeSpec *volume.Spec, volumePluginMgr *volume.Vo return false } + +// GetReliableMountRefs calls mounter.GetMountRefs and retries on IsInconsistentReadError. +// To be used in volume reconstruction of volume plugins that don't have any protection +// against mounting a single volume on multiple nodes (such as attach/detach). +func GetReliableMountRefs(mounter mount.Interface, mountPath string) ([]string, error) { + var paths []string + var lastErr error + err := wait.PollImmediate(10*time.Millisecond, time.Minute, func() (bool, error) { + var err error + paths, err = mounter.GetMountRefs(mountPath) + if io.IsInconsistentReadError(err) { + lastErr = err + return false, nil + } + if err != nil { + return false, err + } + return true, nil + }) + if err == wait.ErrWaitTimeout { + klog.Errorf("Failed to read mount refs from /proc/mounts for %s: %s", mountPath, err) + klog.Errorf("Kubelet cannot unmount volume at %s, please unmount it manually", mountPath) + return nil, lastErr + } + return paths, err +}