mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Merge pull request #102576 from dobsonj/101911
kubelet: do not call RemoveAll on volumes directory for orphaned pods
This commit is contained in:
commit
756203fda0
@ -18,6 +18,8 @@ package kubelet
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@ -110,6 +112,57 @@ func (kl *Kubelet) newVolumeMounterFromPlugins(spec *volume.Spec, pod *v1.Pod, o
|
||||
return physicalMounter, nil
|
||||
}
|
||||
|
||||
// removeOrphanedPodVolumeDirs attempts to remove the pod volumes directory and
|
||||
// its subdirectories. There should be no files left under normal conditions
|
||||
// when this is called, so it effectively does a recursive rmdir instead of
|
||||
// RemoveAll to ensure it only removes directories and not regular files.
|
||||
func (kl *Kubelet) removeOrphanedPodVolumeDirs(uid types.UID) []error {
|
||||
orphanVolumeErrors := []error{}
|
||||
|
||||
// If there are still volume directories, attempt to rmdir them
|
||||
volumePaths, err := kl.getPodVolumePathListFromDisk(uid)
|
||||
if err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading volume dir from disk", uid, err))
|
||||
return orphanVolumeErrors
|
||||
}
|
||||
if len(volumePaths) > 0 {
|
||||
for _, volumePath := range volumePaths {
|
||||
if err := syscall.Rmdir(volumePath); err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() volume at path %v: %v", uid, volumePath, err))
|
||||
} else {
|
||||
klog.InfoS("Cleaned up orphaned volume from pod", "podUID", uid, "path", volumePath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are any volume-subpaths, attempt to rmdir them
|
||||
subpathVolumePaths, err := kl.getPodVolumeSubpathListFromDisk(uid)
|
||||
if err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading of volume-subpaths dir from disk", uid, err))
|
||||
return orphanVolumeErrors
|
||||
}
|
||||
if len(subpathVolumePaths) > 0 {
|
||||
for _, subpathVolumePath := range subpathVolumePaths {
|
||||
if err := syscall.Rmdir(subpathVolumePath); err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() subpath at path %v: %v", uid, subpathVolumePath, err))
|
||||
} else {
|
||||
klog.InfoS("Cleaned up orphaned volume subpath from pod", "podUID", uid, "path", subpathVolumePath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove any remaining subdirectories along with the volumes directory itself.
|
||||
// Fail if any regular files are encountered.
|
||||
podVolDir := kl.getPodVolumesDir(uid)
|
||||
if err := removeall.RemoveDirsOneFilesystem(kl.mounter, podVolDir); err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred when trying to remove the volumes dir", uid, err))
|
||||
} else {
|
||||
klog.InfoS("Cleaned up orphaned pod volumes dir", "podUID", uid, "path", podVolDir)
|
||||
}
|
||||
|
||||
return orphanVolumeErrors
|
||||
}
|
||||
|
||||
// cleanupOrphanedPodDirs removes the volumes of pods that should not be
|
||||
// running and that have no containers running. Note that we roll up logs here since it runs in the main loop.
|
||||
func (kl *Kubelet) cleanupOrphanedPodDirs(pods []*v1.Pod, runningPods []*kubecontainer.Pod) error {
|
||||
@ -147,55 +200,48 @@ func (kl *Kubelet) cleanupOrphanedPodDirs(pods []*v1.Pod, runningPods []*kubecon
|
||||
continue
|
||||
}
|
||||
|
||||
allVolumesCleanedUp := true
|
||||
|
||||
// If there are still volume directories, attempt to rmdir them
|
||||
volumePaths, err := kl.getPodVolumePathListFromDisk(uid)
|
||||
if err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading volume dir from disk", uid, err))
|
||||
continue
|
||||
}
|
||||
if len(volumePaths) > 0 {
|
||||
for _, volumePath := range volumePaths {
|
||||
if err := syscall.Rmdir(volumePath); err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() volume at path %v: %v", uid, volumePath, err))
|
||||
allVolumesCleanedUp = false
|
||||
} else {
|
||||
klog.InfoS("Cleaned up orphaned volume from pod", "podUID", uid, "path", volumePath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are any volume-subpaths, attempt to rmdir them
|
||||
subpathVolumePaths, err := kl.getPodVolumeSubpathListFromDisk(uid)
|
||||
if err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading of volume-subpaths dir from disk", uid, err))
|
||||
continue
|
||||
}
|
||||
if len(subpathVolumePaths) > 0 {
|
||||
for _, subpathVolumePath := range subpathVolumePaths {
|
||||
if err := syscall.Rmdir(subpathVolumePath); err != nil {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() subpath at path %v: %v", uid, subpathVolumePath, err))
|
||||
allVolumesCleanedUp = false
|
||||
} else {
|
||||
klog.InfoS("Cleaned up orphaned volume subpath from pod", "podUID", uid, "path", subpathVolumePath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !allVolumesCleanedUp {
|
||||
// Attempt to remove the pod volumes directory and its subdirs
|
||||
podVolumeErrors := kl.removeOrphanedPodVolumeDirs(uid)
|
||||
if len(podVolumeErrors) > 0 {
|
||||
orphanVolumeErrors = append(orphanVolumeErrors, podVolumeErrors...)
|
||||
// Not all volumes were removed, so don't clean up the pod directory yet. It is likely
|
||||
// that there are still mountpoints left which could stall RemoveAllOneFilesystem which
|
||||
// would otherwise be called below.
|
||||
// that there are still mountpoints or files left which could cause removal of the pod
|
||||
// directory to fail below.
|
||||
// Errors for all removal operations have already been recorded, so don't add another
|
||||
// one here.
|
||||
continue
|
||||
}
|
||||
|
||||
// Call RemoveAllOneFilesystem for remaining subdirs under the pod directory
|
||||
podDir := kl.getPodDir(uid)
|
||||
podSubdirs, err := ioutil.ReadDir(podDir)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Could not read directory", "path", podDir)
|
||||
orphanRemovalErrors = append(orphanRemovalErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading the pod dir from disk", uid, err))
|
||||
continue
|
||||
}
|
||||
for _, podSubdir := range podSubdirs {
|
||||
podSubdirName := podSubdir.Name()
|
||||
podSubdirPath := filepath.Join(podDir, podSubdirName)
|
||||
// Never attempt RemoveAllOneFilesystem on the volumes directory,
|
||||
// as this could lead to data loss in some situations. The volumes
|
||||
// directory should have been removed by removeOrphanedPodVolumeDirs.
|
||||
if podSubdirName == "volumes" {
|
||||
err := fmt.Errorf("volumes subdir was found after it was removed")
|
||||
klog.ErrorS(err, "Orphaned pod found, but failed to remove volumes subdir", "podUID", uid, "path", podSubdirPath)
|
||||
continue
|
||||
}
|
||||
if err := removeall.RemoveAllOneFilesystem(kl.mounter, podSubdirPath); err != nil {
|
||||
klog.ErrorS(err, "Failed to remove orphaned pod subdir", "podUID", uid, "path", podSubdirPath)
|
||||
orphanRemovalErrors = append(orphanRemovalErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred when trying to remove subdir %q", uid, err, podSubdirPath))
|
||||
}
|
||||
}
|
||||
|
||||
// Rmdir the pod dir, which should be empty if everything above was successful
|
||||
klog.V(3).InfoS("Orphaned pod found, removing", "podUID", uid)
|
||||
if err := removeall.RemoveAllOneFilesystem(kl.mounter, kl.getPodDir(uid)); err != nil {
|
||||
if err := syscall.Rmdir(podDir); err != nil {
|
||||
klog.ErrorS(err, "Failed to remove orphaned pod dir", "podUID", uid)
|
||||
orphanRemovalErrors = append(orphanRemovalErrors, err)
|
||||
orphanRemovalErrors = append(orphanRemovalErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred when trying to remove the pod directory", uid, err))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,16 @@ func TestCleanupOrphanedPodDirs(t *testing.T) {
|
||||
return validateDirNotExists(podDir)
|
||||
},
|
||||
},
|
||||
"pod-doesnot-exist-with-volume-subdir": {
|
||||
prepareFunc: func(kubelet *Kubelet) error {
|
||||
podDir := kubelet.getPodDir("pod1uid")
|
||||
return os.MkdirAll(filepath.Join(podDir, "volumes/plugin/name/subdir"), 0750)
|
||||
},
|
||||
validateFunc: func(kubelet *Kubelet) error {
|
||||
podDir := kubelet.getPodDir("pod1uid")
|
||||
return validateDirNotExists(filepath.Join(podDir, "volumes"))
|
||||
},
|
||||
},
|
||||
"pod-doesnot-exist-with-subpath": {
|
||||
prepareFunc: func(kubelet *Kubelet) error {
|
||||
podDir := kubelet.getPodDir("pod1uid")
|
||||
|
8
pkg/util/removeall/OWNERS
Normal file
8
pkg/util/removeall/OWNERS
Normal file
@ -0,0 +1,8 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-storage-approvers
|
||||
reviewers:
|
||||
- sig-storage-reviewers
|
||||
labels:
|
||||
- sig/storage
|
@ -25,16 +25,16 @@ import (
|
||||
"k8s.io/mount-utils"
|
||||
)
|
||||
|
||||
// RemoveAllOneFilesystem removes path and any children it contains.
|
||||
// It removes everything it can but returns the first error
|
||||
// it encounters. If the path does not exist, RemoveAll
|
||||
// RemoveAllOneFilesystemCommon removes the path and any children it contains,
|
||||
// using the provided remove function. It removes everything it can but returns
|
||||
// the first error it encounters. If the path does not exist, RemoveAll
|
||||
// returns nil (no error).
|
||||
// It makes sure it does not cross mount boundary, i.e. it does *not* remove
|
||||
// files from another filesystems. Like 'rm -rf --one-file-system'.
|
||||
// It is copied from RemoveAll() sources, with IsLikelyNotMountPoint
|
||||
func RemoveAllOneFilesystem(mounter mount.Interface, path string) error {
|
||||
func RemoveAllOneFilesystemCommon(mounter mount.Interface, path string, remove func(string) error) error {
|
||||
// Simple case: if Remove works, we're done.
|
||||
err := os.Remove(path)
|
||||
err := remove(path)
|
||||
if err == nil || os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
@ -48,7 +48,7 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error {
|
||||
return serr
|
||||
}
|
||||
if !dir.IsDir() {
|
||||
// Not a directory; return the error from Remove.
|
||||
// Not a directory; return the error from remove.
|
||||
return err
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error {
|
||||
for {
|
||||
names, err1 := fd.Readdirnames(100)
|
||||
for _, name := range names {
|
||||
err1 := RemoveAllOneFilesystem(mounter, path+string(os.PathSeparator)+name)
|
||||
err1 := RemoveAllOneFilesystemCommon(mounter, path+string(os.PathSeparator)+name, remove)
|
||||
if err == nil {
|
||||
err = err1
|
||||
}
|
||||
@ -97,7 +97,7 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error {
|
||||
fd.Close()
|
||||
|
||||
// Remove directory.
|
||||
err1 := os.Remove(path)
|
||||
err1 := remove(path)
|
||||
if err1 == nil || os.IsNotExist(err1) {
|
||||
return nil
|
||||
}
|
||||
@ -106,3 +106,23 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RemoveAllOneFilesystem removes the path and any children it contains, using
|
||||
// the os.Remove function. It makes sure it does not cross mount boundaries,
|
||||
// i.e. it returns an error rather than remove files from another filesystem.
|
||||
// It removes everything it can but returns the first error it encounters.
|
||||
// If the path does not exist, it returns nil (no error).
|
||||
func RemoveAllOneFilesystem(mounter mount.Interface, path string) error {
|
||||
return RemoveAllOneFilesystemCommon(mounter, path, os.Remove)
|
||||
}
|
||||
|
||||
// RemoveDirsOneFilesystem removes the path and any empty subdirectories it
|
||||
// contains, using the syscall.Rmdir function. Unlike RemoveAllOneFilesystem,
|
||||
// RemoveDirsOneFilesystem will remove only directories and returns an error if
|
||||
// it encounters any files in the directory tree. It makes sure it does not
|
||||
// cross mount boundaries, i.e. it returns an error rather than remove dirs
|
||||
// from another filesystem. It removes everything it can but returns the first
|
||||
// error it encounters. If the path does not exist, it returns nil (no error).
|
||||
func RemoveDirsOneFilesystem(mounter mount.Interface, path string) error {
|
||||
return RemoveAllOneFilesystemCommon(mounter, path, syscall.Rmdir)
|
||||
}
|
||||
|
@ -138,3 +138,123 @@ func TestRemoveAllOneFilesystem(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveDirsOneFilesystem(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
// Items of the test directory. Directories end with "/".
|
||||
// Directories starting with "mount" are considered to be mount points.
|
||||
// Directories starting with "err" will cause an error in
|
||||
// IsLikelyNotMountPoint.
|
||||
items []string
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
"empty dir",
|
||||
[]string{},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"non-mount-no-files",
|
||||
[]string{
|
||||
"dir/",
|
||||
"dir/subdir1/",
|
||||
"dir2/",
|
||||
"dir2/subdir2/",
|
||||
"dir2/subdir2/subdir3/",
|
||||
"dir3/",
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"non-mount-with-files",
|
||||
[]string{
|
||||
"dir/",
|
||||
"dir/file",
|
||||
"dir2/",
|
||||
"file2",
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"mount-no-files",
|
||||
[]string{
|
||||
"dir/",
|
||||
"dir/subdir1/",
|
||||
"dir2/",
|
||||
"dir2/subdir2/",
|
||||
"dir2/subdir2/subdir3/",
|
||||
"mount/",
|
||||
"mount/dir3/",
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"mount-with-files",
|
||||
[]string{
|
||||
"dir/",
|
||||
"dir/file",
|
||||
"dir2/",
|
||||
"file2",
|
||||
"mount/",
|
||||
"mount/file3",
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"innermount",
|
||||
[]string{
|
||||
"dir/",
|
||||
"dir/subdir1/",
|
||||
"dir/dir2/",
|
||||
"dir/dir2/subdir2/",
|
||||
"dir/dir2/mount/",
|
||||
"dir/dir2/mount/subdir3/",
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"error",
|
||||
[]string{
|
||||
"dir/",
|
||||
"dir/subdir1/",
|
||||
"dir2/",
|
||||
"err/",
|
||||
"err/subdir3/",
|
||||
},
|
||||
true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
tmpDir, err := utiltesting.MkTmpdir("removeall-" + test.name + "-")
|
||||
if err != nil {
|
||||
t.Fatalf("Can't make a tmp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
// Create the directory structure
|
||||
for _, item := range test.items {
|
||||
if strings.HasSuffix(item, "/") {
|
||||
item = strings.TrimRight(item, "/")
|
||||
if err = os.Mkdir(path.Join(tmpDir, item), 0777); err != nil {
|
||||
t.Fatalf("error creating %s: %v", item, err)
|
||||
}
|
||||
} else {
|
||||
f, err := os.Create(path.Join(tmpDir, item))
|
||||
if err != nil {
|
||||
t.Fatalf("error creating %s: %v", item, err)
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
|
||||
mounter := &fakeMounter{}
|
||||
err = RemoveDirsOneFilesystem(mounter, tmpDir)
|
||||
if err == nil && test.expectError {
|
||||
t.Errorf("test %q failed: expected error and got none", test.name)
|
||||
}
|
||||
if err != nil && !test.expectError {
|
||||
t.Errorf("test %q failed: unexpected error: %v", test.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -235,6 +235,9 @@ func (h *hostpathCSIDriver) PrepareTest(f *framework.Framework) (*storageframewo
|
||||
// testsuites/volumelimits.go `should support volume limits`
|
||||
// test.
|
||||
"--maxvolumespernode=10",
|
||||
// Enable volume lifecycle checks, to report failure if
|
||||
// the volume is not unpublished / unstaged correctly.
|
||||
"--check-volume-lifecycle=true",
|
||||
},
|
||||
ProvisionerContainerName: "csi-provisioner",
|
||||
SnapshotterContainerName: "csi-snapshotter",
|
||||
|
Loading…
Reference in New Issue
Block a user