From 448e0c44c6919b8e874f042cb934609c0bf5a790 Mon Sep 17 00:00:00 2001 From: Robert Krawitz Date: Mon, 19 Nov 2018 17:43:41 -0500 Subject: [PATCH] Apply quotas via syscalls using cgo. --- pkg/features/kube_features.go | 8 + pkg/kubelet/eviction/helpers.go | 20 +- pkg/kubelet/volumemanager/cache/BUILD | 2 + .../cache/desired_state_of_world.go | 21 +- pkg/volume/emptydir/BUILD | 1 + pkg/volume/emptydir/empty_dir.go | 18 +- pkg/volume/util/BUILD | 1 + pkg/volume/util/fs/BUILD | 2 + pkg/volume/util/fs/fs.go | 13 + .../operationexecutor/operation_executor.go | 4 + .../operationexecutor/operation_generator.go | 5 +- pkg/volume/util/quota/BUILD | 62 ++ pkg/volume/util/quota/common/BUILD | 33 + .../util/quota/common/quota_linux_common.go | 77 ++ .../quota/common/quota_linux_common_impl.go | 152 ++++ pkg/volume/util/quota/extfs/BUILD | 31 + pkg/volume/util/quota/extfs/quota_extfs.go | 153 ++++ pkg/volume/util/quota/project.go | 353 ++++++++ pkg/volume/util/quota/quota.go | 47 ++ pkg/volume/util/quota/quota_linux.go | 432 ++++++++++ pkg/volume/util/quota/quota_linux_test.go | 762 ++++++++++++++++++ pkg/volume/util/quota/quota_unsupported.go | 54 ++ pkg/volume/util/quota/xfs/BUILD | 31 + pkg/volume/util/quota/xfs/quota_xfs.go | 153 ++++ pkg/volume/volume.go | 5 +- 25 files changed, 2431 insertions(+), 9 deletions(-) create mode 100644 pkg/volume/util/quota/BUILD create mode 100644 pkg/volume/util/quota/common/BUILD create mode 100644 pkg/volume/util/quota/common/quota_linux_common.go create mode 100644 pkg/volume/util/quota/common/quota_linux_common_impl.go create mode 100644 pkg/volume/util/quota/extfs/BUILD create mode 100644 pkg/volume/util/quota/extfs/quota_extfs.go create mode 100644 pkg/volume/util/quota/project.go create mode 100644 pkg/volume/util/quota/quota.go create mode 100644 pkg/volume/util/quota/quota_linux.go create mode 100644 pkg/volume/util/quota/quota_linux_test.go create mode 100644 pkg/volume/util/quota/quota_unsupported.go create mode 100644 pkg/volume/util/quota/xfs/BUILD create mode 100644 pkg/volume/util/quota/xfs/quota_xfs.go diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 911f8c5a4c1..86d164b895a 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -440,6 +440,13 @@ const ( // // Enables the regional PD feature on GCE. deprecatedGCERegionalPersistentDisk featuregate.Feature = "GCERegionalPersistentDisk" + + // owner: @RobertKrawitz + // alpha: v1.15 + // + // Allow use of filesystems for ephemeral storage monitoring. + // Only applies if LocalStorageCapacityIsolation is set. + FSQuotaForLSCIMonitoring = "FSQuotaForLSCIMonitoring" ) func init() { @@ -514,6 +521,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS TTLAfterFinished: {Default: false, PreRelease: featuregate.Alpha}, KubeletPodResources: {Default: false, PreRelease: featuregate.Alpha}, WindowsGMSA: {Default: false, PreRelease: featuregate.Alpha}, + FSQuotaForLSCIMonitoring: {Default: false, PreRelease: utilfeature.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 74229b75905..0e8f843539a 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -395,13 +395,27 @@ func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsSt }, nil } +func internalIsLocalEphemeralVolume(pod *v1.Pod, volume v1.Volume) bool { + return volume.GitRepo != nil || + (volume.EmptyDir != nil && volume.EmptyDir.Medium != v1.StorageMediumMemory) || + volume.ConfigMap != nil || volume.DownwardAPI != nil +} + +// IsLocalEphemeralVolume determines whether a given volume name is ephemeral +func IsLocalEphemeralVolume(pod *v1.Pod, volumeName string) (bool, error) { + for _, volume := range pod.Spec.Volumes { + if volume.Name == volumeName { + return internalIsLocalEphemeralVolume(pod, volume), nil + } + } + return false, fmt.Errorf("Volume %s not found in pod %v", volumeName, pod) +} + // localEphemeralVolumeNames returns the set of ephemeral volumes for the pod that are local func localEphemeralVolumeNames(pod *v1.Pod) []string { result := []string{} for _, volume := range pod.Spec.Volumes { - if volume.GitRepo != nil || - (volume.EmptyDir != nil && volume.EmptyDir.Medium != v1.StorageMediumMemory) || - volume.ConfigMap != nil || volume.DownwardAPI != nil { + if internalIsLocalEphemeralVolume(pod, volume) { result = append(result, volume.Name) } } diff --git a/pkg/kubelet/volumemanager/cache/BUILD b/pkg/kubelet/volumemanager/cache/BUILD index 3bc1a3886bb..35028f5846e 100644 --- a/pkg/kubelet/volumemanager/cache/BUILD +++ b/pkg/kubelet/volumemanager/cache/BUILD @@ -14,7 +14,9 @@ go_library( ], importpath = "k8s.io/kubernetes/pkg/kubelet/volumemanager/cache", deps = [ + "//pkg/api/v1/resource:go_default_library", "//pkg/features:go_default_library", + "//pkg/kubelet/eviction:go_default_library", "//pkg/volume:go_default_library", "//pkg/volume/util:go_default_library", "//pkg/volume/util/operationexecutor:go_default_library", diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go index 1c748505a26..c1d9fea084b 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go @@ -25,6 +25,8 @@ import ( "sync" "k8s.io/api/core/v1" + apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource" + limits "k8s.io/kubernetes/pkg/kubelet/eviction" "k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util/operationexecutor" @@ -160,6 +162,10 @@ type volumeToMount struct { // reportedInUse indicates that the volume was successfully added to the // VolumesInUse field in the node's status. reportedInUse bool + + // desiredSizeLimit indicates the desired upper bound on the size of the volume + // (if so implemented) + desiredSizeLimit int64 } // The pod object represents a pod that references the underlying volume and @@ -226,6 +232,17 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( } if _, volumeExists := dsw.volumesToMount[volumeName]; !volumeExists { + var sizeLimit int64 + sizeLimit = 0 + isLocal, _ := limits.IsLocalEphemeralVolume(pod, volumeSpec.Name()) + if isLocal { + _, podLimits := apiv1resource.PodRequestsAndLimits(pod) + ephemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage] + sizeLimit = ephemeralStorageLimit.Value() + if sizeLimit == 0 { + sizeLimit = -1 + } + } dsw.volumesToMount[volumeName] = volumeToMount{ volumeName: volumeName, podsToMount: make(map[types.UniquePodName]podToMount), @@ -233,6 +250,7 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( pluginIsDeviceMountable: deviceMountable, volumeGidValue: volumeGidValue, reportedInUse: false, + desiredSizeLimit: sizeLimit, } } @@ -360,7 +378,8 @@ func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount { PluginIsDeviceMountable: volumeObj.pluginIsDeviceMountable, OuterVolumeSpecName: podObj.outerVolumeSpecName, VolumeGidValue: volumeObj.volumeGidValue, - ReportedInUse: volumeObj.reportedInUse}}) + ReportedInUse: volumeObj.reportedInUse, + DesiredSizeLimit: volumeObj.desiredSizeLimit}}) } } return volumesToMount diff --git a/pkg/volume/emptydir/BUILD b/pkg/volume/emptydir/BUILD index 08c2bc334fc..4833bc31f51 100644 --- a/pkg/volume/emptydir/BUILD +++ b/pkg/volume/emptydir/BUILD @@ -20,6 +20,7 @@ go_library( "//pkg/util/mount:go_default_library", "//pkg/volume:go_default_library", "//pkg/volume/util:go_default_library", + "//pkg/volume/util/quota:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", diff --git a/pkg/volume/emptydir/empty_dir.go b/pkg/volume/emptydir/empty_dir.go index 1c1f3792fc3..a2604b7c465 100644 --- a/pkg/volume/emptydir/empty_dir.go +++ b/pkg/volume/emptydir/empty_dir.go @@ -30,6 +30,7 @@ import ( "k8s.io/kubernetes/pkg/util/mount" "k8s.io/kubernetes/pkg/volume" volumeutil "k8s.io/kubernetes/pkg/volume/util" + "k8s.io/kubernetes/pkg/volume/util/quota" utilstrings "k8s.io/utils/strings" ) @@ -174,6 +175,7 @@ type emptyDir struct { mounter mount.Interface mountDetector mountDetector plugin *emptyDirPlugin + desiredSize int64 volume.MetricsProvider } @@ -234,6 +236,15 @@ func (ed *emptyDir) SetUpAt(dir string, mounterArgs volume.MounterArgs) error { if err == nil { volumeutil.SetReady(ed.getMetaDir()) } + if mounterArgs.DesiredSize != 0 { + if hasQuotas, _ := quota.SupportsQuotas(ed.mounter, dir); hasQuotas { + klog.V(3).Infof("emptydir trying to assign quota") + err := quota.AssignQuota(ed.mounter, dir, mounterArgs.PodUID, mounterArgs.DesiredSize) + if err != nil { + klog.V(3).Infof("Set quota failed %v", err) + } + } + } return err } @@ -397,9 +408,14 @@ func (ed *emptyDir) TearDownAt(dir string) error { } func (ed *emptyDir) teardownDefault(dir string) error { + // Remove any quota + err := quota.ClearQuota(ed.mounter, dir) + if err != nil { + klog.V(3).Infof("Failed to clear quota on %s: %v", dir, err) + } // Renaming the directory is not required anymore because the operation executor // now handles duplicate operations on the same volume - err := os.RemoveAll(dir) + err = os.RemoveAll(dir) if err != nil { return err } diff --git a/pkg/volume/util/BUILD b/pkg/volume/util/BUILD index 950fb621661..a94e304a482 100644 --- a/pkg/volume/util/BUILD +++ b/pkg/volume/util/BUILD @@ -91,6 +91,7 @@ filegroup( "//pkg/volume/util/nestedpendingoperations:all-srcs", "//pkg/volume/util/nsenter:all-srcs", "//pkg/volume/util/operationexecutor:all-srcs", + "//pkg/volume/util/quota:all-srcs", "//pkg/volume/util/recyclerclient:all-srcs", "//pkg/volume/util/subpath:all-srcs", "//pkg/volume/util/types:all-srcs", diff --git a/pkg/volume/util/fs/BUILD b/pkg/volume/util/fs/BUILD index 38602bac3e1..504175781f2 100644 --- a/pkg/volume/util/fs/BUILD +++ b/pkg/volume/util/fs/BUILD @@ -14,6 +14,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", ], "@io_bazel_rules_go//go/platform:darwin": [ + "//pkg/volume/util/quota:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/golang.org/x/sys/unix:go_default_library", ], @@ -24,6 +25,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", ], "@io_bazel_rules_go//go/platform:linux": [ + "//pkg/volume/util/quota:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/golang.org/x/sys/unix:go_default_library", ], diff --git a/pkg/volume/util/fs/fs.go b/pkg/volume/util/fs/fs.go index a80a167eea7..dd88367f020 100644 --- a/pkg/volume/util/fs/fs.go +++ b/pkg/volume/util/fs/fs.go @@ -27,6 +27,7 @@ import ( "golang.org/x/sys/unix" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/kubernetes/pkg/volume/util/quota" ) // FSInfo linux returns (available bytes, byte capacity, byte usage, total inodes, inodes free, inode usage, error) @@ -56,6 +57,13 @@ func FsInfo(path string) (int64, int64, int64, int64, int64, int64, error) { // DiskUsage gets disk usage of specified path. func DiskUsage(path string) (*resource.Quantity, error) { + // First check whether the quota system knows about this directory + data, err := quota.GetConsumption(path) + if err == nil { + var q resource.Quantity + q.Set(data) + return &q, nil + } // Uses the same niceness level as cadvisor.fs does when running du // Uses -B 1 to always scale to a blocksize of 1 byte out, err := exec.Command("nice", "-n", "19", "du", "-s", "-B", "1", path).CombinedOutput() @@ -76,6 +84,11 @@ func Find(path string) (int64, error) { if path == "" { return 0, fmt.Errorf("invalid directory") } + // First check whether the quota system knows about this directory + inodes, err := quota.GetInodes(path) + if err == nil { + return inodes, nil + } var counter byteCounter var stderr bytes.Buffer findCmd := exec.Command("find", path, "-xdev", "-printf", ".") diff --git a/pkg/volume/util/operationexecutor/operation_executor.go b/pkg/volume/util/operationexecutor/operation_executor.go index 18668f8d293..379829ad50c 100644 --- a/pkg/volume/util/operationexecutor/operation_executor.go +++ b/pkg/volume/util/operationexecutor/operation_executor.go @@ -346,6 +346,10 @@ type VolumeToMount struct { // ReportedInUse indicates that the volume was successfully added to the // VolumesInUse field in the node's status. ReportedInUse bool + + // DesiredSizeLimit indicates the desired upper bound on the size of the volume + // (if so implemented) + DesiredSizeLimit int64 } // GenerateMsgDetailed returns detailed msgs for volumes to mount diff --git a/pkg/volume/util/operationexecutor/operation_generator.go b/pkg/volume/util/operationexecutor/operation_generator.go index 6acb6443d70..d3195413742 100644 --- a/pkg/volume/util/operationexecutor/operation_generator.go +++ b/pkg/volume/util/operationexecutor/operation_generator.go @@ -702,8 +702,9 @@ func (og *operationGenerator) GenerateMountVolumeFunc( // Execute mount mountErr := volumeMounter.SetUp(volume.MounterArgs{ - FsGroup: fsGroup, - PodUID: string(volumeToMount.Pod.UID), + FsGroup: fsGroup, + DesiredSize: volumeToMount.DesiredSizeLimit, + PodUID: string(volumeToMount.Pod.UID), }) if mountErr != nil { // On failure, return error. Caller will log and retry. diff --git a/pkg/volume/util/quota/BUILD b/pkg/volume/util/quota/BUILD new file mode 100644 index 00000000000..2d1df604472 --- /dev/null +++ b/pkg/volume/util/quota/BUILD @@ -0,0 +1,62 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "project.go", + "quota.go", + "quota_linux.go", + "quota_unsupported.go", + ], + importpath = "k8s.io/kubernetes/pkg/volume/util/quota", + visibility = ["//visibility:public"], + deps = [ + "//pkg/features:go_default_library", + "//pkg/util/mount:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", + ] + select({ + "@io_bazel_rules_go//go/platform:linux": [ + "//pkg/volume/util/quota/common:go_default_library", + "//pkg/volume/util/quota/extfs:go_default_library", + "//pkg/volume/util/quota/xfs:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", + "//vendor/golang.org/x/sys/unix:go_default_library", + "//vendor/k8s.io/klog:go_default_library", + ], + "//conditions:default": [], + }), +) + +go_test( + name = "go_default_test", + srcs = ["quota_linux_test.go"], + embed = [":go_default_library"], + deps = select({ + "@io_bazel_rules_go//go/platform:linux": [ + "//pkg/features:go_default_library", + "//pkg/util/mount:go_default_library", + "//pkg/volume/util/quota/common:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", + ], + "//conditions:default": [], + }), +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [ + ":package-srcs", + "//pkg/volume/util/quota/common:all-srcs", + "//pkg/volume/util/quota/extfs:all-srcs", + "//pkg/volume/util/quota/xfs:all-srcs", + ], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/volume/util/quota/common/BUILD b/pkg/volume/util/quota/common/BUILD new file mode 100644 index 00000000000..536a67450b4 --- /dev/null +++ b/pkg/volume/util/quota/common/BUILD @@ -0,0 +1,33 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = [ + "quota_linux_common.go", + "quota_linux_common_impl.go", + ], + cgo = True, + importpath = "k8s.io/kubernetes/pkg/volume/util/quota/common", + visibility = ["//visibility:public"], + deps = select({ + "@io_bazel_rules_go//go/platform:linux": [ + "//vendor/golang.org/x/sys/unix:go_default_library", + "//vendor/k8s.io/klog:go_default_library", + ], + "//conditions:default": [], + }), +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/volume/util/quota/common/quota_linux_common.go b/pkg/volume/util/quota/common/quota_linux_common.go new file mode 100644 index 00000000000..a5774686acf --- /dev/null +++ b/pkg/volume/util/quota/common/quota_linux_common.go @@ -0,0 +1,77 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package common + +// QuotaID -- generic quota identifier +type QuotaID int32 + +const ( + // BadQuotaID -- Invalid quota + BadQuotaID QuotaID = 0 +) + +// FirstQuota is the quota ID we start with. +// XXXXXXX Need a better way of doing this... +var FirstQuota QuotaID = 1048577 + +// LinuxVolumeQuotaProvider returns an appropriate quota applier +// object if we can support quotas on this device +type LinuxVolumeQuotaProvider interface { + // GetQuotaApplier retrieves an object that can apply + // quotas (or nil if this provider cannot support quotas + // on the device) + GetQuotaApplier(mountpoint string, backingDev string) LinuxVolumeQuotaApplier +} + +// LinuxVolumeQuotaApplier is a generic interface to any quota +// mechanism supported by Linux +type LinuxVolumeQuotaApplier interface { + // GetQuotaOnDir gets the quota ID (if any) that applies to + // this directory + GetQuotaOnDir(path string) (QuotaID, error) + + // SetQuotaOnDir applies the specified quota ID to a directory. + // Negative value for bytes means that a non-enforcing quota + // should be applied (perhaps by setting a quota too large to + // be hit) + SetQuotaOnDir(path string, id QuotaID, bytes int64) error + + // QuotaIDIsInUse determines whether the quota ID is in use. + // Implementations should not check /etc/project or /etc/projid, + // only whether their underlying mechanism already has the ID in + // use. + // Return value of false with no error means that the ID is not + // in use; true means that it is already in use. An error + // return means that any quota ID will fail. + QuotaIDIsInUse(path string, id QuotaID) (bool, error) + + // GetConsumption returns the consumption (in bytes) of the + // directory, determined by the implementation's quota-based + // mechanism. If it is unable to do so using that mechanism, + // it should return an error and allow higher layers to + // enumerate the directory. + GetConsumption(path string, id QuotaID) (int64, error) + + // GetInodes returns the number of inodes used by the + // directory, determined by the implementation's quota-based + // mechanism. If it is unable to do so using that mechanism, + // it should return an error and allow higher layers to + // enumerate the directory. + GetInodes(path string, id QuotaID) (int64, error) +} diff --git a/pkg/volume/util/quota/common/quota_linux_common_impl.go b/pkg/volume/util/quota/common/quota_linux_common_impl.go new file mode 100644 index 00000000000..5df76fd532f --- /dev/null +++ b/pkg/volume/util/quota/common/quota_linux_common_impl.go @@ -0,0 +1,152 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package common + +/* +#include +#include +#include +#include +#include +#include + +#ifndef FS_XFLAG_PROJINHERIT +struct fsxattr { + __u32 fsx_xflags; + __u32 fsx_extsize; + __u32 fsx_nextents; + __u32 fsx_projid; + unsigned char fsx_pad[12]; +}; +#define FS_XFLAG_PROJINHERIT 0x00000200 +#endif +#ifndef FS_IOC_FSGETXATTR +#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr) +#endif +#ifndef FS_IOC_FSSETXATTR +#define FS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr) +#endif + +#ifndef PRJQUOTA +#define PRJQUOTA 2 +#endif +#ifndef Q_XGETQSTAT_PRJQUOTA +#define Q_XGETQSTAT_PRJQUOTA QCMD(Q_XGETQSTAT, PRJQUOTA) +#endif +*/ +import "C" + +import ( + "fmt" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" + "k8s.io/klog" +) + +// IsFilesystemOfType determines whether the filesystem specified is of the type +// specified by the magic number +func IsFilesystemOfType(mountpoint string, backingDev string, magic int64) bool { + var buf syscall.Statfs_t + err := syscall.Statfs(mountpoint, &buf) + if err != nil { + klog.V(3).Infof("Extfs Unable to statfs %s: %v", mountpoint, err) + return false + } + if buf.Type != magic { + return false + } + + var qstat C.fs_quota_stat_t + CPath := C.CString(backingDev) + defer free(CPath) + + _, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, uintptr(C.Q_XGETQSTAT_PRJQUOTA), uintptr(unsafe.Pointer(CPath)), 0, uintptr(unsafe.Pointer(&qstat)), 0, 0) + return errno == 0 && qstat.qs_flags&C.FS_QUOTA_PDQ_ENFD > 0 && qstat.qs_flags&C.FS_QUOTA_PDQ_ACCT > 0 +} + +func free(p *C.char) { + C.free(unsafe.Pointer(p)) +} + +func openDir(path string) (*C.DIR, error) { + Cpath := C.CString(path) + defer free(Cpath) + + dir := C.opendir(Cpath) + if dir == nil { + return nil, fmt.Errorf("Can't open dir") + } + return dir, nil +} + +func closeDir(dir *C.DIR) { + if dir != nil { + C.closedir(dir) + } +} + +func getDirFd(dir *C.DIR) uintptr { + return uintptr(C.dirfd(dir)) +} + +// GetQuotaOnDir retrieves the quota ID (if any) associated with the specified directory +func GetQuotaOnDir(path string) (QuotaID, error) { + dir, err := openDir(path) + if err != nil { + klog.V(3).Infof("Can't open directory %s: %#+v", path, err) + return BadQuotaID, err + } + defer closeDir(dir) + var fsx C.struct_fsxattr + _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR, + uintptr(unsafe.Pointer(&fsx))) + if errno != 0 { + return BadQuotaID, fmt.Errorf("Failed to get quota ID for %s: %v", path, errno.Error()) + } + if fsx.fsx_projid == 0 { + return BadQuotaID, fmt.Errorf("Failed to get quota ID for %s: %s", path, "no applicable quota") + } + return QuotaID(fsx.fsx_projid), nil +} + +// ApplyProjectToDir applies the specified quota ID to the specified directory +func ApplyProjectToDir(path string, id QuotaID) error { + dir, err := openDir(path) + if err != nil { + return err + } + defer closeDir(dir) + + var fsx C.struct_fsxattr + _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR, + uintptr(unsafe.Pointer(&fsx))) + if errno != 0 { + return fmt.Errorf("Failed to get quota ID for %s: %v", path, errno.Error()) + } + fsx.fsx_projid = C.__u32(id) + fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT + _, _, errno = unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR, + uintptr(unsafe.Pointer(&fsx))) + if errno != 0 { + return fmt.Errorf("Failed to set quota ID for %s: %v", path, errno.Error()) + } + return nil +} diff --git a/pkg/volume/util/quota/extfs/BUILD b/pkg/volume/util/quota/extfs/BUILD new file mode 100644 index 00000000000..431386beed0 --- /dev/null +++ b/pkg/volume/util/quota/extfs/BUILD @@ -0,0 +1,31 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["quota_extfs.go"], + cgo = True, + importpath = "k8s.io/kubernetes/pkg/volume/util/quota/extfs", + visibility = ["//visibility:public"], + deps = select({ + "@io_bazel_rules_go//go/platform:linux": [ + "//pkg/volume/util/quota/common:go_default_library", + "//vendor/golang.org/x/sys/unix:go_default_library", + "//vendor/k8s.io/klog:go_default_library", + ], + "//conditions:default": [], + }), +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/volume/util/quota/extfs/quota_extfs.go b/pkg/volume/util/quota/extfs/quota_extfs.go new file mode 100644 index 00000000000..397e3ba79e9 --- /dev/null +++ b/pkg/volume/util/quota/extfs/quota_extfs.go @@ -0,0 +1,153 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package extfs + +/* +#include +#include +#include +#include +#include + +#ifndef PRJQUOTA +#define PRJQUOTA 2 +#endif +#ifndef Q_SETPQUOTA +#define Q_SETPQUOTA (unsigned) QCMD(Q_SETQUOTA, PRJQUOTA) +#endif +#ifndef Q_GETPQUOTA +#define Q_GETPQUOTA (unsigned) QCMD(Q_GETQUOTA, PRJQUOTA) +#endif +*/ +import "C" + +import ( + "fmt" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" + "k8s.io/klog" + "k8s.io/kubernetes/pkg/volume/util/quota/common" +) + +// ext4fs empirically has a maximum quota size of 2^48 - 1 1KiB blocks (256 petabytes) +const ( + linuxExtfsMagic = 0xef53 + quotaBsize = 1024 // extfs specific + bitsPerWord = 32 << (^uint(0) >> 63) // either 32 or 64 + maxQuota int64 = (1<<(bitsPerWord-1) - 1) & (1<<58 - 1) // either 1<<31 - 1 or 1<<58 - 1 +) + +// VolumeProvider supplies a quota applier to the generic code. +type VolumeProvider struct { +} + +// GetQuotaApplier -- does this backing device support quotas that +// can be applied to directories? +func (*VolumeProvider) GetQuotaApplier(mountpoint string, backingDev string) common.LinuxVolumeQuotaApplier { + if common.IsFilesystemOfType(mountpoint, backingDev, linuxExtfsMagic) { + return extfsVolumeQuota{backingDev} + } + return nil +} + +type extfsVolumeQuota struct { + backingDev string +} + +// GetQuotaOnDir -- get the quota ID that applies to this directory. + +func (v extfsVolumeQuota) GetQuotaOnDir(path string) (common.QuotaID, error) { + return common.GetQuotaOnDir(path) +} + +// SetQuotaOnDir -- apply the specified quota to the directory. If +// bytes is not greater than zero, the quota should be applied in a +// way that is non-enforcing (either explicitly so or by setting a +// quota larger than anything the user may possibly create) +func (v extfsVolumeQuota) SetQuotaOnDir(path string, id common.QuotaID, bytes int64) error { + klog.V(3).Infof("extfsSetQuotaOn %s ID %v bytes %v", path, id, bytes) + if bytes < 0 || bytes > maxQuota { + bytes = maxQuota + } + + var d C.struct_if_dqblk + + d.dqb_bhardlimit = C.__u64(bytes / quotaBsize) + d.dqb_bsoftlimit = d.dqb_bhardlimit + d.dqb_ihardlimit = 0 + d.dqb_isoftlimit = 0 + d.dqb_valid = C.QIF_LIMITS + + var cs = C.CString(v.backingDev) + defer C.free(unsafe.Pointer(cs)) + + _, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_SETPQUOTA, + uintptr(unsafe.Pointer(cs)), uintptr(id), + uintptr(unsafe.Pointer(&d)), 0, 0) + if errno != 0 { + return fmt.Errorf("Failed to set quota limit for ID %d on %s: %v", + id, path, errno.Error()) + } + return common.ApplyProjectToDir(path, id) +} + +func (v extfsVolumeQuota) getQuotaInfo(path string, id common.QuotaID) (C.struct_if_dqblk, syscall.Errno) { + var d C.struct_if_dqblk + + var cs = C.CString(v.backingDev) + defer C.free(unsafe.Pointer(cs)) + + _, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_GETPQUOTA, + uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(id)), + uintptr(unsafe.Pointer(&d)), 0, 0) + return d, errno +} + +// QuotaIDIsInUse -- determine whether the quota ID is already in use. +func (v extfsVolumeQuota) QuotaIDIsInUse(path string, id common.QuotaID) (bool, error) { + d, errno := v.getQuotaInfo(path, id) + isInUse := !(d.dqb_bhardlimit == 0 && d.dqb_bsoftlimit == 0 && d.dqb_curspace == 0 && + d.dqb_ihardlimit == 0 && d.dqb_isoftlimit == 0 && d.dqb_curinodes == 0 && + d.dqb_btime == 0 && d.dqb_itime == 0) + return errno == 0 && isInUse, nil +} + +// GetConsumption -- retrieve the consumption (in bytes) of the directory +// Note that with ext[[:digit:]]fs the quota consumption is in bytes +// per man quotactl +func (v extfsVolumeQuota) GetConsumption(path string, id common.QuotaID) (int64, error) { + d, errno := v.getQuotaInfo(path, id) + if errno != 0 { + return 0, fmt.Errorf("Failed to get quota for %s: %s", path, errno.Error()) + } + klog.V(3).Infof("Consumption for %s is %v", path, d.dqb_curspace) + return int64(d.dqb_curspace), nil +} + +// GetInodes -- retrieve the number of inodes in use under the directory +func (v extfsVolumeQuota) GetInodes(path string, id common.QuotaID) (int64, error) { + d, errno := v.getQuotaInfo(path, id) + if errno != 0 { + return 0, fmt.Errorf("Failed to get quota for %s: %s", path, errno.Error()) + } + klog.V(3).Infof("Inode consumption for %s is %v", path, d.dqb_curinodes) + return int64(d.dqb_curinodes), nil +} diff --git a/pkg/volume/util/quota/project.go b/pkg/volume/util/quota/project.go new file mode 100644 index 00000000000..4caa7ecd671 --- /dev/null +++ b/pkg/volume/util/quota/project.go @@ -0,0 +1,353 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package quota + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strconv" + "sync" + + "golang.org/x/sys/unix" + "k8s.io/klog" + "k8s.io/kubernetes/pkg/volume/util/quota/common" +) + +var projectsFile = "/etc/projects" +var projidFile = "/etc/projid" + +var projectsParseRegexp *regexp.Regexp = regexp.MustCompilePOSIX("^([[:digit:]]+):(.*)$") +var projidParseRegexp *regexp.Regexp = regexp.MustCompilePOSIX("^([^#][^:]*):([[:digit:]]+)$") + +var quotaIDLock sync.RWMutex + +const maxUnusedQuotasToSearch = 128 // Don't go into an infinite loop searching for an unused quota + +type projectType struct { + isValid bool // False if we need to remove this line + id common.QuotaID + data string // Project name (projid) or directory (projects) + line string +} + +type projectsList struct { + projects []projectType + projid []projectType +} + +func projFilesAreOK() error { + if sf, err := os.Lstat(projectsFile); err != nil || sf.Mode().IsRegular() { + if sf, err := os.Lstat(projidFile); err != nil || sf.Mode().IsRegular() { + return nil + } + return fmt.Errorf("%s exists but is not a plain file, cannot continue", projidFile) + } + return fmt.Errorf("%s exists but is not a plain file, cannot continue", projectsFile) +} + +func lockFile(file *os.File) error { + return unix.Flock(int(file.Fd()), unix.LOCK_EX) +} + +func unlockFile(file *os.File) error { + return unix.Flock(int(file.Fd()), unix.LOCK_UN) +} + +// openAndLockProjectFiles opens /etc/projects and /etc/projid locked. +// Creates them if they don't exist +func openAndLockProjectFiles() (*os.File, *os.File, error) { + // Make sure neither project-related file is a symlink! + if err := projFilesAreOK(); err != nil { + return nil, nil, err + } + // We don't actually modify the original files; we create temporaries and + // move them over the originals + fProjects, err := os.OpenFile(projectsFile, os.O_RDONLY|os.O_CREATE, 0644) + if err != nil { + return nil, nil, err + } + fProjid, err := os.OpenFile(projidFile, os.O_RDONLY|os.O_CREATE, 0644) + if err == nil { + // Check once more, to ensure nothing got changed out from under us + if err := projFilesAreOK(); err == nil { + err = lockFile(fProjects) + if err == nil { + err = lockFile(fProjid) + if err == nil { + return fProjects, fProjid, nil + } + // Nothing useful we can do if we get an error here + unlockFile(fProjects) + } + } + fProjid.Close() + } + fProjects.Close() + return nil, nil, err +} + +func closeProjectFiles(fProjects *os.File, fProjid *os.File) error { + // Nothing useful we can do if either of these fail, + // but we have to close (and thereby unlock) the files anyway. + var err error + var err1 error + if fProjid != nil { + err = fProjid.Close() + } + if fProjects != nil { + err1 = fProjects.Close() + } + if err == nil { + return err1 + } + return err +} + +func parseProject(l string) projectType { + if match := projectsParseRegexp.FindStringSubmatch(l); match != nil { + i, err := strconv.Atoi(match[1]) + if err == nil { + return projectType{true, common.QuotaID(i), match[2], l} + } + } + return projectType{true, common.BadQuotaID, "", l} +} + +func parseProjid(l string) projectType { + if match := projidParseRegexp.FindStringSubmatch(l); match != nil { + i, err := strconv.Atoi(match[2]) + if err == nil { + return projectType{true, common.QuotaID(i), match[1], l} + } + } + return projectType{true, common.BadQuotaID, "", l} +} + +func parseProjFile(f *os.File, parser func(l string) projectType) []projectType { + var answer []projectType + scanner := bufio.NewScanner(f) + for scanner.Scan() { + answer = append(answer, parser(scanner.Text())) + } + return answer +} + +func readProjectFiles(projects *os.File, projid *os.File) projectsList { + return projectsList{parseProjFile(projects, parseProject), parseProjFile(projid, parseProjid)} +} + +func findAvailableQuota(path string, idMap map[common.QuotaID]bool) (common.QuotaID, error) { + unusedQuotasSearched := 0 + for id := common.FirstQuota; id == id; id++ { + if _, ok := idMap[id]; !ok { + isInUse, err := getApplier(path).QuotaIDIsInUse(path, id) + if err != nil { + return common.BadQuotaID, err + } else if !isInUse { + return id, nil + } + unusedQuotasSearched++ + if unusedQuotasSearched > maxUnusedQuotasToSearch { + break + } + } + } + return common.BadQuotaID, fmt.Errorf("Cannot find available quota ID") +} + +func addDirToProject(path string, id common.QuotaID, list *projectsList) (common.QuotaID, bool, error) { + idMap := make(map[common.QuotaID]bool) + for _, project := range list.projects { + if project.data == path { + if id != project.id { + return common.BadQuotaID, false, fmt.Errorf("Attempt to reassign project ID for %s", path) + } + // Trying to reassign a directory to the project it's + // already in. Maybe this should be an error, but for + // now treat it as an idempotent operation + return id, false, nil + } + idMap[project.id] = true + } + var needToAddProjid = true + for _, projid := range list.projid { + idMap[projid.id] = true + if projid.id == id && id != common.BadQuotaID { + needToAddProjid = false + } + } + var err error + if id == common.BadQuotaID { + id, err = findAvailableQuota(path, idMap) + if err != nil { + return common.BadQuotaID, false, err + } + needToAddProjid = true + } + if needToAddProjid { + name := fmt.Sprintf("volume%v", id) + line := fmt.Sprintf("%s:%v", name, id) + list.projid = append(list.projid, projectType{true, id, name, line}) + } + line := fmt.Sprintf("%v:%s", id, path) + list.projects = append(list.projects, projectType{true, id, path, line}) + return id, needToAddProjid, nil +} + +func removeDirFromProject(path string, id common.QuotaID, list *projectsList) (bool, error) { + if id == common.BadQuotaID { + return false, fmt.Errorf("Attempt to remove invalid quota ID from %s", path) + } + foundAt := -1 + countByID := make(map[common.QuotaID]int) + for i, project := range list.projects { + if project.data == path { + if id != project.id { + return false, fmt.Errorf("Attempting to remove quota ID %v from path %s, but expecting ID %v", id, path, project.id) + } else if foundAt != -1 { + return false, fmt.Errorf("Found multiple quota IDs for path %s", path) + } + // Faster and easier than deleting an element + list.projects[i].isValid = false + foundAt = i + } + countByID[project.id]++ + } + if foundAt == -1 { + return false, fmt.Errorf("Cannot find quota associated with path %s", path) + } + if countByID[id] <= 1 { + // Removing the last entry means that we're no longer using + // the quota ID, so remove that as well + for i, projid := range list.projid { + if projid.id == id { + list.projid[i].isValid = false + } + } + return true, nil + } + return false, nil +} + +func writeProjectFile(base *os.File, projects []projectType) (string, error) { + oname := base.Name() + stat, err := base.Stat() + if err != nil { + return "", err + } + mode := stat.Mode() & os.ModePerm + f, err := ioutil.TempFile(filepath.Dir(oname), filepath.Base(oname)) + if err != nil { + return "", err + } + filename := f.Name() + if err := os.Chmod(filename, mode); err != nil { + return "", err + } + for _, proj := range projects { + if proj.isValid { + if _, err := f.WriteString(fmt.Sprintf("%s\n", proj.line)); err != nil { + f.Close() + os.Remove(filename) + return "", err + } + } + } + if err := f.Close(); err != nil { + os.Remove(filename) + return "", err + } + return filename, nil +} + +func writeProjectFiles(fProjects *os.File, fProjid *os.File, writeProjid bool, list projectsList) error { + tmpProjects, err := writeProjectFile(fProjects, list.projects) + if err == nil { + // Ensure that both files are written before we try to rename either. + if writeProjid { + tmpProjid, err := writeProjectFile(fProjid, list.projid) + if err == nil { + err = os.Rename(tmpProjid, fProjid.Name()) + if err != nil { + os.Remove(tmpProjid) + } + } + } + if err == nil { + err = os.Rename(tmpProjects, fProjects.Name()) + if err == nil { + return nil + } + // We're in a bit of trouble here; at this + // point we've successfully renamed tmpProjid + // to the real thing, but renaming tmpProject + // to the real file failed. There's not much we + // can do in this position. Anything we could do + // to try to undo it would itself be likely to fail. + } + os.Remove(tmpProjects) + } + klog.V(3).Infof("Unable to write project files: %v", err) + return err +} + +func createProjectID(path string, ID common.QuotaID) (common.QuotaID, error) { + quotaIDLock.Lock() + defer quotaIDLock.Unlock() + fProjects, fProjid, err := openAndLockProjectFiles() + if err == nil { + defer closeProjectFiles(fProjects, fProjid) + list := readProjectFiles(fProjects, fProjid) + writeProjid := true + ID, writeProjid, err = addDirToProject(path, ID, &list) + if err == nil && ID != common.BadQuotaID { + if err = writeProjectFiles(fProjects, fProjid, writeProjid, list); err == nil { + return ID, nil + } + } + } + klog.V(3).Infof("addQuotaID %s %v failed %v", path, ID, err) + return common.BadQuotaID, err +} + +func removeProjectID(path string, ID common.QuotaID) error { + if ID == common.BadQuotaID { + return fmt.Errorf("attempting to remove invalid quota ID %v", ID) + } + quotaIDLock.Lock() + defer quotaIDLock.Unlock() + fProjects, fProjid, err := openAndLockProjectFiles() + if err == nil { + defer closeProjectFiles(fProjects, fProjid) + list := readProjectFiles(fProjects, fProjid) + writeProjid := true + writeProjid, err = removeDirFromProject(path, ID, &list) + if err == nil { + if err = writeProjectFiles(fProjects, fProjid, writeProjid, list); err == nil { + return nil + } + } + } + klog.V(3).Infof("removeQuotaID %s %v failed %v", path, ID, err) + return err +} diff --git a/pkg/volume/util/quota/quota.go b/pkg/volume/util/quota/quota.go new file mode 100644 index 00000000000..1bfa7d7c69f --- /dev/null +++ b/pkg/volume/util/quota/quota.go @@ -0,0 +1,47 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package quota + +import ( + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/util/mount" +) + +// Interface -- quota interface +type Interface interface { + // Does the path provided support quotas, and if so, what types + SupportsQuotas(m mount.Interface, path string) (bool, error) + // Assign a quota (picked by the quota mechanism) to a path, + // and return it. + AssignQuota(m mount.Interface, path string, poduid string, bytes int64) error + + // Get the quota-based storage consumption for the path + GetConsumption(path string) (int64, error) + + // Get the quota-based inode consumption for the path + GetInodes(path string) (int64, error) + + // Remove the quota from a path + // Implementations may assume that any data covered by the + // quota has already been removed. + ClearQuota(m mount.Interface, path string, poduid string) error +} + +func enabledQuotasForMonitoring() bool { + return utilfeature.DefaultFeatureGate.Enabled(features.FSQuotaForLSCIMonitoring) +} diff --git a/pkg/volume/util/quota/quota_linux.go b/pkg/volume/util/quota/quota_linux.go new file mode 100644 index 00000000000..664d2bc38d3 --- /dev/null +++ b/pkg/volume/util/quota/quota_linux.go @@ -0,0 +1,432 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package quota + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "sync" + + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/klog" + "k8s.io/kubernetes/pkg/util/mount" + "k8s.io/kubernetes/pkg/volume/util/quota/common" + "k8s.io/kubernetes/pkg/volume/util/quota/extfs" + "k8s.io/kubernetes/pkg/volume/util/quota/xfs" +) + +// Pod -> ID +var podQuotaMap = make(map[string]common.QuotaID) + +// Dir -> ID (for convenience) +var dirQuotaMap = make(map[string]common.QuotaID) + +// ID -> pod +var quotaPodMap = make(map[common.QuotaID]string) + +// Directory -> pod +var dirPodMap = make(map[string]string) + +// Backing device -> applier +// This is *not* cleaned up; its size will be bounded. +var devApplierMap = make(map[string]common.LinuxVolumeQuotaApplier) + +// Directory -> applier +var dirApplierMap = make(map[string]common.LinuxVolumeQuotaApplier) +var dirApplierLock sync.RWMutex + +// Pod -> refcount +var podDirCountMap = make(map[string]int) + +// ID -> size +var quotaSizeMap = make(map[common.QuotaID]int64) +var quotaLock sync.RWMutex + +var supportsQuotasMap = make(map[string]bool) +var supportsQuotasLock sync.RWMutex + +var mountParseRegexp *regexp.Regexp = regexp.MustCompilePOSIX("^([^ ]*)[ \t]*([^ ]*)[ \t]*([^ ]*)") // Ignore options etc. + +// Directory -> backingDev +var backingDevMap = make(map[string]string) +var backingDevLock sync.RWMutex + +var mountpointMap = make(map[string]string) +var mountpointLock sync.RWMutex + +var mountsFile = "/proc/self/mounts" + +var providers = []common.LinuxVolumeQuotaProvider{ + &extfs.VolumeProvider{}, + &xfs.VolumeProvider{}, +} + +// Separate the innards for ease of testing +func detectBackingDevInternal(mountpoint string, mounts string) (string, error) { + file, err := os.Open(mounts) + if err != nil { + return "", err + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + match := mountParseRegexp.FindStringSubmatch(scanner.Text()) + if match != nil { + device := match[1] + mount := match[2] + if mount == mountpoint { + return device, nil + } + } + } + return "", fmt.Errorf("couldn't find backing device for %s", mountpoint) +} + +// detectBackingDev assumes that the mount point provided is valid +func detectBackingDev(_ mount.Interface, mountpoint string) (string, error) { + return detectBackingDevInternal(mountpoint, mountsFile) +} + +func clearBackingDev(path string) { + backingDevLock.Lock() + defer backingDevLock.Unlock() + delete(backingDevMap, path) +} + +// Assumes that the path has been fully canonicalized +// Breaking this up helps with testing +func detectMountpointInternal(m mount.Interface, path string) (string, error) { + for path != "" && path != "/" { + // per pkg/util/mount/mount_linux this detects all but + // a bind mount from one part of a mount to another. + // For our purposes that's fine; we simply want the "true" + // mount point + // + // IsNotMountPoint proved much more troublesome; it actually + // scans the mounts, and when a lot of mount/unmount + // activity takes place, it is not able to get a consistent + // view of /proc/self/mounts, causing it to time out and + // report incorrectly. + isNotMount, err := m.IsLikelyNotMountPoint(path) + if err != nil { + return "/", err + } + if !isNotMount { + return path, nil + } + path = filepath.Dir(path) + } + return "/", nil +} + +func detectMountpoint(m mount.Interface, path string) (string, error) { + xpath, err := filepath.Abs(path) + if err == nil { + if xpath, err = filepath.EvalSymlinks(xpath); err == nil { + if xpath, err = detectMountpointInternal(m, xpath); err == nil { + return xpath, nil + } + } + } + return "/", err +} + +func clearMountpoint(path string) { + mountpointLock.Lock() + defer mountpointLock.Unlock() + delete(mountpointMap, path) +} + +// getFSInfo Returns mountpoint and backing device +// getFSInfo should cache the mountpoint and backing device for the +// path. +func getFSInfo(m mount.Interface, path string) (string, string, error) { + mountpointLock.Lock() + defer mountpointLock.Unlock() + + backingDevLock.Lock() + defer backingDevLock.Unlock() + + var err error + + mountpoint, okMountpoint := mountpointMap[path] + if !okMountpoint { + mountpoint, err = detectMountpoint(m, path) + klog.V(3).Infof("Mountpoint %s -> %s (%v)", path, mountpoint, err) + if err != nil { + return "", "", err + } + } + + backingDev, okBackingDev := backingDevMap[path] + if !okBackingDev { + backingDev, err = detectBackingDev(m, mountpoint) + klog.V(3).Infof("Backing dev %s -> %s (%v)", path, backingDev, err) + if err != nil { + return "", "", err + } + } + mountpointMap[path] = mountpoint + backingDevMap[path] = backingDev + return mountpoint, backingDev, nil +} + +func clearFSInfo(path string) { + clearMountpoint(path) + clearBackingDev(path) +} + +func getApplier(path string) common.LinuxVolumeQuotaApplier { + dirApplierLock.Lock() + defer dirApplierLock.Unlock() + return dirApplierMap[path] +} + +func setApplier(path string, applier common.LinuxVolumeQuotaApplier) { + dirApplierLock.Lock() + defer dirApplierLock.Unlock() + dirApplierMap[path] = applier +} + +func clearApplier(path string) { + dirApplierLock.Lock() + defer dirApplierLock.Unlock() + delete(dirApplierMap, path) +} + +func setQuotaOnDir(path string, id common.QuotaID, bytes int64) error { + return getApplier(path).SetQuotaOnDir(path, id, bytes) +} + +func getQuotaOnDir(m mount.Interface, path string) (common.QuotaID, error) { + _, _, err := getFSInfo(m, path) + if err != nil { + return common.BadQuotaID, err + } + return getApplier(path).GetQuotaOnDir(path) +} + +func clearQuotaOnDir(m mount.Interface, path string) error { + // Since we may be called without path being in the map, + // we explicitly have to check in this case. + klog.V(3).Infof("clearQuotaOnDir %s", path) + supportsQuotas, err := SupportsQuotas(m, path) + if !supportsQuotas { + return nil + } + projid, err := getQuotaOnDir(m, path) + if err == nil && projid != common.BadQuotaID { + klog.V(3).Infof("clearQuotaOnDir clearing quota") + // This means that we have a quota on the directory but + // we can't clear it. That's not good. + err = setQuotaOnDir(path, projid, 0) + if err != nil { + klog.V(3).Infof("Attempt to clear quota failed: %v", err) + } + err1 := removeProjectID(path, projid) + if err1 != nil { + klog.V(3).Infof("Attempt to remove quota ID from system files failed: %v", err1) + } + clearFSInfo(path) + if err != nil { + return err + } + return err1 + } + klog.V(3).Infof("clearQuotaOnDir fails %v", err) + // If we couldn't get a quota, that's fine -- there may + // never have been one, and we have no way to know otherwise + return nil +} + +// SupportsQuotas -- Does the path support quotas +// Cache the applier for paths that support quotas. For paths that don't, +// don't cache the result because nothing will clean it up. +// However, do cache the device->applier map; the number of devices +// is bounded. +func SupportsQuotas(m mount.Interface, path string) (bool, error) { + if !enabledQuotasForMonitoring() { + klog.V(3).Info("SupportsQuotas called, but quotas disabled") + return false, nil + } + supportsQuotasLock.Lock() + defer supportsQuotasLock.Unlock() + if supportsQuotas, ok := supportsQuotasMap[path]; ok { + return supportsQuotas, nil + } + mount, dev, err := getFSInfo(m, path) + klog.V(3).Infof("SupportsQuotas %s -> mount %s dev %s %v", path, mount, dev, err) + if err != nil { + return false, err + } + // Do we know about this device? + applier, ok := devApplierMap[mount] + if !ok { + for _, provider := range providers { + if applier = provider.GetQuotaApplier(mount, dev); applier != nil { + devApplierMap[mount] = applier + break + } + } + } + if applier != nil { + klog.V(3).Infof("SupportsQuotas got applier %v", applier) + supportsQuotasMap[path] = true + setApplier(path, applier) + return true, nil + } + klog.V(3).Infof("SupportsQuotas got no applier") + delete(backingDevMap, path) + delete(mountpointMap, path) + return false, nil +} + +// AssignQuota -- assign a quota to the specified directory. +// AssignQuota chooses the quota ID based on the pod UID and path. +// If the pod UID is identical to another one known, it may (but presently +// doesn't) choose the same quota ID as other volumes in the pod. +func AssignQuota(m mount.Interface, path string, poduid string, bytes int64) error { + if ok, err := SupportsQuotas(m, path); !ok { + return fmt.Errorf("Quotas not supported on %s: %v", path, err) + } + quotaLock.Lock() + defer quotaLock.Unlock() + // Current policy is to set individual quotas on each volumes. + // If we decide later that we want to assign one quota for all + // volumes in a pod, we can simply remove this line of code. + // If and when we decide permanently that we're going to adop + // one quota per volume, we can rip all of the pod code out. + poduid = string(uuid.NewUUID()) + klog.V(3).Infof("Synthesizing pod ID %s for directory %s in AssignQuota", poduid, path) + if pod, ok := dirPodMap[path]; ok && pod != poduid { + return fmt.Errorf("Requesting quota on existing directory %s but different pod %s %s", path, pod, poduid) + } + oid, ok := podQuotaMap[poduid] + if ok { + if quotaSizeMap[oid] != bytes { + return fmt.Errorf("Requesting quota of different size: old %v new %v", quotaSizeMap[oid], bytes) + } + } else { + oid = common.BadQuotaID + } + id, err := createProjectID(path, oid) + if err == nil { + if oid != common.BadQuotaID && oid != id { + klog.V(3).Infof("Attempt to reassign quota %v to %v", oid, id) + return fmt.Errorf("Attempt to reassign quota %v to %v", oid, id) + } + // When enforcing quotas are enabled, we'll condition this + // on their being disabled also. + if bytes > 0 { + bytes = -1 + } + if err = setQuotaOnDir(path, id, bytes); err == nil { + quotaPodMap[id] = poduid + quotaSizeMap[id] = bytes + podQuotaMap[poduid] = id + dirQuotaMap[path] = id + dirPodMap[path] = poduid + podDirCountMap[poduid]++ + return nil + } + removeProjectID(path, id) + } + klog.V(3).Infof("Assign quota FAILED %v", err) + return err +} + +// GetConsumption -- retrieve the consumption (in bytes) of the directory +func GetConsumption(path string) (int64, error) { + // Note that we actually need to hold the lock at least through + // running the quota command, so it can't get recycled behind our back + quotaLock.Lock() + defer quotaLock.Unlock() + applier := getApplier(path) + if applier == nil { + return 0, fmt.Errorf("No quota available for %s", path) + } + return applier.GetConsumption(path, dirQuotaMap[path]) +} + +// GetInodes -- retrieve the number of inodes in use under the directory +func GetInodes(path string) (int64, error) { + // Note that we actually need to hold the lock at least through + // running the quota command, so it can't get recycled behind our back + quotaLock.Lock() + defer quotaLock.Unlock() + applier := getApplier(path) + if applier == nil { + return 0, fmt.Errorf("No quota available for %s", path) + } + return applier.GetInodes(path, dirQuotaMap[path]) +} + +// ClearQuota -- remove the quota assigned to a directory +func ClearQuota(m mount.Interface, path string) error { + klog.V(3).Infof("ClearQuota %s", path) + if !enabledQuotasForMonitoring() { + klog.V(3).Info("ClearQuota called, but quotas disabled") + return fmt.Errorf("ClearQuota called, but quotas disabled") + } + quotaLock.Lock() + defer quotaLock.Unlock() + poduid, ok := dirPodMap[path] + if !ok { + // Nothing in the map either means that there was no + // quota to begin with or that we're clearing a + // stale directory, so if we find a quota, just remove it. + // The process of clearing the quota requires that an applier + // be found, which needs to be cleaned up. + defer delete(supportsQuotasMap, path) + defer clearApplier(path) + return clearQuotaOnDir(m, path) + } + _, ok = podQuotaMap[poduid] + if !ok { + return fmt.Errorf("ClearQuota: No quota available for %s", path) + } + var err error + projid, err := getQuotaOnDir(m, path) + if projid != dirQuotaMap[path] { + return fmt.Errorf("Expected quota ID %v on dir %s does not match actual %v", dirQuotaMap[path], path, projid) + } + count, ok := podDirCountMap[poduid] + if count <= 1 || !ok { + err = clearQuotaOnDir(m, path) + if err != nil { + klog.V(3).Infof("Unable to clear quota %v %s: %v", dirQuotaMap[path], path, err) + } + delete(quotaSizeMap, podQuotaMap[poduid]) + delete(quotaPodMap, podQuotaMap[poduid]) + delete(podDirCountMap, poduid) + delete(podQuotaMap, poduid) + } else { + err = removeProjectID(path, projid) + podDirCountMap[poduid]-- + klog.V(3).Infof("Not clearing quota for pod %s; still %v dirs outstanding", poduid, podDirCountMap[poduid]) + } + delete(dirPodMap, path) + delete(dirQuotaMap, path) + delete(supportsQuotasMap, path) + clearApplier(path) + return err +} diff --git a/pkg/volume/util/quota/quota_linux_test.go b/pkg/volume/util/quota/quota_linux_test.go new file mode 100644 index 00000000000..9366821c75c --- /dev/null +++ b/pkg/volume/util/quota/quota_linux_test.go @@ -0,0 +1,762 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package quota + +import ( + "fmt" + "io/ioutil" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/util/mount" + "k8s.io/kubernetes/pkg/volume/util/quota/common" + "os" + "strings" + "testing" +) + +const dummyMountData = `sysfs /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0 +proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0 +devtmpfs /dev devtmpfs rw,nosuid,size=6133536k,nr_inodes=1533384,mode=755 0 0 +tmpfs /tmp tmpfs rw,nosuid,nodev 0 0 +/dev/sda1 /boot ext4 rw,relatime 0 0 +/dev/mapper/fedora-root / ext4 rw,noatime 0 0 +/dev/mapper/fedora-home /home ext4 rw,noatime 0 0 +/dev/sdb1 /virt xfs rw,noatime,attr2,inode64,usrquota,prjquota 0 0 +` + +const dummyMountDataPquota = `tmpfs /tmp tmpfs rw,nosuid,nodev 0 0 +/dev/sda1 /boot ext4 rw,relatime 0 0 +/dev/mapper/fedora-root / ext4 rw,noatime 0 0 +/dev/mapper/fedora-home /home ext4 rw,noatime 0 0 +/dev/sdb1 /mnt/virt xfs rw,noatime,attr2,inode64,usrquota,prjquota 0 0 +` +const dummyMountDataNoPquota = `tmpfs /tmp tmpfs rw,nosuid,nodev 0 0 +/dev/sda1 /boot ext4 rw,relatime 0 0 +/dev/mapper/fedora-root / ext4 rw,noatime 0 0 +/dev/mapper/fedora-home /home ext4 rw,noatime 0 0 +/dev/sdb1 /mnt/virt xfs rw,noatime,attr2,inode64,usrquota 0 0 +` + +const dummyMountTest = `/dev/sda1 / ext4 rw,noatime 0 0 +/dev/sda2 /quota ext4 rw,prjquota 0 0 +/dev/sda3 /noquota ext4 rw 0 0 +` + +func dummyFakeMount1() mount.Interface { + return &mount.FakeMounter{ + MountPoints: []mount.MountPoint{ + { + Device: "tmpfs", + Path: "/tmp", + Type: "tmpfs", + Opts: []string{"rw", "nosuid", "nodev"}, + }, + { + Device: "/dev/sda1", + Path: "/boot", + Type: "ext4", + Opts: []string{"rw", "relatime"}, + }, + { + Device: "dev/mapper/fedora-root", + Path: "/", + Type: "ext4", + Opts: []string{"rw", "relatime"}, + }, + { + Device: "dev/mapper/fedora-home", + Path: "/home", + Type: "ext4", + Opts: []string{"rw", "relatime"}, + }, + { + Device: "/dev/sdb1", + Path: "/mnt/virt", + Type: "xfs", + Opts: []string{"rw", "relatime", "attr2", "inode64", "usrquota", "prjquota"}, + }, + }, + } +} + +type backingDevTest struct { + path string + mountdata string + expectedResult string + expectFailure bool +} + +type mountpointTest struct { + path string + mounter mount.Interface + expectedResult string + expectFailure bool +} + +func testBackingDev1(testcase backingDevTest) error { + tmpfile, err := ioutil.TempFile("", "backingdev") + if err != nil { + return err + } + defer os.Remove(tmpfile.Name()) + if _, err = tmpfile.WriteString(testcase.mountdata); err != nil { + return err + } + + backingDev, err := detectBackingDevInternal(testcase.path, tmpfile.Name()) + if err != nil { + if testcase.expectFailure { + return nil + } + return err + } + if testcase.expectFailure { + return fmt.Errorf("Path %s expected to fail; succeeded and got %s", testcase.path, backingDev) + } + if backingDev == testcase.expectedResult { + return nil + } + return fmt.Errorf("Mismatch: path %s expects mountpoint %s got %s", testcase.path, testcase.expectedResult, backingDev) +} + +func TestBackingDev(t *testing.T) { + testcasesBackingDev := map[string]backingDevTest{ + "Root": { + "/", + dummyMountData, + "/dev/mapper/fedora-root", + false, + }, + "tmpfs": { + "/tmp", + dummyMountData, + "tmpfs", + false, + }, + "user filesystem": { + "/virt", + dummyMountData, + "/dev/sdb1", + false, + }, + "empty mountpoint": { + "", + dummyMountData, + "", + true, + }, + "bad mountpoint": { + "/kiusf", + dummyMountData, + "", + true, + }, + } + for name, testcase := range testcasesBackingDev { + err := testBackingDev1(testcase) + if err != nil { + t.Errorf("%s failed: %s", name, err.Error()) + } + } +} + +func TestDetectMountPoint(t *testing.T) { + testcasesMount := map[string]mountpointTest{ + "Root": { + "/", + dummyFakeMount1(), + "/", + false, + }, + "(empty)": { + "", + dummyFakeMount1(), + "/", + false, + }, + "(invalid)": { + "", + dummyFakeMount1(), + "/", + false, + }, + "/usr": { + "/usr", + dummyFakeMount1(), + "/", + false, + }, + "/var/tmp": { + "/var/tmp", + dummyFakeMount1(), + "/", + false, + }, + } + for name, testcase := range testcasesMount { + mountpoint, err := detectMountpointInternal(testcase.mounter, testcase.path) + if err == nil && testcase.expectFailure { + t.Errorf("Case %s expected failure, but succeeded, returning mountpoint %s", name, mountpoint) + } else if err != nil { + t.Errorf("Case %s failed: %s", name, err.Error()) + } else if mountpoint != testcase.expectedResult { + t.Errorf("Case %s got mountpoint %s, expected %s", name, mountpoint, testcase.expectedResult) + } + } +} + +var dummyMountPoints = []mount.MountPoint{ + { + Device: "/dev/sda2", + Path: "/quota1", + Type: "ext4", + Opts: []string{"rw", "relatime", "prjquota"}, + }, + { + Device: "/dev/sda3", + Path: "/quota2", + Type: "ext4", + Opts: []string{"rw", "relatime", "prjquota"}, + }, + { + Device: "/dev/sda3", + Path: "/noquota", + Type: "ext4", + Opts: []string{"rw", "relatime"}, + }, + { + Device: "/dev/sda1", + Path: "/", + Type: "ext4", + Opts: []string{"rw", "relatime"}, + }, +} + +func dummyQuotaTest() mount.Interface { + return &mount.FakeMounter{ + MountPoints: dummyMountPoints, + } +} + +func dummySetFSInfo(path string) { + if enabledQuotasForMonitoring() { + for _, mount := range dummyMountPoints { + if strings.HasPrefix(path, mount.Path) { + mountpointMap[path] = mount.Path + backingDevMap[path] = mount.Device + return + } + } + } +} + +type VolumeProvider1 struct { +} + +type VolumeProvider2 struct { +} + +type testVolumeQuota struct { +} + +func logAllMaps(where string) { + fmt.Printf("Maps at %s\n", where) + fmt.Printf(" Map podQuotaMap contents:\n") + for key, val := range podQuotaMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map dirQuotaMap contents:\n") + for key, val := range dirQuotaMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map quotaPodMap contents:\n") + for key, val := range quotaPodMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map dirPodMap contents:\n") + for key, val := range dirPodMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map devApplierMap contents:\n") + for key, val := range devApplierMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map dirApplierMap contents:\n") + for key, val := range dirApplierMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map podDirCountMap contents:\n") + for key, val := range podDirCountMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map quotaSizeMap contents:\n") + for key, val := range quotaSizeMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map supportsQuotasMap contents:\n") + for key, val := range supportsQuotasMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map backingDevMap contents:\n") + for key, val := range backingDevMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf(" Map mountpointMap contents:\n") + for key, val := range mountpointMap { + fmt.Printf(" %v -> %v\n", key, val) + } + fmt.Printf("End maps %s\n", where) +} + +var testIDQuotaMap = make(map[common.QuotaID]string) +var testQuotaIDMap = make(map[string]common.QuotaID) + +func (*VolumeProvider1) GetQuotaApplier(mountpoint string, backingDev string) common.LinuxVolumeQuotaApplier { + if strings.HasPrefix(mountpoint, "/quota1") { + return testVolumeQuota{} + } + return nil +} + +func (*VolumeProvider2) GetQuotaApplier(mountpoint string, backingDev string) common.LinuxVolumeQuotaApplier { + if strings.HasPrefix(mountpoint, "/quota2") { + return testVolumeQuota{} + } + return nil +} + +func (v testVolumeQuota) SetQuotaOnDir(dir string, id common.QuotaID, _ int64) error { + odir, ok := testIDQuotaMap[id] + if ok && dir != odir { + return fmt.Errorf("ID %v is already in use", id) + } + oid, ok := testQuotaIDMap[dir] + if ok && id != oid { + return fmt.Errorf("Directory %s already has a quota applied", dir) + } + testQuotaIDMap[dir] = id + testIDQuotaMap[id] = dir + return nil +} + +func (v testVolumeQuota) GetQuotaOnDir(path string) (common.QuotaID, error) { + id, ok := testQuotaIDMap[path] + if ok { + return id, nil + } + return common.BadQuotaID, fmt.Errorf("No quota available for %s", path) +} + +func (v testVolumeQuota) QuotaIDIsInUse(_ string, id common.QuotaID) (bool, error) { + if _, ok := testIDQuotaMap[id]; ok { + return true, nil + } + // So that we reject some + if id%3 == 0 { + return false, nil + } + return false, nil +} + +func (v testVolumeQuota) GetConsumption(_ string, _ common.QuotaID) (int64, error) { + return 4096, nil +} + +func (v testVolumeQuota) GetInodes(_ string, _ common.QuotaID) (int64, error) { + return 1, nil +} + +func fakeSupportsQuotas(path string) (bool, error) { + dummySetFSInfo(path) + return SupportsQuotas(dummyQuotaTest(), path) +} + +func fakeAssignQuota(path string, poduid string, bytes int64) error { + dummySetFSInfo(path) + return AssignQuota(dummyQuotaTest(), path, poduid, bytes) +} + +func fakeClearQuota(path string) error { + dummySetFSInfo(path) + return ClearQuota(dummyQuotaTest(), path) +} + +type quotaTestCase struct { + path string + poduid string + bytes int64 + op string + expectedProjects string + expectedProjid string + supportsQuota bool + expectsSetQuota bool + deltaExpectedPodQuotaCount int + deltaExpectedDirQuotaCount int + deltaExpectedQuotaPodCount int + deltaExpectedDirPodCount int + deltaExpectedDevApplierCount int + deltaExpectedDirApplierCount int + deltaExpectedPodDirCountCount int + deltaExpectedQuotaSizeCount int + deltaExpectedSupportsQuotasCount int + deltaExpectedBackingDevCount int + deltaExpectedMountpointCount int +} + +const ( + projectsHeader = `# This is a /etc/projects header +1048578:/quota/d +` + projects1 = `1048577:/quota1/a +` + projects2 = `1048577:/quota1/a +1048580:/quota1/b +` + projects3 = `1048577:/quota1/a +1048580:/quota1/b +1048581:/quota2/b +` + projects4 = `1048577:/quota1/a +1048581:/quota2/b +` + projects5 = `1048581:/quota2/b +` + + projidHeader = `# This is a /etc/projid header +xxxxxx:1048579 +` + projid1 = `volume1048577:1048577 +` + projid2 = `volume1048577:1048577 +volume1048580:1048580 +` + projid3 = `volume1048577:1048577 +volume1048580:1048580 +volume1048581:1048581 +` + projid4 = `volume1048577:1048577 +volume1048581:1048581 +` + projid5 = `volume1048581:1048581 +` +) + +var quotaTestCases = []quotaTestCase{ + { + "/quota1/a", "", 1024, "Supports", "", "", + true, true, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, + }, + { + "/quota1/a", "", 1024, "Set", projects1, projid1, + true, true, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, + }, + { + "/quota1/b", "x", 1024, "Set", projects2, projid2, + true, true, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + }, + { + "/quota2/b", "x", 1024, "Set", projects3, projid3, + true, true, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + { + "/quota1/b", "x", 1024, "Set", projects3, projid3, + true, false, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + "/quota1/b", "", 1024, "Clear", projects4, projid4, + true, true, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, + }, + { + "/noquota/a", "", 1024, "Supports", projects4, projid4, + false, false, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + "/quota1/a", "", 1024, "Clear", projects5, projid5, + true, true, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, + }, + { + "/quota1/a", "", 1024, "Clear", projects5, projid5, + true, false, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + "/quota2/b", "", 1024, "Clear", "", "", + true, true, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, + }, +} + +func compareProjectsFiles(t *testing.T, testcase quotaTestCase, projectsFile string, projidFile string, enabled bool) { + bytes, err := ioutil.ReadFile(projectsFile) + if err != nil { + t.Error(err.Error()) + } else { + s := string(bytes) + p := projectsHeader + if enabled { + p += testcase.expectedProjects + } + if s != p { + t.Errorf("Case %v /etc/projects miscompare: expected\n`%s`\ngot\n`%s`\n", testcase.path, p, s) + } + } + bytes, err = ioutil.ReadFile(projidFile) + if err != nil { + t.Error(err.Error()) + } else { + s := string(bytes) + p := projidHeader + if enabled { + p += testcase.expectedProjid + } + if s != p { + t.Errorf("Case %v /etc/projid miscompare: expected\n`%s`\ngot\n`%s`\n", testcase.path, p, s) + } + } +} + +func setFeature(feature utilfeature.Feature, value bool) error { + v := "true" + if !value { + v = "false" + } + return utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=%s", string(feature), v)) +} + +func runCaseEnabled(t *testing.T, testcase quotaTestCase, seq int) bool { + fail := false + var err error + switch testcase.op { + case "Supports": + supports, err := fakeSupportsQuotas(testcase.path) + if err != nil { + fail = true + t.Errorf("Case %v (%s, %v) Got error in fakeSupportsQuotas: %v", seq, testcase.path, true, err) + } + if supports != testcase.supportsQuota { + fail = true + t.Errorf("Case %v (%s, %v) fakeSupportsQuotas got %v, expect %v", seq, testcase.path, true, supports, testcase.supportsQuota) + } + return fail + case "Set": + err = fakeAssignQuota(testcase.path, testcase.poduid, testcase.bytes) + case "Clear": + err = fakeClearQuota(testcase.path) + case "GetConsumption": + _, err = GetConsumption(testcase.path) + case "GetInodes": + _, err = GetInodes(testcase.path) + default: + t.Errorf("Case %v (%s, %v) unknown operation %s", seq, testcase.path, true, testcase.op) + return true + } + if err != nil && testcase.expectsSetQuota { + fail = true + t.Errorf("Case %v (%s, %v) %s expected to clear quota but failed %v", seq, testcase.path, true, testcase.op, err) + } else if err == nil && !testcase.expectsSetQuota { + fail = true + t.Errorf("Case %v (%s, %v) %s expected not to clear quota but succeeded", seq, testcase.path, true, testcase.op) + } + return fail +} + +func runCaseDisabled(t *testing.T, testcase quotaTestCase, seq int) bool { + var err error + var supports bool + switch testcase.op { + case "Supports": + if supports, err = fakeSupportsQuotas(testcase.path); supports { + t.Errorf("Case %v (%s, %v) supports quotas but shouldn't", seq, testcase.path, false) + return true + } + return false + case "Set": + err = fakeAssignQuota(testcase.path, testcase.poduid, testcase.bytes) + case "Clear": + err = fakeClearQuota(testcase.path) + case "GetConsumption": + _, err = GetConsumption(testcase.path) + case "GetInodes": + _, err = GetInodes(testcase.path) + default: + t.Errorf("Case %v (%s, %v) unknown operation %s", seq, testcase.path, false, testcase.op) + return true + } + if err == nil { + t.Errorf("Case %v (%s, %v) %s: supports quotas but shouldn't", seq, testcase.path, false, testcase.op) + return true + } + return false +} + +func testAddRemoveQuotas(t *testing.T, enabled bool) { + if err := setFeature(features.FSQuotaForLSCIMonitoring, enabled); err != nil { + t.Errorf("Unable to enable LSCI monitoring: %v", err) + } + tmpProjectsFile, err := ioutil.TempFile("", "projects") + if err == nil { + _, err = tmpProjectsFile.WriteString(projectsHeader) + } + if err != nil { + t.Errorf("Unable to create fake projects file") + } + projectsFile = tmpProjectsFile.Name() + tmpProjectsFile.Close() + tmpProjidFile, err := ioutil.TempFile("", "projid") + if err == nil { + _, err = tmpProjidFile.WriteString(projidHeader) + } + if err != nil { + t.Errorf("Unable to create fake projid file") + } + projidFile = tmpProjidFile.Name() + tmpProjidFile.Close() + providers = []common.LinuxVolumeQuotaProvider{ + &VolumeProvider1{}, + &VolumeProvider2{}, + } + for k := range podQuotaMap { + delete(podQuotaMap, k) + } + for k := range dirQuotaMap { + delete(dirQuotaMap, k) + } + for k := range quotaPodMap { + delete(quotaPodMap, k) + } + for k := range dirPodMap { + delete(dirPodMap, k) + } + for k := range devApplierMap { + delete(devApplierMap, k) + } + for k := range dirApplierMap { + delete(dirApplierMap, k) + } + for k := range podDirCountMap { + delete(podDirCountMap, k) + } + for k := range quotaSizeMap { + delete(quotaSizeMap, k) + } + for k := range supportsQuotasMap { + delete(supportsQuotasMap, k) + } + for k := range backingDevMap { + delete(backingDevMap, k) + } + for k := range mountpointMap { + delete(mountpointMap, k) + } + for k := range testIDQuotaMap { + delete(testIDQuotaMap, k) + } + for k := range testQuotaIDMap { + delete(testQuotaIDMap, k) + } + expectedPodQuotaCount := 0 + expectedDirQuotaCount := 0 + expectedQuotaPodCount := 0 + expectedDirPodCount := 0 + expectedDevApplierCount := 0 + expectedDirApplierCount := 0 + expectedPodDirCountCount := 0 + expectedQuotaSizeCount := 0 + expectedSupportsQuotasCount := 0 + expectedBackingDevCount := 0 + expectedMountpointCount := 0 + for seq, testcase := range quotaTestCases { + if enabled { + expectedPodQuotaCount += testcase.deltaExpectedPodQuotaCount + expectedDirQuotaCount += testcase.deltaExpectedDirQuotaCount + expectedQuotaPodCount += testcase.deltaExpectedQuotaPodCount + expectedDirPodCount += testcase.deltaExpectedDirPodCount + expectedDevApplierCount += testcase.deltaExpectedDevApplierCount + expectedDirApplierCount += testcase.deltaExpectedDirApplierCount + expectedPodDirCountCount += testcase.deltaExpectedPodDirCountCount + expectedQuotaSizeCount += testcase.deltaExpectedQuotaSizeCount + expectedSupportsQuotasCount += testcase.deltaExpectedSupportsQuotasCount + expectedBackingDevCount += testcase.deltaExpectedBackingDevCount + expectedMountpointCount += testcase.deltaExpectedMountpointCount + } + fail := false + if enabled { + fail = runCaseEnabled(t, testcase, seq) + } else { + fail = runCaseDisabled(t, testcase, seq) + } + + compareProjectsFiles(t, testcase, projectsFile, projidFile, enabled) + if len(podQuotaMap) != expectedPodQuotaCount { + fail = true + t.Errorf("Case %v (%s, %v) podQuotaCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(podQuotaMap), expectedPodQuotaCount) + } + if len(dirQuotaMap) != expectedDirQuotaCount { + fail = true + t.Errorf("Case %v (%s, %v) dirQuotaCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(dirQuotaMap), expectedDirQuotaCount) + } + if len(quotaPodMap) != expectedQuotaPodCount { + fail = true + t.Errorf("Case %v (%s, %v) quotaPodCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(quotaPodMap), expectedQuotaPodCount) + } + if len(dirPodMap) != expectedDirPodCount { + fail = true + t.Errorf("Case %v (%s, %v) dirPodCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(dirPodMap), expectedDirPodCount) + } + if len(devApplierMap) != expectedDevApplierCount { + fail = true + t.Errorf("Case %v (%s, %v) devApplierCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(devApplierMap), expectedDevApplierCount) + } + if len(dirApplierMap) != expectedDirApplierCount { + fail = true + t.Errorf("Case %v (%s, %v) dirApplierCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(dirApplierMap), expectedDirApplierCount) + } + if len(podDirCountMap) != expectedPodDirCountCount { + fail = true + t.Errorf("Case %v (%s, %v) podDirCountCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(podDirCountMap), expectedPodDirCountCount) + } + if len(quotaSizeMap) != expectedQuotaSizeCount { + fail = true + t.Errorf("Case %v (%s, %v) quotaSizeCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(quotaSizeMap), expectedQuotaSizeCount) + } + if len(supportsQuotasMap) != expectedSupportsQuotasCount { + fail = true + t.Errorf("Case %v (%s, %v) supportsQuotasCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(supportsQuotasMap), expectedSupportsQuotasCount) + } + if len(backingDevMap) != expectedBackingDevCount { + fail = true + t.Errorf("Case %v (%s, %v) BackingDevCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(backingDevMap), expectedBackingDevCount) + } + if len(mountpointMap) != expectedMountpointCount { + fail = true + t.Errorf("Case %v (%s, %v) MountpointCount mismatch: got %v, expect %v", seq, testcase.path, enabled, len(mountpointMap), expectedMountpointCount) + } + if fail { + logAllMaps(fmt.Sprintf("%v %s", seq, testcase.path)) + } + } + os.Remove(projectsFile) + os.Remove(projidFile) +} + +func TestAddRemoveQuotasEnabled(t *testing.T) { + testAddRemoveQuotas(t, true) +} + +func TestAddRemoveQuotasDisabled(t *testing.T) { + testAddRemoveQuotas(t, false) +} diff --git a/pkg/volume/util/quota/quota_unsupported.go b/pkg/volume/util/quota/quota_unsupported.go new file mode 100644 index 00000000000..a7d1b5ecf34 --- /dev/null +++ b/pkg/volume/util/quota/quota_unsupported.go @@ -0,0 +1,54 @@ +// +build !linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package quota + +import ( + "errors" + "k8s.io/kubernetes/pkg/util/mount" +) + +// Dummy quota implementation for systems that do not implement support +// for volume quotas + +var errNotImplemented = errors.New("not implemented") + +// SupportsQuotas -- dummy implementation +func SupportsQuotas(_ mount.Interface, _ string) (bool, error) { + return false, errNotImplemented +} + +// AssignQuota -- dummy implementation +func AssignQuota(_ mount.Interface, _ string, _ string, _ int64) error { + return errNotImplemented +} + +// GetConsumption -- dummy implementation +func GetConsumption(_ string) (int64, error) { + return 0, errNotImplemented +} + +// GetInodes -- dummy implementation +func GetInodes(_ string) (int64, error) { + return 0, errNotImplemented +} + +// ClearQuota -- dummy implementation +func ClearQuota(_ mount.Interface, _ string) error { + return errNotImplemented +} diff --git a/pkg/volume/util/quota/xfs/BUILD b/pkg/volume/util/quota/xfs/BUILD new file mode 100644 index 00000000000..431587cf09e --- /dev/null +++ b/pkg/volume/util/quota/xfs/BUILD @@ -0,0 +1,31 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["quota_xfs.go"], + cgo = True, + importpath = "k8s.io/kubernetes/pkg/volume/util/quota/xfs", + visibility = ["//visibility:public"], + deps = select({ + "@io_bazel_rules_go//go/platform:linux": [ + "//pkg/volume/util/quota/common:go_default_library", + "//vendor/golang.org/x/sys/unix:go_default_library", + "//vendor/k8s.io/klog:go_default_library", + ], + "//conditions:default": [], + }), +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/volume/util/quota/xfs/quota_xfs.go b/pkg/volume/util/quota/xfs/quota_xfs.go new file mode 100644 index 00000000000..c6669323c3a --- /dev/null +++ b/pkg/volume/util/quota/xfs/quota_xfs.go @@ -0,0 +1,153 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package xfs + +/* +#include +#include +#include +#include +#include +#include + +#ifndef PRJQUOTA +#define PRJQUOTA 2 +#endif +#ifndef XFS_PROJ_QUOTA +#define XFS_PROJ_QUOTA 2 +#endif +#ifndef Q_XSETPQLIM +#define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA) +#endif +#ifndef Q_XGETPQUOTA +#define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA) +#endif +*/ +import "C" + +import ( + "fmt" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" + "k8s.io/klog" + "k8s.io/kubernetes/pkg/volume/util/quota/common" +) + +const ( + linuxXfsMagic = 0x58465342 + // Documented in man xfs_quota(8); not necessarily the same + // as the filesystem blocksize + quotaBsize = 512 + bitsPerWord = 32 << (^uint(0) >> 63) // either 32 or 64 + maxQuota int64 = 1<<(bitsPerWord-1) - 1 // either 1<<31 - 1 or 1<<63 - 1 +) + +// VolumeProvider supplies a quota applier to the generic code. +type VolumeProvider struct { +} + +// GetQuotaApplier -- does this backing device support quotas that +// can be applied to directories? +func (*VolumeProvider) GetQuotaApplier(mountpoint string, backingDev string) common.LinuxVolumeQuotaApplier { + if common.IsFilesystemOfType(mountpoint, backingDev, linuxXfsMagic) { + return xfsVolumeQuota{backingDev} + } + return nil +} + +type xfsVolumeQuota struct { + backingDev string +} + +// GetQuotaOnDir -- get the quota ID that applies to this directory. +func (v xfsVolumeQuota) GetQuotaOnDir(path string) (common.QuotaID, error) { + return common.GetQuotaOnDir(path) +} + +// SetQuotaOnDir -- apply the specified quota to the directory. If +// bytes is not greater than zero, the quota should be applied in a +// way that is non-enforcing (either explicitly so or by setting a +// quota larger than anything the user may possibly create) +func (v xfsVolumeQuota) SetQuotaOnDir(path string, id common.QuotaID, bytes int64) error { + klog.V(3).Infof("xfsSetQuotaOn %s ID %v bytes %v", path, id, bytes) + if bytes < 0 || bytes > maxQuota { + bytes = maxQuota + } + + var d C.fs_disk_quota_t + d.d_version = C.FS_DQUOT_VERSION + d.d_id = C.__u32(id) + d.d_flags = C.XFS_PROJ_QUOTA + + d.d_fieldmask = C.FS_DQ_BHARD + d.d_blk_hardlimit = C.__u64(bytes / quotaBsize) + d.d_blk_softlimit = d.d_blk_hardlimit + + var cs = C.CString(v.backingDev) + defer C.free(unsafe.Pointer(cs)) + + _, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XSETPQLIM, + uintptr(unsafe.Pointer(cs)), uintptr(d.d_id), + uintptr(unsafe.Pointer(&d)), 0, 0) + if errno != 0 { + return fmt.Errorf("Failed to set quota limit for ID %d on %s: %v", + id, path, errno.Error()) + } + return common.ApplyProjectToDir(path, id) +} + +func (v xfsVolumeQuota) getQuotaInfo(path string, id common.QuotaID) (C.fs_disk_quota_t, syscall.Errno) { + var d C.fs_disk_quota_t + + var cs = C.CString(v.backingDev) + defer C.free(unsafe.Pointer(cs)) + + _, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XGETPQUOTA, + uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(id)), + uintptr(unsafe.Pointer(&d)), 0, 0) + return d, errno +} + +// QuotaIDIsInUse -- determine whether the quota ID is already in use. +func (v xfsVolumeQuota) QuotaIDIsInUse(path string, id common.QuotaID) (bool, error) { + _, errno := v.getQuotaInfo(path, id) + return errno == 0, nil +} + +// GetConsumption -- retrieve the consumption (in bytes) of the directory +func (v xfsVolumeQuota) GetConsumption(path string, id common.QuotaID) (int64, error) { + d, errno := v.getQuotaInfo(path, id) + if errno != 0 { + return 0, fmt.Errorf("Failed to get quota for %s: %s", path, errno.Error()) + } + klog.V(3).Infof("Consumption for %s is %v", path, d.d_bcount*quotaBsize) + return int64(d.d_bcount) * quotaBsize, nil +} + +// GetInodes -- retrieve the number of inodes in use under the directory +func (v xfsVolumeQuota) GetInodes(path string, id common.QuotaID) (int64, error) { + d, errno := v.getQuotaInfo(path, id) + if errno != 0 { + return 0, fmt.Errorf("Failed to get quota for %s: %s", path, errno.Error()) + } + klog.V(3).Infof("Inode consumption for %s is %v", path, d.d_icount) + return int64(d.d_icount), nil +} diff --git a/pkg/volume/volume.go b/pkg/volume/volume.go index a5abb923b24..fdcc30647da 100644 --- a/pkg/volume/volume.go +++ b/pkg/volume/volume.go @@ -103,8 +103,9 @@ type Attributes struct { // MounterArgs provides more easily extensible arguments to Mounter type MounterArgs struct { - FsGroup *int64 - PodUID string + FsGroup *int64 + DesiredSize int64 + PodUID string } // Mounter interface provides methods to set up/mount the volume.