diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index d228da03205..a06e729e34d 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -657,6 +657,12 @@ const ( // // Enable Terminating condition in Endpoint Slices. EndpointSliceTerminatingCondition featuregate.Feature = "EndpointSliceTerminatingCondition" + + // owner: @derekwaynecarr + // alpha: v1.20 + // + // Enables kubelet support to size memory backed volumes + SizeMemoryBackedVolumes featuregate.Feature = "SizeMemoryBackedVolumes" ) func init() { @@ -756,6 +762,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS DisableAcceleratorUsageMetrics: {Default: true, PreRelease: featuregate.Beta}, HPAContainerMetrics: {Default: false, PreRelease: featuregate.Alpha}, RootCAConfigMap: {Default: true, PreRelease: featuregate.Beta}, + SizeMemoryBackedVolumes: {Default: false, PreRelease: featuregate.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/volume/emptydir/BUILD b/pkg/volume/emptydir/BUILD index 4a7621aa511..052a4ebaa2b 100644 --- a/pkg/volume/emptydir/BUILD +++ b/pkg/volume/emptydir/BUILD @@ -17,6 +17,8 @@ go_library( importpath = "k8s.io/kubernetes/pkg/volume/emptydir", deps = [ "//pkg/apis/core/v1/helper:go_default_library", + "//pkg/features:go_default_library", + "//pkg/kubelet/cm:go_default_library", "//pkg/volume:go_default_library", "//pkg/volume/util:go_default_library", "//pkg/volume/util/fsquota:go_default_library", @@ -24,6 +26,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//staging/src/k8s.io/mount-utils:go_default_library", "//vendor/k8s.io/klog/v2:go_default_library", "//vendor/k8s.io/utils/strings:go_default_library", diff --git a/pkg/volume/emptydir/empty_dir.go b/pkg/volume/emptydir/empty_dir.go index 59c6fef85a6..68f5b7b367e 100644 --- a/pkg/volume/emptydir/empty_dir.go +++ b/pkg/volume/emptydir/empty_dir.go @@ -29,7 +29,10 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/volume" volumeutil "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util/fsquota" @@ -107,17 +110,58 @@ func (plugin *emptyDirPlugin) NewMounter(spec *volume.Spec, pod *v1.Pod, opts vo return plugin.newMounterInternal(spec, pod, plugin.host.GetMounter(plugin.GetPluginName()), &realMountDetector{plugin.host.GetMounter(plugin.GetPluginName())}, opts) } -func (plugin *emptyDirPlugin) newMounterInternal(spec *volume.Spec, pod *v1.Pod, mounter mount.Interface, mountDetector mountDetector, opts volume.VolumeOptions) (volume.Mounter, error) { - medium := v1.StorageMediumDefault - - if spec.Volume.EmptyDir != nil { // Support a non-specified source as EmptyDir. - medium = spec.Volume.EmptyDir.Medium +func calculateEmptyDirMemorySize(nodeAllocatableMemory *resource.Quantity, spec *volume.Spec, pod *v1.Pod) *resource.Quantity { + // if feature is disabled, continue the default behavior of linux host default + sizeLimit := &resource.Quantity{} + if !utilfeature.DefaultFeatureGate.Enabled(features.SizeMemoryBackedVolumes) { + return sizeLimit } + // size limit defaults to node allocatable (pods cant consume more memory than all pods) + sizeLimit = nodeAllocatableMemory + zero := resource.MustParse("0") + + // determine pod resource allocation + // we use the same function for pod cgroup assigment to maintain consistent behavior + // NOTE: this could be nil on systems that do not support pod memory containment (i.e. windows) + podResourceConfig := cm.ResourceConfigForPod(pod, false, uint64(100000)) + if podResourceConfig != nil && podResourceConfig.Memory != nil { + podMemoryLimit := resource.NewQuantity(*(podResourceConfig.Memory), resource.BinarySI) + // ensure 0 < value < size + if podMemoryLimit.Cmp(zero) > 0 && podMemoryLimit.Cmp(*sizeLimit) < 1 { + sizeLimit = podMemoryLimit + } + } + + // volume local size is used if and only if less than what pod could consume + if spec.Volume.EmptyDir.SizeLimit != nil { + volumeSizeLimit := spec.Volume.EmptyDir.SizeLimit + // ensure 0 < value < size + if volumeSizeLimit.Cmp(zero) > 0 && volumeSizeLimit.Cmp(*sizeLimit) < 1 { + sizeLimit = volumeSizeLimit + } + } + return sizeLimit +} + +func (plugin *emptyDirPlugin) newMounterInternal(spec *volume.Spec, pod *v1.Pod, mounter mount.Interface, mountDetector mountDetector, opts volume.VolumeOptions) (volume.Mounter, error) { + medium := v1.StorageMediumDefault + sizeLimit := &resource.Quantity{} + if spec.Volume.EmptyDir != nil { // Support a non-specified source as EmptyDir. + medium = spec.Volume.EmptyDir.Medium + if medium == v1.StorageMediumMemory { + nodeAllocatable, err := plugin.host.GetNodeAllocatable() + if err != nil { + return nil, err + } + sizeLimit = calculateEmptyDirMemorySize(nodeAllocatable.Memory(), spec, pod) + } + } return &emptyDir{ pod: pod, volName: spec.Name(), medium: medium, + sizeLimit: sizeLimit, mounter: mounter, mountDetector: mountDetector, plugin: plugin, @@ -168,6 +212,7 @@ type mountDetector interface { type emptyDir struct { pod *v1.Pod volName string + sizeLimit *resource.Quantity medium v1.StorageMedium mounter mount.Interface mountDetector mountDetector @@ -271,8 +316,14 @@ func (ed *emptyDir) setupTmpfs(dir string) error { return nil } + var options []string + // Linux system default is 50% of capacity. + if ed.sizeLimit != nil && ed.sizeLimit.Value() > 0 { + options = []string{fmt.Sprintf("size=%d", ed.sizeLimit.Value())} + } + klog.V(3).Infof("pod %v: mounting tmpfs for volume %v", ed.pod.UID, ed.volName) - return ed.mounter.MountSensitiveWithoutSystemd("tmpfs", dir, "tmpfs", nil /* options */, nil) + return ed.mounter.MountSensitiveWithoutSystemd("tmpfs", dir, "tmpfs", options, nil) } // setupHugepages creates a hugepage mount at the specified directory. diff --git a/pkg/volume/emptydir/empty_dir_test.go b/pkg/volume/emptydir/empty_dir_test.go index a3dfd858931..7ea189f6e5a 100644 --- a/pkg/volume/emptydir/empty_dir_test.go +++ b/pkg/volume/emptydir/empty_dir_test.go @@ -25,7 +25,7 @@ import ( "path/filepath" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -918,3 +918,109 @@ func TestGetPageSize(t *testing.T) { }) } } + +func TestCalculateEmptyDirMemorySize(t *testing.T) { + testCases := map[string]struct { + pod *v1.Pod + nodeAllocatableMemory resource.Quantity + emptyDirSizeLimit resource.Quantity + expectedResult resource.Quantity + featureGateEnabled bool + }{ + "SizeMemoryBackedVolumesDisabled": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("memory"): resource.MustParse("10Gi"), + }, + }, + }, + }, + }, + }, + nodeAllocatableMemory: resource.MustParse("16Gi"), + emptyDirSizeLimit: resource.MustParse("1Gi"), + expectedResult: resource.MustParse("0"), + featureGateEnabled: false, + }, + "EmptyDirLocalLimit": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName("memory"): resource.MustParse("10Gi"), + }, + }, + }, + }, + }, + }, + nodeAllocatableMemory: resource.MustParse("16Gi"), + emptyDirSizeLimit: resource.MustParse("1Gi"), + expectedResult: resource.MustParse("1Gi"), + featureGateEnabled: true, + }, + "PodLocalLimit": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName("memory"): resource.MustParse("10Gi"), + }, + }, + }, + }, + }, + }, + nodeAllocatableMemory: resource.MustParse("16Gi"), + emptyDirSizeLimit: resource.MustParse("0"), + expectedResult: resource.MustParse("10Gi"), + featureGateEnabled: true, + }, + "NodeAllocatableLimit": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("memory"): resource.MustParse("10Gi"), + }, + }, + }, + }, + }, + }, + nodeAllocatableMemory: resource.MustParse("16Gi"), + emptyDirSizeLimit: resource.MustParse("0"), + expectedResult: resource.MustParse("16Gi"), + featureGateEnabled: true, + }, + } + + for testCaseName, testCase := range testCases { + t.Run(testCaseName, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SizeMemoryBackedVolumes, testCase.featureGateEnabled)() + spec := &volume.Spec{ + Volume: &v1.Volume{ + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: v1.StorageMediumMemory, + SizeLimit: &testCase.emptyDirSizeLimit, + }, + }, + }} + result := calculateEmptyDirMemorySize(&testCase.nodeAllocatableMemory, spec, testCase.pod) + if result.Cmp(testCase.expectedResult) != 0 { + t.Errorf("%s: Unexpected result. Expected %v, got %v", testCaseName, testCase.expectedResult.String(), result.String()) + } + }) + } +} diff --git a/test/e2e/common/BUILD b/test/e2e/common/BUILD index 3c642b6e247..834f457d903 100644 --- a/test/e2e/common/BUILD +++ b/test/e2e/common/BUILD @@ -45,6 +45,7 @@ go_library( deps = [ "//pkg/api/v1/pod:go_default_library", "//pkg/client/conditions:go_default_library", + "//pkg/features:go_default_library", "//pkg/kubelet:go_default_library", "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/images:go_default_library", @@ -66,6 +67,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library", "//staging/src/k8s.io/client-go/tools/watch:go_default_library", diff --git a/test/e2e/common/empty_dir.go b/test/e2e/common/empty_dir.go index 2abf7722296..29b5199370c 100644 --- a/test/e2e/common/empty_dir.go +++ b/test/e2e/common/empty_dir.go @@ -17,14 +17,19 @@ limitations under the License. package common import ( + "context" "fmt" "path" "github.com/onsi/ginkgo" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" imageutils "k8s.io/kubernetes/test/utils/image" ) @@ -283,6 +288,78 @@ var _ = ginkgo.Describe("[sig-storage] EmptyDir volumes", func() { result := f.ExecShellInContainer(pod.Name, busyBoxMainContainerName, fmt.Sprintf("cat %s", busyBoxMainVolumeFilePath)) framework.ExpectEqual(result, message, "failed to match expected string %s with %s", message, resultString) }) + + /* + Release: v1.20 + Testname: EmptyDir, Memory backed volume is sized to specified limit + Description: A Pod created with an 'emptyDir' Volume backed by memory should be sized to user provided value. + */ + ginkgo.It("pod should support memory backed volumes of specified size", func() { + // skip if feature gate is not enabled, this could be elevated to conformance in future if on Linux. + if !utilfeature.DefaultFeatureGate.Enabled(features.SizeMemoryBackedVolumes) { + return + } + + var ( + volumeName = "shared-data" + busyBoxMainVolumeMountPath = "/usr/share/volumeshare" + busyBoxMainContainerName = "busybox-main-container" + expectedResult = "10240" // equal to 10Mi + deletionGracePeriod = int64(0) + sizeLimit = resource.MustParse("10Mi") + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-size-memory-volume-" + string(uuid.NewUUID()), + }, + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + Name: volumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: v1.StorageMediumMemory, + SizeLimit: &sizeLimit, + }, + }, + }, + }, + Containers: []v1.Container{ + { + Name: busyBoxMainContainerName, + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: []string{"/bin/sh"}, + Args: []string{"-c", "sleep 100000"}, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: busyBoxMainVolumeMountPath, + }, + }, + }, + }, + TerminationGracePeriodSeconds: &deletionGracePeriod, + RestartPolicy: v1.RestartPolicyNever, + }, + } + + var err error + ginkgo.By("Creating Pod") + pod = f.PodClient().CreateSync(pod) + + ginkgo.By("Waiting for the pod running") + err = e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) + framework.ExpectNoError(err, "failed to deploy pod %s", pod.Name) + + ginkgo.By("Getting the pod") + pod, err = f.PodClient().Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get pod %s", pod.Name) + + ginkgo.By("Reading empty dir size") + result := f.ExecShellInContainer(pod.Name, busyBoxMainContainerName, fmt.Sprintf("df | grep %s | awk '{print $2}'", busyBoxMainVolumeMountPath)) + framework.ExpectEqual(result, expectedResult, "failed to match expected string %s with %s", expectedResult, result) + }) }) const (