diff --git a/pkg/api/types.go b/pkg/api/types.go index c6e57efe471..2c54fcf71fb 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -681,8 +681,9 @@ type EmptyDirVolumeSource struct { type StorageMedium string const ( - StorageMediumDefault StorageMedium = "" // use whatever the default is for the node - StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs) + StorageMediumDefault StorageMedium = "" // use whatever the default is for the node + StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs) + StorageMediumHugePages StorageMedium = "HugePages" // use hugepages ) // Protocol defines network protocols supported for things like container ports. diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index aac45af683b..81a9e01904f 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -394,6 +394,9 @@ func validateVolumeSource(source *api.VolumeSource, fldPath *field.Path, volName allErrs = append(allErrs, field.Forbidden(fldPath.Child("emptyDir").Child("sizeLimit"), "SizeLimit field must be a valid resource quantity")) } } + if !utilfeature.DefaultFeatureGate.Enabled(features.HugePages) && source.EmptyDir.Medium == api.StorageMediumHugePages { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("emptyDir").Child("medium"), "HugePages medium is disabled by feature-gate for EmptyDir volumes")) + } } if source.HostPath != nil { if numVolumes > 0 { diff --git a/pkg/api/validation/validation_test.go b/pkg/api/validation/validation_test.go index 05491bcbaed..d0d51354ae0 100644 --- a/pkg/api/validation/validation_test.go +++ b/pkg/api/validation/validation_test.go @@ -2757,6 +2757,28 @@ func TestValidateVolumes(t *testing.T) { } else if errs[0].Type != field.ErrorTypeDuplicate { t.Errorf("expected error type %v, got %v", field.ErrorTypeDuplicate, errs[0].Type) } + + // Validate HugePages medium type for EmptyDir when HugePages feature is enabled/disabled + hugePagesCase := api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{Medium: api.StorageMediumHugePages}} + + // Enable alpha feature HugePages + err := utilfeature.DefaultFeatureGate.Set("HugePages=true") + if err != nil { + t.Errorf("Failed to enable feature gate for HugePages: %v", err) + } + if errs := validateVolumeSource(&hugePagesCase, field.NewPath("field").Index(0), "working"); len(errs) != 0 { + t.Errorf("Unexpected error when HugePages feature is enabled.") + } + + // Disable alpha feature HugePages + err = utilfeature.DefaultFeatureGate.Set("HugePages=false") + if err != nil { + t.Errorf("Failed to disable feature gate for HugePages: %v", err) + } + if errs := validateVolumeSource(&hugePagesCase, field.NewPath("field").Index(0), "failing"); len(errs) == 0 { + t.Errorf("Expected error when HugePages feature is disabled got nothing.") + } + } func TestAlphaHugePagesIsolation(t *testing.T) { diff --git a/pkg/volume/empty_dir/BUILD b/pkg/volume/empty_dir/BUILD index f16bc3941ba..3dfe1313647 100644 --- a/pkg/volume/empty_dir/BUILD +++ b/pkg/volume/empty_dir/BUILD @@ -19,12 +19,14 @@ go_library( "//conditions:default": [], }), deps = [ + "//pkg/api/v1/helper:go_default_library", "//pkg/util/mount:go_default_library", "//pkg/util/strings:go_default_library", "//pkg/volume:go_default_library", "//pkg/volume/util:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", ] + select({ @@ -51,6 +53,7 @@ go_test( "//pkg/volume/testing:go_default_library", "//pkg/volume/util:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/client-go/util/testing:go_default_library", diff --git a/pkg/volume/empty_dir/empty_dir.go b/pkg/volume/empty_dir/empty_dir.go index 76da7c3cffe..c64ea2a180e 100644 --- a/pkg/volume/empty_dir/empty_dir.go +++ b/pkg/volume/empty_dir/empty_dir.go @@ -23,10 +23,12 @@ import ( "github.com/golang/glog" "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + v1helper "k8s.io/kubernetes/pkg/api/v1/helper" "k8s.io/kubernetes/pkg/util/mount" - "k8s.io/kubernetes/pkg/util/strings" + stringsutil "k8s.io/kubernetes/pkg/util/strings" "k8s.io/kubernetes/pkg/volume" volumeutil "k8s.io/kubernetes/pkg/volume/util" ) @@ -56,7 +58,7 @@ const ( ) func getPath(uid types.UID, volName string, host volume.VolumeHost) string { - return host.GetPodVolumeDir(uid, strings.EscapeQualifiedNameForDisk(emptyDirPluginName), volName) + return host.GetPodVolumeDir(uid, stringsutil.EscapeQualifiedNameForDisk(emptyDirPluginName), volName) } func (plugin *emptyDirPlugin) Init(host volume.VolumeHost) error { @@ -104,9 +106,11 @@ func (plugin *emptyDirPlugin) NewMounter(spec *volume.Spec, pod *v1.Pod, opts vo func (plugin *emptyDirPlugin) newMounterInternal(spec *volume.Spec, pod *v1.Pod, mounter mount.Interface, mountDetector mountDetector, opts volume.VolumeOptions) (volume.Mounter, error) { medium := v1.StorageMediumDefault + if spec.Volume.EmptyDir != nil { // Support a non-specified source as EmptyDir. medium = spec.Volume.EmptyDir.Medium } + return &emptyDir{ pod: pod, volName: spec.Name(), @@ -159,8 +163,9 @@ type mountDetector interface { type storageMedium int const ( - mediumUnknown storageMedium = 0 // assume anything we don't explicitly handle is this - mediumMemory storageMedium = 1 // memory (e.g. tmpfs on linux) + mediumUnknown storageMedium = 0 // assume anything we don't explicitly handle is this + mediumMemory storageMedium = 1 // memory (e.g. tmpfs on linux) + mediumHugepages storageMedium = 2 // hugepages ) // EmptyDir volumes are temporary directories exposed to the pod. @@ -221,6 +226,8 @@ func (ed *emptyDir) SetUpAt(dir string, fsGroup *int64) error { err = ed.setupDir(dir) case v1.StorageMediumMemory: err = ed.setupTmpfs(dir) + case v1.StorageMediumHugepages: + err = ed.setupHugepages(dir) default: err = fmt.Errorf("unknown storage medium %q", ed.medium) } @@ -257,6 +264,67 @@ func (ed *emptyDir) setupTmpfs(dir string) error { return ed.mounter.Mount("tmpfs", dir, "tmpfs", nil /* options */) } +// setupHugepages creates a hugepage mount at the specified directory. +func (ed *emptyDir) setupHugepages(dir string) error { + if ed.mounter == nil { + return fmt.Errorf("memory storage requested, but mounter is nil") + } + if err := ed.setupDir(dir); err != nil { + return err + } + // Make SetUp idempotent. + medium, isMnt, err := ed.mountDetector.GetMountMedium(dir) + if err != nil { + return err + } + // If the directory is a mountpoint with medium hugepages, there is no + // work to do since we are already in the desired state. + if isMnt && medium == mediumHugepages { + return nil + } + + pageSizeMountOption, err := getPageSizeMountOptionFromPod(ed.pod) + if err != nil { + return err + } + + glog.V(3).Infof("pod %v: mounting hugepages for volume %v", ed.pod.UID, ed.volName) + return ed.mounter.Mount("nodev", dir, "hugetlbfs", []string{pageSizeMountOption}) +} + +// getPageSizeMountOptionFromPod retrieves pageSize mount option from Pod's resources +// and validates pageSize options in all containers of given Pod. +func getPageSizeMountOptionFromPod(pod *v1.Pod) (string, error) { + pageSizeFound := false + pageSize := resource.Quantity{} + // In some rare cases init containers can also consume Huge pages. + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for _, container := range containers { + // We can take request because limit and requests must match. + for requestName := range container.Resources.Requests { + if v1helper.IsHugePageResourceName(requestName) { + currentPageSize, err := v1helper.HugePageSizeFromResourceName(requestName) + if err != nil { + return "", err + } + // PageSize for all volumes in a POD are equal, except for the first one discovered. + if pageSizeFound && pageSize.Cmp(currentPageSize) != 0 { + return "", fmt.Errorf("multiple pageSizes for huge pages in a single PodSpec") + } + pageSize = currentPageSize + pageSizeFound = true + } + } + } + + if !pageSizeFound { + return "", fmt.Errorf("hugePages storage requested, but there is no resource request for huge pages.") + } + + return fmt.Sprintf("pageSize=%s", pageSize.String()), nil + +} + // setupDir creates the directory with the default permissions specified by the perm constant. func (ed *emptyDir) setupDir(dir string) error { // Create the directory if it doesn't already exist. @@ -318,9 +386,14 @@ func (ed *emptyDir) TearDownAt(dir string) error { if err != nil { return err } - if isMnt && medium == mediumMemory { - ed.medium = v1.StorageMediumMemory - return ed.teardownTmpfs(dir) + if isMnt { + if medium == mediumMemory { + ed.medium = v1.StorageMediumMemory + return ed.teardownTmpfsOrHugetlbfs(dir) + } else if medium == mediumHugepages { + ed.medium = v1.StorageMediumHugepages + return ed.teardownTmpfsOrHugetlbfs(dir) + } } // assume StorageMediumDefault return ed.teardownDefault(dir) @@ -336,7 +409,7 @@ func (ed *emptyDir) teardownDefault(dir string) error { return nil } -func (ed *emptyDir) teardownTmpfs(dir string) error { +func (ed *emptyDir) teardownTmpfsOrHugetlbfs(dir string) error { if ed.mounter == nil { return fmt.Errorf("memory storage requested, but mounter is nil") } @@ -350,7 +423,7 @@ func (ed *emptyDir) teardownTmpfs(dir string) error { } func (ed *emptyDir) getMetaDir() string { - return path.Join(ed.plugin.host.GetPodPluginDir(ed.pod.UID, strings.EscapeQualifiedNameForDisk(emptyDirPluginName)), ed.volName) + return path.Join(ed.plugin.host.GetPodPluginDir(ed.pod.UID, stringsutil.EscapeQualifiedNameForDisk(emptyDirPluginName)), ed.volName) } func getVolumeSource(spec *volume.Spec) (*v1.EmptyDirVolumeSource, bool) { diff --git a/pkg/volume/empty_dir/empty_dir_linux.go b/pkg/volume/empty_dir/empty_dir_linux.go index b086429f9cb..630a49fc2e9 100644 --- a/pkg/volume/empty_dir/empty_dir_linux.go +++ b/pkg/volume/empty_dir/empty_dir_linux.go @@ -27,7 +27,10 @@ import ( ) // Defined by Linux - the type number for tmpfs mounts. -const linuxTmpfsMagic = 0x01021994 +const ( + linuxTmpfsMagic = 0x01021994 + linuxHugetlbfsMagic = 0x958458f6 +) // realMountDetector implements mountDetector in terms of syscalls. type realMountDetector struct { @@ -48,6 +51,8 @@ func (m *realMountDetector) GetMountMedium(path string) (storageMedium, bool, er glog.V(5).Infof("Statfs_t of %v: %+v", path, buf) if buf.Type == linuxTmpfsMagic { return mediumMemory, !notMnt, nil + } else if buf.Type == linuxHugetlbfsMagic { + return mediumHugepages, !notMnt, nil } return mediumUnknown, !notMnt, nil } diff --git a/pkg/volume/empty_dir/empty_dir_test.go b/pkg/volume/empty_dir/empty_dir_test.go index 31cc7f5a312..ffe1ce08612 100644 --- a/pkg/volume/empty_dir/empty_dir_test.go +++ b/pkg/volume/empty_dir/empty_dir_test.go @@ -24,6 +24,7 @@ import ( "testing" "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" utiltesting "k8s.io/client-go/util/testing" @@ -80,6 +81,15 @@ func TestPluginEmptyRootContext(t *testing.T) { expectedTeardownMounts: 0}) } +func TestPluginHugetlbfs(t *testing.T) { + doTestPlugin(t, pluginTestConfig{ + medium: v1.StorageMediumHugepages, + expectedSetupMounts: 1, + expectedTeardownMounts: 0, + shouldBeMountedBeforeTeardown: true, + }) +} + type pluginTestConfig struct { medium v1.StorageMedium idempotent bool @@ -109,7 +119,22 @@ func doTestPlugin(t *testing.T, config pluginTestConfig) { physicalMounter = mount.FakeMounter{} mountDetector = fakeMountDetector{} - pod = &v1.Pod{ObjectMeta: metav1.ObjectMeta{UID: types.UID("poduid")}} + pod = &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID("poduid"), + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-2Mi"): resource.MustParse("100Mi"), + }, + }, + }, + }, + }, + } ) if config.idempotent { @@ -165,7 +190,7 @@ func doTestPlugin(t *testing.T, config pluginTestConfig) { if e, a := config.expectedSetupMounts, len(physicalMounter.Log); e != a { t.Errorf("Expected %v physicalMounter calls during setup, got %v", e, a) } else if config.expectedSetupMounts == 1 && - (physicalMounter.Log[0].Action != mount.FakeActionMount || physicalMounter.Log[0].FSType != "tmpfs") { + (physicalMounter.Log[0].Action != mount.FakeActionMount || (physicalMounter.Log[0].FSType != "tmpfs" && physicalMounter.Log[0].FSType != "hugetlbfs")) { t.Errorf("Unexpected physicalMounter action during setup: %#v", physicalMounter.Log[0]) } physicalMounter.ResetLog() @@ -276,3 +301,141 @@ func TestMetrics(t *testing.T) { t.Errorf("Expected Available to be greater than 0") } } + +func TestGetHugePagesMountOptions(t *testing.T) { + testCases := map[string]struct { + pod *v1.Pod + shouldFail bool + expectedResult string + }{ + "testWithProperValues": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-2Mi"): resource.MustParse("100Mi"), + }, + }, + }, + }, + }, + }, + shouldFail: false, + expectedResult: "pageSize=2Mi", + }, + "testWithProperValuesAndDifferentPageSize": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"), + }, + }, + }, + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-1Gi"): resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + }, + shouldFail: false, + expectedResult: "pageSize=1Gi", + }, + "InitContainerAndContainerHasProperValues": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"), + }, + }, + }, + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-1Gi"): resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + }, + shouldFail: false, + expectedResult: "pageSize=1Gi", + }, + "InitContainerAndContainerHasDifferentPageSizes": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-2Mi"): resource.MustParse("2Gi"), + }, + }, + }, + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-1Gi"): resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + }, + shouldFail: true, + expectedResult: "", + }, + "ContainersWithMultiplePageSizes": { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"), + }, + }, + }, + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("hugepages-2Mi"): resource.MustParse("100Mi"), + }, + }, + }, + }, + }, + }, + shouldFail: true, + expectedResult: "", + }, + "PodWithNoHugePagesRequest": { + pod: &v1.Pod{}, + shouldFail: true, + expectedResult: "", + }, + } + + for testCaseName, testCase := range testCases { + value, err := getPageSizeMountOptionFromPod(testCase.pod) + if testCase.shouldFail && err == nil { + t.Errorf("Expected an error in %v", testCaseName) + } else if !testCase.shouldFail && err != nil { + t.Errorf("Unexpected error in %v, got %v", testCaseName, err) + } else if testCase.expectedResult != value { + t.Errorf("Unexpected mountOptions for Pod. Expected %v, got %v", testCase.expectedResult, value) + } + } +} diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index 391736885d3..eef13a25a9f 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -943,8 +943,9 @@ type FlockerVolumeSource struct { type StorageMedium string const ( - StorageMediumDefault StorageMedium = "" // use whatever the default is for the node - StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs) + StorageMediumDefault StorageMedium = "" // use whatever the default is for the node + StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs) + StorageMediumHugepages StorageMedium = "HugePages" // use hugepages ) // Protocol defines network protocols supported for things like container ports.