Implement support for multiple sizes huge pages

This implementation allows Pod to request multiple hugepage resources
of different size and mount hugepage volumes using storage medium
HugePage-<size>, e.g.

spec:
  containers:
    resources:
      requests:
        hugepages-2Mi: 2Mi
        hugepages-1Gi: 2Gi
    volumeMounts:
      - mountPath: /hugepages-2Mi
        name: hugepage-2mi
      - mountPath: /hugepages-1Gi
        name: hugepage-1gi
    ...
  volumes:
    - name: hugepage-2mi
      emptyDir:
        medium: HugePages-2Mi
    - name: hugepage-1gi
      emptyDir:
        medium: HugePages-1Gi

NOTE: This is an alpha feature.
      Feature gate HugePageStorageMediumSize must be enabled for it to work.
This commit is contained in:
Ed Bartosh 2019-10-17 15:25:06 +03:00
parent ddd6d668f6
commit 0eb65bd7da
13 changed files with 148 additions and 51 deletions

View File

@ -583,9 +583,10 @@ type StorageMedium string
// These are the valid value for StorageMedium
const (
StorageMediumDefault StorageMedium = "" // use whatever the default is for the node
StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs)
StorageMediumHugePages StorageMedium = "HugePages" // use hugepages
StorageMediumDefault StorageMedium = "" // use whatever the default is for the node
StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs)
StorageMediumHugePages StorageMedium = "HugePages" // use hugepages
StorageMediumHugePagesPrefix StorageMedium = "HugePages-" // prefix for full medium notation HugePages-<size>
)
// Protocol defines network protocols supported for things like container ports.

View File

@ -103,6 +103,27 @@ func HugePageUnitSizeFromByteSize(size int64) (string, error) {
return fmt.Sprintf("%d%s", size, hugePageSizeUnitList[idx]), nil
}
// IsHugePageMedium returns true if the volume medium is in 'HugePages[-size]' format
func IsHugePageMedium(medium v1.StorageMedium) bool {
if medium == v1.StorageMediumHugePages {
return true
}
return strings.HasPrefix(string(medium), string(v1.StorageMediumHugePagesPrefix))
}
// HugePageSizeFromMedium returns the page size for the specified huge page medium.
// If the specified input is not a valid huge page medium an error is returned.
func HugePageSizeFromMedium(medium v1.StorageMedium) (resource.Quantity, error) {
if !IsHugePageMedium(medium) {
return resource.Quantity{}, fmt.Errorf("medium: %s is not a hugepage medium", medium)
}
if medium == v1.StorageMediumHugePages {
return resource.Quantity{}, fmt.Errorf("medium: %s doesn't have size information", medium)
}
pageSize := strings.TrimPrefix(string(medium), string(v1.StorageMediumHugePagesPrefix))
return resource.ParseQuantity(pageSize)
}
// IsOvercommitAllowed returns true if the resource is in the default
// namespace and is not hugepages.
func IsOvercommitAllowed(name v1.ResourceName) bool {

View File

@ -3083,8 +3083,33 @@ func validateContainersOnlyForPod(containers []core.Container, fldPath *field.Pa
return allErrs
}
// PodValidationOptions contains the different settings for pod validation
type PodValidationOptions struct {
// Allow pod spec to have more than one huge page resource (with different sizes)
AllowMultipleHugePageResources bool
}
// ValidatePodSingleHugePageResources checks if there are multiple huge
// pages resources in the pod object.
func ValidatePodSingleHugePageResources(pod *core.Pod, specPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
hugePageResources := sets.NewString()
for i := range pod.Spec.Containers {
resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
for resourceStr := range resourceSet {
if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
hugePageResources.Insert(resourceStr)
}
}
}
if len(hugePageResources) > 1 {
allErrs = append(allErrs, field.Invalid(specPath, hugePageResources.List(), "must use a single hugepage size in a pod spec"))
}
return allErrs
}
// ValidatePod tests if required fields in the pod are set.
func ValidatePod(pod *core.Pod) field.ErrorList {
func ValidatePod(pod *core.Pod, opts PodValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMeta(&pod.ObjectMeta, true, ValidatePodName, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(pod.ObjectMeta.Annotations, &pod.Spec, fldPath.Child("annotations"))...)
@ -3111,17 +3136,8 @@ func ValidatePod(pod *core.Pod) field.ErrorList {
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.Containers, specPath.Child("containers"))...)
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.InitContainers, specPath.Child("initContainers"))...)
hugePageResources := sets.NewString()
for i := range pod.Spec.Containers {
resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
for resourceStr := range resourceSet {
if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
hugePageResources.Insert(resourceStr)
}
}
}
if len(hugePageResources) > 1 {
allErrs = append(allErrs, field.Invalid(specPath, hugePageResources, "must use a single hugepage size in a pod spec"))
if !opts.AllowMultipleHugePageResources {
allErrs = append(allErrs, ValidatePodSingleHugePageResources(pod, specPath)...)
}
podIPsField := field.NewPath("status", "podIPs")
@ -3679,8 +3695,8 @@ func ValidateContainerUpdates(newContainers, oldContainers []core.Container, fld
}
// ValidatePodCreate validates a pod in the context of its initial create
func ValidatePodCreate(pod *core.Pod) field.ErrorList {
allErrs := ValidatePod(pod)
func ValidatePodCreate(pod *core.Pod, opts PodValidationOptions) field.ErrorList {
allErrs := ValidatePod(pod, opts)
fldPath := field.NewPath("spec")
// EphemeralContainers can only be set on update using the ephemeralcontainers subresource
@ -3693,12 +3709,16 @@ func ValidatePodCreate(pod *core.Pod) field.ErrorList {
// ValidatePodUpdate tests to see if the update is legal for an end user to make. newPod is updated with fields
// that cannot be changed.
func ValidatePodUpdate(newPod, oldPod *core.Pod) field.ErrorList {
func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMetaUpdate(&newPod.ObjectMeta, &oldPod.ObjectMeta, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"))...)
specPath := field.NewPath("spec")
if !opts.AllowMultipleHugePageResources {
allErrs = append(allErrs, ValidatePodSingleHugePageResources(newPod, specPath)...)
}
// validate updateable fields:
// 1. spec.containers[*].image
// 2. spec.initContainers[*].image

View File

@ -539,6 +539,14 @@ const (
//
// Enables a feature to make secrets and configmaps data immutable.
ImmutableEphemeralVolumes featuregate.Feature = "ImmutableEphemeralVolumes"
// owner: @bart0sh
// alpha: v1.18
//
// Enables usage of HugePages-<size> in a volume medium,
// e.g. emptyDir:
// medium: HugePages-1Gi
HugePageStorageMediumSize featuregate.Feature = "HugePageStorageMediumSize"
)
func init() {
@ -624,6 +632,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
PodDisruptionBudget: {Default: true, PreRelease: featuregate.Beta},
ServiceTopology: {Default: false, PreRelease: featuregate.Alpha},
ImmutableEphemeralVolumes: {Default: false, PreRelease: featuregate.Alpha},
HugePageStorageMediumSize: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -24,6 +24,7 @@ go_library(
"//pkg/apis/core/install:go_default_library",
"//pkg/apis/core/v1:go_default_library",
"//pkg/apis/core/validation:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/checkpoint:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/container:go_default_library",
@ -40,6 +41,7 @@ go_library(
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/yaml:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/tools/record:go_default_library",

View File

@ -27,8 +27,10 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilyaml "k8s.io/apimachinery/pkg/util/yaml"
utilfeature "k8s.io/apiserver/pkg/util/feature"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/core/helper"
"k8s.io/kubernetes/pkg/features"
// TODO: remove this import if
// api.Registry.GroupOrDie(v1.GroupName).GroupVersion.String() is changed
@ -133,7 +135,10 @@ func tryDecodeSinglePod(data []byte, defaultFn defaultFunc) (parsed bool, pod *v
if err = defaultFn(newPod); err != nil {
return true, pod, err
}
if errs := validation.ValidatePod(newPod); len(errs) > 0 {
opts := validation.PodValidationOptions{
AllowMultipleHugePageResources: utilfeature.DefaultFeatureGate.Enabled(features.HugePageStorageMediumSize),
}
if errs := validation.ValidatePod(newPod, opts); len(errs) > 0 {
return true, pod, fmt.Errorf("invalid pod: %v", errs)
}
v1Pod := &v1.Pod{}
@ -157,13 +162,17 @@ func tryDecodePodList(data []byte, defaultFn defaultFunc) (parsed bool, pods v1.
return false, pods, err
}
opts := validation.PodValidationOptions{
AllowMultipleHugePageResources: utilfeature.DefaultFeatureGate.Enabled(features.HugePageStorageMediumSize),
}
// Apply default values and validate pods.
for i := range newPods.Items {
newPod := &newPods.Items[i]
if err = defaultFn(newPod); err != nil {
return true, pods, err
}
if errs := validation.ValidatePod(newPod); len(errs) > 0 {
if errs := validation.ValidatePod(newPod, opts); len(errs) > 0 {
err = fmt.Errorf("invalid pod: %v", errs)
return true, pods, err
}

View File

@ -251,7 +251,7 @@ func TestStaticPodNameGenerate(t *testing.T) {
if c.overwrite != "" {
pod.Name = c.overwrite
}
errs := validation.ValidatePod(pod)
errs := validation.ValidatePod(pod, validation.PodValidationOptions{})
if c.shouldErr {
specNameErrored := false
for _, err := range errs {

View File

@ -90,7 +90,7 @@ func TestReadPodsFromFileExistAlready(t *testing.T) {
if err := k8s_api_v1.Convert_v1_Pod_To_core_Pod(pod, internalPod, nil); err != nil {
t.Fatalf("%s: Cannot convert pod %#v, %#v", testCase.desc, pod, err)
}
if errs := validation.ValidatePod(internalPod); len(errs) > 0 {
if errs := validation.ValidatePod(internalPod, validation.PodValidationOptions{}); len(errs) > 0 {
t.Fatalf("%s: Invalid pod %#v, %#v", testCase.desc, internalPod, errs)
}
}
@ -369,7 +369,7 @@ func expectUpdate(t *testing.T, ch chan interface{}, testCase *testCase) {
if err := k8s_api_v1.Convert_v1_Pod_To_core_Pod(pod, internalPod, nil); err != nil {
t.Fatalf("%s: Cannot convert pod %#v, %#v", testCase.desc, pod, err)
}
if errs := validation.ValidatePod(internalPod); len(errs) > 0 {
if errs := validation.ValidatePod(internalPod, validation.PodValidationOptions{}); len(errs) > 0 {
t.Fatalf("%s: Invalid pod %#v, %#v", testCase.desc, internalPod, errs)
}
}

View File

@ -319,7 +319,7 @@ func TestExtractPodsFromHTTP(t *testing.T) {
if err := k8s_api_v1.Convert_v1_Pod_To_core_Pod(pod, internalPod, nil); err != nil {
t.Fatalf("%s: Cannot convert pod %#v, %#v", testCase.desc, pod, err)
}
if errs := validation.ValidatePod(internalPod); len(errs) != 0 {
if errs := validation.ValidatePod(internalPod, validation.PodValidationOptions{}); len(errs) != 0 {
t.Errorf("%s: Expected no validation errors on %#v, Got %v", testCase.desc, pod, errs.ToAggregate())
}
}

View File

@ -88,7 +88,11 @@ func (podStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object
// Validate validates a new pod.
func (podStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList {
pod := obj.(*api.Pod)
allErrs := validation.ValidatePodCreate(pod)
opts := validation.PodValidationOptions{
// Allow multiple huge pages on pod create if feature is enabled
AllowMultipleHugePageResources: utilfeature.DefaultFeatureGate.Enabled(features.HugePageStorageMediumSize),
}
allErrs := validation.ValidatePodCreate(pod, opts)
allErrs = append(allErrs, validation.ValidateConditionalPod(pod, nil, field.NewPath(""))...)
return allErrs
}
@ -104,8 +108,13 @@ func (podStrategy) AllowCreateOnUpdate() bool {
// ValidateUpdate is the default update validation for an end user.
func (podStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList {
errorList := validation.ValidatePod(obj.(*api.Pod))
errorList = append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...)
oldFailsSingleHugepagesValidation := len(validation.ValidatePodSingleHugePageResources(old.(*api.Pod), field.NewPath("spec"))) > 0
opts := validation.PodValidationOptions{
// Allow multiple huge pages on pod create if feature is enabled or if the old pod already has multiple hugepages specified
AllowMultipleHugePageResources: oldFailsSingleHugepagesValidation || utilfeature.DefaultFeatureGate.Enabled(features.HugePageStorageMediumSize),
}
errorList := validation.ValidatePod(obj.(*api.Pod), opts)
errorList = append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod), opts)...)
errorList = append(errorList, validation.ValidateConditionalPod(obj.(*api.Pod), old.(*api.Pod), field.NewPath(""))...)
return errorList
}

View File

@ -44,6 +44,7 @@ go_test(
embed = [":go_default_library"],
deps = select({
"@io_bazel_rules_go//go/platform:android": [
"//pkg/features:go_default_library",
"//pkg/volume:go_default_library",
"//pkg/volume/testing:go_default_library",
"//pkg/volume/util:go_default_library",
@ -51,10 +52,13 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/util/testing:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//vendor/k8s.io/utils/mount:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux": [
"//pkg/features:go_default_library",
"//pkg/volume:go_default_library",
"//pkg/volume/testing:go_default_library",
"//pkg/volume/util:go_default_library",
@ -62,7 +66,9 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/util/testing:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//vendor/k8s.io/utils/mount:go_default_library",
],
"//conditions:default": [],

View File

@ -216,12 +216,12 @@ func (ed *emptyDir) SetUpAt(dir string, mounterArgs volume.MounterArgs) error {
}
}
switch ed.medium {
case v1.StorageMediumDefault:
switch {
case ed.medium == v1.StorageMediumDefault:
err = ed.setupDir(dir)
case v1.StorageMediumMemory:
case ed.medium == v1.StorageMediumMemory:
err = ed.setupTmpfs(dir)
case v1.StorageMediumHugePages:
case v1helper.IsHugePageMedium(ed.medium):
err = ed.setupHugepages(dir)
default:
err = fmt.Errorf("unknown storage medium %q", ed.medium)
@ -290,11 +290,11 @@ func (ed *emptyDir) setupHugepages(dir string) error {
}
// If the directory is a mountpoint with medium hugepages, there is no
// work to do since we are already in the desired state.
if isMnt && medium == v1.StorageMediumHugePages {
if isMnt && v1helper.IsHugePageMedium(medium) {
return nil
}
pageSizeMountOption, err := getPageSizeMountOptionFromPod(ed.pod)
pageSizeMountOption, err := getPageSizeMountOption(ed.medium, ed.pod)
if err != nil {
return err
}
@ -303,33 +303,52 @@ func (ed *emptyDir) setupHugepages(dir string) error {
return ed.mounter.Mount("nodev", dir, "hugetlbfs", []string{pageSizeMountOption})
}
// getPageSizeMountOptionFromPod retrieves pageSize mount option from Pod's resources
// and validates pageSize options in all containers of given Pod.
func getPageSizeMountOptionFromPod(pod *v1.Pod) (string, error) {
// getPageSizeMountOption retrieves pageSize mount option from Pod's resources
// and medium and validates pageSize options in all containers of given Pod.
func getPageSizeMountOption(medium v1.StorageMedium, pod *v1.Pod) (string, error) {
pageSizeFound := false
pageSize := resource.Quantity{}
// In some rare cases init containers can also consume Huge pages.
containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
for _, container := range containers {
var mediumPageSize resource.Quantity
if medium != v1.StorageMediumHugePages {
// medium is: Hugepages-<size>
var err error
mediumPageSize, err = v1helper.HugePageSizeFromMedium(medium)
if err != nil {
return "", err
}
}
// In some rare cases init containers can also consume Huge pages
for _, container := range append(pod.Spec.Containers, pod.Spec.InitContainers...) {
// We can take request because limit and requests must match.
for requestName := range container.Resources.Requests {
if v1helper.IsHugePageResourceName(requestName) {
currentPageSize, err := v1helper.HugePageSizeFromResourceName(requestName)
if err != nil {
return "", err
}
// PageSize for all volumes in a POD are equal, except for the first one discovered.
if !v1helper.IsHugePageResourceName(requestName) {
continue
}
currentPageSize, err := v1helper.HugePageSizeFromResourceName(requestName)
if err != nil {
return "", err
}
if medium == v1.StorageMediumHugePages { // medium is: Hugepages, size is not specified
// PageSize for all volumes in a POD must be equal if medium is "Hugepages"
if pageSizeFound && pageSize.Cmp(currentPageSize) != 0 {
return "", fmt.Errorf("multiple pageSizes for huge pages in a single PodSpec")
return "", fmt.Errorf("medium: %s can't be used if container requests multiple huge page sizes", medium)
}
pageSize = currentPageSize
pageSizeFound = true
pageSize = currentPageSize
} else { // medium is: Hugepages-<size>
if currentPageSize.Cmp(mediumPageSize) == 0 {
pageSizeFound = true
pageSize = currentPageSize
}
}
}
}
if !pageSizeFound {
return "", fmt.Errorf("hugePages storage requested, but there is no resource request for huge pages")
return "", fmt.Errorf("medium %s: hugePages storage requested, but there is no resource request for huge pages", medium)
}
return fmt.Sprintf("%s=%s", hugePagesPageSizeMountOption, pageSize.String()), nil

View File

@ -887,9 +887,10 @@ type FlockerVolumeSource struct {
type StorageMedium string
const (
StorageMediumDefault StorageMedium = "" // use whatever the default is for the node, assume anything we don't explicitly handle is this
StorageMediumMemory StorageMedium = "Memory" // use memory (e.g. tmpfs on linux)
StorageMediumHugePages StorageMedium = "HugePages" // use hugepages
StorageMediumDefault StorageMedium = "" // use whatever the default is for the node, assume anything we don't explicitly handle is this
StorageMediumMemory StorageMedium = "Memory" // use memory (e.g. tmpfs on linux)
StorageMediumHugePages StorageMedium = "HugePages" // use hugepages
StorageMediumHugePagesPrefix StorageMedium = "HugePages-" // prefix for full medium notation HugePages-<size>
)
// Protocol defines network protocols supported for things like container ports.