Merge pull request #50859 from derekwaynecarr/hugepages-feature

Automatic merge from submit-queue HugePages feature **What this PR does / why we need it**: Implements HugePages support per https://github.com/kubernetes/community/pull/837 Feature track issue: https://github.com/kubernetes/features/issues/275 **Special notes for your reviewer**: A follow-on PR is opened to add the EmptyDir support. **Release note**: ```release-note Alpha support for pre-allocated hugepages ```
2025-07-23 19:56:01 +00:00 · 2017-09-05 11:16:17 -07:00 · 2017-09-05 11:16:17 -07:00 · 2f543f321d
commit 2f543f321d
parent 775f5d232d 38d5dee677
26 changed files with 682 additions and 46 deletions
--- a/pkg/api/helper/helpers.go
+++ b/pkg/api/helper/helpers.go
@ -31,6 +31,30 @@ import (
 	"k8s.io/kubernetes/pkg/api"
 )

+// IsHugePageResourceName returns true if the resource name has the huge page
+// resource prefix.
+func IsHugePageResourceName(name api.ResourceName) bool {
+	return strings.HasPrefix(string(name), api.ResourceHugePagesPrefix)
+}
+
+// HugePageResourceName returns a ResourceName with the canonical hugepage
+// prefix prepended for the specified page size.  The page size is converted
+// to its canonical representation.
+func HugePageResourceName(pageSize resource.Quantity) api.ResourceName {
+	return api.ResourceName(fmt.Sprintf("%s%s", api.ResourceHugePagesPrefix, pageSize.String()))
+}
+
+// HugePageSizeFromResourceName returns the page size for the specified huge page
+// resource name.  If the specified input is not a valid huge page resource name
+// an error is returned.
+func HugePageSizeFromResourceName(name api.ResourceName) (resource.Quantity, error) {
+	if !IsHugePageResourceName(name) {
+		return resource.Quantity{}, fmt.Errorf("resource name: %s is not valid hugepage name", name)
+	}
+	pageSize := strings.TrimPrefix(string(name), api.ResourceHugePagesPrefix)
+	return resource.ParseQuantity(pageSize)
+}
+
 // NonConvertibleFields iterates over the provided map and filters out all but
 // any keys with the "non-convertible.kubernetes.io" prefix.
 func NonConvertibleFields(annotations map[string]string) map[string]string {
@ -113,7 +137,7 @@ var standardContainerResources = sets.NewString(
 // IsStandardContainerResourceName returns true if the container can make a resource request
 // for the specified resource
 func IsStandardContainerResourceName(str string) bool {
-	return standardContainerResources.Has(str)
+	return standardContainerResources.Has(str) || IsHugePageResourceName(api.ResourceName(str))
 }

 // IsExtendedResourceName returns true if the resource name is not in the
@ -153,6 +177,7 @@ var overcommitBlacklist = sets.NewString(string(api.ResourceNvidiaGPU))
 // namespace and not blacklisted.
 func IsOvercommitAllowed(name api.ResourceName) bool {
 	return IsDefaultNamespaceResource(name) &&
+		!IsHugePageResourceName(name) &&
 		!overcommitBlacklist.Has(string(name))
 }

@ -220,7 +245,7 @@ var standardResources = sets.NewString(

 // IsStandardResourceName returns true if the resource is known to the system
 func IsStandardResourceName(str string) bool {
-	return standardResources.Has(str)
+	return standardResources.Has(str) || IsHugePageResourceName(api.ResourceName(str))
 }

 var integerResources = sets.NewString(
--- a/pkg/api/helper/helpers_test.go
+++ b/pkg/api/helper/helpers_test.go
@ -58,10 +58,28 @@ func TestIsStandardResource(t *testing.T) {
 		{"disk", false},
 		{"blah", false},
 		{"x.y.z", false},
+		{"hugepages-2Mi", true},
 	}
 	for i, tc := range testCases {
 		if IsStandardResourceName(tc.input) != tc.output {
-			t.Errorf("case[%d], expected: %t, got: %t", i, tc.output, !tc.output)
+			t.Errorf("case[%d], input: %s, expected: %t, got: %t", i, tc.input, tc.output, !tc.output)
+		}
+	}
+}
+
+func TestIsStandardContainerResource(t *testing.T) {
+	testCases := []struct {
+		input  string
+		output bool
+	}{
+		{"cpu", true},
+		{"memory", true},
+		{"disk", false},
+		{"hugepages-2Mi", true},
+	}
+	for i, tc := range testCases {
+		if IsStandardContainerResourceName(tc.input) != tc.output {
+			t.Errorf("case[%d], input: %s, expected: %t, got: %t", i, tc.input, tc.output, !tc.output)
 		}
 	}
 }
@ -353,3 +371,120 @@ func TestGetNodeAffinityFromAnnotations(t *testing.T) {
 		}
 	}
 }
+
+func TestIsHugePageResourceName(t *testing.T) {
+	testCases := []struct {
+		name   api.ResourceName
+		result bool
+	}{
+		{
+			name:   api.ResourceName("hugepages-2Mi"),
+			result: true,
+		},
+		{
+			name:   api.ResourceName("hugepages-1Gi"),
+			result: true,
+		},
+		{
+			name:   api.ResourceName("cpu"),
+			result: false,
+		},
+		{
+			name:   api.ResourceName("memory"),
+			result: false,
+		},
+	}
+	for _, testCase := range testCases {
+		if testCase.result != IsHugePageResourceName(testCase.name) {
+			t.Errorf("resource: %v expected result: %v", testCase.name, testCase.result)
+		}
+	}
+}
+
+func TestHugePageResourceName(t *testing.T) {
+	testCases := []struct {
+		pageSize resource.Quantity
+		name     api.ResourceName
+	}{
+		{
+			pageSize: resource.MustParse("2Mi"),
+			name:     api.ResourceName("hugepages-2Mi"),
+		},
+		{
+			pageSize: resource.MustParse("1Gi"),
+			name:     api.ResourceName("hugepages-1Gi"),
+		},
+		{
+			// verify we do not regress our canonical representation
+			pageSize: *resource.NewQuantity(int64(2097152), resource.BinarySI),
+			name:     api.ResourceName("hugepages-2Mi"),
+		},
+	}
+	for _, testCase := range testCases {
+		if result := HugePageResourceName(testCase.pageSize); result != testCase.name {
+			t.Errorf("pageSize: %v, expected: %v, but got: %v", testCase.pageSize.String(), testCase.name, result.String())
+		}
+	}
+}
+
+func TestHugePageSizeFromResourceName(t *testing.T) {
+	testCases := []struct {
+		name      api.ResourceName
+		expectErr bool
+		pageSize  resource.Quantity
+	}{
+		{
+			name:      api.ResourceName("hugepages-2Mi"),
+			pageSize:  resource.MustParse("2Mi"),
+			expectErr: false,
+		},
+		{
+			name:      api.ResourceName("hugepages-1Gi"),
+			pageSize:  resource.MustParse("1Gi"),
+			expectErr: false,
+		},
+		{
+			name:      api.ResourceName("hugepages-bad"),
+			expectErr: true,
+		},
+	}
+	for _, testCase := range testCases {
+		value, err := HugePageSizeFromResourceName(testCase.name)
+		if testCase.expectErr && err == nil {
+			t.Errorf("Expected an error for %v", testCase.name)
+		} else if !testCase.expectErr && err != nil {
+			t.Errorf("Unexpected error for %v, got %v", testCase.name, err)
+		} else if testCase.pageSize.Value() != value.Value() {
+			t.Errorf("Unexpected pageSize for resource %v got %v", testCase.name, value.String())
+		}
+	}
+}
+
+func TestIsOvercommitAllowed(t *testing.T) {
+	testCases := []struct {
+		name    api.ResourceName
+		allowed bool
+	}{
+		{
+			name:    api.ResourceCPU,
+			allowed: true,
+		},
+		{
+			name:    api.ResourceMemory,
+			allowed: true,
+		},
+		{
+			name:    api.ResourceNvidiaGPU,
+			allowed: false,
+		},
+		{
+			name:    HugePageResourceName(resource.MustParse("2Mi")),
+			allowed: false,
+		},
+	}
+	for _, testCase := range testCases {
+		if testCase.allowed != IsOvercommitAllowed(testCase.name) {
+			t.Errorf("Unexpected result for %v", testCase.name)
+		}
+	}
+}
--- a/pkg/api/helper/qos/BUILD
+++ b/pkg/api/helper/qos/BUILD
@ -10,6 +10,7 @@ go_library(
    srcs = ["qos.go"],
    deps = [
        "//pkg/api:go_default_library",
+        "//pkg/api/helper:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
    ],
--- a/pkg/api/helper/qos/qos.go
+++ b/pkg/api/helper/qos/qos.go
@ -22,10 +22,13 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/helper"
 )

-// supportedComputeResources is the list of compute resources for with QoS is supported.
-var supportedQoSComputeResources = sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory))
+func isSupportedQoSComputeResource(name api.ResourceName) bool {
+	supportedQoSComputeResources := sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory))
+	return supportedQoSComputeResources.Has(string(name)) || helper.IsHugePageResourceName(name)
+}

 // GetPodQOS returns the QoS class of a pod.
 // A pod is besteffort if none of its containers have specified any requests or limits.
@ -39,7 +42,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
 	for _, container := range pod.Spec.Containers {
 		// process requests
 		for name, quantity := range container.Resources.Requests {
-			if !supportedQoSComputeResources.Has(string(name)) {
+			if !isSupportedQoSComputeResource(name) {
 				continue
 			}
 			if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +58,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
 		// process limits
 		qosLimitsFound := sets.NewString()
 		for name, quantity := range container.Resources.Limits {
-			if !supportedQoSComputeResources.Has(string(name)) {
+			if !isSupportedQoSComputeResource(name) {
 				continue
 			}
 			if quantity.Cmp(zeroQuantity) == 1 {
@ -70,7 +73,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
 			}
 		}

-		if len(qosLimitsFound) != len(supportedQoSComputeResources) {
+		if !qosLimitsFound.HasAll(string(api.ResourceMemory), string(api.ResourceCPU)) {
 			isGuaranteed = false
 		}
 	}
--- a/pkg/api/types.go
+++ b/pkg/api/types.go
@ -3339,6 +3339,8 @@ const (
 	ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-"
 	// Default namespace prefix.
 	ResourceDefaultNamespacePrefix = "kubernetes.io/"
+	// Name prefix for huge page resources (alpha).
+	ResourceHugePagesPrefix = "hugepages-"
 )

 // ResourceList is a set of (resource name, quantity) pairs.
--- a/pkg/api/v1/helper/BUILD
+++ b/pkg/api/v1/helper/BUILD
@ -24,6 +24,7 @@ go_library(
    deps = [
        "//pkg/api/helper:go_default_library",
        "//vendor/k8s.io/api/core/v1:go_default_library",
+        "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
--- a/pkg/api/v1/helper/helpers.go
+++ b/pkg/api/v1/helper/helpers.go
@ -22,6 +22,7 @@ import (
 	"strings"

 	"k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/selection"
 	"k8s.io/apimachinery/pkg/util/sets"
@ -41,6 +42,31 @@ func IsExtendedResourceName(name v1.ResourceName) bool {
 func IsDefaultNamespaceResource(name v1.ResourceName) bool {
 	return !strings.Contains(string(name), "/") ||
 		strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
+
+}
+
+// IsHugePageResourceName returns true if the resource name has the huge page
+// resource prefix.
+func IsHugePageResourceName(name v1.ResourceName) bool {
+	return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
+}
+
+// HugePageResourceName returns a ResourceName with the canonical hugepage
+// prefix prepended for the specified page size.  The page size is converted
+// to its canonical representation.
+func HugePageResourceName(pageSize resource.Quantity) v1.ResourceName {
+	return v1.ResourceName(fmt.Sprintf("%s%s", v1.ResourceHugePagesPrefix, pageSize.String()))
+}
+
+// HugePageSizeFromResourceName returns the page size for the specified huge page
+// resource name.  If the specified input is not a valid huge page resource name
+// an error is returned.
+func HugePageSizeFromResourceName(name v1.ResourceName) (resource.Quantity, error) {
+	if !IsHugePageResourceName(name) {
+		return resource.Quantity{}, fmt.Errorf("resource name: %s is not valid hugepage name", name)
+	}
+	pageSize := strings.TrimPrefix(string(name), v1.ResourceHugePagesPrefix)
+	return resource.ParseQuantity(pageSize)
 }

 // IsOpaqueIntResourceName returns true if the resource name has the opaque
--- a/pkg/api/v1/helper/qos/BUILD
+++ b/pkg/api/v1/helper/qos/BUILD
@ -24,6 +24,7 @@ go_library(
    name = "go_default_library",
    srcs = ["qos.go"],
    deps = [
+        "//pkg/api/v1/helper:go_default_library",
        "//vendor/k8s.io/api/core/v1:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
--- a/pkg/api/v1/helper/qos/qos.go
+++ b/pkg/api/v1/helper/qos/qos.go
@ -20,12 +20,16 @@ import (
 	"k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/util/sets"
+	v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
 )

 // QOSList is a set of (resource name, QoS class) pairs.
 type QOSList map[v1.ResourceName]v1.PodQOSClass

-var supportedQoSComputeResources = sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory))
+func isSupportedQoSComputeResource(name v1.ResourceName) bool {
+	supportedQoSComputeResources := sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory))
+	return supportedQoSComputeResources.Has(string(name)) || v1helper.IsHugePageResourceName(name)
+}

 // GetPodQOS returns the QoS class of a pod.
 // A pod is besteffort if none of its containers have specified any requests or limits.
@ -39,7 +43,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
 	for _, container := range pod.Spec.Containers {
 		// process requests
 		for name, quantity := range container.Resources.Requests {
-			if !supportedQoSComputeResources.Has(string(name)) {
+			if !isSupportedQoSComputeResource(name) {
 				continue
 			}
 			if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +59,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
 		// process limits
 		qosLimitsFound := sets.NewString()
 		for name, quantity := range container.Resources.Limits {
-			if !supportedQoSComputeResources.Has(string(name)) {
+			if !isSupportedQoSComputeResource(name) {
 				continue
 			}
 			if quantity.Cmp(zeroQuantity) == 1 {
@ -70,7 +74,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
 			}
 		}

-		if len(qosLimitsFound) != len(supportedQoSComputeResources) {
+		if !qosLimitsFound.HasAll(string(v1.ResourceMemory), string(v1.ResourceCPU)) {
 			isGuaranteed = false
 		}
 	}
--- a/pkg/api/v1/helper/qos/qos_test.go
+++ b/pkg/api/v1/helper/qos/qos_test.go
@ -130,6 +130,12 @@ func TestGetPodQOS(t *testing.T) {
 			}),
 			expected: v1.PodQOSBurstable,
 		},
+		{
+			pod: newPod("burstable-hugepages", []v1.Container{
+				newContainer("burstable", addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0")), addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0"))),
+			}),
+			expected: v1.PodQOSBurstable,
+		},
 	}
 	for id, testCase := range testCases {
 		if actual := GetPodQOS(testCase.pod); testCase.expected != actual {
@ -141,7 +147,7 @@ func TestGetPodQOS(t *testing.T) {
 		k8sv1.Convert_v1_Pod_To_api_Pod(testCase.pod, &pod, nil)

 		if actual := qos.GetPodQOS(&pod); api.PodQOSClass(testCase.expected) != actual {
-			t.Errorf("[%d]: invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
+			t.Errorf("[%d]: conversion invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
 		}
 	}
 }
--- a/pkg/api/validation/validation.go
+++ b/pkg/api/validation/validation.go
@ -2402,6 +2402,28 @@ func ValidateTolerations(tolerations []api.Toleration, fldPath *field.Path) fiel
 	return allErrors
 }

+func toResourceNames(resources api.ResourceList) []api.ResourceName {
+	result := []api.ResourceName{}
+	for resourceName := range resources {
+		result = append(result, resourceName)
+	}
+	return result
+}
+
+func toSet(resourceNames []api.ResourceName) sets.String {
+	result := sets.NewString()
+	for _, resourceName := range resourceNames {
+		result.Insert(string(resourceName))
+	}
+	return result
+}
+
+func toContainerResourcesSet(ctr *api.Container) sets.String {
+	resourceNames := toResourceNames(ctr.Resources.Requests)
+	resourceNames = append(resourceNames, toResourceNames(ctr.Resources.Limits)...)
+	return toSet(resourceNames)
+}
+
 // validateContainersOnlyForPod does additional validation for containers on a pod versus a pod template
 // it only does additive validation of fields not covered in validateContainers
 func validateContainersOnlyForPod(containers []api.Container, fldPath *field.Path) field.ErrorList {
@ -2429,6 +2451,21 @@ func ValidatePod(pod *api.Pod) field.ErrorList {
 	allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.Containers, specPath.Child("containers"))...)
 	allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.InitContainers, specPath.Child("initContainers"))...)

+	if utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
+		hugePageResources := sets.NewString()
+		for i := range pod.Spec.Containers {
+			resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
+			for resourceStr := range resourceSet {
+				if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
+					hugePageResources.Insert(resourceStr)
+				}
+			}
+		}
+		if len(hugePageResources) > 1 {
+			allErrs = append(allErrs, field.Invalid(specPath, hugePageResources, "must use a single hugepage size in a pod spec"))
+		}
+	}
+
 	return allErrs
 }

@ -3445,7 +3482,10 @@ func ValidateNode(node *api.Node) field.ErrorList {
 		allErrs = append(allErrs, validateNodeTaints(node.Spec.Taints, fldPath.Child("taints"))...)
 	}

-	// Only validate spec. All status fields are optional and can be updated later.
+	// Only validate spec.
+	// All status fields are optional and can be updated later.
+	// That said, if specified, we need to ensure they are valid.
+	allErrs = append(allErrs, ValidateNodeResources(node)...)

 	// external ID is required.
 	if len(node.Spec.ExternalID) == 0 {
@ -3461,6 +3501,38 @@ func ValidateNode(node *api.Node) field.ErrorList {
 	return allErrs
 }

+// ValidateNodeResources is used to make sure a node has valid capacity and allocatable values.
+func ValidateNodeResources(node *api.Node) field.ErrorList {
+	allErrs := field.ErrorList{}
+	// Validate resource quantities in capacity.
+	hugePageSizes := sets.NewString()
+	for k, v := range node.Status.Capacity {
+		resPath := field.NewPath("status", "capacity", string(k))
+		allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
+		// track any huge page size that has a positive value
+		if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
+			hugePageSizes.Insert(string(k))
+		}
+		if len(hugePageSizes) > 1 {
+			allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes"))
+		}
+	}
+	// Validate resource quantities in allocatable.
+	hugePageSizes = sets.NewString()
+	for k, v := range node.Status.Allocatable {
+		resPath := field.NewPath("status", "allocatable", string(k))
+		allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
+		// track any huge page size that has a positive value
+		if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
+			hugePageSizes.Insert(string(k))
+		}
+		if len(hugePageSizes) > 1 {
+			allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes"))
+		}
+	}
+	return allErrs
+}
+
 // ValidateNodeUpdate tests to make sure a node update can be applied.  Modifies oldNode.
 func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList {
 	fldPath := field.NewPath("metadata")
@ -3473,16 +3545,7 @@ func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList {
 	// 	allErrs = append(allErrs, field.Invalid("status", node.Status, "must be empty"))
 	// }

-	// Validate resource quantities in capacity.
-	for k, v := range node.Status.Capacity {
-		resPath := field.NewPath("status", "capacity", string(k))
-		allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
-	}
-	// Validate resource quantities in allocatable.
-	for k, v := range node.Status.Allocatable {
-		resPath := field.NewPath("status", "allocatable", string(k))
-		allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
-	}
+	allErrs = append(allErrs, ValidateNodeResources(node)...)

 	// Validate no duplicate addresses in node status.
 	addresses := make(map[api.NodeAddress]bool)
@ -3925,6 +3988,10 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
 		if resourceName == api.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
 			allErrs = append(allErrs, field.Forbidden(limPath, "ResourceEphemeralStorage field disabled by feature-gate for ResourceRequirements"))
 		}
+		if helper.IsHugePageResourceName(resourceName) && !utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
+			allErrs = append(allErrs, field.Forbidden(limPath, fmt.Sprintf("%s field disabled by feature-gate for ResourceRequirements", resourceName)))
+		}
+
 	}
 	for resourceName, quantity := range requirements.Requests {
 		fldPath := reqPath.Key(string(resourceName))
--- a/pkg/api/validation/validation_test.go
+++ b/pkg/api/validation/validation_test.go
@ -2759,6 +2759,106 @@ func TestValidateVolumes(t *testing.T) {
 	}
 }

+func TestAlphaHugePagesIsolation(t *testing.T) {
+	successCases := []api.Pod{
+		{ // Basic fields.
+			ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns"},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
+						Resources: api.ResourceRequirements{
+							Requests: api.ResourceList{
+								api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
+							},
+							Limits: api.ResourceList{
+								api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
+							},
+						},
+					},
+				},
+				RestartPolicy: api.RestartPolicyAlways,
+				DNSPolicy:     api.DNSClusterFirst,
+			},
+		},
+	}
+	failureCases := []api.Pod{
+		{ // Basic fields.
+			ObjectMeta: metav1.ObjectMeta{Name: "hugepages-shared", Namespace: "ns"},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
+						Resources: api.ResourceRequirements{
+							Requests: api.ResourceList{
+								api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
+							},
+							Limits: api.ResourceList{
+								api.ResourceName("hugepages-2Mi"): resource.MustParse("2Gi"),
+							},
+						},
+					},
+				},
+				RestartPolicy: api.RestartPolicyAlways,
+				DNSPolicy:     api.DNSClusterFirst,
+			},
+		},
+		{ // Basic fields.
+			ObjectMeta: metav1.ObjectMeta{Name: "hugepages-multiple", Namespace: "ns"},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
+						Resources: api.ResourceRequirements{
+							Requests: api.ResourceList{
+								api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
+								api.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"),
+							},
+							Limits: api.ResourceList{
+								api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
+								api.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"),
+							},
+						},
+					},
+				},
+				RestartPolicy: api.RestartPolicyAlways,
+				DNSPolicy:     api.DNSClusterFirst,
+			},
+		},
+	}
+	// Enable alpha feature HugePages
+	err := utilfeature.DefaultFeatureGate.Set("HugePages=true")
+	if err != nil {
+		t.Errorf("Failed to enable feature gate for HugePages: %v", err)
+		return
+	}
+	for i := range successCases {
+		pod := &successCases[i]
+		if errs := ValidatePod(pod); len(errs) != 0 {
+			t.Errorf("Unexpected error for case[%d], err: %v", i, errs)
+		}
+	}
+	for i := range failureCases {
+		pod := &failureCases[i]
+		if errs := ValidatePod(pod); len(errs) == 0 {
+			t.Errorf("Expected error for case[%d], pod: %v", i, pod.Name)
+		}
+	}
+	// Disable alpha feature HugePages
+	err = utilfeature.DefaultFeatureGate.Set("HugePages=false")
+	if err != nil {
+		t.Errorf("Failed to disable feature gate for HugePages: %v", err)
+		return
+	}
+	// Disable alpha feature HugePages and ensure all success cases fail
+	for i := range successCases {
+		pod := &successCases[i]
+		if errs := ValidatePod(pod); len(errs) == 0 {
+			t.Errorf("Expected error for case[%d], pod: %v", i, pod.Name)
+		}
+	}
+}
+
 func TestAlphaLocalStorageCapacityIsolation(t *testing.T) {

 	testCases := []api.VolumeSource{
@ -7838,6 +7938,8 @@ func TestValidateNode(t *testing.T) {
 					api.ResourceName(api.ResourceCPU):    resource.MustParse("10"),
 					api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
 					api.ResourceName("my.org/gpu"):       resource.MustParse("10"),
+					api.ResourceName("hugepages-2Mi"):    resource.MustParse("10Gi"),
+					api.ResourceName("hugepages-1Gi"):    resource.MustParse("0"),
 				},
 			},
 			Spec: api.NodeSpec{
@ -8119,6 +8221,27 @@ func TestValidateNode(t *testing.T) {
 				ExternalID: "external",
 			},
 		},
+		"multiple-pre-allocated-hugepages": {
+			ObjectMeta: metav1.ObjectMeta{
+				Name:   "abc",
+				Labels: validSelector,
+			},
+			Status: api.NodeStatus{
+				Addresses: []api.NodeAddress{
+					{Type: api.NodeExternalIP, Address: "something"},
+				},
+				Capacity: api.ResourceList{
+					api.ResourceName(api.ResourceCPU):    resource.MustParse("10"),
+					api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
+					api.ResourceName("my.org/gpu"):       resource.MustParse("10"),
+					api.ResourceName("hugepages-2Mi"):    resource.MustParse("10Gi"),
+					api.ResourceName("hugepages-1Gi"):    resource.MustParse("10Gi"),
+				},
+			},
+			Spec: api.NodeSpec{
+				ExternalID: "external",
+			},
+		},
 	}
 	for k, v := range errorCases {
 		errs := ValidateNode(&v)
--- a/pkg/features/kube_features.go
+++ b/pkg/features/kube_features.go
@ -157,6 +157,12 @@ const (
 	//
 	// Alternative container-level CPU affinity policies.
 	CPUManager utilfeature.Feature = "CPUManager"
+
+	// owner: @derekwaynecarr
+	// alpha: v1.8
+	//
+	// Enable pods to consume pre-allocated huge pages of varying page sizes
+	HugePages utilfeature.Feature = "HugePages"
 )

 func init() {
@ -180,6 +186,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
 	RotateKubeletClientCertificate:              {Default: true, PreRelease: utilfeature.Beta},
 	PersistentLocalVolumes:                      {Default: false, PreRelease: utilfeature.Alpha},
 	LocalStorageCapacityIsolation:               {Default: false, PreRelease: utilfeature.Alpha},
+	HugePages:                                   {Default: false, PreRelease: utilfeature.Alpha},
 	DebugContainers:                             {Default: false, PreRelease: utilfeature.Alpha},
 	PodPriority:                                 {Default: false, PreRelease: utilfeature.Alpha},
 	EnableEquivalenceClassCache:                 {Default: false, PreRelease: utilfeature.Alpha},
--- a/pkg/kubelet/cadvisor/BUILD
+++ b/pkg/kubelet/cadvisor/BUILD
@ -23,11 +23,14 @@ go_library(
        "//conditions:default": [],
    }),
    deps = [
+        "//pkg/api/v1/helper:go_default_library",
+        "//pkg/features:go_default_library",
        "//vendor/github.com/google/cadvisor/events:go_default_library",
        "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
        "//vendor/github.com/google/cadvisor/info/v2:go_default_library",
        "//vendor/k8s.io/api/core/v1:go_default_library",
        "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
+        "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
    ] + select({
        "@io_bazel_rules_go//go/platform:linux_amd64": [
            "//pkg/kubelet/types:go_default_library",
--- a/pkg/kubelet/cadvisor/util.go
+++ b/pkg/kubelet/cadvisor/util.go
@ -21,6 +21,9 @@ import (
 	cadvisorapi2 "github.com/google/cadvisor/info/v2"
 	"k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
+	"k8s.io/kubernetes/pkg/features"
 )

 func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
@ -32,6 +35,17 @@ func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
 			int64(info.MemoryCapacity),
 			resource.BinarySI),
 	}
+
+	// if huge pages are enabled, we report them as a schedulable resource on the node
+	if utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
+		for _, hugepagesInfo := range info.HugePages {
+			pageSizeBytes := int64(hugepagesInfo.PageSize * 1024)
+			hugePagesBytes := pageSizeBytes * int64(hugepagesInfo.NumPages)
+			pageSizeQuantity := resource.NewQuantity(pageSizeBytes, resource.BinarySI)
+			c[v1helper.HugePageResourceName(*pageSizeQuantity)] = *resource.NewQuantity(hugePagesBytes, resource.BinarySI)
+		}
+	}
+
 	return c
 }

--- a/pkg/kubelet/cm/BUILD
+++ b/pkg/kubelet/cm/BUILD
@ -52,6 +52,7 @@ go_library(
    ] + select({
        "@io_bazel_rules_go//go/platform:linux_amd64": [
            "//pkg/api:go_default_library",
+            "//pkg/api/v1/helper:go_default_library",
            "//pkg/api/v1/helper/qos:go_default_library",
            "//pkg/api/v1/resource:go_default_library",
            "//pkg/kubelet/cm/util:go_default_library",
@ -63,6 +64,7 @@ go_library(
            "//pkg/util/procfs:go_default_library",
            "//pkg/util/sysctl:go_default_library",
            "//pkg/util/version:go_default_library",
+            "//vendor/github.com/docker/go-units:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
--- a/pkg/kubelet/cm/cgroup_manager_linux.go
+++ b/pkg/kubelet/cm/cgroup_manager_linux.go
@ -24,12 +24,16 @@ import (
 	"strings"
 	"time"

+	units "github.com/docker/go-units"
 	"github.com/golang/glog"
 	libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
 	cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
 	cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
 	libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
+
 	"k8s.io/apimachinery/pkg/util/sets"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	kubefeatures "k8s.io/kubernetes/pkg/features"
 	"k8s.io/kubernetes/pkg/kubelet/metrics"
 )

@ -43,6 +47,10 @@ const (
 	libcontainerSystemd libcontainerCgroupManagerType = "systemd"
 )

+// hugePageSizeList is useful for converting to the hugetlb canonical unit
+// which is what is expected when interacting with libcontainer
+var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
+
 // ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name.
 // For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice
 // If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form.
@ -299,10 +307,16 @@ type subsystem interface {
 	GetStats(path string, stats *libcontainercgroups.Stats) error
 }

-// Cgroup subsystems we currently support
-var supportedSubsystems = []subsystem{
-	&cgroupfs.MemoryGroup{},
-	&cgroupfs.CpuGroup{},
+// getSupportedSubsystems returns list of subsystems supported
+func getSupportedSubsystems() []subsystem {
+	supportedSubsystems := []subsystem{
+		&cgroupfs.MemoryGroup{},
+		&cgroupfs.CpuGroup{},
+	}
+	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
+		supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{})
+	}
+	return supportedSubsystems
 }

 // setSupportedSubsystems sets cgroup resource limits only on the supported
@ -315,7 +329,7 @@ var supportedSubsystems = []subsystem{
 // but this is not possible with libcontainers Set() method
 // See https://github.com/opencontainers/runc/issues/932
 func setSupportedSubsystems(cgroupConfig *libcontainerconfigs.Cgroup) error {
-	for _, sys := range supportedSubsystems {
+	for _, sys := range getSupportedSubsystems() {
 		if _, ok := cgroupConfig.Paths[sys.Name()]; !ok {
 			return fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name())
 		}
@ -343,6 +357,30 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
 	if resourceConfig.CpuPeriod != nil {
 		resources.CpuPeriod = *resourceConfig.CpuPeriod
 	}
+
+	// if huge pages are enabled, we set them in libcontainer
+	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
+		// for each page size enumerated, set that value
+		pageSizes := sets.NewString()
+		for pageSize, limit := range resourceConfig.HugePageLimit {
+			sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, hugePageSizeList)
+			resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
+				Pagesize: sizeString,
+				Limit:    uint64(limit),
+			})
+			pageSizes.Insert(sizeString)
+		}
+		// for each page size omitted, limit to 0
+		for _, pageSize := range cgroupfs.HugePageSizes {
+			if pageSizes.Has(pageSize) {
+				continue
+			}
+			resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
+				Pagesize: pageSize,
+				Limit:    uint64(0),
+			})
+		}
+	}
 	return resources
 }

@ -502,7 +540,7 @@ func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error {

 func getStatsSupportedSubsystems(cgroupPaths map[string]string) (*libcontainercgroups.Stats, error) {
 	stats := libcontainercgroups.NewStats()
-	for _, sys := range supportedSubsystems {
+	for _, sys := range getSupportedSubsystems() {
 		if _, ok := cgroupPaths[sys.Name()]; !ok {
 			return nil, fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name())
 		}
--- a/pkg/kubelet/cm/helpers_linux.go
+++ b/pkg/kubelet/cm/helpers_linux.go
@ -26,6 +26,7 @@ import (
 	libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"

 	"k8s.io/api/core/v1"
+	v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
 	v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
 	"k8s.io/kubernetes/pkg/api/v1/resource"
 )
@ -83,6 +84,23 @@ func MilliCPUToShares(milliCPU int64) int64 {
 	return shares
 }

+// HugePageLimits converts the API representation to a map
+// from huge page size (in bytes) to huge page limit (in bytes).
+func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
+	hugePageLimits := map[int64]int64{}
+	for k, v := range resourceList {
+		if v1helper.IsHugePageResourceName(k) {
+			pageSize, _ := v1helper.HugePageSizeFromResourceName(k)
+			if value, exists := hugePageLimits[pageSize.Value()]; exists {
+				hugePageLimits[pageSize.Value()] = value + v.Value()
+			} else {
+				hugePageLimits[pageSize.Value()] = v.Value()
+			}
+		}
+	}
+	return hugePageLimits
+}
+
 // ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
 func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
 	// sum requests and limits.
@ -108,6 +126,8 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
 	// track if limits were applied for each resource.
 	memoryLimitsDeclared := true
 	cpuLimitsDeclared := true
+	// map hugepage pagesize (bytes) to limits (bytes)
+	hugePageLimits := map[int64]int64{}
 	for _, container := range pod.Spec.Containers {
 		if container.Resources.Limits.Cpu().IsZero() {
 			cpuLimitsDeclared = false
@ -115,6 +135,14 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
 		if container.Resources.Limits.Memory().IsZero() {
 			memoryLimitsDeclared = false
 		}
+		containerHugePageLimits := HugePageLimits(container.Resources.Requests)
+		for k, v := range containerHugePageLimits {
+			if value, exists := hugePageLimits[k]; exists {
+				hugePageLimits[k] = value + v
+			} else {
+				hugePageLimits[k] = v
+			}
+		}
 	}

 	// determine the qos class
@ -140,6 +168,7 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
 		shares := int64(MinShares)
 		result.CpuShares = &shares
 	}
+	result.HugePageLimit = hugePageLimits
 	return result
 }

--- a/pkg/kubelet/cm/node_container_manager.go
+++ b/pkg/kubelet/cm/node_container_manager.go
@ -28,7 +28,9 @@ import (
 	"k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/types"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/kubernetes/pkg/api"
+	kubefeatures "k8s.io/kubernetes/pkg/features"
 	"k8s.io/kubernetes/pkg/kubelet/events"
 	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
 )
@ -154,6 +156,10 @@ func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
 		val := MilliCPUToShares(q.MilliValue())
 		rc.CpuShares = &val
 	}
+	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
+		rc.HugePageLimit = HugePageLimits(rl)
+	}
+
 	return &rc
 }

--- a/pkg/kubelet/cm/qos_container_manager_linux.go
+++ b/pkg/kubelet/cm/qos_container_manager_linux.go
@ -27,9 +27,13 @@ import (

 	"k8s.io/apimachinery/pkg/util/wait"

+	units "github.com/docker/go-units"
+	cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
 	"k8s.io/api/core/v1"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
 	"k8s.io/kubernetes/pkg/api/v1/resource"
+	kubefeatures "k8s.io/kubernetes/pkg/features"
 )

 const (
@ -100,11 +104,18 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
 			minShares := int64(MinShares)
 			resourceParameters.CpuShares = &minShares
 		}
+
 		// containerConfig object stores the cgroup specifications
 		containerConfig := &CgroupConfig{
 			Name:               absoluteContainerName,
 			ResourceParameters: resourceParameters,
 		}
+
+		// for each enumerated huge page size, the qos tiers are unbounded
+		if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
+			m.setHugePagesUnbounded(containerConfig)
+		}
+
 		// check if it exists
 		if !cm.Exists(absoluteContainerName) {
 			if err := cm.Create(containerConfig); err != nil {
@ -138,6 +149,29 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
 	return nil
 }

+// setHugePagesUnbounded ensures hugetlb is effectively unbounded
+func (m *qosContainerManagerImpl) setHugePagesUnbounded(cgroupConfig *CgroupConfig) error {
+	hugePageLimit := map[int64]int64{}
+	for _, pageSize := range cgroupfs.HugePageSizes {
+		pageSizeBytes, err := units.RAMInBytes(pageSize)
+		if err != nil {
+			return err
+		}
+		hugePageLimit[pageSizeBytes] = int64(1 << 62)
+	}
+	cgroupConfig.ResourceParameters.HugePageLimit = hugePageLimit
+	return nil
+}
+
+func (m *qosContainerManagerImpl) setHugePagesConfig(configs map[v1.PodQOSClass]*CgroupConfig) error {
+	for _, v := range configs {
+		if err := m.setHugePagesUnbounded(v); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 func (m *qosContainerManagerImpl) setCPUCgroupConfig(configs map[v1.PodQOSClass]*CgroupConfig) error {
 	pods := m.activePods()
 	burstablePodCPURequest := int64(0)
@ -262,6 +296,13 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
 		return err
 	}

+	// update the qos level cgroup settings for huge pages (ensure they remain unbounded)
+	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
+		if err := m.setHugePagesConfig(qosConfigs); err != nil {
+			return err
+		}
+	}
+
 	for resource, percentReserve := range m.qosReserved {
 		switch resource {
 		case v1.ResourceMemory:
--- a/pkg/kubelet/cm/types.go
+++ b/pkg/kubelet/cm/types.go
@ -31,6 +31,8 @@ type ResourceConfig struct {
 	CpuQuota *int64
 	// CPU quota period.
 	CpuPeriod *int64
+	// HugePageLimit map from page size (in bytes) to limit (in bytes)
+	HugePageLimit map[int64]int64
 }

 // CgroupName is the abstract name of a cgroup prior to any driver specific conversion.
--- a/pkg/kubelet/kubelet_node_status.go
+++ b/pkg/kubelet/kubelet_node_status.go
@ -630,6 +630,19 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
 		}
 		node.Status.Allocatable[k] = value
 	}
+	// for every huge page reservation, we need to remove it from allocatable memory
+	for k, v := range node.Status.Capacity {
+		if v1helper.IsHugePageResourceName(k) {
+			allocatableMemory := node.Status.Allocatable[v1.ResourceMemory]
+			value := *(v.Copy())
+			allocatableMemory.Sub(value)
+			if allocatableMemory.Sign() < 0 {
+				// Negative Allocatable resources don't make sense.
+				allocatableMemory.Set(0)
+			}
+			node.Status.Allocatable[v1.ResourceMemory] = allocatableMemory
+		}
+	}
 }

 // Set versioninfo for the node.
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@ -509,6 +509,12 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
 						result.SetExtended(rName, value)
 					}
 				}
+				if v1helper.IsHugePageResourceName(rName) {
+					value := rQuantity.Value()
+					if value > result.HugePages[rName] {
+						result.SetHugePages(rName, value)
+					}
+				}
 			}
 		}
 	}
@ -542,7 +548,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
 		// We couldn't parse metadata - fallback to computing it.
 		podRequest = GetResourceRequest(pod)
 	}
-	if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && podRequest.EphemeralStorage == 0 && len(podRequest.ExtendedResources) == 0 {
+	if podRequest.MilliCPU == 0 &&
+		podRequest.Memory == 0 &&
+		podRequest.NvidiaGPU == 0 &&
+		podRequest.EphemeralStorage == 0 &&
+		len(podRequest.ExtendedResources) == 0 &&
+		len(podRequest.HugePages) == 0 {
 		return len(predicateFails) == 0, predicateFails, nil
 	}

@ -567,6 +578,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
 		}
 	}

+	for rName, rQuant := range podRequest.HugePages {
+		if allocatable.HugePages[rName] < rQuant+nodeInfo.RequestedResource().HugePages[rName] {
+			predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.HugePages[rName], nodeInfo.RequestedResource().HugePages[rName], allocatable.HugePages[rName]))
+		}
+	}
+
 	if glog.V(10) {
 		if len(predicateFails) == 0 {
 			// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
@ -73,11 +73,12 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.Pe
 }

 var (
-	opaqueResourceA = v1helper.OpaqueIntResourceName("AAA")
-	opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
+	opaqueResourceA   = v1helper.OpaqueIntResourceName("AAA")
+	opaqueResourceB   = v1helper.OpaqueIntResourceName("BBB")
+	hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
 )

-func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
+func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.NodeResources {
 	return v1.NodeResources{
 		Capacity: v1.ResourceList{
 			v1.ResourceCPU:              *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@ -86,11 +87,12 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v
 			v1.ResourceNvidiaGPU:        *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 			opaqueResourceA:             *resource.NewQuantity(opaqueA, resource.DecimalSI),
 			v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
+			hugePageResourceA:           *resource.NewQuantity(hugePageA, resource.BinarySI),
 		},
 	}
 }

-func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
+func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.ResourceList {
 	return v1.ResourceList{
 		v1.ResourceCPU:              *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
 		v1.ResourceMemory:           *resource.NewQuantity(memory, resource.BinarySI),
@ -98,6 +100,7 @@ func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, stora
 		v1.ResourceNvidiaGPU:        *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 		opaqueResourceA:             *resource.NewQuantity(opaqueA, resource.DecimalSI),
 		v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
+		hugePageResourceA:           *resource.NewQuantity(hugePageA, resource.BinarySI),
 	}
 }

@ -348,10 +351,38 @@ func TestPodFitsResources(t *testing.T) {
 			test:    "opaque resource allocatable enforced for unknown resource for init container",
 			reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)},
 		},
+		{
+			pod: newResourcePod(
+				schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
+			fits:    false,
+			test:    "hugepages resource capacity enforced",
+			reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
+		},
+		{
+			pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
+				schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
+			fits:    false,
+			test:    "hugepages resource capacity enforced for init container",
+			reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
+		},
+		{
+			pod: newResourcePod(
+				schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}},
+				schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 2}})),
+			fits:    false,
+			test:    "hugepages resource allocatable enforced for multiple containers",
+			reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 6, 2, 5)},
+		},
 	}

 	for _, test := range enoughPodsTests {
-		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
+		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
 		test.nodeInfo.SetNode(&node)
 		fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
 		if err != nil {
@ -406,7 +437,7 @@ func TestPodFitsResources(t *testing.T) {
 		},
 	}
 	for _, test := range notEnoughPodsTests {
-		node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
+		node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}}
 		test.nodeInfo.SetNode(&node)
 		fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
 		if err != nil {
@ -464,7 +495,7 @@ func TestPodFitsResources(t *testing.T) {
 	}

 	for _, test := range storagePodsTests {
-		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
+		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
 		test.nodeInfo.SetNode(&node)
 		fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
 		if err != nil {
@ -1889,7 +1920,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
 			},
 			fits: true,
 			wErr: nil,
@ -1901,7 +1932,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
 			},
 			fits: false,
 			wErr: nil,
@ -1915,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			pod: &v1.Pod{},
 			nodeInfo: schedulercache.NewNodeInfo(
 				newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
-			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
+			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
 			fits: true,
 			wErr: nil,
 			test: "no resources/port/host requested always fits on GPU machine",
@ -1924,7 +1955,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
 			nodeInfo: schedulercache.NewNodeInfo(
 				newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
-			node:    &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
+			node:    &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
 			fits:    false,
 			wErr:    nil,
 			reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@ -1934,7 +1965,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
 			nodeInfo: schedulercache.NewNodeInfo(
 				newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
-			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
+			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
 			fits: true,
 			wErr: nil,
 			test: "enough GPU resource",
@ -1948,7 +1979,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
 			},
 			fits:    false,
 			wErr:    nil,
@ -1960,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
 			},
 			fits:    false,
 			wErr:    nil,
@ -3252,7 +3283,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
 					ImagePullPolicy: "Always",
 					// at least one requirement -> burstable pod
 					Resources: v1.ResourceRequirements{
-						Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
+						Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0),
 					},
 				},
 			},
--- a/plugin/pkg/scheduler/schedulercache/node_info.go
+++ b/plugin/pkg/scheduler/schedulercache/node_info.go
@ -72,6 +72,7 @@ type Resource struct {
 	// explicitly as int, to avoid conversions and improve performance.
 	AllowedPodNumber  int
 	ExtendedResources map[v1.ResourceName]int64
+	HugePages         map[v1.ResourceName]int64
 }

 // New creates a Resource from ResourceList
@ -103,6 +104,9 @@ func (r *Resource) Add(rl v1.ResourceList) {
 			if v1helper.IsExtendedResourceName(rName) {
 				r.AddExtended(rName, rQuant.Value())
 			}
+			if v1helper.IsHugePageResourceName(rName) {
+				r.AddHugePages(rName, rQuant.Value())
+			}
 		}
 	}
 }
@ -118,6 +122,9 @@ func (r *Resource) ResourceList() v1.ResourceList {
 	for rName, rQuant := range r.ExtendedResources {
 		result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
 	}
+	for rName, rQuant := range r.HugePages {
+		result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
+	}
 	return result
 }

@ -135,6 +142,12 @@ func (r *Resource) Clone() *Resource {
 			res.ExtendedResources[k] = v
 		}
 	}
+	if r.HugePages != nil {
+		res.HugePages = make(map[v1.ResourceName]int64)
+		for k, v := range r.HugePages {
+			res.HugePages[k] = v
+		}
+	}
 	return res
 }

@ -150,6 +163,18 @@ func (r *Resource) SetExtended(name v1.ResourceName, quantity int64) {
 	r.ExtendedResources[name] = quantity
 }

+func (r *Resource) AddHugePages(name v1.ResourceName, quantity int64) {
+	r.SetHugePages(name, r.HugePages[name]+quantity)
+}
+
+func (r *Resource) SetHugePages(name v1.ResourceName, quantity int64) {
+	// Lazily allocate hugepages resource map.
+	if r.HugePages == nil {
+		r.HugePages = map[v1.ResourceName]int64{}
+	}
+	r.HugePages[name] = quantity
+}
+
 // NewNodeInfo returns a ready to use empty NodeInfo object.
 // If any pods are given in arguments, their information will be aggregated in
 // the returned object.
@ -307,6 +332,12 @@ func (n *NodeInfo) addPod(pod *v1.Pod) {
 	for rName, rQuant := range res.ExtendedResources {
 		n.requestedResource.ExtendedResources[rName] += rQuant
 	}
+	if n.requestedResource.HugePages == nil && len(res.HugePages) > 0 {
+		n.requestedResource.HugePages = map[v1.ResourceName]int64{}
+	}
+	for rName, rQuant := range res.HugePages {
+		n.requestedResource.HugePages[rName] += rQuant
+	}
 	n.nonzeroRequest.MilliCPU += non0_cpu
 	n.nonzeroRequest.Memory += non0_mem
 	n.pods = append(n.pods, pod)
@ -362,6 +393,12 @@ func (n *NodeInfo) removePod(pod *v1.Pod) error {
 			for rName, rQuant := range res.ExtendedResources {
 				n.requestedResource.ExtendedResources[rName] -= rQuant
 			}
+			if len(res.HugePages) > 0 && n.requestedResource.HugePages == nil {
+				n.requestedResource.HugePages = map[v1.ResourceName]int64{}
+			}
+			for rName, rQuant := range res.HugePages {
+				n.requestedResource.HugePages[rName] -= rQuant
+			}
 			n.nonzeroRequest.MilliCPU -= non0_cpu
 			n.nonzeroRequest.Memory -= non0_mem

--- a/staging/src/k8s.io/api/core/v1/types.go
+++ b/staging/src/k8s.io/api/core/v1/types.go
@ -3759,6 +3759,8 @@ const (
 	ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-"
 	// Default namespace prefix.
 	ResourceDefaultNamespacePrefix = "kubernetes.io/"
+	// Name prefix for huge page resources (alpha).
+	ResourceHugePagesPrefix = "hugepages-"
 )

 // ResourceList is a set of (resource name, quantity) pairs.