Merge pull request #50859 from derekwaynecarr/hugepages-feature

Automatic merge from submit-queue

HugePages feature

**What this PR does / why we need it**:
Implements HugePages support per https://github.com/kubernetes/community/pull/837

Feature track issue: https://github.com/kubernetes/features/issues/275

**Special notes for your reviewer**:
A follow-on PR is opened to add the EmptyDir support.

**Release note**:
```release-note
Alpha support for pre-allocated hugepages
```
This commit is contained in:
Kubernetes Submit Queue 2017-09-05 11:16:17 -07:00 committed by GitHub
commit 2f543f321d
26 changed files with 682 additions and 46 deletions

View File

@ -31,6 +31,30 @@ import (
"k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api"
) )
// IsHugePageResourceName returns true if the resource name has the huge page
// resource prefix.
func IsHugePageResourceName(name api.ResourceName) bool {
return strings.HasPrefix(string(name), api.ResourceHugePagesPrefix)
}
// HugePageResourceName returns a ResourceName with the canonical hugepage
// prefix prepended for the specified page size. The page size is converted
// to its canonical representation.
func HugePageResourceName(pageSize resource.Quantity) api.ResourceName {
return api.ResourceName(fmt.Sprintf("%s%s", api.ResourceHugePagesPrefix, pageSize.String()))
}
// HugePageSizeFromResourceName returns the page size for the specified huge page
// resource name. If the specified input is not a valid huge page resource name
// an error is returned.
func HugePageSizeFromResourceName(name api.ResourceName) (resource.Quantity, error) {
if !IsHugePageResourceName(name) {
return resource.Quantity{}, fmt.Errorf("resource name: %s is not valid hugepage name", name)
}
pageSize := strings.TrimPrefix(string(name), api.ResourceHugePagesPrefix)
return resource.ParseQuantity(pageSize)
}
// NonConvertibleFields iterates over the provided map and filters out all but // NonConvertibleFields iterates over the provided map and filters out all but
// any keys with the "non-convertible.kubernetes.io" prefix. // any keys with the "non-convertible.kubernetes.io" prefix.
func NonConvertibleFields(annotations map[string]string) map[string]string { func NonConvertibleFields(annotations map[string]string) map[string]string {
@ -113,7 +137,7 @@ var standardContainerResources = sets.NewString(
// IsStandardContainerResourceName returns true if the container can make a resource request // IsStandardContainerResourceName returns true if the container can make a resource request
// for the specified resource // for the specified resource
func IsStandardContainerResourceName(str string) bool { func IsStandardContainerResourceName(str string) bool {
return standardContainerResources.Has(str) return standardContainerResources.Has(str) || IsHugePageResourceName(api.ResourceName(str))
} }
// IsExtendedResourceName returns true if the resource name is not in the // IsExtendedResourceName returns true if the resource name is not in the
@ -153,6 +177,7 @@ var overcommitBlacklist = sets.NewString(string(api.ResourceNvidiaGPU))
// namespace and not blacklisted. // namespace and not blacklisted.
func IsOvercommitAllowed(name api.ResourceName) bool { func IsOvercommitAllowed(name api.ResourceName) bool {
return IsDefaultNamespaceResource(name) && return IsDefaultNamespaceResource(name) &&
!IsHugePageResourceName(name) &&
!overcommitBlacklist.Has(string(name)) !overcommitBlacklist.Has(string(name))
} }
@ -220,7 +245,7 @@ var standardResources = sets.NewString(
// IsStandardResourceName returns true if the resource is known to the system // IsStandardResourceName returns true if the resource is known to the system
func IsStandardResourceName(str string) bool { func IsStandardResourceName(str string) bool {
return standardResources.Has(str) return standardResources.Has(str) || IsHugePageResourceName(api.ResourceName(str))
} }
var integerResources = sets.NewString( var integerResources = sets.NewString(

View File

@ -58,10 +58,28 @@ func TestIsStandardResource(t *testing.T) {
{"disk", false}, {"disk", false},
{"blah", false}, {"blah", false},
{"x.y.z", false}, {"x.y.z", false},
{"hugepages-2Mi", true},
} }
for i, tc := range testCases { for i, tc := range testCases {
if IsStandardResourceName(tc.input) != tc.output { if IsStandardResourceName(tc.input) != tc.output {
t.Errorf("case[%d], expected: %t, got: %t", i, tc.output, !tc.output) t.Errorf("case[%d], input: %s, expected: %t, got: %t", i, tc.input, tc.output, !tc.output)
}
}
}
func TestIsStandardContainerResource(t *testing.T) {
testCases := []struct {
input string
output bool
}{
{"cpu", true},
{"memory", true},
{"disk", false},
{"hugepages-2Mi", true},
}
for i, tc := range testCases {
if IsStandardContainerResourceName(tc.input) != tc.output {
t.Errorf("case[%d], input: %s, expected: %t, got: %t", i, tc.input, tc.output, !tc.output)
} }
} }
} }
@ -353,3 +371,120 @@ func TestGetNodeAffinityFromAnnotations(t *testing.T) {
} }
} }
} }
func TestIsHugePageResourceName(t *testing.T) {
testCases := []struct {
name api.ResourceName
result bool
}{
{
name: api.ResourceName("hugepages-2Mi"),
result: true,
},
{
name: api.ResourceName("hugepages-1Gi"),
result: true,
},
{
name: api.ResourceName("cpu"),
result: false,
},
{
name: api.ResourceName("memory"),
result: false,
},
}
for _, testCase := range testCases {
if testCase.result != IsHugePageResourceName(testCase.name) {
t.Errorf("resource: %v expected result: %v", testCase.name, testCase.result)
}
}
}
func TestHugePageResourceName(t *testing.T) {
testCases := []struct {
pageSize resource.Quantity
name api.ResourceName
}{
{
pageSize: resource.MustParse("2Mi"),
name: api.ResourceName("hugepages-2Mi"),
},
{
pageSize: resource.MustParse("1Gi"),
name: api.ResourceName("hugepages-1Gi"),
},
{
// verify we do not regress our canonical representation
pageSize: *resource.NewQuantity(int64(2097152), resource.BinarySI),
name: api.ResourceName("hugepages-2Mi"),
},
}
for _, testCase := range testCases {
if result := HugePageResourceName(testCase.pageSize); result != testCase.name {
t.Errorf("pageSize: %v, expected: %v, but got: %v", testCase.pageSize.String(), testCase.name, result.String())
}
}
}
func TestHugePageSizeFromResourceName(t *testing.T) {
testCases := []struct {
name api.ResourceName
expectErr bool
pageSize resource.Quantity
}{
{
name: api.ResourceName("hugepages-2Mi"),
pageSize: resource.MustParse("2Mi"),
expectErr: false,
},
{
name: api.ResourceName("hugepages-1Gi"),
pageSize: resource.MustParse("1Gi"),
expectErr: false,
},
{
name: api.ResourceName("hugepages-bad"),
expectErr: true,
},
}
for _, testCase := range testCases {
value, err := HugePageSizeFromResourceName(testCase.name)
if testCase.expectErr && err == nil {
t.Errorf("Expected an error for %v", testCase.name)
} else if !testCase.expectErr && err != nil {
t.Errorf("Unexpected error for %v, got %v", testCase.name, err)
} else if testCase.pageSize.Value() != value.Value() {
t.Errorf("Unexpected pageSize for resource %v got %v", testCase.name, value.String())
}
}
}
func TestIsOvercommitAllowed(t *testing.T) {
testCases := []struct {
name api.ResourceName
allowed bool
}{
{
name: api.ResourceCPU,
allowed: true,
},
{
name: api.ResourceMemory,
allowed: true,
},
{
name: api.ResourceNvidiaGPU,
allowed: false,
},
{
name: HugePageResourceName(resource.MustParse("2Mi")),
allowed: false,
},
}
for _, testCase := range testCases {
if testCase.allowed != IsOvercommitAllowed(testCase.name) {
t.Errorf("Unexpected result for %v", testCase.name)
}
}
}

View File

@ -10,6 +10,7 @@ go_library(
srcs = ["qos.go"], srcs = ["qos.go"],
deps = [ deps = [
"//pkg/api:go_default_library", "//pkg/api:go_default_library",
"//pkg/api/helper:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
], ],

View File

@ -22,10 +22,13 @@ import (
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/helper"
) )
// supportedComputeResources is the list of compute resources for with QoS is supported. func isSupportedQoSComputeResource(name api.ResourceName) bool {
var supportedQoSComputeResources = sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory)) supportedQoSComputeResources := sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory))
return supportedQoSComputeResources.Has(string(name)) || helper.IsHugePageResourceName(name)
}
// GetPodQOS returns the QoS class of a pod. // GetPodQOS returns the QoS class of a pod.
// A pod is besteffort if none of its containers have specified any requests or limits. // A pod is besteffort if none of its containers have specified any requests or limits.
@ -39,7 +42,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
// process requests // process requests
for name, quantity := range container.Resources.Requests { for name, quantity := range container.Resources.Requests {
if !supportedQoSComputeResources.Has(string(name)) { if !isSupportedQoSComputeResource(name) {
continue continue
} }
if quantity.Cmp(zeroQuantity) == 1 { if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +58,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
// process limits // process limits
qosLimitsFound := sets.NewString() qosLimitsFound := sets.NewString()
for name, quantity := range container.Resources.Limits { for name, quantity := range container.Resources.Limits {
if !supportedQoSComputeResources.Has(string(name)) { if !isSupportedQoSComputeResource(name) {
continue continue
} }
if quantity.Cmp(zeroQuantity) == 1 { if quantity.Cmp(zeroQuantity) == 1 {
@ -70,7 +73,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
} }
} }
if len(qosLimitsFound) != len(supportedQoSComputeResources) { if !qosLimitsFound.HasAll(string(api.ResourceMemory), string(api.ResourceCPU)) {
isGuaranteed = false isGuaranteed = false
} }
} }

View File

@ -3339,6 +3339,8 @@ const (
ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-" ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-"
// Default namespace prefix. // Default namespace prefix.
ResourceDefaultNamespacePrefix = "kubernetes.io/" ResourceDefaultNamespacePrefix = "kubernetes.io/"
// Name prefix for huge page resources (alpha).
ResourceHugePagesPrefix = "hugepages-"
) )
// ResourceList is a set of (resource name, quantity) pairs. // ResourceList is a set of (resource name, quantity) pairs.

View File

@ -24,6 +24,7 @@ go_library(
deps = [ deps = [
"//pkg/api/helper:go_default_library", "//pkg/api/helper:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/selection:go_default_library", "//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",

View File

@ -22,6 +22,7 @@ import (
"strings" "strings"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/selection"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
@ -41,6 +42,31 @@ func IsExtendedResourceName(name v1.ResourceName) bool {
func IsDefaultNamespaceResource(name v1.ResourceName) bool { func IsDefaultNamespaceResource(name v1.ResourceName) bool {
return !strings.Contains(string(name), "/") || return !strings.Contains(string(name), "/") ||
strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix) strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
}
// IsHugePageResourceName returns true if the resource name has the huge page
// resource prefix.
func IsHugePageResourceName(name v1.ResourceName) bool {
return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
}
// HugePageResourceName returns a ResourceName with the canonical hugepage
// prefix prepended for the specified page size. The page size is converted
// to its canonical representation.
func HugePageResourceName(pageSize resource.Quantity) v1.ResourceName {
return v1.ResourceName(fmt.Sprintf("%s%s", v1.ResourceHugePagesPrefix, pageSize.String()))
}
// HugePageSizeFromResourceName returns the page size for the specified huge page
// resource name. If the specified input is not a valid huge page resource name
// an error is returned.
func HugePageSizeFromResourceName(name v1.ResourceName) (resource.Quantity, error) {
if !IsHugePageResourceName(name) {
return resource.Quantity{}, fmt.Errorf("resource name: %s is not valid hugepage name", name)
}
pageSize := strings.TrimPrefix(string(name), v1.ResourceHugePagesPrefix)
return resource.ParseQuantity(pageSize)
} }
// IsOpaqueIntResourceName returns true if the resource name has the opaque // IsOpaqueIntResourceName returns true if the resource name has the opaque

View File

@ -24,6 +24,7 @@ go_library(
name = "go_default_library", name = "go_default_library",
srcs = ["qos.go"], srcs = ["qos.go"],
deps = [ deps = [
"//pkg/api/v1/helper:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",

View File

@ -20,12 +20,16 @@ import (
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
) )
// QOSList is a set of (resource name, QoS class) pairs. // QOSList is a set of (resource name, QoS class) pairs.
type QOSList map[v1.ResourceName]v1.PodQOSClass type QOSList map[v1.ResourceName]v1.PodQOSClass
var supportedQoSComputeResources = sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory)) func isSupportedQoSComputeResource(name v1.ResourceName) bool {
supportedQoSComputeResources := sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory))
return supportedQoSComputeResources.Has(string(name)) || v1helper.IsHugePageResourceName(name)
}
// GetPodQOS returns the QoS class of a pod. // GetPodQOS returns the QoS class of a pod.
// A pod is besteffort if none of its containers have specified any requests or limits. // A pod is besteffort if none of its containers have specified any requests or limits.
@ -39,7 +43,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
// process requests // process requests
for name, quantity := range container.Resources.Requests { for name, quantity := range container.Resources.Requests {
if !supportedQoSComputeResources.Has(string(name)) { if !isSupportedQoSComputeResource(name) {
continue continue
} }
if quantity.Cmp(zeroQuantity) == 1 { if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +59,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
// process limits // process limits
qosLimitsFound := sets.NewString() qosLimitsFound := sets.NewString()
for name, quantity := range container.Resources.Limits { for name, quantity := range container.Resources.Limits {
if !supportedQoSComputeResources.Has(string(name)) { if !isSupportedQoSComputeResource(name) {
continue continue
} }
if quantity.Cmp(zeroQuantity) == 1 { if quantity.Cmp(zeroQuantity) == 1 {
@ -70,7 +74,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
} }
} }
if len(qosLimitsFound) != len(supportedQoSComputeResources) { if !qosLimitsFound.HasAll(string(v1.ResourceMemory), string(v1.ResourceCPU)) {
isGuaranteed = false isGuaranteed = false
} }
} }

View File

@ -130,6 +130,12 @@ func TestGetPodQOS(t *testing.T) {
}), }),
expected: v1.PodQOSBurstable, expected: v1.PodQOSBurstable,
}, },
{
pod: newPod("burstable-hugepages", []v1.Container{
newContainer("burstable", addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0")), addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0"))),
}),
expected: v1.PodQOSBurstable,
},
} }
for id, testCase := range testCases { for id, testCase := range testCases {
if actual := GetPodQOS(testCase.pod); testCase.expected != actual { if actual := GetPodQOS(testCase.pod); testCase.expected != actual {
@ -141,7 +147,7 @@ func TestGetPodQOS(t *testing.T) {
k8sv1.Convert_v1_Pod_To_api_Pod(testCase.pod, &pod, nil) k8sv1.Convert_v1_Pod_To_api_Pod(testCase.pod, &pod, nil)
if actual := qos.GetPodQOS(&pod); api.PodQOSClass(testCase.expected) != actual { if actual := qos.GetPodQOS(&pod); api.PodQOSClass(testCase.expected) != actual {
t.Errorf("[%d]: invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual) t.Errorf("[%d]: conversion invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
} }
} }
} }

View File

@ -2402,6 +2402,28 @@ func ValidateTolerations(tolerations []api.Toleration, fldPath *field.Path) fiel
return allErrors return allErrors
} }
func toResourceNames(resources api.ResourceList) []api.ResourceName {
result := []api.ResourceName{}
for resourceName := range resources {
result = append(result, resourceName)
}
return result
}
func toSet(resourceNames []api.ResourceName) sets.String {
result := sets.NewString()
for _, resourceName := range resourceNames {
result.Insert(string(resourceName))
}
return result
}
func toContainerResourcesSet(ctr *api.Container) sets.String {
resourceNames := toResourceNames(ctr.Resources.Requests)
resourceNames = append(resourceNames, toResourceNames(ctr.Resources.Limits)...)
return toSet(resourceNames)
}
// validateContainersOnlyForPod does additional validation for containers on a pod versus a pod template // validateContainersOnlyForPod does additional validation for containers on a pod versus a pod template
// it only does additive validation of fields not covered in validateContainers // it only does additive validation of fields not covered in validateContainers
func validateContainersOnlyForPod(containers []api.Container, fldPath *field.Path) field.ErrorList { func validateContainersOnlyForPod(containers []api.Container, fldPath *field.Path) field.ErrorList {
@ -2429,6 +2451,21 @@ func ValidatePod(pod *api.Pod) field.ErrorList {
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.Containers, specPath.Child("containers"))...) allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.Containers, specPath.Child("containers"))...)
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.InitContainers, specPath.Child("initContainers"))...) allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.InitContainers, specPath.Child("initContainers"))...)
if utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
hugePageResources := sets.NewString()
for i := range pod.Spec.Containers {
resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
for resourceStr := range resourceSet {
if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
hugePageResources.Insert(resourceStr)
}
}
}
if len(hugePageResources) > 1 {
allErrs = append(allErrs, field.Invalid(specPath, hugePageResources, "must use a single hugepage size in a pod spec"))
}
}
return allErrs return allErrs
} }
@ -3445,7 +3482,10 @@ func ValidateNode(node *api.Node) field.ErrorList {
allErrs = append(allErrs, validateNodeTaints(node.Spec.Taints, fldPath.Child("taints"))...) allErrs = append(allErrs, validateNodeTaints(node.Spec.Taints, fldPath.Child("taints"))...)
} }
// Only validate spec. All status fields are optional and can be updated later. // Only validate spec.
// All status fields are optional and can be updated later.
// That said, if specified, we need to ensure they are valid.
allErrs = append(allErrs, ValidateNodeResources(node)...)
// external ID is required. // external ID is required.
if len(node.Spec.ExternalID) == 0 { if len(node.Spec.ExternalID) == 0 {
@ -3461,6 +3501,38 @@ func ValidateNode(node *api.Node) field.ErrorList {
return allErrs return allErrs
} }
// ValidateNodeResources is used to make sure a node has valid capacity and allocatable values.
func ValidateNodeResources(node *api.Node) field.ErrorList {
allErrs := field.ErrorList{}
// Validate resource quantities in capacity.
hugePageSizes := sets.NewString()
for k, v := range node.Status.Capacity {
resPath := field.NewPath("status", "capacity", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
// track any huge page size that has a positive value
if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
hugePageSizes.Insert(string(k))
}
if len(hugePageSizes) > 1 {
allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes"))
}
}
// Validate resource quantities in allocatable.
hugePageSizes = sets.NewString()
for k, v := range node.Status.Allocatable {
resPath := field.NewPath("status", "allocatable", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
// track any huge page size that has a positive value
if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
hugePageSizes.Insert(string(k))
}
if len(hugePageSizes) > 1 {
allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes"))
}
}
return allErrs
}
// ValidateNodeUpdate tests to make sure a node update can be applied. Modifies oldNode. // ValidateNodeUpdate tests to make sure a node update can be applied. Modifies oldNode.
func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList { func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList {
fldPath := field.NewPath("metadata") fldPath := field.NewPath("metadata")
@ -3473,16 +3545,7 @@ func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList {
// allErrs = append(allErrs, field.Invalid("status", node.Status, "must be empty")) // allErrs = append(allErrs, field.Invalid("status", node.Status, "must be empty"))
// } // }
// Validate resource quantities in capacity. allErrs = append(allErrs, ValidateNodeResources(node)...)
for k, v := range node.Status.Capacity {
resPath := field.NewPath("status", "capacity", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
}
// Validate resource quantities in allocatable.
for k, v := range node.Status.Allocatable {
resPath := field.NewPath("status", "allocatable", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
}
// Validate no duplicate addresses in node status. // Validate no duplicate addresses in node status.
addresses := make(map[api.NodeAddress]bool) addresses := make(map[api.NodeAddress]bool)
@ -3925,6 +3988,10 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
if resourceName == api.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { if resourceName == api.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
allErrs = append(allErrs, field.Forbidden(limPath, "ResourceEphemeralStorage field disabled by feature-gate for ResourceRequirements")) allErrs = append(allErrs, field.Forbidden(limPath, "ResourceEphemeralStorage field disabled by feature-gate for ResourceRequirements"))
} }
if helper.IsHugePageResourceName(resourceName) && !utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
allErrs = append(allErrs, field.Forbidden(limPath, fmt.Sprintf("%s field disabled by feature-gate for ResourceRequirements", resourceName)))
}
} }
for resourceName, quantity := range requirements.Requests { for resourceName, quantity := range requirements.Requests {
fldPath := reqPath.Key(string(resourceName)) fldPath := reqPath.Key(string(resourceName))

View File

@ -2759,6 +2759,106 @@ func TestValidateVolumes(t *testing.T) {
} }
} }
func TestAlphaHugePagesIsolation(t *testing.T) {
successCases := []api.Pod{
{ // Basic fields.
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
},
Limits: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
},
},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
},
},
}
failureCases := []api.Pod{
{ // Basic fields.
ObjectMeta: metav1.ObjectMeta{Name: "hugepages-shared", Namespace: "ns"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
},
Limits: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("2Gi"),
},
},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
},
},
{ // Basic fields.
ObjectMeta: metav1.ObjectMeta{Name: "hugepages-multiple", Namespace: "ns"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"),
},
Limits: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"),
},
},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
},
},
}
// Enable alpha feature HugePages
err := utilfeature.DefaultFeatureGate.Set("HugePages=true")
if err != nil {
t.Errorf("Failed to enable feature gate for HugePages: %v", err)
return
}
for i := range successCases {
pod := &successCases[i]
if errs := ValidatePod(pod); len(errs) != 0 {
t.Errorf("Unexpected error for case[%d], err: %v", i, errs)
}
}
for i := range failureCases {
pod := &failureCases[i]
if errs := ValidatePod(pod); len(errs) == 0 {
t.Errorf("Expected error for case[%d], pod: %v", i, pod.Name)
}
}
// Disable alpha feature HugePages
err = utilfeature.DefaultFeatureGate.Set("HugePages=false")
if err != nil {
t.Errorf("Failed to disable feature gate for HugePages: %v", err)
return
}
// Disable alpha feature HugePages and ensure all success cases fail
for i := range successCases {
pod := &successCases[i]
if errs := ValidatePod(pod); len(errs) == 0 {
t.Errorf("Expected error for case[%d], pod: %v", i, pod.Name)
}
}
}
func TestAlphaLocalStorageCapacityIsolation(t *testing.T) { func TestAlphaLocalStorageCapacityIsolation(t *testing.T) {
testCases := []api.VolumeSource{ testCases := []api.VolumeSource{
@ -7838,6 +7938,8 @@ func TestValidateNode(t *testing.T) {
api.ResourceName(api.ResourceCPU): resource.MustParse("10"), api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName("my.org/gpu"): resource.MustParse("10"), api.ResourceName("my.org/gpu"): resource.MustParse("10"),
api.ResourceName("hugepages-2Mi"): resource.MustParse("10Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("0"),
}, },
}, },
Spec: api.NodeSpec{ Spec: api.NodeSpec{
@ -8119,6 +8221,27 @@ func TestValidateNode(t *testing.T) {
ExternalID: "external", ExternalID: "external",
}, },
}, },
"multiple-pre-allocated-hugepages": {
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Labels: validSelector,
},
Status: api.NodeStatus{
Addresses: []api.NodeAddress{
{Type: api.NodeExternalIP, Address: "something"},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName("my.org/gpu"): resource.MustParse("10"),
api.ResourceName("hugepages-2Mi"): resource.MustParse("10Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("10Gi"),
},
},
Spec: api.NodeSpec{
ExternalID: "external",
},
},
} }
for k, v := range errorCases { for k, v := range errorCases {
errs := ValidateNode(&v) errs := ValidateNode(&v)

View File

@ -157,6 +157,12 @@ const (
// //
// Alternative container-level CPU affinity policies. // Alternative container-level CPU affinity policies.
CPUManager utilfeature.Feature = "CPUManager" CPUManager utilfeature.Feature = "CPUManager"
// owner: @derekwaynecarr
// alpha: v1.8
//
// Enable pods to consume pre-allocated huge pages of varying page sizes
HugePages utilfeature.Feature = "HugePages"
) )
func init() { func init() {
@ -180,6 +186,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
RotateKubeletClientCertificate: {Default: true, PreRelease: utilfeature.Beta}, RotateKubeletClientCertificate: {Default: true, PreRelease: utilfeature.Beta},
PersistentLocalVolumes: {Default: false, PreRelease: utilfeature.Alpha}, PersistentLocalVolumes: {Default: false, PreRelease: utilfeature.Alpha},
LocalStorageCapacityIsolation: {Default: false, PreRelease: utilfeature.Alpha}, LocalStorageCapacityIsolation: {Default: false, PreRelease: utilfeature.Alpha},
HugePages: {Default: false, PreRelease: utilfeature.Alpha},
DebugContainers: {Default: false, PreRelease: utilfeature.Alpha}, DebugContainers: {Default: false, PreRelease: utilfeature.Alpha},
PodPriority: {Default: false, PreRelease: utilfeature.Alpha}, PodPriority: {Default: false, PreRelease: utilfeature.Alpha},
EnableEquivalenceClassCache: {Default: false, PreRelease: utilfeature.Alpha}, EnableEquivalenceClassCache: {Default: false, PreRelease: utilfeature.Alpha},

View File

@ -23,11 +23,14 @@ go_library(
"//conditions:default": [], "//conditions:default": [],
}), }),
deps = [ deps = [
"//pkg/api/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//vendor/github.com/google/cadvisor/events:go_default_library", "//vendor/github.com/google/cadvisor/events:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library", "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library", "//vendor/github.com/google/cadvisor/info/v2:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
] + select({ ] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [ "@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/kubelet/types:go_default_library", "//pkg/kubelet/types:go_default_library",

View File

@ -21,6 +21,9 @@ import (
cadvisorapi2 "github.com/google/cadvisor/info/v2" cadvisorapi2 "github.com/google/cadvisor/info/v2"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
"k8s.io/kubernetes/pkg/features"
) )
func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList { func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
@ -32,6 +35,17 @@ func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
int64(info.MemoryCapacity), int64(info.MemoryCapacity),
resource.BinarySI), resource.BinarySI),
} }
// if huge pages are enabled, we report them as a schedulable resource on the node
if utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
for _, hugepagesInfo := range info.HugePages {
pageSizeBytes := int64(hugepagesInfo.PageSize * 1024)
hugePagesBytes := pageSizeBytes * int64(hugepagesInfo.NumPages)
pageSizeQuantity := resource.NewQuantity(pageSizeBytes, resource.BinarySI)
c[v1helper.HugePageResourceName(*pageSizeQuantity)] = *resource.NewQuantity(hugePagesBytes, resource.BinarySI)
}
}
return c return c
} }

View File

@ -52,6 +52,7 @@ go_library(
] + select({ ] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [ "@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/api:go_default_library", "//pkg/api:go_default_library",
"//pkg/api/v1/helper:go_default_library",
"//pkg/api/v1/helper/qos:go_default_library", "//pkg/api/v1/helper/qos:go_default_library",
"//pkg/api/v1/resource:go_default_library", "//pkg/api/v1/resource:go_default_library",
"//pkg/kubelet/cm/util:go_default_library", "//pkg/kubelet/cm/util:go_default_library",
@ -63,6 +64,7 @@ go_library(
"//pkg/util/procfs:go_default_library", "//pkg/util/procfs:go_default_library",
"//pkg/util/sysctl:go_default_library", "//pkg/util/sysctl:go_default_library",
"//pkg/util/version:go_default_library", "//pkg/util/version:go_default_library",
"//vendor/github.com/docker/go-units:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library", "//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library", "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library", "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",

View File

@ -24,12 +24,16 @@ import (
"strings" "strings"
"time" "time"
units "github.com/docker/go-units"
"github.com/golang/glog" "github.com/golang/glog"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs" cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/metrics"
) )
@ -43,6 +47,10 @@ const (
libcontainerSystemd libcontainerCgroupManagerType = "systemd" libcontainerSystemd libcontainerCgroupManagerType = "systemd"
) )
// hugePageSizeList is useful for converting to the hugetlb canonical unit
// which is what is expected when interacting with libcontainer
var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
// ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name. // ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name.
// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice // For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice
// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form. // If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form.
@ -299,10 +307,16 @@ type subsystem interface {
GetStats(path string, stats *libcontainercgroups.Stats) error GetStats(path string, stats *libcontainercgroups.Stats) error
} }
// Cgroup subsystems we currently support // getSupportedSubsystems returns list of subsystems supported
var supportedSubsystems = []subsystem{ func getSupportedSubsystems() []subsystem {
supportedSubsystems := []subsystem{
&cgroupfs.MemoryGroup{}, &cgroupfs.MemoryGroup{},
&cgroupfs.CpuGroup{}, &cgroupfs.CpuGroup{},
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{})
}
return supportedSubsystems
} }
// setSupportedSubsystems sets cgroup resource limits only on the supported // setSupportedSubsystems sets cgroup resource limits only on the supported
@ -315,7 +329,7 @@ var supportedSubsystems = []subsystem{
// but this is not possible with libcontainers Set() method // but this is not possible with libcontainers Set() method
// See https://github.com/opencontainers/runc/issues/932 // See https://github.com/opencontainers/runc/issues/932
func setSupportedSubsystems(cgroupConfig *libcontainerconfigs.Cgroup) error { func setSupportedSubsystems(cgroupConfig *libcontainerconfigs.Cgroup) error {
for _, sys := range supportedSubsystems { for _, sys := range getSupportedSubsystems() {
if _, ok := cgroupConfig.Paths[sys.Name()]; !ok { if _, ok := cgroupConfig.Paths[sys.Name()]; !ok {
return fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name()) return fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name())
} }
@ -343,6 +357,30 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
if resourceConfig.CpuPeriod != nil { if resourceConfig.CpuPeriod != nil {
resources.CpuPeriod = *resourceConfig.CpuPeriod resources.CpuPeriod = *resourceConfig.CpuPeriod
} }
// if huge pages are enabled, we set them in libcontainer
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
// for each page size enumerated, set that value
pageSizes := sets.NewString()
for pageSize, limit := range resourceConfig.HugePageLimit {
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, hugePageSizeList)
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
Pagesize: sizeString,
Limit: uint64(limit),
})
pageSizes.Insert(sizeString)
}
// for each page size omitted, limit to 0
for _, pageSize := range cgroupfs.HugePageSizes {
if pageSizes.Has(pageSize) {
continue
}
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
Pagesize: pageSize,
Limit: uint64(0),
})
}
}
return resources return resources
} }
@ -502,7 +540,7 @@ func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error {
func getStatsSupportedSubsystems(cgroupPaths map[string]string) (*libcontainercgroups.Stats, error) { func getStatsSupportedSubsystems(cgroupPaths map[string]string) (*libcontainercgroups.Stats, error) {
stats := libcontainercgroups.NewStats() stats := libcontainercgroups.NewStats()
for _, sys := range supportedSubsystems { for _, sys := range getSupportedSubsystems() {
if _, ok := cgroupPaths[sys.Name()]; !ok { if _, ok := cgroupPaths[sys.Name()]; !ok {
return nil, fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name()) return nil, fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name())
} }

View File

@ -26,6 +26,7 @@ import (
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
"k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/api/v1/resource"
) )
@ -83,6 +84,23 @@ func MilliCPUToShares(milliCPU int64) int64 {
return shares return shares
} }
// HugePageLimits converts the API representation to a map
// from huge page size (in bytes) to huge page limit (in bytes).
func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
hugePageLimits := map[int64]int64{}
for k, v := range resourceList {
if v1helper.IsHugePageResourceName(k) {
pageSize, _ := v1helper.HugePageSizeFromResourceName(k)
if value, exists := hugePageLimits[pageSize.Value()]; exists {
hugePageLimits[pageSize.Value()] = value + v.Value()
} else {
hugePageLimits[pageSize.Value()] = v.Value()
}
}
}
return hugePageLimits
}
// ResourceConfigForPod takes the input pod and outputs the cgroup resource config. // ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig { func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
// sum requests and limits. // sum requests and limits.
@ -108,6 +126,8 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
// track if limits were applied for each resource. // track if limits were applied for each resource.
memoryLimitsDeclared := true memoryLimitsDeclared := true
cpuLimitsDeclared := true cpuLimitsDeclared := true
// map hugepage pagesize (bytes) to limits (bytes)
hugePageLimits := map[int64]int64{}
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
if container.Resources.Limits.Cpu().IsZero() { if container.Resources.Limits.Cpu().IsZero() {
cpuLimitsDeclared = false cpuLimitsDeclared = false
@ -115,6 +135,14 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
if container.Resources.Limits.Memory().IsZero() { if container.Resources.Limits.Memory().IsZero() {
memoryLimitsDeclared = false memoryLimitsDeclared = false
} }
containerHugePageLimits := HugePageLimits(container.Resources.Requests)
for k, v := range containerHugePageLimits {
if value, exists := hugePageLimits[k]; exists {
hugePageLimits[k] = value + v
} else {
hugePageLimits[k] = v
}
}
} }
// determine the qos class // determine the qos class
@ -140,6 +168,7 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
shares := int64(MinShares) shares := int64(MinShares)
result.CpuShares = &shares result.CpuShares = &shares
} }
result.HugePageLimit = hugePageLimits
return result return result
} }

View File

@ -28,7 +28,9 @@ import (
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/pkg/kubelet/events"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
) )
@ -154,6 +156,10 @@ func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
val := MilliCPUToShares(q.MilliValue()) val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val rc.CpuShares = &val
} }
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
rc.HugePageLimit = HugePageLimits(rl)
}
return &rc return &rc
} }

View File

@ -27,9 +27,13 @@ import (
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
units "github.com/docker/go-units"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
"k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/api/v1/resource"
kubefeatures "k8s.io/kubernetes/pkg/features"
) )
const ( const (
@ -100,11 +104,18 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
minShares := int64(MinShares) minShares := int64(MinShares)
resourceParameters.CpuShares = &minShares resourceParameters.CpuShares = &minShares
} }
// containerConfig object stores the cgroup specifications // containerConfig object stores the cgroup specifications
containerConfig := &CgroupConfig{ containerConfig := &CgroupConfig{
Name: absoluteContainerName, Name: absoluteContainerName,
ResourceParameters: resourceParameters, ResourceParameters: resourceParameters,
} }
// for each enumerated huge page size, the qos tiers are unbounded
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
m.setHugePagesUnbounded(containerConfig)
}
// check if it exists // check if it exists
if !cm.Exists(absoluteContainerName) { if !cm.Exists(absoluteContainerName) {
if err := cm.Create(containerConfig); err != nil { if err := cm.Create(containerConfig); err != nil {
@ -138,6 +149,29 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
return nil return nil
} }
// setHugePagesUnbounded ensures hugetlb is effectively unbounded
func (m *qosContainerManagerImpl) setHugePagesUnbounded(cgroupConfig *CgroupConfig) error {
hugePageLimit := map[int64]int64{}
for _, pageSize := range cgroupfs.HugePageSizes {
pageSizeBytes, err := units.RAMInBytes(pageSize)
if err != nil {
return err
}
hugePageLimit[pageSizeBytes] = int64(1 << 62)
}
cgroupConfig.ResourceParameters.HugePageLimit = hugePageLimit
return nil
}
func (m *qosContainerManagerImpl) setHugePagesConfig(configs map[v1.PodQOSClass]*CgroupConfig) error {
for _, v := range configs {
if err := m.setHugePagesUnbounded(v); err != nil {
return err
}
}
return nil
}
func (m *qosContainerManagerImpl) setCPUCgroupConfig(configs map[v1.PodQOSClass]*CgroupConfig) error { func (m *qosContainerManagerImpl) setCPUCgroupConfig(configs map[v1.PodQOSClass]*CgroupConfig) error {
pods := m.activePods() pods := m.activePods()
burstablePodCPURequest := int64(0) burstablePodCPURequest := int64(0)
@ -262,6 +296,13 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
return err return err
} }
// update the qos level cgroup settings for huge pages (ensure they remain unbounded)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
if err := m.setHugePagesConfig(qosConfigs); err != nil {
return err
}
}
for resource, percentReserve := range m.qosReserved { for resource, percentReserve := range m.qosReserved {
switch resource { switch resource {
case v1.ResourceMemory: case v1.ResourceMemory:

View File

@ -31,6 +31,8 @@ type ResourceConfig struct {
CpuQuota *int64 CpuQuota *int64
// CPU quota period. // CPU quota period.
CpuPeriod *int64 CpuPeriod *int64
// HugePageLimit map from page size (in bytes) to limit (in bytes)
HugePageLimit map[int64]int64
} }
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion. // CgroupName is the abstract name of a cgroup prior to any driver specific conversion.

View File

@ -630,6 +630,19 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
} }
node.Status.Allocatable[k] = value node.Status.Allocatable[k] = value
} }
// for every huge page reservation, we need to remove it from allocatable memory
for k, v := range node.Status.Capacity {
if v1helper.IsHugePageResourceName(k) {
allocatableMemory := node.Status.Allocatable[v1.ResourceMemory]
value := *(v.Copy())
allocatableMemory.Sub(value)
if allocatableMemory.Sign() < 0 {
// Negative Allocatable resources don't make sense.
allocatableMemory.Set(0)
}
node.Status.Allocatable[v1.ResourceMemory] = allocatableMemory
}
}
} }
// Set versioninfo for the node. // Set versioninfo for the node.

View File

@ -509,6 +509,12 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
result.SetExtended(rName, value) result.SetExtended(rName, value)
} }
} }
if v1helper.IsHugePageResourceName(rName) {
value := rQuantity.Value()
if value > result.HugePages[rName] {
result.SetHugePages(rName, value)
}
}
} }
} }
} }
@ -542,7 +548,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
// We couldn't parse metadata - fallback to computing it. // We couldn't parse metadata - fallback to computing it.
podRequest = GetResourceRequest(pod) podRequest = GetResourceRequest(pod)
} }
if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && podRequest.EphemeralStorage == 0 && len(podRequest.ExtendedResources) == 0 { if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.NvidiaGPU == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ExtendedResources) == 0 &&
len(podRequest.HugePages) == 0 {
return len(predicateFails) == 0, predicateFails, nil return len(predicateFails) == 0, predicateFails, nil
} }
@ -567,6 +578,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
} }
} }
for rName, rQuant := range podRequest.HugePages {
if allocatable.HugePages[rName] < rQuant+nodeInfo.RequestedResource().HugePages[rName] {
predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.HugePages[rName], nodeInfo.RequestedResource().HugePages[rName], allocatable.HugePages[rName]))
}
}
if glog.V(10) { if glog.V(10) {
if len(predicateFails) == 0 { if len(predicateFails) == 0 {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is

View File

@ -75,9 +75,10 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.Pe
var ( var (
opaqueResourceA = v1helper.OpaqueIntResourceName("AAA") opaqueResourceA = v1helper.OpaqueIntResourceName("AAA")
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB") opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
) )
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources { func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.NodeResources {
return v1.NodeResources{ return v1.NodeResources{
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@ -86,11 +87,12 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI), v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI), opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
}, },
} }
} }
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList { func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.ResourceList {
return v1.ResourceList{ return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
@ -98,6 +100,7 @@ func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, stora
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI), v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI), opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
} }
} }
@ -348,10 +351,38 @@ func TestPodFitsResources(t *testing.T) {
test: "opaque resource allocatable enforced for unknown resource for init container", test: "opaque resource allocatable enforced for unknown resource for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)}, reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)},
}, },
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
fits: false,
test: "hugepages resource capacity enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
fits: false,
test: "hugepages resource capacity enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}},
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 2}})),
fits: false,
test: "hugepages resource allocatable enforced for multiple containers",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 6, 2, 5)},
},
} }
for _, test := range enoughPodsTests { for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}} node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node) test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil { if err != nil {
@ -406,7 +437,7 @@ func TestPodFitsResources(t *testing.T) {
}, },
} }
for _, test := range notEnoughPodsTests { for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}} node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}}
test.nodeInfo.SetNode(&node) test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil { if err != nil {
@ -464,7 +495,7 @@ func TestPodFitsResources(t *testing.T) {
} }
for _, test := range storagePodsTests { for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}} node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node) test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil { if err != nil {
@ -1889,7 +1920,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})), newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
}, },
fits: true, fits: true,
wErr: nil, wErr: nil,
@ -1901,7 +1932,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})), newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
}, },
fits: false, fits: false,
wErr: nil, wErr: nil,
@ -1915,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: &v1.Pod{}, pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo( nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})), newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}}, node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true, fits: true,
wErr: nil, wErr: nil,
test: "no resources/port/host requested always fits on GPU machine", test: "no resources/port/host requested always fits on GPU machine",
@ -1924,7 +1955,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}), pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo( nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})), newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}}, node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: false, fits: false,
wErr: nil, wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)}, reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@ -1934,7 +1965,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}), pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo( nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})), newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}}, node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true, fits: true,
wErr: nil, wErr: nil,
test: "enough GPU resource", test: "enough GPU resource",
@ -1948,7 +1979,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(), nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
}, },
fits: false, fits: false,
wErr: nil, wErr: nil,
@ -1960,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)), nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
}, },
fits: false, fits: false,
wErr: nil, wErr: nil,
@ -3252,7 +3283,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always", ImagePullPolicy: "Always",
// at least one requirement -> burstable pod // at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{ Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0), Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0),
}, },
}, },
}, },

View File

@ -72,6 +72,7 @@ type Resource struct {
// explicitly as int, to avoid conversions and improve performance. // explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int AllowedPodNumber int
ExtendedResources map[v1.ResourceName]int64 ExtendedResources map[v1.ResourceName]int64
HugePages map[v1.ResourceName]int64
} }
// New creates a Resource from ResourceList // New creates a Resource from ResourceList
@ -103,6 +104,9 @@ func (r *Resource) Add(rl v1.ResourceList) {
if v1helper.IsExtendedResourceName(rName) { if v1helper.IsExtendedResourceName(rName) {
r.AddExtended(rName, rQuant.Value()) r.AddExtended(rName, rQuant.Value())
} }
if v1helper.IsHugePageResourceName(rName) {
r.AddHugePages(rName, rQuant.Value())
}
} }
} }
} }
@ -118,6 +122,9 @@ func (r *Resource) ResourceList() v1.ResourceList {
for rName, rQuant := range r.ExtendedResources { for rName, rQuant := range r.ExtendedResources {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI) result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
} }
for rName, rQuant := range r.HugePages {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
}
return result return result
} }
@ -135,6 +142,12 @@ func (r *Resource) Clone() *Resource {
res.ExtendedResources[k] = v res.ExtendedResources[k] = v
} }
} }
if r.HugePages != nil {
res.HugePages = make(map[v1.ResourceName]int64)
for k, v := range r.HugePages {
res.HugePages[k] = v
}
}
return res return res
} }
@ -150,6 +163,18 @@ func (r *Resource) SetExtended(name v1.ResourceName, quantity int64) {
r.ExtendedResources[name] = quantity r.ExtendedResources[name] = quantity
} }
func (r *Resource) AddHugePages(name v1.ResourceName, quantity int64) {
r.SetHugePages(name, r.HugePages[name]+quantity)
}
func (r *Resource) SetHugePages(name v1.ResourceName, quantity int64) {
// Lazily allocate hugepages resource map.
if r.HugePages == nil {
r.HugePages = map[v1.ResourceName]int64{}
}
r.HugePages[name] = quantity
}
// NewNodeInfo returns a ready to use empty NodeInfo object. // NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in // If any pods are given in arguments, their information will be aggregated in
// the returned object. // the returned object.
@ -307,6 +332,12 @@ func (n *NodeInfo) addPod(pod *v1.Pod) {
for rName, rQuant := range res.ExtendedResources { for rName, rQuant := range res.ExtendedResources {
n.requestedResource.ExtendedResources[rName] += rQuant n.requestedResource.ExtendedResources[rName] += rQuant
} }
if n.requestedResource.HugePages == nil && len(res.HugePages) > 0 {
n.requestedResource.HugePages = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.HugePages {
n.requestedResource.HugePages[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0_cpu n.nonzeroRequest.MilliCPU += non0_cpu
n.nonzeroRequest.Memory += non0_mem n.nonzeroRequest.Memory += non0_mem
n.pods = append(n.pods, pod) n.pods = append(n.pods, pod)
@ -362,6 +393,12 @@ func (n *NodeInfo) removePod(pod *v1.Pod) error {
for rName, rQuant := range res.ExtendedResources { for rName, rQuant := range res.ExtendedResources {
n.requestedResource.ExtendedResources[rName] -= rQuant n.requestedResource.ExtendedResources[rName] -= rQuant
} }
if len(res.HugePages) > 0 && n.requestedResource.HugePages == nil {
n.requestedResource.HugePages = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.HugePages {
n.requestedResource.HugePages[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0_cpu n.nonzeroRequest.MilliCPU -= non0_cpu
n.nonzeroRequest.Memory -= non0_mem n.nonzeroRequest.Memory -= non0_mem

View File

@ -3759,6 +3759,8 @@ const (
ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-" ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-"
// Default namespace prefix. // Default namespace prefix.
ResourceDefaultNamespacePrefix = "kubernetes.io/" ResourceDefaultNamespacePrefix = "kubernetes.io/"
// Name prefix for huge page resources (alpha).
ResourceHugePagesPrefix = "hugepages-"
) )
// ResourceList is a set of (resource name, quantity) pairs. // ResourceList is a set of (resource name, quantity) pairs.