Merge pull request #50859 from derekwaynecarr/hugepages-feature

Automatic merge from submit-queue

HugePages feature

**What this PR does / why we need it**:
Implements HugePages support per https://github.com/kubernetes/community/pull/837

Feature track issue: https://github.com/kubernetes/features/issues/275

**Special notes for your reviewer**:
A follow-on PR is opened to add the EmptyDir support.

**Release note**:
```release-note
Alpha support for pre-allocated hugepages
```
This commit is contained in:
Kubernetes Submit Queue 2017-09-05 11:16:17 -07:00 committed by GitHub
commit 2f543f321d
26 changed files with 682 additions and 46 deletions

View File

@ -31,6 +31,30 @@ import (
"k8s.io/kubernetes/pkg/api"
)
// IsHugePageResourceName returns true if the resource name has the huge page
// resource prefix.
func IsHugePageResourceName(name api.ResourceName) bool {
return strings.HasPrefix(string(name), api.ResourceHugePagesPrefix)
}
// HugePageResourceName returns a ResourceName with the canonical hugepage
// prefix prepended for the specified page size. The page size is converted
// to its canonical representation.
func HugePageResourceName(pageSize resource.Quantity) api.ResourceName {
return api.ResourceName(fmt.Sprintf("%s%s", api.ResourceHugePagesPrefix, pageSize.String()))
}
// HugePageSizeFromResourceName returns the page size for the specified huge page
// resource name. If the specified input is not a valid huge page resource name
// an error is returned.
func HugePageSizeFromResourceName(name api.ResourceName) (resource.Quantity, error) {
if !IsHugePageResourceName(name) {
return resource.Quantity{}, fmt.Errorf("resource name: %s is not valid hugepage name", name)
}
pageSize := strings.TrimPrefix(string(name), api.ResourceHugePagesPrefix)
return resource.ParseQuantity(pageSize)
}
// NonConvertibleFields iterates over the provided map and filters out all but
// any keys with the "non-convertible.kubernetes.io" prefix.
func NonConvertibleFields(annotations map[string]string) map[string]string {
@ -113,7 +137,7 @@ var standardContainerResources = sets.NewString(
// IsStandardContainerResourceName returns true if the container can make a resource request
// for the specified resource
func IsStandardContainerResourceName(str string) bool {
return standardContainerResources.Has(str)
return standardContainerResources.Has(str) || IsHugePageResourceName(api.ResourceName(str))
}
// IsExtendedResourceName returns true if the resource name is not in the
@ -153,6 +177,7 @@ var overcommitBlacklist = sets.NewString(string(api.ResourceNvidiaGPU))
// namespace and not blacklisted.
func IsOvercommitAllowed(name api.ResourceName) bool {
return IsDefaultNamespaceResource(name) &&
!IsHugePageResourceName(name) &&
!overcommitBlacklist.Has(string(name))
}
@ -220,7 +245,7 @@ var standardResources = sets.NewString(
// IsStandardResourceName returns true if the resource is known to the system
func IsStandardResourceName(str string) bool {
return standardResources.Has(str)
return standardResources.Has(str) || IsHugePageResourceName(api.ResourceName(str))
}
var integerResources = sets.NewString(

View File

@ -58,10 +58,28 @@ func TestIsStandardResource(t *testing.T) {
{"disk", false},
{"blah", false},
{"x.y.z", false},
{"hugepages-2Mi", true},
}
for i, tc := range testCases {
if IsStandardResourceName(tc.input) != tc.output {
t.Errorf("case[%d], expected: %t, got: %t", i, tc.output, !tc.output)
t.Errorf("case[%d], input: %s, expected: %t, got: %t", i, tc.input, tc.output, !tc.output)
}
}
}
func TestIsStandardContainerResource(t *testing.T) {
testCases := []struct {
input string
output bool
}{
{"cpu", true},
{"memory", true},
{"disk", false},
{"hugepages-2Mi", true},
}
for i, tc := range testCases {
if IsStandardContainerResourceName(tc.input) != tc.output {
t.Errorf("case[%d], input: %s, expected: %t, got: %t", i, tc.input, tc.output, !tc.output)
}
}
}
@ -353,3 +371,120 @@ func TestGetNodeAffinityFromAnnotations(t *testing.T) {
}
}
}
func TestIsHugePageResourceName(t *testing.T) {
testCases := []struct {
name api.ResourceName
result bool
}{
{
name: api.ResourceName("hugepages-2Mi"),
result: true,
},
{
name: api.ResourceName("hugepages-1Gi"),
result: true,
},
{
name: api.ResourceName("cpu"),
result: false,
},
{
name: api.ResourceName("memory"),
result: false,
},
}
for _, testCase := range testCases {
if testCase.result != IsHugePageResourceName(testCase.name) {
t.Errorf("resource: %v expected result: %v", testCase.name, testCase.result)
}
}
}
func TestHugePageResourceName(t *testing.T) {
testCases := []struct {
pageSize resource.Quantity
name api.ResourceName
}{
{
pageSize: resource.MustParse("2Mi"),
name: api.ResourceName("hugepages-2Mi"),
},
{
pageSize: resource.MustParse("1Gi"),
name: api.ResourceName("hugepages-1Gi"),
},
{
// verify we do not regress our canonical representation
pageSize: *resource.NewQuantity(int64(2097152), resource.BinarySI),
name: api.ResourceName("hugepages-2Mi"),
},
}
for _, testCase := range testCases {
if result := HugePageResourceName(testCase.pageSize); result != testCase.name {
t.Errorf("pageSize: %v, expected: %v, but got: %v", testCase.pageSize.String(), testCase.name, result.String())
}
}
}
func TestHugePageSizeFromResourceName(t *testing.T) {
testCases := []struct {
name api.ResourceName
expectErr bool
pageSize resource.Quantity
}{
{
name: api.ResourceName("hugepages-2Mi"),
pageSize: resource.MustParse("2Mi"),
expectErr: false,
},
{
name: api.ResourceName("hugepages-1Gi"),
pageSize: resource.MustParse("1Gi"),
expectErr: false,
},
{
name: api.ResourceName("hugepages-bad"),
expectErr: true,
},
}
for _, testCase := range testCases {
value, err := HugePageSizeFromResourceName(testCase.name)
if testCase.expectErr && err == nil {
t.Errorf("Expected an error for %v", testCase.name)
} else if !testCase.expectErr && err != nil {
t.Errorf("Unexpected error for %v, got %v", testCase.name, err)
} else if testCase.pageSize.Value() != value.Value() {
t.Errorf("Unexpected pageSize for resource %v got %v", testCase.name, value.String())
}
}
}
func TestIsOvercommitAllowed(t *testing.T) {
testCases := []struct {
name api.ResourceName
allowed bool
}{
{
name: api.ResourceCPU,
allowed: true,
},
{
name: api.ResourceMemory,
allowed: true,
},
{
name: api.ResourceNvidiaGPU,
allowed: false,
},
{
name: HugePageResourceName(resource.MustParse("2Mi")),
allowed: false,
},
}
for _, testCase := range testCases {
if testCase.allowed != IsOvercommitAllowed(testCase.name) {
t.Errorf("Unexpected result for %v", testCase.name)
}
}
}

View File

@ -10,6 +10,7 @@ go_library(
srcs = ["qos.go"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/helper:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],

View File

@ -22,10 +22,13 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/helper"
)
// supportedComputeResources is the list of compute resources for with QoS is supported.
var supportedQoSComputeResources = sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory))
func isSupportedQoSComputeResource(name api.ResourceName) bool {
supportedQoSComputeResources := sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory))
return supportedQoSComputeResources.Has(string(name)) || helper.IsHugePageResourceName(name)
}
// GetPodQOS returns the QoS class of a pod.
// A pod is besteffort if none of its containers have specified any requests or limits.
@ -39,7 +42,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
for _, container := range pod.Spec.Containers {
// process requests
for name, quantity := range container.Resources.Requests {
if !supportedQoSComputeResources.Has(string(name)) {
if !isSupportedQoSComputeResource(name) {
continue
}
if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +58,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
// process limits
qosLimitsFound := sets.NewString()
for name, quantity := range container.Resources.Limits {
if !supportedQoSComputeResources.Has(string(name)) {
if !isSupportedQoSComputeResource(name) {
continue
}
if quantity.Cmp(zeroQuantity) == 1 {
@ -70,7 +73,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
}
}
if len(qosLimitsFound) != len(supportedQoSComputeResources) {
if !qosLimitsFound.HasAll(string(api.ResourceMemory), string(api.ResourceCPU)) {
isGuaranteed = false
}
}

View File

@ -3339,6 +3339,8 @@ const (
ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-"
// Default namespace prefix.
ResourceDefaultNamespacePrefix = "kubernetes.io/"
// Name prefix for huge page resources (alpha).
ResourceHugePagesPrefix = "hugepages-"
)
// ResourceList is a set of (resource name, quantity) pairs.

View File

@ -24,6 +24,7 @@ go_library(
deps = [
"//pkg/api/helper:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",

View File

@ -22,6 +22,7 @@ import (
"strings"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/apimachinery/pkg/util/sets"
@ -41,6 +42,31 @@ func IsExtendedResourceName(name v1.ResourceName) bool {
func IsDefaultNamespaceResource(name v1.ResourceName) bool {
return !strings.Contains(string(name), "/") ||
strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
}
// IsHugePageResourceName returns true if the resource name has the huge page
// resource prefix.
func IsHugePageResourceName(name v1.ResourceName) bool {
return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
}
// HugePageResourceName returns a ResourceName with the canonical hugepage
// prefix prepended for the specified page size. The page size is converted
// to its canonical representation.
func HugePageResourceName(pageSize resource.Quantity) v1.ResourceName {
return v1.ResourceName(fmt.Sprintf("%s%s", v1.ResourceHugePagesPrefix, pageSize.String()))
}
// HugePageSizeFromResourceName returns the page size for the specified huge page
// resource name. If the specified input is not a valid huge page resource name
// an error is returned.
func HugePageSizeFromResourceName(name v1.ResourceName) (resource.Quantity, error) {
if !IsHugePageResourceName(name) {
return resource.Quantity{}, fmt.Errorf("resource name: %s is not valid hugepage name", name)
}
pageSize := strings.TrimPrefix(string(name), v1.ResourceHugePagesPrefix)
return resource.ParseQuantity(pageSize)
}
// IsOpaqueIntResourceName returns true if the resource name has the opaque

View File

@ -24,6 +24,7 @@ go_library(
name = "go_default_library",
srcs = ["qos.go"],
deps = [
"//pkg/api/v1/helper:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",

View File

@ -20,12 +20,16 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
)
// QOSList is a set of (resource name, QoS class) pairs.
type QOSList map[v1.ResourceName]v1.PodQOSClass
var supportedQoSComputeResources = sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory))
func isSupportedQoSComputeResource(name v1.ResourceName) bool {
supportedQoSComputeResources := sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory))
return supportedQoSComputeResources.Has(string(name)) || v1helper.IsHugePageResourceName(name)
}
// GetPodQOS returns the QoS class of a pod.
// A pod is besteffort if none of its containers have specified any requests or limits.
@ -39,7 +43,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
for _, container := range pod.Spec.Containers {
// process requests
for name, quantity := range container.Resources.Requests {
if !supportedQoSComputeResources.Has(string(name)) {
if !isSupportedQoSComputeResource(name) {
continue
}
if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +59,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
// process limits
qosLimitsFound := sets.NewString()
for name, quantity := range container.Resources.Limits {
if !supportedQoSComputeResources.Has(string(name)) {
if !isSupportedQoSComputeResource(name) {
continue
}
if quantity.Cmp(zeroQuantity) == 1 {
@ -70,7 +74,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass {
}
}
if len(qosLimitsFound) != len(supportedQoSComputeResources) {
if !qosLimitsFound.HasAll(string(v1.ResourceMemory), string(v1.ResourceCPU)) {
isGuaranteed = false
}
}

View File

@ -130,6 +130,12 @@ func TestGetPodQOS(t *testing.T) {
}),
expected: v1.PodQOSBurstable,
},
{
pod: newPod("burstable-hugepages", []v1.Container{
newContainer("burstable", addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0")), addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0"))),
}),
expected: v1.PodQOSBurstable,
},
}
for id, testCase := range testCases {
if actual := GetPodQOS(testCase.pod); testCase.expected != actual {
@ -141,7 +147,7 @@ func TestGetPodQOS(t *testing.T) {
k8sv1.Convert_v1_Pod_To_api_Pod(testCase.pod, &pod, nil)
if actual := qos.GetPodQOS(&pod); api.PodQOSClass(testCase.expected) != actual {
t.Errorf("[%d]: invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
t.Errorf("[%d]: conversion invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
}
}
}

View File

@ -2402,6 +2402,28 @@ func ValidateTolerations(tolerations []api.Toleration, fldPath *field.Path) fiel
return allErrors
}
func toResourceNames(resources api.ResourceList) []api.ResourceName {
result := []api.ResourceName{}
for resourceName := range resources {
result = append(result, resourceName)
}
return result
}
func toSet(resourceNames []api.ResourceName) sets.String {
result := sets.NewString()
for _, resourceName := range resourceNames {
result.Insert(string(resourceName))
}
return result
}
func toContainerResourcesSet(ctr *api.Container) sets.String {
resourceNames := toResourceNames(ctr.Resources.Requests)
resourceNames = append(resourceNames, toResourceNames(ctr.Resources.Limits)...)
return toSet(resourceNames)
}
// validateContainersOnlyForPod does additional validation for containers on a pod versus a pod template
// it only does additive validation of fields not covered in validateContainers
func validateContainersOnlyForPod(containers []api.Container, fldPath *field.Path) field.ErrorList {
@ -2429,6 +2451,21 @@ func ValidatePod(pod *api.Pod) field.ErrorList {
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.Containers, specPath.Child("containers"))...)
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.InitContainers, specPath.Child("initContainers"))...)
if utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
hugePageResources := sets.NewString()
for i := range pod.Spec.Containers {
resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
for resourceStr := range resourceSet {
if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
hugePageResources.Insert(resourceStr)
}
}
}
if len(hugePageResources) > 1 {
allErrs = append(allErrs, field.Invalid(specPath, hugePageResources, "must use a single hugepage size in a pod spec"))
}
}
return allErrs
}
@ -3445,7 +3482,10 @@ func ValidateNode(node *api.Node) field.ErrorList {
allErrs = append(allErrs, validateNodeTaints(node.Spec.Taints, fldPath.Child("taints"))...)
}
// Only validate spec. All status fields are optional and can be updated later.
// Only validate spec.
// All status fields are optional and can be updated later.
// That said, if specified, we need to ensure they are valid.
allErrs = append(allErrs, ValidateNodeResources(node)...)
// external ID is required.
if len(node.Spec.ExternalID) == 0 {
@ -3461,6 +3501,38 @@ func ValidateNode(node *api.Node) field.ErrorList {
return allErrs
}
// ValidateNodeResources is used to make sure a node has valid capacity and allocatable values.
func ValidateNodeResources(node *api.Node) field.ErrorList {
allErrs := field.ErrorList{}
// Validate resource quantities in capacity.
hugePageSizes := sets.NewString()
for k, v := range node.Status.Capacity {
resPath := field.NewPath("status", "capacity", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
// track any huge page size that has a positive value
if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
hugePageSizes.Insert(string(k))
}
if len(hugePageSizes) > 1 {
allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes"))
}
}
// Validate resource quantities in allocatable.
hugePageSizes = sets.NewString()
for k, v := range node.Status.Allocatable {
resPath := field.NewPath("status", "allocatable", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
// track any huge page size that has a positive value
if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
hugePageSizes.Insert(string(k))
}
if len(hugePageSizes) > 1 {
allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes"))
}
}
return allErrs
}
// ValidateNodeUpdate tests to make sure a node update can be applied. Modifies oldNode.
func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList {
fldPath := field.NewPath("metadata")
@ -3473,16 +3545,7 @@ func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList {
// allErrs = append(allErrs, field.Invalid("status", node.Status, "must be empty"))
// }
// Validate resource quantities in capacity.
for k, v := range node.Status.Capacity {
resPath := field.NewPath("status", "capacity", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
}
// Validate resource quantities in allocatable.
for k, v := range node.Status.Allocatable {
resPath := field.NewPath("status", "allocatable", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
}
allErrs = append(allErrs, ValidateNodeResources(node)...)
// Validate no duplicate addresses in node status.
addresses := make(map[api.NodeAddress]bool)
@ -3925,6 +3988,10 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
if resourceName == api.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
allErrs = append(allErrs, field.Forbidden(limPath, "ResourceEphemeralStorage field disabled by feature-gate for ResourceRequirements"))
}
if helper.IsHugePageResourceName(resourceName) && !utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
allErrs = append(allErrs, field.Forbidden(limPath, fmt.Sprintf("%s field disabled by feature-gate for ResourceRequirements", resourceName)))
}
}
for resourceName, quantity := range requirements.Requests {
fldPath := reqPath.Key(string(resourceName))

View File

@ -2759,6 +2759,106 @@ func TestValidateVolumes(t *testing.T) {
}
}
func TestAlphaHugePagesIsolation(t *testing.T) {
successCases := []api.Pod{
{ // Basic fields.
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
},
Limits: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
},
},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
},
},
}
failureCases := []api.Pod{
{ // Basic fields.
ObjectMeta: metav1.ObjectMeta{Name: "hugepages-shared", Namespace: "ns"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
},
Limits: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("2Gi"),
},
},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
},
},
{ // Basic fields.
ObjectMeta: metav1.ObjectMeta{Name: "hugepages-multiple", Namespace: "ns"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"),
},
Limits: api.ResourceList{
api.ResourceName("hugepages-2Mi"): resource.MustParse("1Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("2Gi"),
},
},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
},
},
}
// Enable alpha feature HugePages
err := utilfeature.DefaultFeatureGate.Set("HugePages=true")
if err != nil {
t.Errorf("Failed to enable feature gate for HugePages: %v", err)
return
}
for i := range successCases {
pod := &successCases[i]
if errs := ValidatePod(pod); len(errs) != 0 {
t.Errorf("Unexpected error for case[%d], err: %v", i, errs)
}
}
for i := range failureCases {
pod := &failureCases[i]
if errs := ValidatePod(pod); len(errs) == 0 {
t.Errorf("Expected error for case[%d], pod: %v", i, pod.Name)
}
}
// Disable alpha feature HugePages
err = utilfeature.DefaultFeatureGate.Set("HugePages=false")
if err != nil {
t.Errorf("Failed to disable feature gate for HugePages: %v", err)
return
}
// Disable alpha feature HugePages and ensure all success cases fail
for i := range successCases {
pod := &successCases[i]
if errs := ValidatePod(pod); len(errs) == 0 {
t.Errorf("Expected error for case[%d], pod: %v", i, pod.Name)
}
}
}
func TestAlphaLocalStorageCapacityIsolation(t *testing.T) {
testCases := []api.VolumeSource{
@ -7838,6 +7938,8 @@ func TestValidateNode(t *testing.T) {
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName("my.org/gpu"): resource.MustParse("10"),
api.ResourceName("hugepages-2Mi"): resource.MustParse("10Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("0"),
},
},
Spec: api.NodeSpec{
@ -8119,6 +8221,27 @@ func TestValidateNode(t *testing.T) {
ExternalID: "external",
},
},
"multiple-pre-allocated-hugepages": {
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Labels: validSelector,
},
Status: api.NodeStatus{
Addresses: []api.NodeAddress{
{Type: api.NodeExternalIP, Address: "something"},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName("my.org/gpu"): resource.MustParse("10"),
api.ResourceName("hugepages-2Mi"): resource.MustParse("10Gi"),
api.ResourceName("hugepages-1Gi"): resource.MustParse("10Gi"),
},
},
Spec: api.NodeSpec{
ExternalID: "external",
},
},
}
for k, v := range errorCases {
errs := ValidateNode(&v)

View File

@ -157,6 +157,12 @@ const (
//
// Alternative container-level CPU affinity policies.
CPUManager utilfeature.Feature = "CPUManager"
// owner: @derekwaynecarr
// alpha: v1.8
//
// Enable pods to consume pre-allocated huge pages of varying page sizes
HugePages utilfeature.Feature = "HugePages"
)
func init() {
@ -180,6 +186,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
RotateKubeletClientCertificate: {Default: true, PreRelease: utilfeature.Beta},
PersistentLocalVolumes: {Default: false, PreRelease: utilfeature.Alpha},
LocalStorageCapacityIsolation: {Default: false, PreRelease: utilfeature.Alpha},
HugePages: {Default: false, PreRelease: utilfeature.Alpha},
DebugContainers: {Default: false, PreRelease: utilfeature.Alpha},
PodPriority: {Default: false, PreRelease: utilfeature.Alpha},
EnableEquivalenceClassCache: {Default: false, PreRelease: utilfeature.Alpha},

View File

@ -23,11 +23,14 @@ go_library(
"//conditions:default": [],
}),
deps = [
"//pkg/api/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//vendor/github.com/google/cadvisor/events:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/kubelet/types:go_default_library",

View File

@ -21,6 +21,9 @@ import (
cadvisorapi2 "github.com/google/cadvisor/info/v2"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
"k8s.io/kubernetes/pkg/features"
)
func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
@ -32,6 +35,17 @@ func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
int64(info.MemoryCapacity),
resource.BinarySI),
}
// if huge pages are enabled, we report them as a schedulable resource on the node
if utilfeature.DefaultFeatureGate.Enabled(features.HugePages) {
for _, hugepagesInfo := range info.HugePages {
pageSizeBytes := int64(hugepagesInfo.PageSize * 1024)
hugePagesBytes := pageSizeBytes * int64(hugepagesInfo.NumPages)
pageSizeQuantity := resource.NewQuantity(pageSizeBytes, resource.BinarySI)
c[v1helper.HugePageResourceName(*pageSizeQuantity)] = *resource.NewQuantity(hugePagesBytes, resource.BinarySI)
}
}
return c
}

View File

@ -52,6 +52,7 @@ go_library(
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/api:go_default_library",
"//pkg/api/v1/helper:go_default_library",
"//pkg/api/v1/helper/qos:go_default_library",
"//pkg/api/v1/resource:go_default_library",
"//pkg/kubelet/cm/util:go_default_library",
@ -63,6 +64,7 @@ go_library(
"//pkg/util/procfs:go_default_library",
"//pkg/util/sysctl:go_default_library",
"//pkg/util/version:go_default_library",
"//vendor/github.com/docker/go-units:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",

View File

@ -24,12 +24,16 @@ import (
"strings"
"time"
units "github.com/docker/go-units"
"github.com/golang/glog"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/metrics"
)
@ -43,6 +47,10 @@ const (
libcontainerSystemd libcontainerCgroupManagerType = "systemd"
)
// hugePageSizeList is useful for converting to the hugetlb canonical unit
// which is what is expected when interacting with libcontainer
var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
// ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name.
// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice
// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form.
@ -299,10 +307,16 @@ type subsystem interface {
GetStats(path string, stats *libcontainercgroups.Stats) error
}
// Cgroup subsystems we currently support
var supportedSubsystems = []subsystem{
&cgroupfs.MemoryGroup{},
&cgroupfs.CpuGroup{},
// getSupportedSubsystems returns list of subsystems supported
func getSupportedSubsystems() []subsystem {
supportedSubsystems := []subsystem{
&cgroupfs.MemoryGroup{},
&cgroupfs.CpuGroup{},
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{})
}
return supportedSubsystems
}
// setSupportedSubsystems sets cgroup resource limits only on the supported
@ -315,7 +329,7 @@ var supportedSubsystems = []subsystem{
// but this is not possible with libcontainers Set() method
// See https://github.com/opencontainers/runc/issues/932
func setSupportedSubsystems(cgroupConfig *libcontainerconfigs.Cgroup) error {
for _, sys := range supportedSubsystems {
for _, sys := range getSupportedSubsystems() {
if _, ok := cgroupConfig.Paths[sys.Name()]; !ok {
return fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name())
}
@ -343,6 +357,30 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
if resourceConfig.CpuPeriod != nil {
resources.CpuPeriod = *resourceConfig.CpuPeriod
}
// if huge pages are enabled, we set them in libcontainer
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
// for each page size enumerated, set that value
pageSizes := sets.NewString()
for pageSize, limit := range resourceConfig.HugePageLimit {
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, hugePageSizeList)
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
Pagesize: sizeString,
Limit: uint64(limit),
})
pageSizes.Insert(sizeString)
}
// for each page size omitted, limit to 0
for _, pageSize := range cgroupfs.HugePageSizes {
if pageSizes.Has(pageSize) {
continue
}
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
Pagesize: pageSize,
Limit: uint64(0),
})
}
}
return resources
}
@ -502,7 +540,7 @@ func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error {
func getStatsSupportedSubsystems(cgroupPaths map[string]string) (*libcontainercgroups.Stats, error) {
stats := libcontainercgroups.NewStats()
for _, sys := range supportedSubsystems {
for _, sys := range getSupportedSubsystems() {
if _, ok := cgroupPaths[sys.Name()]; !ok {
return nil, fmt.Errorf("Failed to find subsystem mount for subsystem: %v", sys.Name())
}

View File

@ -26,6 +26,7 @@ import (
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
"k8s.io/kubernetes/pkg/api/v1/resource"
)
@ -83,6 +84,23 @@ func MilliCPUToShares(milliCPU int64) int64 {
return shares
}
// HugePageLimits converts the API representation to a map
// from huge page size (in bytes) to huge page limit (in bytes).
func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
hugePageLimits := map[int64]int64{}
for k, v := range resourceList {
if v1helper.IsHugePageResourceName(k) {
pageSize, _ := v1helper.HugePageSizeFromResourceName(k)
if value, exists := hugePageLimits[pageSize.Value()]; exists {
hugePageLimits[pageSize.Value()] = value + v.Value()
} else {
hugePageLimits[pageSize.Value()] = v.Value()
}
}
}
return hugePageLimits
}
// ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
// sum requests and limits.
@ -108,6 +126,8 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
// track if limits were applied for each resource.
memoryLimitsDeclared := true
cpuLimitsDeclared := true
// map hugepage pagesize (bytes) to limits (bytes)
hugePageLimits := map[int64]int64{}
for _, container := range pod.Spec.Containers {
if container.Resources.Limits.Cpu().IsZero() {
cpuLimitsDeclared = false
@ -115,6 +135,14 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
if container.Resources.Limits.Memory().IsZero() {
memoryLimitsDeclared = false
}
containerHugePageLimits := HugePageLimits(container.Resources.Requests)
for k, v := range containerHugePageLimits {
if value, exists := hugePageLimits[k]; exists {
hugePageLimits[k] = value + v
} else {
hugePageLimits[k] = v
}
}
}
// determine the qos class
@ -140,6 +168,7 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
shares := int64(MinShares)
result.CpuShares = &shares
}
result.HugePageLimit = hugePageLimits
return result
}

View File

@ -28,7 +28,9 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/api"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/events"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
@ -154,6 +156,10 @@ func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
rc.HugePageLimit = HugePageLimits(rl)
}
return &rc
}

View File

@ -27,9 +27,13 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
units "github.com/docker/go-units"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
"k8s.io/kubernetes/pkg/api/v1/resource"
kubefeatures "k8s.io/kubernetes/pkg/features"
)
const (
@ -100,11 +104,18 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
minShares := int64(MinShares)
resourceParameters.CpuShares = &minShares
}
// containerConfig object stores the cgroup specifications
containerConfig := &CgroupConfig{
Name: absoluteContainerName,
ResourceParameters: resourceParameters,
}
// for each enumerated huge page size, the qos tiers are unbounded
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
m.setHugePagesUnbounded(containerConfig)
}
// check if it exists
if !cm.Exists(absoluteContainerName) {
if err := cm.Create(containerConfig); err != nil {
@ -138,6 +149,29 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
return nil
}
// setHugePagesUnbounded ensures hugetlb is effectively unbounded
func (m *qosContainerManagerImpl) setHugePagesUnbounded(cgroupConfig *CgroupConfig) error {
hugePageLimit := map[int64]int64{}
for _, pageSize := range cgroupfs.HugePageSizes {
pageSizeBytes, err := units.RAMInBytes(pageSize)
if err != nil {
return err
}
hugePageLimit[pageSizeBytes] = int64(1 << 62)
}
cgroupConfig.ResourceParameters.HugePageLimit = hugePageLimit
return nil
}
func (m *qosContainerManagerImpl) setHugePagesConfig(configs map[v1.PodQOSClass]*CgroupConfig) error {
for _, v := range configs {
if err := m.setHugePagesUnbounded(v); err != nil {
return err
}
}
return nil
}
func (m *qosContainerManagerImpl) setCPUCgroupConfig(configs map[v1.PodQOSClass]*CgroupConfig) error {
pods := m.activePods()
burstablePodCPURequest := int64(0)
@ -262,6 +296,13 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
return err
}
// update the qos level cgroup settings for huge pages (ensure they remain unbounded)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
if err := m.setHugePagesConfig(qosConfigs); err != nil {
return err
}
}
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:

View File

@ -31,6 +31,8 @@ type ResourceConfig struct {
CpuQuota *int64
// CPU quota period.
CpuPeriod *int64
// HugePageLimit map from page size (in bytes) to limit (in bytes)
HugePageLimit map[int64]int64
}
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.

View File

@ -630,6 +630,19 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
}
node.Status.Allocatable[k] = value
}
// for every huge page reservation, we need to remove it from allocatable memory
for k, v := range node.Status.Capacity {
if v1helper.IsHugePageResourceName(k) {
allocatableMemory := node.Status.Allocatable[v1.ResourceMemory]
value := *(v.Copy())
allocatableMemory.Sub(value)
if allocatableMemory.Sign() < 0 {
// Negative Allocatable resources don't make sense.
allocatableMemory.Set(0)
}
node.Status.Allocatable[v1.ResourceMemory] = allocatableMemory
}
}
}
// Set versioninfo for the node.

View File

@ -509,6 +509,12 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
result.SetExtended(rName, value)
}
}
if v1helper.IsHugePageResourceName(rName) {
value := rQuantity.Value()
if value > result.HugePages[rName] {
result.SetHugePages(rName, value)
}
}
}
}
}
@ -542,7 +548,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
// We couldn't parse metadata - fallback to computing it.
podRequest = GetResourceRequest(pod)
}
if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && podRequest.EphemeralStorage == 0 && len(podRequest.ExtendedResources) == 0 {
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.NvidiaGPU == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ExtendedResources) == 0 &&
len(podRequest.HugePages) == 0 {
return len(predicateFails) == 0, predicateFails, nil
}
@ -567,6 +578,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
}
}
for rName, rQuant := range podRequest.HugePages {
if allocatable.HugePages[rName] < rQuant+nodeInfo.RequestedResource().HugePages[rName] {
predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.HugePages[rName], nodeInfo.RequestedResource().HugePages[rName], allocatable.HugePages[rName]))
}
}
if glog.V(10) {
if len(predicateFails) == 0 {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is

View File

@ -73,11 +73,12 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.Pe
}
var (
opaqueResourceA = v1helper.OpaqueIntResourceName("AAA")
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
opaqueResourceA = v1helper.OpaqueIntResourceName("AAA")
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
)
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.NodeResources {
return v1.NodeResources{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@ -86,11 +87,12 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
},
}
}
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.ResourceList {
return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
@ -98,6 +100,7 @@ func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, stora
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
}
}
@ -348,10 +351,38 @@ func TestPodFitsResources(t *testing.T) {
test: "opaque resource allocatable enforced for unknown resource for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
fits: false,
test: "hugepages resource capacity enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
fits: false,
test: "hugepages resource capacity enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}},
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 2}})),
fits: false,
test: "hugepages resource allocatable enforced for multiple containers",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 6, 2, 5)},
},
}
for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -406,7 +437,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -464,7 +495,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -1889,7 +1920,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: true,
wErr: nil,
@ -1901,7 +1932,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -1915,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits on GPU machine",
@ -1924,7 +1955,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: false,
wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@ -1934,7 +1965,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "enough GPU resource",
@ -1948,7 +1979,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -1960,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -3252,7 +3283,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always",
// at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0),
},
},
},

View File

@ -72,6 +72,7 @@ type Resource struct {
// explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int
ExtendedResources map[v1.ResourceName]int64
HugePages map[v1.ResourceName]int64
}
// New creates a Resource from ResourceList
@ -103,6 +104,9 @@ func (r *Resource) Add(rl v1.ResourceList) {
if v1helper.IsExtendedResourceName(rName) {
r.AddExtended(rName, rQuant.Value())
}
if v1helper.IsHugePageResourceName(rName) {
r.AddHugePages(rName, rQuant.Value())
}
}
}
}
@ -118,6 +122,9 @@ func (r *Resource) ResourceList() v1.ResourceList {
for rName, rQuant := range r.ExtendedResources {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
for rName, rQuant := range r.HugePages {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
}
return result
}
@ -135,6 +142,12 @@ func (r *Resource) Clone() *Resource {
res.ExtendedResources[k] = v
}
}
if r.HugePages != nil {
res.HugePages = make(map[v1.ResourceName]int64)
for k, v := range r.HugePages {
res.HugePages[k] = v
}
}
return res
}
@ -150,6 +163,18 @@ func (r *Resource) SetExtended(name v1.ResourceName, quantity int64) {
r.ExtendedResources[name] = quantity
}
func (r *Resource) AddHugePages(name v1.ResourceName, quantity int64) {
r.SetHugePages(name, r.HugePages[name]+quantity)
}
func (r *Resource) SetHugePages(name v1.ResourceName, quantity int64) {
// Lazily allocate hugepages resource map.
if r.HugePages == nil {
r.HugePages = map[v1.ResourceName]int64{}
}
r.HugePages[name] = quantity
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
@ -307,6 +332,12 @@ func (n *NodeInfo) addPod(pod *v1.Pod) {
for rName, rQuant := range res.ExtendedResources {
n.requestedResource.ExtendedResources[rName] += rQuant
}
if n.requestedResource.HugePages == nil && len(res.HugePages) > 0 {
n.requestedResource.HugePages = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.HugePages {
n.requestedResource.HugePages[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0_cpu
n.nonzeroRequest.Memory += non0_mem
n.pods = append(n.pods, pod)
@ -362,6 +393,12 @@ func (n *NodeInfo) removePod(pod *v1.Pod) error {
for rName, rQuant := range res.ExtendedResources {
n.requestedResource.ExtendedResources[rName] -= rQuant
}
if len(res.HugePages) > 0 && n.requestedResource.HugePages == nil {
n.requestedResource.HugePages = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.HugePages {
n.requestedResource.HugePages[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0_cpu
n.nonzeroRequest.Memory -= non0_mem

View File

@ -3759,6 +3759,8 @@ const (
ResourceOpaqueIntPrefix = "pod.alpha.kubernetes.io/opaque-int-resource-"
// Default namespace prefix.
ResourceDefaultNamespacePrefix = "kubernetes.io/"
// Name prefix for huge page resources (alpha).
ResourceHugePagesPrefix = "hugepages-"
)
// ResourceList is a set of (resource name, quantity) pairs.