From 38d5dee67776733045463a8a35fb2058db49eb3b Mon Sep 17 00:00:00 2001 From: Derek Carr Date: Wed, 30 Aug 2017 14:23:26 -0400 Subject: [PATCH] Node validation restricts pre-allocated hugepages to single page size --- pkg/api/helper/qos/qos.go | 6 ++-- pkg/api/v1/helper/qos/qos.go | 5 ++- pkg/api/validation/validation.go | 48 ++++++++++++++++++++------ pkg/api/validation/validation_test.go | 23 ++++++++++++ pkg/kubelet/cm/cgroup_manager_linux.go | 2 +- 5 files changed, 65 insertions(+), 19 deletions(-) diff --git a/pkg/api/helper/qos/qos.go b/pkg/api/helper/qos/qos.go index 0ea6286c841..cc58bde0c31 100644 --- a/pkg/api/helper/qos/qos.go +++ b/pkg/api/helper/qos/qos.go @@ -25,10 +25,8 @@ import ( "k8s.io/kubernetes/pkg/api/helper" ) -// supportedComputeResources is the list of compute resources for with QoS is supported. -var supportedQoSComputeResources = sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory)) - func isSupportedQoSComputeResource(name api.ResourceName) bool { + supportedQoSComputeResources := sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory)) return supportedQoSComputeResources.Has(string(name)) || helper.IsHugePageResourceName(name) } @@ -75,7 +73,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass { } } - if len(qosLimitsFound) != len(supportedQoSComputeResources) { + if !qosLimitsFound.HasAll(string(api.ResourceMemory), string(api.ResourceCPU)) { isGuaranteed = false } } diff --git a/pkg/api/v1/helper/qos/qos.go b/pkg/api/v1/helper/qos/qos.go index aceee068808..2fc7f22856a 100644 --- a/pkg/api/v1/helper/qos/qos.go +++ b/pkg/api/v1/helper/qos/qos.go @@ -26,9 +26,8 @@ import ( // QOSList is a set of (resource name, QoS class) pairs. type QOSList map[v1.ResourceName]v1.PodQOSClass -var supportedQoSComputeResources = sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory)) - func isSupportedQoSComputeResource(name v1.ResourceName) bool { + supportedQoSComputeResources := sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory)) return supportedQoSComputeResources.Has(string(name)) || v1helper.IsHugePageResourceName(name) } @@ -75,7 +74,7 @@ func GetPodQOS(pod *v1.Pod) v1.PodQOSClass { } } - if len(qosLimitsFound) != len(supportedQoSComputeResources) { + if !qosLimitsFound.HasAll(string(v1.ResourceMemory), string(v1.ResourceCPU)) { isGuaranteed = false } } diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index a14fc6841fd..aac45af683b 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -3482,7 +3482,10 @@ func ValidateNode(node *api.Node) field.ErrorList { allErrs = append(allErrs, validateNodeTaints(node.Spec.Taints, fldPath.Child("taints"))...) } - // Only validate spec. All status fields are optional and can be updated later. + // Only validate spec. + // All status fields are optional and can be updated later. + // That said, if specified, we need to ensure they are valid. + allErrs = append(allErrs, ValidateNodeResources(node)...) // external ID is required. if len(node.Spec.ExternalID) == 0 { @@ -3498,6 +3501,38 @@ func ValidateNode(node *api.Node) field.ErrorList { return allErrs } +// ValidateNodeResources is used to make sure a node has valid capacity and allocatable values. +func ValidateNodeResources(node *api.Node) field.ErrorList { + allErrs := field.ErrorList{} + // Validate resource quantities in capacity. + hugePageSizes := sets.NewString() + for k, v := range node.Status.Capacity { + resPath := field.NewPath("status", "capacity", string(k)) + allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...) + // track any huge page size that has a positive value + if helper.IsHugePageResourceName(k) && v.Value() > int64(0) { + hugePageSizes.Insert(string(k)) + } + if len(hugePageSizes) > 1 { + allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes")) + } + } + // Validate resource quantities in allocatable. + hugePageSizes = sets.NewString() + for k, v := range node.Status.Allocatable { + resPath := field.NewPath("status", "allocatable", string(k)) + allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...) + // track any huge page size that has a positive value + if helper.IsHugePageResourceName(k) && v.Value() > int64(0) { + hugePageSizes.Insert(string(k)) + } + if len(hugePageSizes) > 1 { + allErrs = append(allErrs, field.Invalid(resPath, v, "may not have pre-allocated hugepages for multiple page sizes")) + } + } + return allErrs +} + // ValidateNodeUpdate tests to make sure a node update can be applied. Modifies oldNode. func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList { fldPath := field.NewPath("metadata") @@ -3510,16 +3545,7 @@ func ValidateNodeUpdate(node, oldNode *api.Node) field.ErrorList { // allErrs = append(allErrs, field.Invalid("status", node.Status, "must be empty")) // } - // Validate resource quantities in capacity. - for k, v := range node.Status.Capacity { - resPath := field.NewPath("status", "capacity", string(k)) - allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...) - } - // Validate resource quantities in allocatable. - for k, v := range node.Status.Allocatable { - resPath := field.NewPath("status", "allocatable", string(k)) - allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...) - } + allErrs = append(allErrs, ValidateNodeResources(node)...) // Validate no duplicate addresses in node status. addresses := make(map[api.NodeAddress]bool) diff --git a/pkg/api/validation/validation_test.go b/pkg/api/validation/validation_test.go index d631286ec0f..05491bcbaed 100644 --- a/pkg/api/validation/validation_test.go +++ b/pkg/api/validation/validation_test.go @@ -7938,6 +7938,8 @@ func TestValidateNode(t *testing.T) { api.ResourceName(api.ResourceCPU): resource.MustParse("10"), api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), api.ResourceName("my.org/gpu"): resource.MustParse("10"), + api.ResourceName("hugepages-2Mi"): resource.MustParse("10Gi"), + api.ResourceName("hugepages-1Gi"): resource.MustParse("0"), }, }, Spec: api.NodeSpec{ @@ -8219,6 +8221,27 @@ func TestValidateNode(t *testing.T) { ExternalID: "external", }, }, + "multiple-pre-allocated-hugepages": { + ObjectMeta: metav1.ObjectMeta{ + Name: "abc", + Labels: validSelector, + }, + Status: api.NodeStatus{ + Addresses: []api.NodeAddress{ + {Type: api.NodeExternalIP, Address: "something"}, + }, + Capacity: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("10"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + api.ResourceName("my.org/gpu"): resource.MustParse("10"), + api.ResourceName("hugepages-2Mi"): resource.MustParse("10Gi"), + api.ResourceName("hugepages-1Gi"): resource.MustParse("10Gi"), + }, + }, + Spec: api.NodeSpec{ + ExternalID: "external", + }, + }, } for k, v := range errorCases { errs := ValidateNode(&v) diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 137a2a14620..e24a955e106 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -49,7 +49,7 @@ const ( // hugePageSizeList is useful for converting to the hugetlb canonical unit // which is what is expected when interacting with libcontainer -var hugePageSizeList = []string{"", "kB", "MB", "GB", "TB", "PB"} +var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"} // ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name. // For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice