diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index 5f10945b041..f1f6455ccf1 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" cloudprovider "k8s.io/cloud-provider" cloudproviderapi "k8s.io/cloud-provider/api" "k8s.io/klog" @@ -117,6 +118,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool { requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode) requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate + requiresUpdate = kl.reconcileHugePageResource(node, existingNode) || requiresUpdate if requiresUpdate { if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil { klog.Errorf("Unable to reconcile node %q with API server: error updating node: %v", kl.nodeName, err) @@ -127,6 +129,53 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool { return true } +// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported +func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool { + requiresUpdate := false + supportedHugePageResources := sets.String{} + + for resourceName := range initialNode.Status.Capacity { + if !v1helper.IsHugePageResourceName(resourceName) { + continue + } + supportedHugePageResources.Insert(string(resourceName)) + + initialCapacity := initialNode.Status.Capacity[resourceName] + initialAllocatable := initialNode.Status.Allocatable[resourceName] + + capacity, resourceIsSupported := existingNode.Status.Capacity[resourceName] + allocatable := existingNode.Status.Allocatable[resourceName] + + // Add or update capacity if it the size was previously unsupported or has changed + if !resourceIsSupported || capacity.Cmp(initialCapacity) != 0 { + existingNode.Status.Capacity[resourceName] = initialCapacity.DeepCopy() + requiresUpdate = true + } + + // Add or update allocatable if it the size was previously unsupported or has changed + if !resourceIsSupported || allocatable.Cmp(initialAllocatable) != 0 { + existingNode.Status.Allocatable[resourceName] = initialAllocatable.DeepCopy() + requiresUpdate = true + } + + } + + for resourceName := range existingNode.Status.Capacity { + if !v1helper.IsHugePageResourceName(resourceName) { + continue + } + + // If huge page size no longer is supported, we remove it from the node + if !supportedHugePageResources.Has(string(resourceName)) { + delete(existingNode.Status.Capacity, resourceName) + delete(existingNode.Status.Allocatable, resourceName) + klog.Infof("Removing now unsupported huge page resource named: %s", resourceName) + requiresUpdate = true + } + } + return requiresUpdate +} + // Zeros out extended resource capacity during reconciliation. func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool { requiresUpdate := false diff --git a/pkg/kubelet/kubelet_node_status_test.go b/pkg/kubelet/kubelet_node_status_test.go index 491d207371d..9cd38921911 100644 --- a/pkg/kubelet/kubelet_node_status_test.go +++ b/pkg/kubelet/kubelet_node_status_test.go @@ -1705,6 +1705,255 @@ func TestUpdateDefaultLabels(t *testing.T) { } } +func TestReconcileHugePageResource(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + hugePageResourceName64Ki := v1.ResourceName("hugepages-64Ki") + hugePageResourceName2Mi := v1.ResourceName("hugepages-2Mi") + hugePageResourceName1Gi := v1.ResourceName("hugepages-1Gi") + + cases := []struct { + name string + testKubelet *TestKubelet + initialNode *v1.Node + existingNode *v1.Node + expectedNode *v1.Node + needsUpdate bool + }{ + { + name: "no update needed when all huge page resources are similar", + testKubelet: testKubelet, + needsUpdate: false, + initialNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI), + }, + }, + }, + existingNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI), + }, + }, + }, + expectedNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI), + }, + }, + }, + }, { + name: "update needed when new huge page resources is supported", + testKubelet: testKubelet, + needsUpdate: true, + initialNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + }, + }, + existingNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: resource.MustParse("100Mi"), + }, + }, + }, + expectedNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + }, + }, + }, { + name: "update needed when huge page resource quantity has changed", + testKubelet: testKubelet, + needsUpdate: true, + initialNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("4Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("4Gi"), + }, + }, + }, + existingNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + }, + }, + expectedNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("4Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("4Gi"), + }, + }, + }, + }, { + name: "update needed when a huge page resources is no longer supported", + testKubelet: testKubelet, + needsUpdate: true, + initialNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + }, + }, + existingNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + }, + }, + expectedNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + hugePageResourceName1Gi: resource.MustParse("2Gi"), + }, + }, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(T *testing.T) { + defer testKubelet.Cleanup() + kubelet := testKubelet.kubelet + + needsUpdate := kubelet.reconcileHugePageResource(tc.initialNode, tc.existingNode) + assert.Equal(t, tc.needsUpdate, needsUpdate, tc.name) + assert.Equal(t, tc.expectedNode, tc.existingNode, tc.name) + }) + } + +} func TestReconcileExtendedResource(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) testKubelet.kubelet.kubeClient = nil // ensure only the heartbeat client is used diff --git a/test/e2e_node/hugepages_test.go b/test/e2e_node/hugepages_test.go index c15d2a2a541..011c2c0273a 100644 --- a/test/e2e_node/hugepages_test.go +++ b/test/e2e_node/hugepages_test.go @@ -28,6 +28,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/test/e2e/framework" @@ -178,12 +179,57 @@ func runHugePagesTests(f *framework.Framework) { err := e2epod.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name) framework.ExpectNoError(err) }) + + ginkgo.It("should add resources for new huge page sizes on kubelet restart", func() { + ginkgo.By("Stopping kubelet") + startKubelet := stopKubelet() + ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`) + patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`) + result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO()) + framework.ExpectNoError(result.Error(), "while patching") + + ginkgo.By("Starting kubelet again") + startKubelet() + + ginkgo.By("verifying that the hugepages-2Mi resource is present") + gomega.Eventually(func() bool { + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "while getting node status") + _, isPresent := node.Status.Capacity["hugepages-2Mi"] + return isPresent + }, 30*time.Second, framework.Poll).Should(gomega.Equal(true)) + }) } // Serial because the test updates kubelet configuration. var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeFeature:HugePages]", func() { f := framework.NewDefaultFramework("hugepages-test") + ginkgo.It("should remove resources for huge page sizes no longer supported", func() { + ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status") + patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`) + result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO()) + framework.ExpectNoError(result.Error(), "while patching") + + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "while getting node status") + + ginkgo.By("Verifying that the node now supports huge pages with size 3Mi") + value, ok := node.Status.Capacity["hugepages-3Mi"] + framework.ExpectEqual(ok, true, "capacity should contain resouce hugepages-3Mi") + framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported") + + ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported") + restartKubelet() + + ginkgo.By("verifying that the hugepages-3Mi resource no longer is present") + gomega.Eventually(func() bool { + node, err = f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "while getting node status") + _, isPresent := node.Status.Capacity["hugepages-3Mi"] + return isPresent + }, 30*time.Second, framework.Poll).Should(gomega.Equal(false)) + }) ginkgo.Context("With config updated with hugepages feature enabled", func() { ginkgo.BeforeEach(func() { ginkgo.By("verifying hugepages are supported")