Add support for removing unsupported huge page sizes

When kubelet is restarted, it will now remove the resources for huge
page sizes no longer supported. This is required when:
- node disables huge pages
- changing the default huge page size in older versions of linux
(because it will then only support the newly set default).
- Software updates that change what sizes are supported (eg. by changing
boot parameters).
This commit is contained in:
Odin Ugedal 2019-07-31 20:13:49 +02:00
parent 21d4d13d98
commit 2830827442
No known key found for this signature in database
GPG Key ID: AFF9C8242CF7A7AF
3 changed files with 344 additions and 0 deletions

View File

@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
"k8s.io/klog"
@ -117,6 +118,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode)
requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate
requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate
requiresUpdate = kl.reconcileHugePageResource(node, existingNode) || requiresUpdate
if requiresUpdate {
if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
klog.Errorf("Unable to reconcile node %q with API server: error updating node: %v", kl.nodeName, err)
@ -127,6 +129,53 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
return true
}
// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported
func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool {
requiresUpdate := false
supportedHugePageResources := sets.String{}
for resourceName := range initialNode.Status.Capacity {
if !v1helper.IsHugePageResourceName(resourceName) {
continue
}
supportedHugePageResources.Insert(string(resourceName))
initialCapacity := initialNode.Status.Capacity[resourceName]
initialAllocatable := initialNode.Status.Allocatable[resourceName]
capacity, resourceIsSupported := existingNode.Status.Capacity[resourceName]
allocatable := existingNode.Status.Allocatable[resourceName]
// Add or update capacity if it the size was previously unsupported or has changed
if !resourceIsSupported || capacity.Cmp(initialCapacity) != 0 {
existingNode.Status.Capacity[resourceName] = initialCapacity.DeepCopy()
requiresUpdate = true
}
// Add or update allocatable if it the size was previously unsupported or has changed
if !resourceIsSupported || allocatable.Cmp(initialAllocatable) != 0 {
existingNode.Status.Allocatable[resourceName] = initialAllocatable.DeepCopy()
requiresUpdate = true
}
}
for resourceName := range existingNode.Status.Capacity {
if !v1helper.IsHugePageResourceName(resourceName) {
continue
}
// If huge page size no longer is supported, we remove it from the node
if !supportedHugePageResources.Has(string(resourceName)) {
delete(existingNode.Status.Capacity, resourceName)
delete(existingNode.Status.Allocatable, resourceName)
klog.Infof("Removing now unsupported huge page resource named: %s", resourceName)
requiresUpdate = true
}
}
return requiresUpdate
}
// Zeros out extended resource capacity during reconciliation.
func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool {
requiresUpdate := false

View File

@ -1705,6 +1705,255 @@ func TestUpdateDefaultLabels(t *testing.T) {
}
}
func TestReconcileHugePageResource(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
hugePageResourceName64Ki := v1.ResourceName("hugepages-64Ki")
hugePageResourceName2Mi := v1.ResourceName("hugepages-2Mi")
hugePageResourceName1Gi := v1.ResourceName("hugepages-1Gi")
cases := []struct {
name string
testKubelet *TestKubelet
initialNode *v1.Node
existingNode *v1.Node
expectedNode *v1.Node
needsUpdate bool
}{
{
name: "no update needed when all huge page resources are similar",
testKubelet: testKubelet,
needsUpdate: false,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
},
},
}, {
name: "update needed when new huge page resources is supported",
testKubelet: testKubelet,
needsUpdate: true,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
}, {
name: "update needed when huge page resource quantity has changed",
testKubelet: testKubelet,
needsUpdate: true,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
},
},
}, {
name: "update needed when a huge page resources is no longer supported",
testKubelet: testKubelet,
needsUpdate: true,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(T *testing.T) {
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
needsUpdate := kubelet.reconcileHugePageResource(tc.initialNode, tc.existingNode)
assert.Equal(t, tc.needsUpdate, needsUpdate, tc.name)
assert.Equal(t, tc.expectedNode, tc.existingNode, tc.name)
})
}
}
func TestReconcileExtendedResource(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
testKubelet.kubelet.kubeClient = nil // ensure only the heartbeat client is used

View File

@ -28,6 +28,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/test/e2e/framework"
@ -178,12 +179,57 @@ func runHugePagesTests(f *framework.Framework) {
err := e2epod.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name)
framework.ExpectNoError(err)
})
ginkgo.It("should add resources for new huge page sizes on kubelet restart", func() {
ginkgo.By("Stopping kubelet")
startKubelet := stopKubelet()
ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO())
framework.ExpectNoError(result.Error(), "while patching")
ginkgo.By("Starting kubelet again")
startKubelet()
ginkgo.By("verifying that the hugepages-2Mi resource is present")
gomega.Eventually(func() bool {
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "while getting node status")
_, isPresent := node.Status.Capacity["hugepages-2Mi"]
return isPresent
}, 30*time.Second, framework.Poll).Should(gomega.Equal(true))
})
}
// Serial because the test updates kubelet configuration.
var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeFeature:HugePages]", func() {
f := framework.NewDefaultFramework("hugepages-test")
ginkgo.It("should remove resources for huge page sizes no longer supported", func() {
ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status")
patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`)
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO())
framework.ExpectNoError(result.Error(), "while patching")
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "while getting node status")
ginkgo.By("Verifying that the node now supports huge pages with size 3Mi")
value, ok := node.Status.Capacity["hugepages-3Mi"]
framework.ExpectEqual(ok, true, "capacity should contain resouce hugepages-3Mi")
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
restartKubelet()
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
gomega.Eventually(func() bool {
node, err = f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "while getting node status")
_, isPresent := node.Status.Capacity["hugepages-3Mi"]
return isPresent
}, 30*time.Second, framework.Poll).Should(gomega.Equal(false))
})
ginkgo.Context("With config updated with hugepages feature enabled", func() {
ginkgo.BeforeEach(func() {
ginkgo.By("verifying hugepages are supported")