Merge pull request #80831 from odinuge/hugetlb-pagesizes-cleanup

Add support for removing unsupported huge page sizes
This commit is contained in:
Kubernetes Prow Robot 2020-06-04 23:41:43 -07:00 committed by GitHub
commit 8ce1b535ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 365 additions and 5 deletions

View File

@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
"k8s.io/klog/v2"
@ -116,6 +117,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode)
requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate
requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate
requiresUpdate = kl.reconcileHugePageResource(node, existingNode) || requiresUpdate
if requiresUpdate {
if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
klog.Errorf("Unable to reconcile node %q with API server: error updating node: %v", kl.nodeName, err)
@ -126,6 +128,53 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
return true
}
// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported
func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool {
requiresUpdate := false
supportedHugePageResources := sets.String{}
for resourceName := range initialNode.Status.Capacity {
if !v1helper.IsHugePageResourceName(resourceName) {
continue
}
supportedHugePageResources.Insert(string(resourceName))
initialCapacity := initialNode.Status.Capacity[resourceName]
initialAllocatable := initialNode.Status.Allocatable[resourceName]
capacity, resourceIsSupported := existingNode.Status.Capacity[resourceName]
allocatable := existingNode.Status.Allocatable[resourceName]
// Add or update capacity if it the size was previously unsupported or has changed
if !resourceIsSupported || capacity.Cmp(initialCapacity) != 0 {
existingNode.Status.Capacity[resourceName] = initialCapacity.DeepCopy()
requiresUpdate = true
}
// Add or update allocatable if it the size was previously unsupported or has changed
if !resourceIsSupported || allocatable.Cmp(initialAllocatable) != 0 {
existingNode.Status.Allocatable[resourceName] = initialAllocatable.DeepCopy()
requiresUpdate = true
}
}
for resourceName := range existingNode.Status.Capacity {
if !v1helper.IsHugePageResourceName(resourceName) {
continue
}
// If huge page size no longer is supported, we remove it from the node
if !supportedHugePageResources.Has(string(resourceName)) {
delete(existingNode.Status.Capacity, resourceName)
delete(existingNode.Status.Allocatable, resourceName)
klog.Infof("Removing now unsupported huge page resource named: %s", resourceName)
requiresUpdate = true
}
}
return requiresUpdate
}
// Zeros out extended resource capacity during reconciliation.
func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool {
requiresUpdate := false

View File

@ -1694,6 +1694,255 @@ func TestUpdateDefaultLabels(t *testing.T) {
}
}
func TestReconcileHugePageResource(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
hugePageResourceName64Ki := v1.ResourceName("hugepages-64Ki")
hugePageResourceName2Mi := v1.ResourceName("hugepages-2Mi")
hugePageResourceName1Gi := v1.ResourceName("hugepages-1Gi")
cases := []struct {
name string
testKubelet *TestKubelet
initialNode *v1.Node
existingNode *v1.Node
expectedNode *v1.Node
needsUpdate bool
}{
{
name: "no update needed when all huge page resources are similar",
testKubelet: testKubelet,
needsUpdate: false,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
},
},
},
}, {
name: "update needed when new huge page resources is supported",
testKubelet: testKubelet,
needsUpdate: true,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: resource.MustParse("100Mi"),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
}, {
name: "update needed when huge page resource quantity has changed",
testKubelet: testKubelet,
needsUpdate: true,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("4Gi"),
},
},
},
}, {
name: "update needed when a huge page resources is no longer supported",
testKubelet: testKubelet,
needsUpdate: true,
initialNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
existingNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
expectedNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
hugePageResourceName1Gi: resource.MustParse("2Gi"),
},
},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(T *testing.T) {
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
needsUpdate := kubelet.reconcileHugePageResource(tc.initialNode, tc.existingNode)
assert.Equal(t, tc.needsUpdate, needsUpdate, tc.name)
assert.Equal(t, tc.expectedNode, tc.existingNode, tc.name)
})
}
}
func TestReconcileExtendedResource(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
testKubelet.kubelet.kubeClient = nil // ensure only the heartbeat client is used

View File

@ -28,6 +28,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/test/e2e/framework"
@ -183,12 +184,57 @@ func runHugePagesTests(f *framework.Framework) {
err := e2epod.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name)
framework.ExpectNoError(err)
})
ginkgo.It("should add resources for new huge page sizes on kubelet restart", func() {
ginkgo.By("Stopping kubelet")
startKubelet := stopKubelet()
ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO())
framework.ExpectNoError(result.Error(), "while patching")
ginkgo.By("Starting kubelet again")
startKubelet()
ginkgo.By("verifying that the hugepages-2Mi resource is present")
gomega.Eventually(func() bool {
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "while getting node status")
_, isPresent := node.Status.Capacity["hugepages-2Mi"]
return isPresent
}, 30*time.Second, framework.Poll).Should(gomega.Equal(true))
})
}
// Serial because the test updates kubelet configuration.
var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeFeature:HugePages]", func() {
f := framework.NewDefaultFramework("hugepages-test")
ginkgo.It("should remove resources for huge page sizes no longer supported", func() {
ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status")
patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`)
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO())
framework.ExpectNoError(result.Error(), "while patching")
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "while getting node status")
ginkgo.By("Verifying that the node now supports huge pages with size 3Mi")
value, ok := node.Status.Capacity["hugepages-3Mi"]
framework.ExpectEqual(ok, true, "capacity should contain resouce hugepages-3Mi")
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
restartKubelet()
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
gomega.Eventually(func() bool {
node, err = f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "while getting node status")
_, isPresent := node.Status.Capacity["hugepages-3Mi"]
return isPresent
}, 30*time.Second, framework.Poll).Should(gomega.Equal(false))
})
ginkgo.Context("With config updated with hugepages feature enabled", func() {
ginkgo.BeforeEach(func() {
ginkgo.By("verifying hugepages are supported")

View File

@ -373,18 +373,34 @@ func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService
}
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
func restartKubelet() {
func findRunningKubletServiceName() string {
stdout, err := exec.Command("sudo", "systemctl", "list-units", "kubelet*", "--state=running").CombinedOutput()
framework.ExpectNoError(err)
regex := regexp.MustCompile("(kubelet-\\w+)")
matches := regex.FindStringSubmatch(string(stdout))
framework.ExpectNotEqual(len(matches), 0)
kube := matches[0]
framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kube)
stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
framework.ExpectNotEqual(len(matches), 0, "Found more than one kubelet service running: %q", stdout)
kubeletServiceName := matches[0]
framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kubeletServiceName)
return kubeletServiceName
}
func restartKubelet() {
kubeletServiceName := findRunningKubletServiceName()
stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
}
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
func stopKubelet() func() {
kubeletServiceName := findRunningKubletServiceName()
stdout, err := exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %v", err, stdout)
return func() {
stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
}
}
func toCgroupFsName(cgroupName cm.CgroupName) string {
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
return cgroupName.ToSystemd()