From aa63e5aed210c6ea3c9a13aa66649cb842bf8dc0 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 4 Nov 2020 13:53:32 +0200 Subject: [PATCH] memory manager: provide an additional validation for reserved memory Calculate the total amount of reserved memory only for NUMA nodes that are existing under the machine. Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager.go | 21 ++++++--- .../cm/memorymanager/memory_manager_test.go | 45 ++++++++++++++----- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 3f906ac68f9..058f3ad922e 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -267,6 +267,7 @@ func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[str return m.policy.GetTopologyHints(m.state, pod, container) } +// TODO: move the method to the upper level, to re-use it under the CPU and memory managers func (m *manager) removeStaleState() { // Only once all sources are ready do we attempt to remove any stale state. // This ensures that the call to `m.activePods()` below will succeed with @@ -319,10 +320,20 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { +func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { totalMemoryType := map[v1.ResourceName]resource.Quantity{} - for _, node := range preReservedMemory { + numaNodes := map[int]bool{} + for _, numaNode := range machineInfo.Topology { + numaNodes[numaNode.Id] = true + } + + for nodeID, node := range reservedMemory { + if !numaNodes[nodeID] { + klog.Warningf("The NUMA node %d specified under --reserved- memory does not exist on the machine", nodeID) + continue + } + for memType, memVal := range node { if totalMem, exists := totalMemoryType[memType]; exists { memVal.Add(totalMem) @@ -334,8 +345,8 @@ func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]re return totalMemoryType } -func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { - totalMemoryType := getTotalMemoryTypeReserved(reservedMemory) +func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { + totalMemoryType := getTotalMemoryTypeReserved(machineInfo, reservedMemory) commonMemoryTypeSet := make(map[v1.ResourceName]bool) for resourceType := range totalMemoryType { @@ -391,7 +402,7 @@ func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[in } func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { - if err := validateReservedMemory(nodeAllocatableReservation, preReservedMemory); err != nil { + if err := validateReservedMemory(machineInfo, nodeAllocatableReservation, preReservedMemory); err != nil { return nil, err } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index 40abd34447e..edbaa1fed8f 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -146,54 +146,77 @@ func getPod(podUID string, containerName string, requirements *v1.ResourceRequir } func TestValidateReservedMemory(t *testing.T) { + machineInfo := &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + {Id: 0}, + {Id: 1}, + }, + } const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" testCases := []struct { description string nodeAllocatableReservation v1.ResourceList + machineInfo *cadvisorapi.MachineInfo systemReservedMemory map[int]map[v1.ResourceName]resource.Quantity expectedError string }{ { - "Node Allocatable not set, pre-reserved not set", + "Node Allocatable not set, reserved not set", v1.ResourceList{}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{}, "", }, { - "Node Allocatable set to zero, pre-reserved set to zero", + "Node Allocatable set to zero, reserved set to zero", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, }, "", }, { - "Node Allocatable not set (equal zero), pre-reserved set", + "Node Allocatable not set (equal zero), reserved set", v1.ResourceList{}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, { - "Node Allocatable set, pre-reserved not set", + "Node Allocatable set, reserved not set", v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{}, fmt.Sprintf(msgNotEqual, hugepages2M), }, { - "Pre-reserved not equal to Node Allocatable", + "Reserved not equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, { - "Pre-reserved total equal to Node Allocatable", + "Reserved contains the NUMA node that does not exist under the machine", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI)}, + machineInfo, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + 2: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + }, + { + "Reserved total equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), hugepages2M: *resource.NewQuantity(77, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), @@ -204,10 +227,11 @@ func TestValidateReservedMemory(t *testing.T) { "", }, { - "Pre-reserved total hugapages-2M not equal to Node Allocatable", + "Reserved total hugapages-2M not equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), hugepages2M: *resource.NewQuantity(14, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), @@ -215,13 +239,14 @@ func TestValidateReservedMemory(t *testing.T) { 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, }, + fmt.Sprintf(msgNotEqual, hugepages2M), }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - err := validateReservedMemory(tc.nodeAllocatableReservation, tc.systemReservedMemory) + err := validateReservedMemory(tc.machineInfo, tc.nodeAllocatableReservation, tc.systemReservedMemory) if strings.TrimSpace(tc.expectedError) != "" { assert.Error(t, err) assert.Equal(t, err.Error(), tc.expectedError) @@ -254,7 +279,7 @@ func TestConvertPreReserved(t *testing.T) { "", }, { - "Single NUMA node is pre-reserved", + "Single NUMA node is reserved", map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), @@ -271,7 +296,7 @@ func TestConvertPreReserved(t *testing.T) { "", }, { - "Both NUMA nodes are pre-reserved", + "Both NUMA nodes are reserved", map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI),