Merge pull request #123443 from Tal-or/mm_consistent_memory_numa_alloc

memorymanager: avoid violating NUMA node memory allocation rule
This commit is contained in:
Kubernetes Prow Robot 2024-09-17 20:10:43 +01:00 committed by GitHub
commit f153edf356
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 226 additions and 0 deletions

View File

@ -157,6 +157,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
bestHint = extendedHint bestHint = extendedHint
} }
// the best hint might violate the NUMA allocation rule on which
// NUMA node cannot have both single and cross NUMA node allocations
// https://kubernetes.io/blog/2021/08/11/kubernetes-1-22-feature-memory-manager-moves-to-beta/#single-vs-cross-numa-node-allocation
if isAffinityViolatingNUMAAllocations(machineState, bestHint.NUMANodeAffinity) {
return fmt.Errorf("[memorymanager] preferred hint violates NUMA node allocation")
}
var containerBlocks []state.Block var containerBlocks []state.Block
maskBits := bestHint.NUMANodeAffinity.GetBits() maskBits := bestHint.NUMANodeAffinity.GetBits()
for resourceName, requestedSize := range requestedResources { for resourceName, requestedSize := range requestedResources {
@ -992,3 +999,26 @@ func isNUMAAffinitiesEqual(numaAffinity1, numaAffinity2 []int) bool {
return bitMask1.IsEqual(bitMask2) return bitMask1.IsEqual(bitMask2)
} }
func isAffinityViolatingNUMAAllocations(machineState state.NUMANodeMap, mask bitmask.BitMask) bool {
maskBits := mask.GetBits()
singleNUMAHint := len(maskBits) == 1
for _, nodeID := range mask.GetBits() {
// the node was never used for the memory allocation
if machineState[nodeID].NumberOfAssignments == 0 {
continue
}
if singleNUMAHint {
continue
}
// the node used for the single NUMA memory allocation, it cannot be used for the multi NUMA node allocation
if len(machineState[nodeID].Cells) == 1 {
return true
}
// the node already used with a different group of nodes, it cannot be used within the current hint
if !areGroupsEqual(machineState[nodeID].Cells, maskBits) {
return true
}
}
return false
}

View File

@ -1765,6 +1765,126 @@ func TestStaticPolicyAllocate(t *testing.T) {
pod: getPod("pod1", "container1", requirementsGuaranteed), pod: getPod("pod1", "container1", requirementsGuaranteed),
topologyHint: &topologymanager.TopologyHint{Preferred: true}, topologyHint: &topologymanager.TopologyHint{Preferred: true},
}, },
{
description: "should validate NUMA node can not have both single and cross NUMA node memory allocations",
assignments: state.ContainerMemoryAssignments{
"pod1": map[string][]state.Block{
"container1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1024 * mb,
},
},
},
},
expectedAssignments: state.ContainerMemoryAssignments{
"pod1": map[string][]state.Block{
"container1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1024 * mb,
},
},
},
},
machineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 1536 * mb,
Free: 512 * mb,
Reserved: 1024 * mb,
SystemReserved: 512 * mb,
TotalMemSize: 2176 * mb,
},
hugepages1Gi: {
Allocatable: gb,
Free: gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: gb,
},
},
Cells: []int{0},
NumberOfAssignments: 1,
},
1: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 512 * mb,
Free: 512 * mb,
Reserved: 0,
SystemReserved: 512 * mb,
TotalMemSize: 2176 * mb,
},
hugepages1Gi: {
Allocatable: gb,
Free: gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: gb,
},
},
Cells: []int{1},
NumberOfAssignments: 0,
},
},
expectedMachineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 1536 * mb,
Free: 512 * mb,
Reserved: 1024 * mb,
SystemReserved: 512 * mb,
TotalMemSize: 2176 * mb,
},
hugepages1Gi: {
Allocatable: gb,
Free: gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: gb,
},
},
Cells: []int{0},
NumberOfAssignments: 1,
},
1: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 512 * mb,
Free: 512 * mb,
Reserved: 0,
SystemReserved: 512 * mb,
TotalMemSize: 2176 * mb,
},
hugepages1Gi: {
Allocatable: gb,
Free: gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: gb,
},
},
Cells: []int{1},
NumberOfAssignments: 0,
},
},
systemReserved: systemReservedMemory{
0: map[v1.ResourceName]uint64{
v1.ResourceMemory: 512 * mb,
},
1: map[v1.ResourceName]uint64{
v1.ResourceMemory: 512 * mb,
},
},
pod: getPod("pod2", "container1", requirementsGuaranteed),
topologyHint: &topologymanager.TopologyHint{NUMANodeAffinity: newNUMAAffinity(0, 1), Preferred: true},
expectedError: fmt.Errorf("[memorymanager] preferred hint violates NUMA node allocation"),
},
} }
for _, testCase := range testCases { for _, testCase := range testCases {
@ -3776,3 +3896,79 @@ func Test_getPodRequestedResources(t *testing.T) {
}) })
} }
} }
func Test_isAffinityViolatingNUMAAllocations(t *testing.T) {
testsCases := []struct {
description string
machineState map[int]*state.NUMANodeState
topologyHint *topologymanager.TopologyHint
isViolationExpected bool
}{
{
description: "violating NUMA allocations because given affinity asks for NUMA ID 1 which is on different cells group",
machineState: map[int]*state.NUMANodeState{
0: {
NumberOfAssignments: 1,
Cells: []int{0, 1},
},
1: {
NumberOfAssignments: 1,
Cells: []int{0, 1},
},
2: {
NumberOfAssignments: 1,
Cells: []int{2},
},
3: {
NumberOfAssignments: 0,
Cells: []int{3},
},
},
topologyHint: &topologymanager.TopologyHint{
NUMANodeAffinity: newNUMAAffinity(1, 2),
},
isViolationExpected: true,
},
{
description: "violating NUMA allocations because given affinity with multiple nodes asks for NUMA ID 1 which is used for a single NUMA node memory allocation",
machineState: map[int]*state.NUMANodeState{
0: {
NumberOfAssignments: 0,
Cells: []int{0, 1},
},
1: {
NumberOfAssignments: 1,
Cells: []int{1},
},
},
topologyHint: &topologymanager.TopologyHint{
NUMANodeAffinity: newNUMAAffinity(0, 1),
},
isViolationExpected: true,
},
{
description: "valid affinity, no prior assignments",
machineState: map[int]*state.NUMANodeState{
0: {
NumberOfAssignments: 0,
Cells: []int{0},
},
1: {
NumberOfAssignments: 0,
Cells: []int{1},
},
},
topologyHint: &topologymanager.TopologyHint{
NUMANodeAffinity: newNUMAAffinity(0, 1),
},
isViolationExpected: false,
},
}
for _, tc := range testsCases {
t.Run(tc.description, func(t *testing.T) {
if isAffinityViolatingNUMAAllocations(tc.machineState, tc.topologyHint.NUMANodeAffinity) != tc.isViolationExpected {
t.Errorf("isAffinityViolatingNUMAAllocations with affinity %v expected to return %t, got %t", tc.topologyHint.NUMANodeAffinity.GetBits(), tc.isViolationExpected, !tc.isViolationExpected)
}
})
}
}