mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-25 19:43:22 +00:00
Merge pull request #123443 from Tal-or/mm_consistent_memory_numa_alloc
memorymanager: avoid violating NUMA node memory allocation rule
This commit is contained in:
commit
f153edf356
@ -157,6 +157,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
bestHint = extendedHint
|
||||
}
|
||||
|
||||
// the best hint might violate the NUMA allocation rule on which
|
||||
// NUMA node cannot have both single and cross NUMA node allocations
|
||||
// https://kubernetes.io/blog/2021/08/11/kubernetes-1-22-feature-memory-manager-moves-to-beta/#single-vs-cross-numa-node-allocation
|
||||
if isAffinityViolatingNUMAAllocations(machineState, bestHint.NUMANodeAffinity) {
|
||||
return fmt.Errorf("[memorymanager] preferred hint violates NUMA node allocation")
|
||||
}
|
||||
|
||||
var containerBlocks []state.Block
|
||||
maskBits := bestHint.NUMANodeAffinity.GetBits()
|
||||
for resourceName, requestedSize := range requestedResources {
|
||||
@ -992,3 +999,26 @@ func isNUMAAffinitiesEqual(numaAffinity1, numaAffinity2 []int) bool {
|
||||
|
||||
return bitMask1.IsEqual(bitMask2)
|
||||
}
|
||||
|
||||
func isAffinityViolatingNUMAAllocations(machineState state.NUMANodeMap, mask bitmask.BitMask) bool {
|
||||
maskBits := mask.GetBits()
|
||||
singleNUMAHint := len(maskBits) == 1
|
||||
for _, nodeID := range mask.GetBits() {
|
||||
// the node was never used for the memory allocation
|
||||
if machineState[nodeID].NumberOfAssignments == 0 {
|
||||
continue
|
||||
}
|
||||
if singleNUMAHint {
|
||||
continue
|
||||
}
|
||||
// the node used for the single NUMA memory allocation, it cannot be used for the multi NUMA node allocation
|
||||
if len(machineState[nodeID].Cells) == 1 {
|
||||
return true
|
||||
}
|
||||
// the node already used with a different group of nodes, it cannot be used within the current hint
|
||||
if !areGroupsEqual(machineState[nodeID].Cells, maskBits) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -1765,6 +1765,126 @@ func TestStaticPolicyAllocate(t *testing.T) {
|
||||
pod: getPod("pod1", "container1", requirementsGuaranteed),
|
||||
topologyHint: &topologymanager.TopologyHint{Preferred: true},
|
||||
},
|
||||
{
|
||||
description: "should validate NUMA node can not have both single and cross NUMA node memory allocations",
|
||||
assignments: state.ContainerMemoryAssignments{
|
||||
"pod1": map[string][]state.Block{
|
||||
"container1": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1024 * mb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAssignments: state.ContainerMemoryAssignments{
|
||||
"pod1": map[string][]state.Block{
|
||||
"container1": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1024 * mb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
machineState: state.NUMANodeMap{
|
||||
0: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 1536 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 1024 * mb,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{0},
|
||||
NumberOfAssignments: 1,
|
||||
},
|
||||
1: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 512 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{1},
|
||||
NumberOfAssignments: 0,
|
||||
},
|
||||
},
|
||||
expectedMachineState: state.NUMANodeMap{
|
||||
0: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 1536 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 1024 * mb,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{0},
|
||||
NumberOfAssignments: 1,
|
||||
},
|
||||
1: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 512 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{1},
|
||||
NumberOfAssignments: 0,
|
||||
},
|
||||
},
|
||||
systemReserved: systemReservedMemory{
|
||||
0: map[v1.ResourceName]uint64{
|
||||
v1.ResourceMemory: 512 * mb,
|
||||
},
|
||||
1: map[v1.ResourceName]uint64{
|
||||
v1.ResourceMemory: 512 * mb,
|
||||
},
|
||||
},
|
||||
pod: getPod("pod2", "container1", requirementsGuaranteed),
|
||||
topologyHint: &topologymanager.TopologyHint{NUMANodeAffinity: newNUMAAffinity(0, 1), Preferred: true},
|
||||
expectedError: fmt.Errorf("[memorymanager] preferred hint violates NUMA node allocation"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
@ -3776,3 +3896,79 @@ func Test_getPodRequestedResources(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_isAffinityViolatingNUMAAllocations(t *testing.T) {
|
||||
testsCases := []struct {
|
||||
description string
|
||||
machineState map[int]*state.NUMANodeState
|
||||
topologyHint *topologymanager.TopologyHint
|
||||
isViolationExpected bool
|
||||
}{
|
||||
{
|
||||
description: "violating NUMA allocations because given affinity asks for NUMA ID 1 which is on different cells group",
|
||||
machineState: map[int]*state.NUMANodeState{
|
||||
0: {
|
||||
NumberOfAssignments: 1,
|
||||
Cells: []int{0, 1},
|
||||
},
|
||||
1: {
|
||||
NumberOfAssignments: 1,
|
||||
Cells: []int{0, 1},
|
||||
},
|
||||
2: {
|
||||
NumberOfAssignments: 1,
|
||||
Cells: []int{2},
|
||||
},
|
||||
3: {
|
||||
NumberOfAssignments: 0,
|
||||
Cells: []int{3},
|
||||
},
|
||||
},
|
||||
topologyHint: &topologymanager.TopologyHint{
|
||||
NUMANodeAffinity: newNUMAAffinity(1, 2),
|
||||
},
|
||||
isViolationExpected: true,
|
||||
},
|
||||
{
|
||||
description: "violating NUMA allocations because given affinity with multiple nodes asks for NUMA ID 1 which is used for a single NUMA node memory allocation",
|
||||
machineState: map[int]*state.NUMANodeState{
|
||||
0: {
|
||||
NumberOfAssignments: 0,
|
||||
Cells: []int{0, 1},
|
||||
},
|
||||
1: {
|
||||
NumberOfAssignments: 1,
|
||||
Cells: []int{1},
|
||||
},
|
||||
},
|
||||
topologyHint: &topologymanager.TopologyHint{
|
||||
NUMANodeAffinity: newNUMAAffinity(0, 1),
|
||||
},
|
||||
isViolationExpected: true,
|
||||
},
|
||||
{
|
||||
description: "valid affinity, no prior assignments",
|
||||
machineState: map[int]*state.NUMANodeState{
|
||||
0: {
|
||||
NumberOfAssignments: 0,
|
||||
Cells: []int{0},
|
||||
},
|
||||
1: {
|
||||
NumberOfAssignments: 0,
|
||||
Cells: []int{1},
|
||||
},
|
||||
},
|
||||
topologyHint: &topologymanager.TopologyHint{
|
||||
NUMANodeAffinity: newNUMAAffinity(0, 1),
|
||||
},
|
||||
isViolationExpected: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range testsCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
if isAffinityViolatingNUMAAllocations(tc.machineState, tc.topologyHint.NUMANodeAffinity) != tc.isViolationExpected {
|
||||
t.Errorf("isAffinityViolatingNUMAAllocations with affinity %v expected to return %t, got %t", tc.topologyHint.NUMANodeAffinity.GetBits(), tc.isViolationExpected, !tc.isViolationExpected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user