mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-11 21:12:07 +00:00
memorymanager: avoid violating NUMA node memory allocation rule
According to https://kubernetes.io/blog/2021/08/11/kubernetes-1-22-feature-memory-manager-moves-to-beta/#single-vs-cross-numa-node-allocation and to the design introduce in the memory manager KEP: https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1769-memory-manager the NUMA node can not have both single and cross NUMA node allocations. There're cases when the chosen affinity hint does not align with the rule above but the pod get admitted anyway. The implications are incosistent admission/rejection behaviors from memory manager side. In order to fix the issue, we should validate that the affinity hint that has been chosen is not violating the above rule. Signed-off-by: Talor Itzhak <titzhak@redhat.com>
This commit is contained in:
parent
5a99930ab5
commit
37399c8d4f
@ -157,6 +157,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
bestHint = extendedHint
|
||||
}
|
||||
|
||||
// the best hint might violate the NUMA allocation rule on which
|
||||
// NUMA node cannot have both single and cross NUMA node allocations
|
||||
// https://kubernetes.io/blog/2021/08/11/kubernetes-1-22-feature-memory-manager-moves-to-beta/#single-vs-cross-numa-node-allocation
|
||||
if isAffinityViolatingNUMAAllocations(machineState, bestHint.NUMANodeAffinity) {
|
||||
return fmt.Errorf("[memorymanager] preferred hint violates NUMA node allocation")
|
||||
}
|
||||
|
||||
var containerBlocks []state.Block
|
||||
maskBits := bestHint.NUMANodeAffinity.GetBits()
|
||||
for resourceName, requestedSize := range requestedResources {
|
||||
@ -992,3 +999,26 @@ func isNUMAAffinitiesEqual(numaAffinity1, numaAffinity2 []int) bool {
|
||||
|
||||
return bitMask1.IsEqual(bitMask2)
|
||||
}
|
||||
|
||||
func isAffinityViolatingNUMAAllocations(machineState state.NUMANodeMap, mask bitmask.BitMask) bool {
|
||||
maskBits := mask.GetBits()
|
||||
singleNUMAHint := len(maskBits) == 1
|
||||
for _, nodeID := range mask.GetBits() {
|
||||
// the node was never used for the memory allocation
|
||||
if machineState[nodeID].NumberOfAssignments == 0 {
|
||||
continue
|
||||
}
|
||||
if singleNUMAHint {
|
||||
continue
|
||||
}
|
||||
// the node used for the single NUMA memory allocation, it cannot be used for the multi NUMA node allocation
|
||||
if len(machineState[nodeID].Cells) == 1 {
|
||||
return true
|
||||
}
|
||||
// the node already used with a different group of nodes, it cannot be used within the current hint
|
||||
if !areGroupsEqual(machineState[nodeID].Cells, maskBits) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -1765,6 +1765,126 @@ func TestStaticPolicyAllocate(t *testing.T) {
|
||||
pod: getPod("pod1", "container1", requirementsGuaranteed),
|
||||
topologyHint: &topologymanager.TopologyHint{Preferred: true},
|
||||
},
|
||||
{
|
||||
description: "should validate NUMA node can not have both single and cross NUMA node memory allocations",
|
||||
assignments: state.ContainerMemoryAssignments{
|
||||
"pod1": map[string][]state.Block{
|
||||
"container1": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1024 * mb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAssignments: state.ContainerMemoryAssignments{
|
||||
"pod1": map[string][]state.Block{
|
||||
"container1": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1024 * mb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
machineState: state.NUMANodeMap{
|
||||
0: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 1536 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 1024 * mb,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{0},
|
||||
NumberOfAssignments: 1,
|
||||
},
|
||||
1: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 512 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{1},
|
||||
NumberOfAssignments: 0,
|
||||
},
|
||||
},
|
||||
expectedMachineState: state.NUMANodeMap{
|
||||
0: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 1536 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 1024 * mb,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{0},
|
||||
NumberOfAssignments: 1,
|
||||
},
|
||||
1: &state.NUMANodeState{
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 512 * mb,
|
||||
Free: 512 * mb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 512 * mb,
|
||||
TotalMemSize: 2176 * mb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: gb,
|
||||
Free: gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: gb,
|
||||
},
|
||||
},
|
||||
Cells: []int{1},
|
||||
NumberOfAssignments: 0,
|
||||
},
|
||||
},
|
||||
systemReserved: systemReservedMemory{
|
||||
0: map[v1.ResourceName]uint64{
|
||||
v1.ResourceMemory: 512 * mb,
|
||||
},
|
||||
1: map[v1.ResourceName]uint64{
|
||||
v1.ResourceMemory: 512 * mb,
|
||||
},
|
||||
},
|
||||
pod: getPod("pod2", "container1", requirementsGuaranteed),
|
||||
topologyHint: &topologymanager.TopologyHint{NUMANodeAffinity: newNUMAAffinity(0, 1), Preferred: true},
|
||||
expectedError: fmt.Errorf("[memorymanager] preferred hint violates NUMA node allocation"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
|
Loading…
Reference in New Issue
Block a user