From 4bae656835769b98fbbad3e1915453a9699503b7 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 Oct 2021 08:37:42 +0200 Subject: [PATCH] cpumanager: test NUMA node support for CPU assign (2) This batch of tests adds a fake topology on which each numa node has multiple sockets. We didn't find yet a real HW topology in the wild like this, but we need one to fully exercise the code. So, until we find a HW topology, we add a fake one flipping the NUMA/socket config of the existing xeon dual gold 6320. Signed-off-by: Francesco Romani --- .../cm/cpumanager/cpu_assignment_test.go | 50 ++++++ pkg/kubelet/cm/cpumanager/policy_test.go | 95 +++++++++++ .../cm/cpumanager/topology/topology_test.go | 156 ++++++++++++++++++ 3 files changed, 301 insertions(+) diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index bdfffedb676..5dfc80b4340 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -18,6 +18,7 @@ package cpumanager import ( "reflect" + "sort" "testing" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" @@ -79,12 +80,43 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { mustParseCPUSet(t, "1-78"), []int{}, }, + { + "dual numa, multi socket per per socket, HT, 4 sockets free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-79"), + []int{0, 1, 2, 3}, + }, + { + "dual numa, multi socket per per socket, HT, 3 sockets free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-19,21-79"), + []int{0, 1, 3}, + }, + { + "dual numa, multi socket per per socket, HT, 2 sockets free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-59,61-78"), + []int{0, 1}, + }, + { + "dual numa, multi socket per per socket, HT, 1 sockets free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "1-19,21-38,41-60,61-78"), + []int{1}, + }, + { + "dual numa, multi socket per per socket, HT, 0 sockets free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-40,42-49,51-68,71-79"), + []int{}, + }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0) result := acc.freeSockets() + sort.Ints(result) if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -160,6 +192,24 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) { mustParseCPUSet(t, "1-9,11-59,61-78"), []int{}, }, + { + "dual numa, multi socket per per socket, HT, 2 NUMA node free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-79"), + []int{0, 1}, + }, + { + "dual numa, multi socket per per socket, HT, 1 NUMA node free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-9,11-79"), + []int{1}, + }, + { + "dual numa, multi socket per per socket, HT, 0 sockets free", + fakeTopoMultiSocketDualSocketPerNumaHT, + mustParseCPUSet(t, "0-9,11-59,61-79"), + []int{}, + }, } for _, tc := range testCases { diff --git a/pkg/kubelet/cm/cpumanager/policy_test.go b/pkg/kubelet/cm/cpumanager/policy_test.go index a106ec6b782..2cd681c395f 100644 --- a/pkg/kubelet/cm/cpumanager/policy_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_test.go @@ -515,4 +515,99 @@ var ( 79: {CoreID: 39, SocketID: 1, NUMANodeID: 3}, }, } + /* + FAKE Topology from dual xeon gold 6230 + (see: topoDualSocketMultiNumaPerSocketHT). + We flip NUMA cells and Sockets to exercise the code. + TODO(fromanirh): replace with a real-world topology + once we find a suitable one. + */ + fakeTopoMultiSocketDualSocketPerNumaHT = &topology.CPUTopology{ + NumCPUs: 80, + NumSockets: 4, + NumCores: 40, + NumNUMANodes: 2, + CPUDetails: map[int]topology.CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 4: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 5: {CoreID: 5, SocketID: 0, NUMANodeID: 0}, + 6: {CoreID: 6, SocketID: 0, NUMANodeID: 0}, + 7: {CoreID: 7, SocketID: 0, NUMANodeID: 0}, + 8: {CoreID: 8, SocketID: 0, NUMANodeID: 0}, + 9: {CoreID: 9, SocketID: 0, NUMANodeID: 0}, + 10: {CoreID: 10, SocketID: 1, NUMANodeID: 0}, + 11: {CoreID: 11, SocketID: 1, NUMANodeID: 0}, + 12: {CoreID: 12, SocketID: 1, NUMANodeID: 0}, + 13: {CoreID: 13, SocketID: 1, NUMANodeID: 0}, + 14: {CoreID: 14, SocketID: 1, NUMANodeID: 0}, + 15: {CoreID: 15, SocketID: 1, NUMANodeID: 0}, + 16: {CoreID: 16, SocketID: 1, NUMANodeID: 0}, + 17: {CoreID: 17, SocketID: 1, NUMANodeID: 0}, + 18: {CoreID: 18, SocketID: 1, NUMANodeID: 0}, + 19: {CoreID: 19, SocketID: 1, NUMANodeID: 0}, + 20: {CoreID: 20, SocketID: 2, NUMANodeID: 1}, + 21: {CoreID: 21, SocketID: 2, NUMANodeID: 1}, + 22: {CoreID: 22, SocketID: 2, NUMANodeID: 1}, + 23: {CoreID: 23, SocketID: 2, NUMANodeID: 1}, + 24: {CoreID: 24, SocketID: 2, NUMANodeID: 1}, + 25: {CoreID: 25, SocketID: 2, NUMANodeID: 1}, + 26: {CoreID: 26, SocketID: 2, NUMANodeID: 1}, + 27: {CoreID: 27, SocketID: 2, NUMANodeID: 1}, + 28: {CoreID: 28, SocketID: 2, NUMANodeID: 1}, + 29: {CoreID: 29, SocketID: 2, NUMANodeID: 1}, + 30: {CoreID: 30, SocketID: 3, NUMANodeID: 1}, + 31: {CoreID: 31, SocketID: 3, NUMANodeID: 1}, + 32: {CoreID: 32, SocketID: 3, NUMANodeID: 1}, + 33: {CoreID: 33, SocketID: 3, NUMANodeID: 1}, + 34: {CoreID: 34, SocketID: 3, NUMANodeID: 1}, + 35: {CoreID: 35, SocketID: 3, NUMANodeID: 1}, + 36: {CoreID: 36, SocketID: 3, NUMANodeID: 1}, + 37: {CoreID: 37, SocketID: 3, NUMANodeID: 1}, + 38: {CoreID: 38, SocketID: 3, NUMANodeID: 1}, + 39: {CoreID: 39, SocketID: 3, NUMANodeID: 1}, + 40: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 41: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 42: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 43: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 44: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 45: {CoreID: 5, SocketID: 0, NUMANodeID: 0}, + 46: {CoreID: 6, SocketID: 0, NUMANodeID: 0}, + 47: {CoreID: 7, SocketID: 0, NUMANodeID: 0}, + 48: {CoreID: 8, SocketID: 0, NUMANodeID: 0}, + 49: {CoreID: 9, SocketID: 0, NUMANodeID: 0}, + 50: {CoreID: 10, SocketID: 1, NUMANodeID: 0}, + 51: {CoreID: 11, SocketID: 1, NUMANodeID: 0}, + 52: {CoreID: 12, SocketID: 1, NUMANodeID: 0}, + 53: {CoreID: 13, SocketID: 1, NUMANodeID: 0}, + 54: {CoreID: 14, SocketID: 1, NUMANodeID: 0}, + 55: {CoreID: 15, SocketID: 1, NUMANodeID: 0}, + 56: {CoreID: 16, SocketID: 1, NUMANodeID: 0}, + 57: {CoreID: 17, SocketID: 1, NUMANodeID: 0}, + 58: {CoreID: 18, SocketID: 1, NUMANodeID: 0}, + 59: {CoreID: 19, SocketID: 1, NUMANodeID: 0}, + 60: {CoreID: 20, SocketID: 2, NUMANodeID: 1}, + 61: {CoreID: 21, SocketID: 2, NUMANodeID: 1}, + 62: {CoreID: 22, SocketID: 2, NUMANodeID: 1}, + 63: {CoreID: 23, SocketID: 2, NUMANodeID: 1}, + 64: {CoreID: 24, SocketID: 2, NUMANodeID: 1}, + 65: {CoreID: 25, SocketID: 2, NUMANodeID: 1}, + 66: {CoreID: 26, SocketID: 2, NUMANodeID: 1}, + 67: {CoreID: 27, SocketID: 2, NUMANodeID: 1}, + 68: {CoreID: 28, SocketID: 2, NUMANodeID: 1}, + 69: {CoreID: 29, SocketID: 2, NUMANodeID: 1}, + 70: {CoreID: 30, SocketID: 3, NUMANodeID: 1}, + 71: {CoreID: 31, SocketID: 3, NUMANodeID: 1}, + 72: {CoreID: 32, SocketID: 3, NUMANodeID: 1}, + 73: {CoreID: 33, SocketID: 3, NUMANodeID: 1}, + 74: {CoreID: 34, SocketID: 3, NUMANodeID: 1}, + 75: {CoreID: 35, SocketID: 3, NUMANodeID: 1}, + 76: {CoreID: 36, SocketID: 3, NUMANodeID: 1}, + 77: {CoreID: 37, SocketID: 3, NUMANodeID: 1}, + 78: {CoreID: 38, SocketID: 3, NUMANodeID: 1}, + 79: {CoreID: 39, SocketID: 3, NUMANodeID: 1}, + }, + } ) diff --git a/pkg/kubelet/cm/cpumanager/topology/topology_test.go b/pkg/kubelet/cm/cpumanager/topology/topology_test.go index 9cc938226e1..0c53839ff0a 100644 --- a/pkg/kubelet/cm/cpumanager/topology/topology_test.go +++ b/pkg/kubelet/cm/cpumanager/topology/topology_test.go @@ -230,7 +230,163 @@ func Test_Discover(t *testing.T) { }, wantErr: false, }, + { + // FAKE Topology from dual xeon gold 6230 + // (see: dual xeon gold 6230). + // We flip NUMA cells and Sockets to exercise the code. + // TODO(fromanirh): replace with a real-world topology + // once we find a suitable one. + // Note: this is a fake topology. Thus, there is not a "correct" + // representation. This one was created following the these concepts: + // 1. be internally consistent (most important rule) + // 2. be as close as possible as existing HW topologies + // 3. if possible, minimize chances wrt existing HW topologies. + name: "DualNumaMultiSocketPerNumaHT", + machineInfo: cadvisorapi.MachineInfo{ + NumCores: 80, + NumSockets: 4, + Topology: []cadvisorapi.Node{ + {Id: 0, + Cores: []cadvisorapi.Core{ + {SocketID: 0, Id: 0, Threads: []int{0, 40}}, + {SocketID: 0, Id: 1, Threads: []int{1, 41}}, + {SocketID: 0, Id: 2, Threads: []int{2, 42}}, + {SocketID: 0, Id: 8, Threads: []int{3, 43}}, + {SocketID: 0, Id: 9, Threads: []int{4, 44}}, + {SocketID: 0, Id: 16, Threads: []int{5, 45}}, + {SocketID: 0, Id: 17, Threads: []int{6, 46}}, + {SocketID: 0, Id: 18, Threads: []int{7, 47}}, + {SocketID: 0, Id: 24, Threads: []int{8, 48}}, + {SocketID: 0, Id: 25, Threads: []int{9, 49}}, + {SocketID: 1, Id: 3, Threads: []int{10, 50}}, + {SocketID: 1, Id: 4, Threads: []int{11, 51}}, + {SocketID: 1, Id: 10, Threads: []int{12, 52}}, + {SocketID: 1, Id: 11, Threads: []int{13, 53}}, + {SocketID: 1, Id: 12, Threads: []int{14, 54}}, + {SocketID: 1, Id: 19, Threads: []int{15, 55}}, + {SocketID: 1, Id: 20, Threads: []int{16, 56}}, + {SocketID: 1, Id: 26, Threads: []int{17, 57}}, + {SocketID: 1, Id: 27, Threads: []int{18, 58}}, + {SocketID: 1, Id: 28, Threads: []int{19, 59}}, + }, + }, + {Id: 1, + Cores: []cadvisorapi.Core{ + {SocketID: 2, Id: 0, Threads: []int{20, 60}}, + {SocketID: 2, Id: 1, Threads: []int{21, 61}}, + {SocketID: 2, Id: 2, Threads: []int{22, 62}}, + {SocketID: 2, Id: 8, Threads: []int{23, 63}}, + {SocketID: 2, Id: 9, Threads: []int{24, 64}}, + {SocketID: 2, Id: 16, Threads: []int{25, 65}}, + {SocketID: 2, Id: 17, Threads: []int{26, 66}}, + {SocketID: 2, Id: 18, Threads: []int{27, 67}}, + {SocketID: 2, Id: 24, Threads: []int{28, 68}}, + {SocketID: 2, Id: 25, Threads: []int{29, 69}}, + {SocketID: 3, Id: 3, Threads: []int{30, 70}}, + {SocketID: 3, Id: 4, Threads: []int{31, 71}}, + {SocketID: 3, Id: 10, Threads: []int{32, 72}}, + {SocketID: 3, Id: 11, Threads: []int{33, 73}}, + {SocketID: 3, Id: 12, Threads: []int{34, 74}}, + {SocketID: 3, Id: 19, Threads: []int{35, 75}}, + {SocketID: 3, Id: 20, Threads: []int{36, 76}}, + {SocketID: 3, Id: 26, Threads: []int{37, 77}}, + {SocketID: 3, Id: 27, Threads: []int{38, 78}}, + {SocketID: 3, Id: 28, Threads: []int{39, 79}}, + }, + }, + }, + }, + want: &CPUTopology{ + NumCPUs: 80, + NumSockets: 4, + NumCores: 40, + NumNUMANodes: 2, + CPUDetails: map[int]CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 4: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 5: {CoreID: 5, SocketID: 0, NUMANodeID: 0}, + 6: {CoreID: 6, SocketID: 0, NUMANodeID: 0}, + 7: {CoreID: 7, SocketID: 0, NUMANodeID: 0}, + 8: {CoreID: 8, SocketID: 0, NUMANodeID: 0}, + 9: {CoreID: 9, SocketID: 0, NUMANodeID: 0}, + 10: {CoreID: 10, SocketID: 1, NUMANodeID: 0}, + 11: {CoreID: 11, SocketID: 1, NUMANodeID: 0}, + 12: {CoreID: 12, SocketID: 1, NUMANodeID: 0}, + 13: {CoreID: 13, SocketID: 1, NUMANodeID: 0}, + 14: {CoreID: 14, SocketID: 1, NUMANodeID: 0}, + 15: {CoreID: 15, SocketID: 1, NUMANodeID: 0}, + 16: {CoreID: 16, SocketID: 1, NUMANodeID: 0}, + 17: {CoreID: 17, SocketID: 1, NUMANodeID: 0}, + 18: {CoreID: 18, SocketID: 1, NUMANodeID: 0}, + 19: {CoreID: 19, SocketID: 1, NUMANodeID: 0}, + 20: {CoreID: 20, SocketID: 2, NUMANodeID: 1}, + 21: {CoreID: 21, SocketID: 2, NUMANodeID: 1}, + 22: {CoreID: 22, SocketID: 2, NUMANodeID: 1}, + 23: {CoreID: 23, SocketID: 2, NUMANodeID: 1}, + 24: {CoreID: 24, SocketID: 2, NUMANodeID: 1}, + 25: {CoreID: 25, SocketID: 2, NUMANodeID: 1}, + 26: {CoreID: 26, SocketID: 2, NUMANodeID: 1}, + 27: {CoreID: 27, SocketID: 2, NUMANodeID: 1}, + 28: {CoreID: 28, SocketID: 2, NUMANodeID: 1}, + 29: {CoreID: 29, SocketID: 2, NUMANodeID: 1}, + 30: {CoreID: 30, SocketID: 3, NUMANodeID: 1}, + 31: {CoreID: 31, SocketID: 3, NUMANodeID: 1}, + 32: {CoreID: 32, SocketID: 3, NUMANodeID: 1}, + 33: {CoreID: 33, SocketID: 3, NUMANodeID: 1}, + 34: {CoreID: 34, SocketID: 3, NUMANodeID: 1}, + 35: {CoreID: 35, SocketID: 3, NUMANodeID: 1}, + 36: {CoreID: 36, SocketID: 3, NUMANodeID: 1}, + 37: {CoreID: 37, SocketID: 3, NUMANodeID: 1}, + 38: {CoreID: 38, SocketID: 3, NUMANodeID: 1}, + 39: {CoreID: 39, SocketID: 3, NUMANodeID: 1}, + 40: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 41: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 42: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 43: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 44: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 45: {CoreID: 5, SocketID: 0, NUMANodeID: 0}, + 46: {CoreID: 6, SocketID: 0, NUMANodeID: 0}, + 47: {CoreID: 7, SocketID: 0, NUMANodeID: 0}, + 48: {CoreID: 8, SocketID: 0, NUMANodeID: 0}, + 49: {CoreID: 9, SocketID: 0, NUMANodeID: 0}, + 50: {CoreID: 10, SocketID: 1, NUMANodeID: 0}, + 51: {CoreID: 11, SocketID: 1, NUMANodeID: 0}, + 52: {CoreID: 12, SocketID: 1, NUMANodeID: 0}, + 53: {CoreID: 13, SocketID: 1, NUMANodeID: 0}, + 54: {CoreID: 14, SocketID: 1, NUMANodeID: 0}, + 55: {CoreID: 15, SocketID: 1, NUMANodeID: 0}, + 56: {CoreID: 16, SocketID: 1, NUMANodeID: 0}, + 57: {CoreID: 17, SocketID: 1, NUMANodeID: 0}, + 58: {CoreID: 18, SocketID: 1, NUMANodeID: 0}, + 59: {CoreID: 19, SocketID: 1, NUMANodeID: 0}, + 60: {CoreID: 20, SocketID: 2, NUMANodeID: 1}, + 61: {CoreID: 21, SocketID: 2, NUMANodeID: 1}, + 62: {CoreID: 22, SocketID: 2, NUMANodeID: 1}, + 63: {CoreID: 23, SocketID: 2, NUMANodeID: 1}, + 64: {CoreID: 24, SocketID: 2, NUMANodeID: 1}, + 65: {CoreID: 25, SocketID: 2, NUMANodeID: 1}, + 66: {CoreID: 26, SocketID: 2, NUMANodeID: 1}, + 67: {CoreID: 27, SocketID: 2, NUMANodeID: 1}, + 68: {CoreID: 28, SocketID: 2, NUMANodeID: 1}, + 69: {CoreID: 29, SocketID: 2, NUMANodeID: 1}, + 70: {CoreID: 30, SocketID: 3, NUMANodeID: 1}, + 71: {CoreID: 31, SocketID: 3, NUMANodeID: 1}, + 72: {CoreID: 32, SocketID: 3, NUMANodeID: 1}, + 73: {CoreID: 33, SocketID: 3, NUMANodeID: 1}, + 74: {CoreID: 34, SocketID: 3, NUMANodeID: 1}, + 75: {CoreID: 35, SocketID: 3, NUMANodeID: 1}, + 76: {CoreID: 36, SocketID: 3, NUMANodeID: 1}, + 77: {CoreID: 37, SocketID: 3, NUMANodeID: 1}, + 78: {CoreID: 38, SocketID: 3, NUMANodeID: 1}, + 79: {CoreID: 39, SocketID: 3, NUMANodeID: 1}, + }, + }, + wantErr: false, + }, { name: "DualSocketNoHT", machineInfo: cadvisorapi.MachineInfo{