diff --git a/pkg/kubelet/cm/cpumanager/topology_hints.go b/pkg/kubelet/cm/cpumanager/topology_hints.go index 30a983ca1d8..ce70c5465f5 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints.go @@ -71,14 +71,26 @@ func (m *manager) GetTopologyHints(pod v1.Pod, container v1.Container) map[strin // bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and // marking all others with 'Preferred: false'. func (m *manager) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { - // Initialize minAffinity to a full affinity mask. - minAffinity, _ := socketmask.NewSocketMask() - minAffinity.Fill() + // Initialize minAffinitySize to include all NUMA Nodes. + minAffinitySize := m.topology.CPUDetails.NUMANodes().Size() + // Initialize minSocketsOnMinAffinity to include all Sockets. + minSocketsOnMinAffinity := m.topology.CPUDetails.Sockets().Size() // Iterate through all combinations of socketMasks and build hints from them. hints := []topologymanager.TopologyHint{} socketmask.IterateSocketMasks(m.topology.CPUDetails.NUMANodes().ToSlice(), func(mask socketmask.SocketMask) { - // Check to see if we have enough CPUs available on the current + // First, update minAffinitySize and minSocketsOnMinAffinity for the + // current request size. + cpusInMask := m.topology.CPUDetails.CPUsInNUMANodes(mask.GetSockets()...).Size() + socketsInMask := m.topology.CPUDetails.SocketsInNUMANodes(mask.GetSockets()...).Size() + if cpusInMask >= request && mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() + if socketsInMask < minSocketsOnMinAffinity { + minSocketsOnMinAffinity = socketsInMask + } + } + + // Then check to see if we have enough CPUs available on the current // SocketMask to satisfy the CPU request. numMatching := 0 for _, c := range availableCPUs.ToSlice() { @@ -99,20 +111,19 @@ func (m *manager) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request NUMANodeAffinity: mask, Preferred: false, }) - - // Update minAffinity if relevant - if mask.IsNarrowerThan(minAffinity) { - minAffinity = mask - } }) // Loop back through all hints and update the 'Preferred' field based on // counting the number of bits sets in the affinity mask and comparing it - // to the minAffinity. Only those with an equal number of bits set will be - // considered preferred. + // to the minAffinitySize. Only those with an equal number of bits set (and + // with a minimal set of sockets) will be considered preferred. for i := range hints { - if hints[i].NUMANodeAffinity.Count() == minAffinity.Count() { - hints[i].Preferred = true + if hints[i].NUMANodeAffinity.Count() == minAffinitySize { + nodes := hints[i].NUMANodeAffinity.GetSockets() + numSockets := m.topology.CPUDetails.SocketsInNUMANodes(nodes...).Size() + if numSockets == minSocketsOnMinAffinity { + hints[i].Preferred = true + } } } diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go index cbcf40f0b06..e9102892a23 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go @@ -75,28 +75,18 @@ func TestGetTopologyHints(t *testing.T) { 1: cpuset.NewCPUSet(3, 9, 4, 10, 5, 11), } - topology, _ := topology.Discover(&machineInfo, numaNodeInfo) - - m := manager{ - policy: &staticPolicy{ - topology: topology, - }, - state: &mockState{ - defaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), - }, - topology: topology, - } - tcases := []struct { name string pod v1.Pod container v1.Container + defaultCPUSet cpuset.CPUSet expectedHints []topologymanager.TopologyHint }{ { - name: "Request 2 CPUs; 4 available on Socket 0, 6 available on Socket 1", - pod: *testPod1, - container: *testContainer1, + name: "Request 2 CPUs, 4 available on NUMA 0, 6 available on NUMA 1", + pod: *testPod1, + container: *testContainer1, + defaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), expectedHints: []topologymanager.TopologyHint{ { NUMANodeAffinity: firstSocketMask, @@ -113,9 +103,10 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - name: "Request 5 CPUs; 4 available on Socket 0, 6 available on Socket 1", - pod: *testPod2, - container: *testContainer2, + name: "Request 5 CPUs, 4 available on NUMA 0, 6 available on NUMA 1", + pod: *testPod2, + container: *testContainer2, + defaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), expectedHints: []topologymanager.TopologyHint{ { NUMANodeAffinity: secondSocketMask, @@ -128,9 +119,10 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - name: "Request 7 CPUs, 4 available on Socket 0, 6 available on Socket 1", - pod: *testPod3, - container: *testContainer3, + name: "Request 7 CPUs, 4 available on NUMA 0, 6 available on NUMA 1", + pod: *testPod3, + container: *testContainer3, + defaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), expectedHints: []topologymanager.TopologyHint{ { NUMANodeAffinity: crossSocketMask, @@ -139,13 +131,38 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - name: "Request 11 CPUs, 4 available on Socket 0, 6 available on Socket 1", + name: "Request 11 CPUs, 4 available on NUMA 0, 6 available on NUMA 1", pod: *testPod4, container: *testContainer4, + defaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), expectedHints: nil, }, + { + name: "Request 2 CPUs, 1 available on NUMA 0, 1 available on NUMA 1", + pod: *testPod1, + container: *testContainer1, + defaultCPUSet: cpuset.NewCPUSet(0, 3), + expectedHints: []topologymanager.TopologyHint{ + { + NUMANodeAffinity: crossSocketMask, + Preferred: false, + }, + }, + }, } for _, tc := range tcases { + topology, _ := topology.Discover(&machineInfo, numaNodeInfo) + + m := manager{ + policy: &staticPolicy{ + topology: topology, + }, + state: &mockState{ + defaultCPUSet: tc.defaultCPUSet, + }, + topology: topology, + } + hints := m.GetTopologyHints(tc.pod, tc.container)[string(v1.ResourceCPU)] if len(tc.expectedHints) == 0 && len(hints) == 0 { continue diff --git a/pkg/kubelet/cm/devicemanager/topology_hints.go b/pkg/kubelet/cm/devicemanager/topology_hints.go index a7535eb01ea..38ae0e166f7 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints.go @@ -73,13 +73,30 @@ func (m *ManagerImpl) getAvailableDevices(resource string) sets.String { } func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.String, request int) []topologymanager.TopologyHint { - // Initialize minAffinity to a full affinity mask. - minAffinity, _ := socketmask.NewSocketMask(m.numaNodes...) + // Initialize minAffinitySize to include all NUMA Nodes + minAffinitySize := len(m.numaNodes) // Iterate through all combinations of NUMA Nodes and build hints from them. hints := []topologymanager.TopologyHint{} socketmask.IterateSocketMasks(m.numaNodes, func(mask socketmask.SocketMask) { - // Check to see if we have enough devices available on the current + // First, update minAffinitySize for the current request size. + devicesInMask := 0 + for _, device := range m.allDevices[resource] { + if device.Topology == nil { + continue + } + for _, node := range device.Topology.Nodes { + if mask.IsSet(int(node.ID)) { + devicesInMask++ + break + } + } + } + if devicesInMask >= request && mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() + } + + // Then check to see if we have enough devices available on the current // NUMA Node combination to satisfy the device request. numMatching := 0 for d := range devices { @@ -106,11 +123,6 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets. NUMANodeAffinity: mask, Preferred: false, }) - - // Update minAffinity if relevant - if mask.IsNarrowerThan(minAffinity) { - minAffinity = mask - } }) // Loop back through all hints and update the 'Preferred' field based on @@ -118,7 +130,7 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets. // to the minAffinity. Only those with an equal number of bits set will be // considered preferred. for i := range hints { - if hints[i].NUMANodeAffinity.Count() == minAffinity.Count() { + if hints[i].NUMANodeAffinity.Count() == minAffinitySize { hints[i].Preferred = true } } diff --git a/pkg/kubelet/cm/devicemanager/topology_hints_test.go b/pkg/kubelet/cm/devicemanager/topology_hints_test.go index d11c555cfd4..d6c0638679e 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints_test.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints_test.go @@ -58,10 +58,11 @@ func makeSocketMask(sockets ...int) socketmask.SocketMask { func TestGetTopologyHints(t *testing.T) { tcases := []struct { - description string - request map[string]string - devices map[string][]pluginapi.Device - expectedHints map[string][]topologymanager.TopologyHint + description string + request map[string]string + devices map[string][]pluginapi.Device + allocatedDevices map[string][]string + expectedHints map[string][]topologymanager.TopologyHint }{ { description: "Single Request, no alignment", @@ -180,6 +181,31 @@ func TestGetTopologyHints(t *testing.T) { }, }, }, + { + description: "Request for 2, optimal on 1 NUMA node, forced cross-NUMA", + request: map[string]string{ + "testdevice": "2", + }, + devices: map[string][]pluginapi.Device{ + "testdevice": { + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 1), + makeNUMADevice("Dev3", 0), + makeNUMADevice("Dev4", 1), + }, + }, + allocatedDevices: map[string][]string{ + "testdevice": {"Dev1", "Dev2"}, + }, + expectedHints: map[string][]topologymanager.TopologyHint{ + "testdevice": { + { + NUMANodeAffinity: makeSocketMask(0, 1), + Preferred: false, + }, + }, + }, + }, { description: "2 device types, mixed configuration", request: map[string]string{ @@ -254,6 +280,14 @@ func TestGetTopologyHints(t *testing.T) { } } + for r := range tc.allocatedDevices { + m.allocatedDevices[r] = sets.NewString() + + for _, d := range tc.allocatedDevices[r] { + m.allocatedDevices[r].Insert(d) + } + } + hints := m.GetTopologyHints(*pod, pod.Spec.Containers[0]) for r := range tc.expectedHints { @@ -276,6 +310,7 @@ func TestTopologyAlignedAllocation(t *testing.T) { resource string request int devices []pluginapi.Device + allocatedDevices []string hint topologymanager.TopologyHint expectedAllocation int expectedAlignment map[int]int