From 9f5f401d608b42d7fc170fb8395b862c0b69e0ef Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 18 Jul 2020 18:31:39 +0000 Subject: [PATCH 1/3] Add AnySet() to topologymanager bitmask API --- .../cm/topologymanager/bitmask/bitmask.go | 11 +++++ .../topologymanager/bitmask/bitmask_test.go | 47 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go b/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go index 468f94f951a..fb2043e0182 100644 --- a/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go +++ b/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go @@ -33,6 +33,7 @@ type BitMask interface { IsEqual(mask BitMask) bool IsEmpty() bool IsSet(bit int) bool + AnySet(bits []int) bool IsNarrowerThan(mask BitMask) bool String() string Count() int @@ -120,6 +121,16 @@ func (s *bitMask) IsSet(bit int) bool { return (*s & (1 << uint64(bit))) > 0 } +// AnySet checks bit in mask to see if any provided bit is set to one +func (s *bitMask) AnySet(bits []int) bool { + for _, b := range bits { + if s.IsSet(b) { + return true + } + } + return false +} + // IsEqual checks if masks are equal func (s *bitMask) IsEqual(mask BitMask) bool { return *s == *mask.(*bitMask) diff --git a/pkg/kubelet/cm/topologymanager/bitmask/bitmask_test.go b/pkg/kubelet/cm/topologymanager/bitmask/bitmask_test.go index 1a02902ad34..d4a5f569509 100644 --- a/pkg/kubelet/cm/topologymanager/bitmask/bitmask_test.go +++ b/pkg/kubelet/cm/topologymanager/bitmask/bitmask_test.go @@ -381,6 +381,53 @@ func TestIsSet(t *testing.T) { } } +func TestAnySet(t *testing.T) { + tcases := []struct { + name string + mask []int + checkBits []int + expectedSet bool + }{ + { + name: "Check if any bits from 11 in mask 00 is set", + mask: nil, + checkBits: []int{0, 1}, + expectedSet: false, + }, + { + name: "Check if any bits from 11 in mask 01 is set", + mask: []int{0}, + checkBits: []int{0, 1}, + expectedSet: true, + }, + { + name: "Check if any bits from 11 in mask 11 is set", + mask: []int{0, 1}, + checkBits: []int{0, 1}, + expectedSet: true, + }, + { + name: "Check if any bit outside range 0-63 is set", + mask: []int{0, 1}, + checkBits: []int{64, 65}, + expectedSet: false, + }, + { + name: "Check if any bits from 1001 in mask 0110 is set", + mask: []int{1, 2}, + checkBits: []int{0, 3}, + expectedSet: false, + }, + } + for _, tc := range tcases { + mask, _ := NewBitMask(tc.mask...) + set := mask.AnySet(tc.checkBits) + if set != tc.expectedSet { + t.Errorf("Expected value to be %v, got %v", tc.expectedSet, set) + } + } +} + func TestIsEqual(t *testing.T) { tcases := []struct { name string From 74fe9364c3c0242033aef88566e0b3014d43cce8 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 18 Jul 2020 19:06:20 +0000 Subject: [PATCH 2/3] Simplify logic in devicemanager TopologyHint generation --- .../cm/devicemanager/topology_hints.go | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pkg/kubelet/cm/devicemanager/topology_hints.go b/pkg/kubelet/cm/devicemanager/topology_hints.go index a423c2d3592..16610e7ebb6 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints.go @@ -20,6 +20,7 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" + pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" ) @@ -103,14 +104,8 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets. // First, update minAffinitySize for the current request size. devicesInMask := 0 for _, device := range m.allDevices[resource] { - if device.Topology == nil { - continue - } - for _, node := range device.Topology.Nodes { - if mask.IsSet(int(node.ID)) { - devicesInMask++ - break - } + if mask.AnySet(m.getNUMANodeIds(device.Topology)) { + devicesInMask++ } } if devicesInMask >= request && mask.Count() < minAffinitySize { @@ -121,14 +116,8 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets. // NUMA Node combination to satisfy the device request. numMatching := 0 for d := range devices { - if m.allDevices[resource][d].Topology == nil { - continue - } - for _, node := range m.allDevices[resource][d].Topology.Nodes { - if mask.IsSet(int(node.ID)) { - numMatching++ - break - } + if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) { + numMatching++ } } @@ -158,3 +147,14 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets. return hints } + +func (m *ManagerImpl) getNUMANodeIds(topology *pluginapi.TopologyInfo) []int { + if topology == nil { + return nil + } + var ids []int + for _, n := range topology.Nodes { + ids = append(ids, int(n.ID)) + } + return ids +} From 00df26a9858ed1ed1ce2ff77760ecea7abcc7122 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Fri, 17 Jul 2020 14:50:07 +0000 Subject: [PATCH 3/3] Fix a bug whereby reusable CPUs and devices were not being honored Previously, it was possible for reusable CPUs and reusable devices (i.e. those previously consumed by init containers) to not be reused by subsequent init containers or app containers if the TopologyManager was enabled. This would happen because hint generation for the TopologyManager was not considering the reusable devices when it made its hint calculation. As such, it would sometimes: 1) Generate a hint for a differnent NUMA node, causing the CPUs and devices to be allocated from that node instead of the one where the reusable devices live; or 2) End up thinking there were not enough CPUs or devices to allocate and throw a TopologyAffinity admission error This patch fixes this by ensuring that reusable CPUs and devices are considered as part of TopologyHint generation. This frunctionality is difficult to unit test since it spans multiple components, but an e2e test will be added in a subsequent patch to test this functionality. --- pkg/kubelet/cm/cpumanager/policy_static.go | 22 +++++++++---- .../cm/devicemanager/topology_hints.go | 32 +++++++++++++------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index adbac8d6a1a..ad3f876ab89 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -338,15 +338,16 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v } klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name) return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested), + string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested), } } // Get a list of available CPUs. available := p.assignableCPUs(s) + reusable := p.cpusToReuse[string(pod.UID)] // Generate hints. - cpuHints := p.generateCPUTopologyHints(available, requested) + cpuHints := p.generateCPUTopologyHints(available, reusable, requested) klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints) return map[string][]topologymanager.TopologyHint{ @@ -360,7 +361,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v // It follows the convention of marking all hints that have the same number of // bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and // marking all others with 'Preferred: false'. -func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { +func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { // Initialize minAffinitySize to include all NUMA Nodes. minAffinitySize := p.topology.CPUDetails.NUMANodes().Size() // Initialize minSocketsOnMinAffinity to include all Sockets. @@ -380,16 +381,25 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, req } } - // Then check to see if we have enough CPUs available on the current - // socket bitmask to satisfy the CPU request. + // Then check to see if all of the reusable CPUs are part of the bitmask. numMatching := 0 + for _, c := range reusableCPUs.ToSlice() { + // Disregard this mask if its NUMANode isn't part of it. + if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { + return + } + numMatching++ + } + + // Finally, check to see if enough available CPUs remain on the current + // NUMA node combination to satisfy the CPU request. for _, c := range availableCPUs.ToSlice() { if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { numMatching++ } } - // If we don't, then move onto the next combination. + // If they don't, then move onto the next combination. if numMatching < request { return } diff --git a/pkg/kubelet/cm/devicemanager/topology_hints.go b/pkg/kubelet/cm/devicemanager/topology_hints.go index 16610e7ebb6..ab5b0859b3d 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints.go @@ -58,21 +58,22 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map continue } klog.Infof("[devicemanager] Regenerating TopologyHints for resource '%v' already allocated to (pod %v, container %v)", resource, string(pod.UID), container.Name) - deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, requested) + deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.String{}, requested) continue } // Get the list of available devices, for which TopologyHints should be generated. available := m.getAvailableDevices(resource) - if available.Len() < requested { - klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Len()) + reusable := m.devicesToReuse[string(pod.UID)][resource] + if available.Union(reusable).Len() < requested { + klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Union(reusable).Len()) deviceHints[resource] = []topologymanager.TopologyHint{} continue } // Generate TopologyHints for this resource given the current // request size and the list of available devices. - deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, requested) + deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, reusable, requested) } } @@ -94,7 +95,7 @@ func (m *ManagerImpl) getAvailableDevices(resource string) sets.String { return m.healthyDevices[resource].Difference(m.allocatedDevices[resource]) } -func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.String, request int) []topologymanager.TopologyHint { +func (m *ManagerImpl) generateDeviceTopologyHints(resource string, available sets.String, reusable sets.String, request int) []topologymanager.TopologyHint { // Initialize minAffinitySize to include all NUMA Nodes minAffinitySize := len(m.numaNodes) @@ -112,16 +113,29 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets. minAffinitySize = mask.Count() } - // Then check to see if we have enough devices available on the current - // NUMA Node combination to satisfy the device request. + // Then check to see if all of the reusable devices are part of the bitmask. numMatching := 0 - for d := range devices { + for d := range reusable { + // Skip the device if it doesn't specify any topology info. + if m.allDevices[resource][d].Topology == nil { + continue + } + // Otherwise disregard this mask if its NUMANode isn't part of it. + if !mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) { + return + } + numMatching++ + } + + // Finally, check to see if enough available devices remain on the + // current NUMA node combination to satisfy the device request. + for d := range available { if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) { numMatching++ } } - // If we don't, then move onto the next combination. + // If they don't, then move onto the next combination. if numMatching < request { return }