Fix a bug whereby reusable CPUs and devices were not being honored

Previously, it was possible for reusable CPUs and reusable devices (i.e.
those previously consumed by init containers) to not be reused by
subsequent init containers or app containers if the TopologyManager was
enabled. This would happen because hint generation for the
TopologyManager was not considering the reusable devices when it made
its hint calculation.

As such, it would sometimes:
1) Generate a hint for a differnent NUMA node, causing the CPUs and
devices to be allocated from that node instead of the one where the
reusable devices live; or
2) End up thinking there were not enough CPUs or devices to allocate and
throw a TopologyAffinity admission error

This patch fixes this by ensuring that reusable CPUs and devices are
considered as part of TopologyHint generation. This frunctionality is
difficult to unit test since it spans multiple components, but an e2e
test will be added in a subsequent patch to test this functionality.
This commit is contained in:
Kevin Klues 2020-07-17 14:50:07 +00:00
parent 74fe9364c3
commit 00df26a985
2 changed files with 39 additions and 15 deletions

View File

@ -338,15 +338,16 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
}
klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name)
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested),
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested),
}
}
// Get a list of available CPUs.
available := p.assignableCPUs(s)
reusable := p.cpusToReuse[string(pod.UID)]
// Generate hints.
cpuHints := p.generateCPUTopologyHints(available, requested)
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints)
return map[string][]topologymanager.TopologyHint{
@ -360,7 +361,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
// It follows the convention of marking all hints that have the same number of
// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and
// marking all others with 'Preferred: false'.
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
// Initialize minAffinitySize to include all NUMA Nodes.
minAffinitySize := p.topology.CPUDetails.NUMANodes().Size()
// Initialize minSocketsOnMinAffinity to include all Sockets.
@ -380,16 +381,25 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, req
}
}
// Then check to see if we have enough CPUs available on the current
// socket bitmask to satisfy the CPU request.
// Then check to see if all of the reusable CPUs are part of the bitmask.
numMatching := 0
for _, c := range reusableCPUs.ToSlice() {
// Disregard this mask if its NUMANode isn't part of it.
if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
return
}
numMatching++
}
// Finally, check to see if enough available CPUs remain on the current
// NUMA node combination to satisfy the CPU request.
for _, c := range availableCPUs.ToSlice() {
if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
numMatching++
}
}
// If we don't, then move onto the next combination.
// If they don't, then move onto the next combination.
if numMatching < request {
return
}

View File

@ -58,21 +58,22 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
continue
}
klog.Infof("[devicemanager] Regenerating TopologyHints for resource '%v' already allocated to (pod %v, container %v)", resource, string(pod.UID), container.Name)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, requested)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.String{}, requested)
continue
}
// Get the list of available devices, for which TopologyHints should be generated.
available := m.getAvailableDevices(resource)
if available.Len() < requested {
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Len())
reusable := m.devicesToReuse[string(pod.UID)][resource]
if available.Union(reusable).Len() < requested {
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Union(reusable).Len())
deviceHints[resource] = []topologymanager.TopologyHint{}
continue
}
// Generate TopologyHints for this resource given the current
// request size and the list of available devices.
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, requested)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, reusable, requested)
}
}
@ -94,7 +95,7 @@ func (m *ManagerImpl) getAvailableDevices(resource string) sets.String {
return m.healthyDevices[resource].Difference(m.allocatedDevices[resource])
}
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.String, request int) []topologymanager.TopologyHint {
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, available sets.String, reusable sets.String, request int) []topologymanager.TopologyHint {
// Initialize minAffinitySize to include all NUMA Nodes
minAffinitySize := len(m.numaNodes)
@ -112,16 +113,29 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.
minAffinitySize = mask.Count()
}
// Then check to see if we have enough devices available on the current
// NUMA Node combination to satisfy the device request.
// Then check to see if all of the reusable devices are part of the bitmask.
numMatching := 0
for d := range devices {
for d := range reusable {
// Skip the device if it doesn't specify any topology info.
if m.allDevices[resource][d].Topology == nil {
continue
}
// Otherwise disregard this mask if its NUMANode isn't part of it.
if !mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
return
}
numMatching++
}
// Finally, check to see if enough available devices remain on the
// current NUMA node combination to satisfy the device request.
for d := range available {
if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
numMatching++
}
}
// If we don't, then move onto the next combination.
// If they don't, then move onto the next combination.
if numMatching < request {
return
}