Merge pull request #93189 from klueska/upstream-fix-bug-topology-manager

Fix a bug whereby reusable CPUs and devices were not being honored
This commit is contained in:
Kubernetes Prow Robot 2020-07-21 04:35:17 -07:00 committed by GitHub
commit b6174e605f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 28 deletions

View File

@ -338,15 +338,16 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
}
klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name)
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested),
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested),
}
}
// Get a list of available CPUs.
available := p.assignableCPUs(s)
reusable := p.cpusToReuse[string(pod.UID)]
// Generate hints.
cpuHints := p.generateCPUTopologyHints(available, requested)
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints)
return map[string][]topologymanager.TopologyHint{
@ -360,7 +361,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
// It follows the convention of marking all hints that have the same number of
// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and
// marking all others with 'Preferred: false'.
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
// Initialize minAffinitySize to include all NUMA Nodes.
minAffinitySize := p.topology.CPUDetails.NUMANodes().Size()
// Initialize minSocketsOnMinAffinity to include all Sockets.
@ -380,16 +381,25 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, req
}
}
// Then check to see if we have enough CPUs available on the current
// socket bitmask to satisfy the CPU request.
// Then check to see if all of the reusable CPUs are part of the bitmask.
numMatching := 0
for _, c := range reusableCPUs.ToSlice() {
// Disregard this mask if its NUMANode isn't part of it.
if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
return
}
numMatching++
}
// Finally, check to see if enough available CPUs remain on the current
// NUMA node combination to satisfy the CPU request.
for _, c := range availableCPUs.ToSlice() {
if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
numMatching++
}
}
// If we don't, then move onto the next combination.
// If they don't, then move onto the next combination.
if numMatching < request {
return
}

View File

@ -20,6 +20,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
)
@ -57,21 +58,22 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
continue
}
klog.Infof("[devicemanager] Regenerating TopologyHints for resource '%v' already allocated to (pod %v, container %v)", resource, string(pod.UID), container.Name)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, requested)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.String{}, requested)
continue
}
// Get the list of available devices, for which TopologyHints should be generated.
available := m.getAvailableDevices(resource)
if available.Len() < requested {
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Len())
reusable := m.devicesToReuse[string(pod.UID)][resource]
if available.Union(reusable).Len() < requested {
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Union(reusable).Len())
deviceHints[resource] = []topologymanager.TopologyHint{}
continue
}
// Generate TopologyHints for this resource given the current
// request size and the list of available devices.
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, requested)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, reusable, requested)
}
}
@ -93,7 +95,7 @@ func (m *ManagerImpl) getAvailableDevices(resource string) sets.String {
return m.healthyDevices[resource].Difference(m.allocatedDevices[resource])
}
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.String, request int) []topologymanager.TopologyHint {
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, available sets.String, reusable sets.String, request int) []topologymanager.TopologyHint {
// Initialize minAffinitySize to include all NUMA Nodes
minAffinitySize := len(m.numaNodes)
@ -103,36 +105,37 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.
// First, update minAffinitySize for the current request size.
devicesInMask := 0
for _, device := range m.allDevices[resource] {
if device.Topology == nil {
continue
}
for _, node := range device.Topology.Nodes {
if mask.IsSet(int(node.ID)) {
devicesInMask++
break
}
if mask.AnySet(m.getNUMANodeIds(device.Topology)) {
devicesInMask++
}
}
if devicesInMask >= request && mask.Count() < minAffinitySize {
minAffinitySize = mask.Count()
}
// Then check to see if we have enough devices available on the current
// NUMA Node combination to satisfy the device request.
// Then check to see if all of the reusable devices are part of the bitmask.
numMatching := 0
for d := range devices {
for d := range reusable {
// Skip the device if it doesn't specify any topology info.
if m.allDevices[resource][d].Topology == nil {
continue
}
for _, node := range m.allDevices[resource][d].Topology.Nodes {
if mask.IsSet(int(node.ID)) {
numMatching++
break
}
// Otherwise disregard this mask if its NUMANode isn't part of it.
if !mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
return
}
numMatching++
}
// Finally, check to see if enough available devices remain on the
// current NUMA node combination to satisfy the device request.
for d := range available {
if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
numMatching++
}
}
// If we don't, then move onto the next combination.
// If they don't, then move onto the next combination.
if numMatching < request {
return
}
@ -158,3 +161,14 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.
return hints
}
func (m *ManagerImpl) getNUMANodeIds(topology *pluginapi.TopologyInfo) []int {
if topology == nil {
return nil
}
var ids []int
for _, n := range topology.Nodes {
ids = append(ids, int(n.ID))
}
return ids
}

View File

@ -33,6 +33,7 @@ type BitMask interface {
IsEqual(mask BitMask) bool
IsEmpty() bool
IsSet(bit int) bool
AnySet(bits []int) bool
IsNarrowerThan(mask BitMask) bool
String() string
Count() int
@ -120,6 +121,16 @@ func (s *bitMask) IsSet(bit int) bool {
return (*s & (1 << uint64(bit))) > 0
}
// AnySet checks bit in mask to see if any provided bit is set to one
func (s *bitMask) AnySet(bits []int) bool {
for _, b := range bits {
if s.IsSet(b) {
return true
}
}
return false
}
// IsEqual checks if masks are equal
func (s *bitMask) IsEqual(mask BitMask) bool {
return *s == *mask.(*bitMask)

View File

@ -381,6 +381,53 @@ func TestIsSet(t *testing.T) {
}
}
func TestAnySet(t *testing.T) {
tcases := []struct {
name string
mask []int
checkBits []int
expectedSet bool
}{
{
name: "Check if any bits from 11 in mask 00 is set",
mask: nil,
checkBits: []int{0, 1},
expectedSet: false,
},
{
name: "Check if any bits from 11 in mask 01 is set",
mask: []int{0},
checkBits: []int{0, 1},
expectedSet: true,
},
{
name: "Check if any bits from 11 in mask 11 is set",
mask: []int{0, 1},
checkBits: []int{0, 1},
expectedSet: true,
},
{
name: "Check if any bit outside range 0-63 is set",
mask: []int{0, 1},
checkBits: []int{64, 65},
expectedSet: false,
},
{
name: "Check if any bits from 1001 in mask 0110 is set",
mask: []int{1, 2},
checkBits: []int{0, 3},
expectedSet: false,
},
}
for _, tc := range tcases {
mask, _ := NewBitMask(tc.mask...)
set := mask.AnySet(tc.checkBits)
if set != tc.expectedSet {
t.Errorf("Expected value to be %v, got %v", tc.expectedSet, set)
}
}
}
func TestIsEqual(t *testing.T) {
tcases := []struct {
name string