mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Merge pull request #93189 from klueska/upstream-fix-bug-topology-manager
Fix a bug whereby reusable CPUs and devices were not being honored
This commit is contained in:
commit
b6174e605f
@ -338,15 +338,16 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
|
||||
}
|
||||
klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name)
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested),
|
||||
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested),
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of available CPUs.
|
||||
available := p.assignableCPUs(s)
|
||||
reusable := p.cpusToReuse[string(pod.UID)]
|
||||
|
||||
// Generate hints.
|
||||
cpuHints := p.generateCPUTopologyHints(available, requested)
|
||||
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
|
||||
klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints)
|
||||
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
@ -360,7 +361,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
|
||||
// It follows the convention of marking all hints that have the same number of
|
||||
// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and
|
||||
// marking all others with 'Preferred: false'.
|
||||
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
|
||||
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
|
||||
// Initialize minAffinitySize to include all NUMA Nodes.
|
||||
minAffinitySize := p.topology.CPUDetails.NUMANodes().Size()
|
||||
// Initialize minSocketsOnMinAffinity to include all Sockets.
|
||||
@ -380,16 +381,25 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, req
|
||||
}
|
||||
}
|
||||
|
||||
// Then check to see if we have enough CPUs available on the current
|
||||
// socket bitmask to satisfy the CPU request.
|
||||
// Then check to see if all of the reusable CPUs are part of the bitmask.
|
||||
numMatching := 0
|
||||
for _, c := range reusableCPUs.ToSlice() {
|
||||
// Disregard this mask if its NUMANode isn't part of it.
|
||||
if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
|
||||
return
|
||||
}
|
||||
numMatching++
|
||||
}
|
||||
|
||||
// Finally, check to see if enough available CPUs remain on the current
|
||||
// NUMA node combination to satisfy the CPU request.
|
||||
for _, c := range availableCPUs.ToSlice() {
|
||||
if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
|
||||
numMatching++
|
||||
}
|
||||
}
|
||||
|
||||
// If we don't, then move onto the next combination.
|
||||
// If they don't, then move onto the next combination.
|
||||
if numMatching < request {
|
||||
return
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/klog/v2"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
)
|
||||
@ -57,21 +58,22 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
|
||||
continue
|
||||
}
|
||||
klog.Infof("[devicemanager] Regenerating TopologyHints for resource '%v' already allocated to (pod %v, container %v)", resource, string(pod.UID), container.Name)
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, requested)
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.String{}, requested)
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the list of available devices, for which TopologyHints should be generated.
|
||||
available := m.getAvailableDevices(resource)
|
||||
if available.Len() < requested {
|
||||
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Len())
|
||||
reusable := m.devicesToReuse[string(pod.UID)][resource]
|
||||
if available.Union(reusable).Len() < requested {
|
||||
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Union(reusable).Len())
|
||||
deviceHints[resource] = []topologymanager.TopologyHint{}
|
||||
continue
|
||||
}
|
||||
|
||||
// Generate TopologyHints for this resource given the current
|
||||
// request size and the list of available devices.
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, requested)
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, reusable, requested)
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,7 +95,7 @@ func (m *ManagerImpl) getAvailableDevices(resource string) sets.String {
|
||||
return m.healthyDevices[resource].Difference(m.allocatedDevices[resource])
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.String, request int) []topologymanager.TopologyHint {
|
||||
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, available sets.String, reusable sets.String, request int) []topologymanager.TopologyHint {
|
||||
// Initialize minAffinitySize to include all NUMA Nodes
|
||||
minAffinitySize := len(m.numaNodes)
|
||||
|
||||
@ -103,36 +105,37 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.
|
||||
// First, update minAffinitySize for the current request size.
|
||||
devicesInMask := 0
|
||||
for _, device := range m.allDevices[resource] {
|
||||
if device.Topology == nil {
|
||||
continue
|
||||
}
|
||||
for _, node := range device.Topology.Nodes {
|
||||
if mask.IsSet(int(node.ID)) {
|
||||
devicesInMask++
|
||||
break
|
||||
}
|
||||
if mask.AnySet(m.getNUMANodeIds(device.Topology)) {
|
||||
devicesInMask++
|
||||
}
|
||||
}
|
||||
if devicesInMask >= request && mask.Count() < minAffinitySize {
|
||||
minAffinitySize = mask.Count()
|
||||
}
|
||||
|
||||
// Then check to see if we have enough devices available on the current
|
||||
// NUMA Node combination to satisfy the device request.
|
||||
// Then check to see if all of the reusable devices are part of the bitmask.
|
||||
numMatching := 0
|
||||
for d := range devices {
|
||||
for d := range reusable {
|
||||
// Skip the device if it doesn't specify any topology info.
|
||||
if m.allDevices[resource][d].Topology == nil {
|
||||
continue
|
||||
}
|
||||
for _, node := range m.allDevices[resource][d].Topology.Nodes {
|
||||
if mask.IsSet(int(node.ID)) {
|
||||
numMatching++
|
||||
break
|
||||
}
|
||||
// Otherwise disregard this mask if its NUMANode isn't part of it.
|
||||
if !mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
|
||||
return
|
||||
}
|
||||
numMatching++
|
||||
}
|
||||
|
||||
// Finally, check to see if enough available devices remain on the
|
||||
// current NUMA node combination to satisfy the device request.
|
||||
for d := range available {
|
||||
if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
|
||||
numMatching++
|
||||
}
|
||||
}
|
||||
|
||||
// If we don't, then move onto the next combination.
|
||||
// If they don't, then move onto the next combination.
|
||||
if numMatching < request {
|
||||
return
|
||||
}
|
||||
@ -158,3 +161,14 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.
|
||||
|
||||
return hints
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) getNUMANodeIds(topology *pluginapi.TopologyInfo) []int {
|
||||
if topology == nil {
|
||||
return nil
|
||||
}
|
||||
var ids []int
|
||||
for _, n := range topology.Nodes {
|
||||
ids = append(ids, int(n.ID))
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ type BitMask interface {
|
||||
IsEqual(mask BitMask) bool
|
||||
IsEmpty() bool
|
||||
IsSet(bit int) bool
|
||||
AnySet(bits []int) bool
|
||||
IsNarrowerThan(mask BitMask) bool
|
||||
String() string
|
||||
Count() int
|
||||
@ -120,6 +121,16 @@ func (s *bitMask) IsSet(bit int) bool {
|
||||
return (*s & (1 << uint64(bit))) > 0
|
||||
}
|
||||
|
||||
// AnySet checks bit in mask to see if any provided bit is set to one
|
||||
func (s *bitMask) AnySet(bits []int) bool {
|
||||
for _, b := range bits {
|
||||
if s.IsSet(b) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsEqual checks if masks are equal
|
||||
func (s *bitMask) IsEqual(mask BitMask) bool {
|
||||
return *s == *mask.(*bitMask)
|
||||
|
@ -381,6 +381,53 @@ func TestIsSet(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnySet(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
mask []int
|
||||
checkBits []int
|
||||
expectedSet bool
|
||||
}{
|
||||
{
|
||||
name: "Check if any bits from 11 in mask 00 is set",
|
||||
mask: nil,
|
||||
checkBits: []int{0, 1},
|
||||
expectedSet: false,
|
||||
},
|
||||
{
|
||||
name: "Check if any bits from 11 in mask 01 is set",
|
||||
mask: []int{0},
|
||||
checkBits: []int{0, 1},
|
||||
expectedSet: true,
|
||||
},
|
||||
{
|
||||
name: "Check if any bits from 11 in mask 11 is set",
|
||||
mask: []int{0, 1},
|
||||
checkBits: []int{0, 1},
|
||||
expectedSet: true,
|
||||
},
|
||||
{
|
||||
name: "Check if any bit outside range 0-63 is set",
|
||||
mask: []int{0, 1},
|
||||
checkBits: []int{64, 65},
|
||||
expectedSet: false,
|
||||
},
|
||||
{
|
||||
name: "Check if any bits from 1001 in mask 0110 is set",
|
||||
mask: []int{1, 2},
|
||||
checkBits: []int{0, 3},
|
||||
expectedSet: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range tcases {
|
||||
mask, _ := NewBitMask(tc.mask...)
|
||||
set := mask.AnySet(tc.checkBits)
|
||||
if set != tc.expectedSet {
|
||||
t.Errorf("Expected value to be %v, got %v", tc.expectedSet, set)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsEqual(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
|
Loading…
Reference in New Issue
Block a user