diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 1708176fc9b..884a224895b 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -791,9 +791,33 @@ func (m *ManagerImpl) filterByAffinity(podUID, contName, resource string, availa nodes = append(nodes, node) } - // Sort the list of nodes by how many devices they contain. + // Sort the list of nodes by: + // 1) Nodes contained in the 'hint's affinity set + // 2) Nodes not contained in the 'hint's affinity set + // 3) The fake NUMANode of -1 (assuming it is included in the list) + // Within each of the groups above, sort the nodes by how many devices they contain sort.Slice(nodes, func(i, j int) bool { - return perNodeDevices[i].Len() < perNodeDevices[j].Len() + // If one or the other of nodes[i] or nodes[j] is in the 'hint's affinity set + if hint.NUMANodeAffinity.IsSet(nodes[i]) && hint.NUMANodeAffinity.IsSet(nodes[j]) { + return perNodeDevices[nodes[i]].Len() < perNodeDevices[nodes[j]].Len() + } + if hint.NUMANodeAffinity.IsSet(nodes[i]) { + return true + } + if hint.NUMANodeAffinity.IsSet(nodes[j]) { + return false + } + + // If one or the other of nodes[i] or nodes[j] is the fake NUMA node -1 (they can't both be) + if nodes[i] == nodeWithoutTopology { + return false + } + if nodes[j] == nodeWithoutTopology { + return true + } + + // Otherwise both nodes[i] and nodes[j] are real NUMA nodes that are not in the 'hint's' affinity list. + return perNodeDevices[nodes[i]].Len() < perNodeDevices[nodes[j]].Len() }) // Generate three sorted lists of devices. Devices in the first list come diff --git a/pkg/kubelet/cm/devicemanager/manager_test.go b/pkg/kubelet/cm/devicemanager/manager_test.go index 86f39eaa016..61031c728e1 100644 --- a/pkg/kubelet/cm/devicemanager/manager_test.go +++ b/pkg/kubelet/cm/devicemanager/manager_test.go @@ -40,9 +40,11 @@ import ( "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/config" "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/pluginmanager" + "k8s.io/kubernetes/pkg/kubelet/util/format" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) @@ -669,6 +671,132 @@ type TestResource struct { topology bool } +func TestFilterByAffinity(t *testing.T) { + as := require.New(t) + allDevices := ResourceDeviceInstances{ + "res1": map[string]pluginapi.Device{ + "dev1": { + ID: "dev1", + Topology: &pluginapi.TopologyInfo{ + Nodes: []*pluginapi.NUMANode{ + { + ID: 1, + }, + }, + }, + }, + "dev2": { + ID: "dev2", + Topology: &pluginapi.TopologyInfo{ + Nodes: []*pluginapi.NUMANode{ + { + ID: 1, + }, + { + ID: 2, + }, + }, + }, + }, + "dev3": { + ID: "dev3", + Topology: &pluginapi.TopologyInfo{ + Nodes: []*pluginapi.NUMANode{ + { + ID: 2, + }, + }, + }, + }, + "dev4": { + ID: "dev4", + Topology: &pluginapi.TopologyInfo{ + Nodes: []*pluginapi.NUMANode{ + { + ID: 2, + }, + }, + }, + }, + }, + } + + fakeAffinity, _ := bitmask.NewBitMask(2) + fakeHint := topologymanager.TopologyHint{ + NUMANodeAffinity: fakeAffinity, + Preferred: true, + } + testManager := ManagerImpl{ + topologyAffinityStore: NewFakeTopologyManagerWithHint(t, &fakeHint), + allDevices: allDevices, + } + + testCases := []struct { + available sets.String + fromAffinityExpected sets.String + notFromAffinityExpected sets.String + withoutTopologyExpected sets.String + }{ + { + available: sets.NewString("dev1", "dev2"), + fromAffinityExpected: sets.NewString("dev2"), + notFromAffinityExpected: sets.NewString("dev1"), + withoutTopologyExpected: sets.NewString(), + }, + { + available: sets.NewString("dev1", "dev2", "dev3", "dev4"), + fromAffinityExpected: sets.NewString("dev2", "dev3", "dev4"), + notFromAffinityExpected: sets.NewString("dev1"), + withoutTopologyExpected: sets.NewString(), + }, + } + + for _, testCase := range testCases { + fromAffinity, notFromAffinity, withoutTopology := testManager.filterByAffinity("", "", "res1", testCase.available) + as.Truef(fromAffinity.Equal(testCase.fromAffinityExpected), "expect devices from affinity to be %v but got %v", testCase.fromAffinityExpected, fromAffinity) + as.Truef(notFromAffinity.Equal(testCase.notFromAffinityExpected), "expect devices not from affinity to be %v but got %v", testCase.notFromAffinityExpected, notFromAffinity) + as.Truef(withoutTopology.Equal(testCase.withoutTopologyExpected), "expect devices without topology to be %v but got %v", testCase.notFromAffinityExpected, notFromAffinity) + } +} + +type fakeTopologyManagerWithHint struct { + t *testing.T + hint *topologymanager.TopologyHint +} + +// NewFakeTopologyManagerWithHint returns an instance of fake topology manager with specified topology hints +func NewFakeTopologyManagerWithHint(t *testing.T, hint *topologymanager.TopologyHint) topologymanager.Manager { + return &fakeTopologyManagerWithHint{ + t: t, + hint: hint, + } +} + +func (m *fakeTopologyManagerWithHint) AddHintProvider(h topologymanager.HintProvider) { + m.t.Logf("[fake topologymanager] AddHintProvider HintProvider: %v", h) +} + +func (m *fakeTopologyManagerWithHint) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) { + m.t.Logf("[fake topologymanager] AddContainer pod: %v container name: %v container id: %v", format.Pod(pod), container.Name, containerID) +} + +func (m *fakeTopologyManagerWithHint) RemoveContainer(containerID string) error { + m.t.Logf("[fake topologymanager] RemoveContainer container id: %v", containerID) + return nil +} + +func (m *fakeTopologyManagerWithHint) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { + m.t.Logf("[fake topologymanager] Topology Admit Handler") + return lifecycle.PodAdmitResult{ + Admit: true, + } +} + +func (m *fakeTopologyManagerWithHint) GetAffinity(podUID string, containerName string) topologymanager.TopologyHint { + m.t.Logf("[fake topologymanager] GetAffinity podUID: %v container name: %v", podUID, containerName) + return *m.hint +} + func TestPodContainerDeviceAllocation(t *testing.T) { res1 := TestResource{ resourceName: "domain1.com/resource1",