From 4d4d4bdd6152d34203bc2f273160ac5a172a0917 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Tue, 5 Nov 2019 13:16:48 +0000 Subject: [PATCH] Ensure devicemanager TopologyHints are regenerated after kubelet restart This patch also includes test to make sure the newly added logic works as expected. --- pkg/kubelet/cm/devicemanager/BUILD | 1 + .../cm/devicemanager/topology_hints.go | 15 ++ .../cm/devicemanager/topology_hints_test.go | 162 ++++++++++++++++-- 3 files changed, 163 insertions(+), 15 deletions(-) diff --git a/pkg/kubelet/cm/devicemanager/BUILD b/pkg/kubelet/cm/devicemanager/BUILD index 909309cd520..4c0ff545194 100644 --- a/pkg/kubelet/cm/devicemanager/BUILD +++ b/pkg/kubelet/cm/devicemanager/BUILD @@ -60,6 +60,7 @@ go_test( "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", diff --git a/pkg/kubelet/cm/devicemanager/topology_hints.go b/pkg/kubelet/cm/devicemanager/topology_hints.go index 81fb3577ea2..7a8d1925896 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints.go @@ -46,6 +46,21 @@ func (m *ManagerImpl) GetTopologyHints(pod v1.Pod, container v1.Container) map[s continue } + // Short circuit to regenerate the same hints if there are already + // devices allocated to the Container. This might happen after a + // kubelet restart, for example. + allocated := m.podDevices.containerDevices(string(pod.UID), container.Name, resource) + if allocated.Len() > 0 { + if allocated.Len() != requested { + klog.Errorf("[devicemanager] Resource '%v' already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", resource, string(pod.UID), container.Name, requested, allocated.Len()) + deviceHints[resource] = []topologymanager.TopologyHint{} + continue + } + klog.Infof("[devicemanager] Regenerating TopologyHints for resource '%v' already allocated to (pod %v, container %v)", resource, string(pod.UID), container.Name) + deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, requested) + continue + } + // Get the list of available devices, for which TopologyHints should be generated. available := m.getAvailableDevices(resource) if available.Len() < requested { diff --git a/pkg/kubelet/cm/devicemanager/topology_hints_test.go b/pkg/kubelet/cm/devicemanager/topology_hints_test.go index e77cf3b4957..b69a7e3c62d 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints_test.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints_test.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" @@ -52,13 +53,17 @@ func makeSocketMask(sockets ...int) bitmask.BitMask { func TestGetTopologyHints(t *testing.T) { tcases := []struct { description string + podUID string + containerName string request map[string]string devices map[string][]pluginapi.Device - allocatedDevices map[string][]string + allocatedDevices map[string]map[string]map[string][]string expectedHints map[string][]topologymanager.TopologyHint }{ { - description: "Single Request, no alignment", + description: "Single Request, no alignment", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice": "1", }, @@ -73,7 +78,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - description: "Single Request, only one with alignment", + description: "Single Request, only one with alignment", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice": "1", }, @@ -97,7 +104,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - description: "Single Request, one device per socket", + description: "Single Request, one device per socket", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice": "1", }, @@ -125,7 +134,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - description: "Request for 2, one device per socket", + description: "Request for 2, one device per socket", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice": "2", }, @@ -145,7 +156,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - description: "Request for 2, 2 devices per socket", + description: "Request for 2, 2 devices per socket", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice": "2", }, @@ -175,7 +188,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - description: "Request for 2, optimal on 1 NUMA node, forced cross-NUMA", + description: "Request for 2, optimal on 1 NUMA node, forced cross-NUMA", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice": "2", }, @@ -187,8 +202,12 @@ func TestGetTopologyHints(t *testing.T) { makeNUMADevice("Dev4", 1), }, }, - allocatedDevices: map[string][]string{ - "testdevice": {"Dev1", "Dev2"}, + allocatedDevices: map[string]map[string]map[string][]string{ + "fakePod": { + "fakeOtherContainer": { + "testdevice": {"Dev1", "Dev2"}, + }, + }, }, expectedHints: map[string][]topologymanager.TopologyHint{ "testdevice": { @@ -200,7 +219,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, { - description: "2 device types, mixed configuration", + description: "2 device types, mixed configuration", + podUID: "fakePod", + containerName: "fakeContainer", request: map[string]string{ "testdevice1": "2", "testdevice2": "1", @@ -243,6 +264,110 @@ func TestGetTopologyHints(t *testing.T) { }, }, }, + { + description: "Single device type, more requested than available", + podUID: "fakePod", + containerName: "fakeContainer", + request: map[string]string{ + "testdevice": "6", + }, + devices: map[string][]pluginapi.Device{ + "testdevice": { + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 1), + makeNUMADevice("Dev4", 1), + }, + }, + expectedHints: map[string][]topologymanager.TopologyHint{ + "testdevice": {}, + }, + }, + { + description: "Single device type, all already allocated to container", + podUID: "fakePod", + containerName: "fakeContainer", + request: map[string]string{ + "testdevice": "2", + }, + devices: map[string][]pluginapi.Device{ + "testdevice": { + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + }, + }, + allocatedDevices: map[string]map[string]map[string][]string{ + "fakePod": { + "fakeContainer": { + "testdevice": {"Dev1", "Dev2"}, + }, + }, + }, + expectedHints: map[string][]topologymanager.TopologyHint{ + "testdevice": { + { + NUMANodeAffinity: makeSocketMask(0), + Preferred: true, + }, + { + NUMANodeAffinity: makeSocketMask(0, 1), + Preferred: false, + }, + }, + }, + }, + { + description: "Single device type, less already allocated to container than requested", + podUID: "fakePod", + containerName: "fakeContainer", + request: map[string]string{ + "testdevice": "4", + }, + devices: map[string][]pluginapi.Device{ + "testdevice": { + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 1), + makeNUMADevice("Dev4", 1), + }, + }, + allocatedDevices: map[string]map[string]map[string][]string{ + "fakePod": { + "fakeContainer": { + "testdevice": {"Dev1", "Dev2"}, + }, + }, + }, + expectedHints: map[string][]topologymanager.TopologyHint{ + "testdevice": {}, + }, + }, + { + description: "Single device type, more already allocated to container than requested", + podUID: "fakePod", + containerName: "fakeContainer", + request: map[string]string{ + "testdevice": "2", + }, + devices: map[string][]pluginapi.Device{ + "testdevice": { + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 1), + makeNUMADevice("Dev4", 1), + }, + }, + allocatedDevices: map[string]map[string]map[string][]string{ + "fakePod": { + "fakeContainer": { + "testdevice": {"Dev1", "Dev2", "Dev3", "Dev4"}, + }, + }, + }, + expectedHints: map[string][]topologymanager.TopologyHint{ + "testdevice": {}, + }, + }, } for _, tc := range tcases { @@ -252,6 +377,8 @@ func TestGetTopologyHints(t *testing.T) { } pod := makePod(resourceList) + pod.UID = types.UID(tc.podUID) + pod.Spec.Containers[0].Name = tc.containerName m := ManagerImpl{ allDevices: make(map[string]map[string]pluginapi.Device), @@ -259,7 +386,7 @@ func TestGetTopologyHints(t *testing.T) { allocatedDevices: make(map[string]sets.String), podDevices: make(podDevices), sourcesReady: &sourcesReadyStub{}, - activePods: func() []*v1.Pod { return []*v1.Pod{} }, + activePods: func() []*v1.Pod { return []*v1.Pod{pod} }, numaNodes: []int{0, 1}, } @@ -273,11 +400,16 @@ func TestGetTopologyHints(t *testing.T) { } } - for r := range tc.allocatedDevices { - m.allocatedDevices[r] = sets.NewString() + for p := range tc.allocatedDevices { + for c := range tc.allocatedDevices[p] { + for r, devices := range tc.allocatedDevices[p][c] { + m.podDevices.insert(p, c, r, sets.NewString(devices...), nil) - for _, d := range tc.allocatedDevices[r] { - m.allocatedDevices[r].Insert(d) + m.allocatedDevices[r] = sets.NewString() + for _, d := range devices { + m.allocatedDevices[r].Insert(d) + } + } } }