From d551ab1e784bde601fb1f9914f85c708e6e2a326 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 4 Jul 2020 12:29:25 +0000 Subject: [PATCH 1/4] Add tests to check paramaters passed to GetPreferredAllocation() These tests uncovered some small bugs that will be fixed in a subsequent set of commits. --- .../cm/devicemanager/topology_hints_test.go | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/pkg/kubelet/cm/devicemanager/topology_hints_test.go b/pkg/kubelet/cm/devicemanager/topology_hints_test.go index 3cbd271802b..3cd072183e8 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints_test.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints_test.go @@ -791,3 +791,192 @@ func TestTopologyAlignedAllocation(t *testing.T) { } } } + +func TestGetPreferredAllocationParameters(t *testing.T) { + tcases := []struct { + description string + resource string + request int + allDevices []pluginapi.Device + allocatedDevices []string + reusableDevices []string + hint topologymanager.TopologyHint + expectedAvailable []string + expectedMustInclude []string + expectedSize int + }{ + { + description: "Request for 1, socket 0, 0 already allocated, 0 reusable", + resource: "resource", + request: 1, + allDevices: []pluginapi.Device{ + makeNUMADevice("Dev0", 0), + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 0), + }, + allocatedDevices: []string{}, + reusableDevices: []string{}, + hint: topologymanager.TopologyHint{ + NUMANodeAffinity: makeSocketMask(0), + Preferred: true, + }, + expectedAvailable: []string{"Dev0", "Dev1", "Dev2", "Dev3"}, + expectedMustInclude: []string{}, + expectedSize: 1, + }, + { + description: "Request for 4, socket 0, 2 already allocated, 2 reusable", + resource: "resource", + request: 4, + allDevices: []pluginapi.Device{ + makeNUMADevice("Dev0", 0), + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 0), + makeNUMADevice("Dev4", 0), + makeNUMADevice("Dev5", 0), + makeNUMADevice("Dev6", 0), + makeNUMADevice("Dev7", 0), + }, + allocatedDevices: []string{"Dev0", "Dev5"}, + reusableDevices: []string{"Dev0", "Dev5"}, + hint: topologymanager.TopologyHint{ + NUMANodeAffinity: makeSocketMask(0), + Preferred: true, + }, + expectedAvailable: []string{"Dev0", "Dev1", "Dev2", "Dev3", "Dev4", "Dev5", "Dev6", "Dev7"}, + expectedMustInclude: []string{"Dev0", "Dev5"}, + expectedSize: 4, + }, + { + description: "Request for 4, socket 0, 4 already allocated, 2 reusable", + resource: "resource", + request: 4, + allDevices: []pluginapi.Device{ + makeNUMADevice("Dev0", 0), + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 0), + makeNUMADevice("Dev4", 0), + makeNUMADevice("Dev5", 0), + makeNUMADevice("Dev6", 0), + makeNUMADevice("Dev7", 0), + }, + allocatedDevices: []string{"Dev0", "Dev5", "Dev4", "Dev1"}, + reusableDevices: []string{"Dev0", "Dev5"}, + hint: topologymanager.TopologyHint{ + NUMANodeAffinity: makeSocketMask(0), + Preferred: true, + }, + expectedAvailable: []string{"Dev0", "Dev2", "Dev3", "Dev5", "Dev6", "Dev7"}, + expectedMustInclude: []string{"Dev0", "Dev5"}, + expectedSize: 4, + }, + { + description: "Request for 6, multisocket, 2 already allocated, 2 reusable", + resource: "resource", + request: 6, + allDevices: []pluginapi.Device{ + makeNUMADevice("Dev0", 0), + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 0), + makeNUMADevice("Dev4", 1), + makeNUMADevice("Dev5", 1), + makeNUMADevice("Dev6", 1), + makeNUMADevice("Dev7", 1), + }, + allocatedDevices: []string{"Dev1", "Dev6"}, + reusableDevices: []string{"Dev1", "Dev6"}, + hint: topologymanager.TopologyHint{ + NUMANodeAffinity: makeSocketMask(0), + Preferred: true, + }, + expectedAvailable: []string{"Dev0", "Dev1", "Dev2", "Dev3", "Dev4", "Dev5", "Dev6", "Dev7"}, + expectedMustInclude: []string{"Dev0", "Dev1", "Dev2", "Dev3", "Dev6"}, + expectedSize: 6, + }, + { + description: "Request for 6, multisocket, 4 already allocated, 2 reusable", + resource: "resource", + request: 6, + allDevices: []pluginapi.Device{ + makeNUMADevice("Dev0", 0), + makeNUMADevice("Dev1", 0), + makeNUMADevice("Dev2", 0), + makeNUMADevice("Dev3", 0), + makeNUMADevice("Dev4", 1), + makeNUMADevice("Dev5", 1), + makeNUMADevice("Dev6", 1), + makeNUMADevice("Dev7", 1), + }, + allocatedDevices: []string{"Dev0", "Dev1", "Dev6", "Dev7"}, + reusableDevices: []string{"Dev1", "Dev6"}, + hint: topologymanager.TopologyHint{ + NUMANodeAffinity: makeSocketMask(0), + Preferred: true, + }, + expectedAvailable: []string{"Dev1", "Dev2", "Dev3", "Dev4", "Dev5", "Dev6"}, + expectedMustInclude: []string{"Dev1", "Dev2", "Dev3", "Dev6"}, + expectedSize: 6, + }, + } + for _, tc := range tcases { + m := ManagerImpl{ + allDevices: make(map[string]map[string]pluginapi.Device), + healthyDevices: make(map[string]sets.String), + allocatedDevices: make(map[string]sets.String), + endpoints: make(map[string]endpointInfo), + podDevices: make(podDevices), + sourcesReady: &sourcesReadyStub{}, + activePods: func() []*v1.Pod { return []*v1.Pod{} }, + topologyAffinityStore: &mockAffinityStore{tc.hint}, + } + + m.allDevices[tc.resource] = make(map[string]pluginapi.Device) + m.healthyDevices[tc.resource] = sets.NewString() + for _, d := range tc.allDevices { + m.allDevices[tc.resource][d.ID] = d + m.healthyDevices[tc.resource].Insert(d.ID) + } + + m.allocatedDevices[tc.resource] = sets.NewString() + for _, d := range tc.allocatedDevices { + m.allocatedDevices[tc.resource].Insert(d) + } + + actualAvailable := []string{} + actualMustInclude := []string{} + actualSize := 0 + m.endpoints[tc.resource] = endpointInfo{ + e: &MockEndpoint{ + getPreferredAllocationFunc: func(available, mustInclude []string, size int) (*pluginapi.PreferredAllocationResponse, error) { + actualAvailable = append(actualAvailable, available...) + actualMustInclude = append(actualMustInclude, mustInclude...) + actualSize = size + return nil, nil + }, + }, + opts: &pluginapi.DevicePluginOptions{GetPreferredAllocationAvailable: true}, + } + + _, err := m.devicesToAllocate("podUID", "containerName", tc.resource, tc.request, sets.NewString(tc.reusableDevices...)) + if err != nil { + t.Errorf("Unexpected error: %v", err) + continue + } + + if !sets.NewString(actualAvailable...).Equal(sets.NewString(tc.expectedAvailable...)) { + t.Errorf("%v. expected available: %v but got: %v", tc.description, tc.expectedAvailable, actualAvailable) + } + + if !sets.NewString(actualAvailable...).Equal(sets.NewString(tc.expectedAvailable...)) { + t.Errorf("%v. expected mustInclude: %v but got: %v", tc.description, tc.expectedMustInclude, actualMustInclude) + } + + if actualSize != tc.expectedSize { + t.Errorf("%v. expected size: %v but got: %v", tc.description, tc.expectedSize, actualSize) + } + } +} From d87365494a87927ef32b4b43ada19ff67d3cddc3 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 4 Jul 2020 12:30:38 +0000 Subject: [PATCH 2/4] Fix bug in call to callGetPreferredAllocationIfAvailable() Previously, we were passing the variable 'devices' to this function, when we should have been passing 'allocated'. This bug crept in due to a variable name change that didn't propogate its way through the entire function. The tests added in the previous commit would have caught this. --- pkg/kubelet/cm/devicemanager/manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 8f35d87a761..ad56ca57ffc 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -730,7 +730,7 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi // Then give the plugin the chance to influence the decision on any // remaining devices to allocate. - preferred, err := m.callGetPreferredAllocationIfAvailable(podUID, contName, resource, available.Union(devices), devices, required) + preferred, err := m.callGetPreferredAllocationIfAvailable(podUID, contName, resource, available.Union(allocated), allocated, required) if err != nil { return nil, err } From 67ecc11c44138152fc3b6cbfb989c1b6ea1ee2c7 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 4 Jul 2020 12:33:01 +0000 Subject: [PATCH 3/4] Harden callGetPreferredAllocationIfAvailable() return value Previously, we didn't check the contents of the result after calling out to the plugin endpoint. This could have resulted in errors if the plugin returned either 'nil' or an empty result. This patch fixes this. --- pkg/kubelet/cm/devicemanager/manager.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index ad56ca57ffc..b2f16b0c0e9 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -997,8 +997,10 @@ func (m *ManagerImpl) callGetPreferredAllocationIfAvailable(podUID, contName, re if err != nil { return nil, fmt.Errorf("device plugin GetPreferredAllocation rpc failed with err: %v", err) } - // TODO: Add metrics support for init RPC - return sets.NewString(resp.ContainerResponses[0].DeviceIDs...), nil + if resp != nil && len(resp.ContainerResponses) > 0 { + return sets.NewString(resp.ContainerResponses[0].DeviceIDs...), nil + } + return sets.NewString(), nil } // sanitizeNodeAllocatable scans through allocatedDevices in the device manager From 26cb6506555e5d10218affda0f22efd9b18a42ae Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 4 Jul 2020 12:35:04 +0000 Subject: [PATCH 4/4] Remove unnecessary union after call to GetPreferredAllocation() There is no need to try and allocate already-allocated devices again. --- pkg/kubelet/cm/devicemanager/manager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index b2f16b0c0e9..5d1925f9458 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -709,7 +709,7 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi if err != nil { return nil, err } - if allocateRemainingFrom(preferred.Intersection(aligned.Union(allocated))) { + if allocateRemainingFrom(preferred.Intersection(aligned)) { return allocated, nil } // Then fallback to allocate from the aligned set if no preferred list @@ -734,7 +734,7 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi if err != nil { return nil, err } - if allocateRemainingFrom(preferred.Intersection(available.Union(allocated))) { + if allocateRemainingFrom(preferred.Intersection(available)) { return allocated, nil }