From dc1a5926321410f141f48b9edd3397fed2d0f93e Mon Sep 17 00:00:00 2001 From: Swati Sehgal Date: Wed, 21 Dec 2022 12:55:47 +0000 Subject: [PATCH] node: device-mgr: Handle recovery by checking if healthy devices exist In case of node reboot/kubelet restart, the flow of events involves obtaining the state from the checkpoint file followed by setting the `healthDevices`/`unhealthyDevices` to its zero value. This is done to allow the device plugin to re-register itself so that capacity can be updated appropriately. During the allocation phase, we need to check if the resources requested by the pod have been registered AND healthy devices are present on the node to be allocated. Also we need to move this check above `needed==0` where needed is required - devices allocated to the container (which is obtained from the checkpoint file) because even in cases where no additional devices have to be allocated (as they were pre-allocated), we still need to make sure he devices that were previously allocated are healthy. Signed-off-by: Swati Sehgal --- pkg/kubelet/cm/devicemanager/manager.go | 24 ++++- pkg/kubelet/cm/devicemanager/manager_test.go | 96 ++++++++++++++++++++ 2 files changed, 115 insertions(+), 5 deletions(-) diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 8cb57aa8190..7499de4460f 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -544,15 +544,29 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi return nil, fmt.Errorf("pod %q container %q changed request for resource %q from %d to %d", string(podUID), contName, resource, devices.Len(), required) } } + + klog.V(3).InfoS("Need devices to allocate for pod", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName) + healthyDevices, hasRegistered := m.healthyDevices[resource] + + // Check if resource registered with devicemanager + if !hasRegistered { + return nil, fmt.Errorf("cannot allocate unregistered device %s", resource) + } + + // Check if registered resource has healthy devices + if healthyDevices.Len() == 0 { + return nil, fmt.Errorf("no healthy devices present; cannot allocate unhealthy devices %s", resource) + } + + // Check if all the previously allocated devices are healthy + if !healthyDevices.IsSuperset(devices) { + return nil, fmt.Errorf("previously allocated devices are no longer healthy; cannot allocate unhealthy devices %s", resource) + } + if needed == 0 { // No change, no work. return nil, nil } - klog.V(3).InfoS("Need devices to allocate for pod", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName) - // Check if resource registered with devicemanager - if _, ok := m.healthyDevices[resource]; !ok { - return nil, fmt.Errorf("can't allocate unregistered device %s", resource) - } // Declare the list of allocated devices. // This will be populated and returned below. diff --git a/pkg/kubelet/cm/devicemanager/manager_test.go b/pkg/kubelet/cm/devicemanager/manager_test.go index 1a91ae2ee0c..56881f9e25a 100644 --- a/pkg/kubelet/cm/devicemanager/manager_test.go +++ b/pkg/kubelet/cm/devicemanager/manager_test.go @@ -993,6 +993,102 @@ func TestPodContainerDeviceAllocation(t *testing.T) { } +func TestPodContainerDeviceToAllocate(t *testing.T) { + resourceName1 := "domain1.com/resource1" + resourceName2 := "domain2.com/resource2" + resourceName3 := "domain2.com/resource3" + as := require.New(t) + tmpDir, err := os.MkdirTemp("", "checkpoint") + as.Nil(err) + defer os.RemoveAll(tmpDir) + + testManager := &ManagerImpl{ + endpoints: make(map[string]endpointInfo), + healthyDevices: make(map[string]sets.String), + unhealthyDevices: make(map[string]sets.String), + allocatedDevices: make(map[string]sets.String), + podDevices: newPodDevices(), + } + + testManager.podDevices.insert("pod1", "con1", resourceName1, + constructDevices([]string{"dev1", "dev2"}), + constructAllocResp(map[string]string{"/dev/r2dev1": "/dev/r2dev1", "/dev/r2dev2": "/dev/r2dev2"}, + map[string]string{"/home/r2lib1": "/usr/r2lib1"}, + map[string]string{"r2devices": "dev1 dev2"})) + testManager.podDevices.insert("pod2", "con2", resourceName2, + checkpoint.DevicesPerNUMA{nodeWithoutTopology: []string{"dev5"}}, + constructAllocResp(map[string]string{"/dev/r1dev5": "/dev/r1dev5"}, + map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{})) + testManager.podDevices.insert("pod3", "con3", resourceName3, + checkpoint.DevicesPerNUMA{nodeWithoutTopology: []string{"dev5"}}, + constructAllocResp(map[string]string{"/dev/r1dev5": "/dev/r1dev5"}, + map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{})) + + // no healthy devices for resourceName1 and devices corresponding to + // resource2 are intentionally omitted to simulate that the resource + // hasn't been registered. + testManager.healthyDevices[resourceName1] = sets.NewString() + testManager.healthyDevices[resourceName3] = sets.NewString() + // dev5 is no longer in the list of healthy devices + testManager.healthyDevices[resourceName3].Insert("dev7") + testManager.healthyDevices[resourceName3].Insert("dev8") + + testCases := []struct { + description string + podUID string + contName string + resource string + required int + reusableDevices sets.String + expectedAllocatedDevices sets.String + expErr error + }{ + { + description: "Admission error in case no healthy devices to allocate present", + podUID: "pod1", + contName: "con1", + resource: resourceName1, + required: 2, + reusableDevices: sets.NewString(), + expectedAllocatedDevices: nil, + expErr: fmt.Errorf("no healthy devices present; cannot allocate unhealthy devices %s", resourceName1), + }, + { + description: "Admission error in case resource is not registered", + podUID: "pod2", + contName: "con2", + resource: resourceName2, + required: 1, + reusableDevices: sets.NewString(), + expectedAllocatedDevices: nil, + expErr: fmt.Errorf("cannot allocate unregistered device %s", resourceName2), + }, + { + description: "Admission error in case resource not devices previously allocated no longer healthy", + podUID: "pod3", + contName: "con3", + resource: resourceName3, + required: 1, + reusableDevices: sets.NewString(), + expectedAllocatedDevices: nil, + expErr: fmt.Errorf("previously allocated devices are no longer healthy; cannot allocate unhealthy devices %s", resourceName3), + }, + } + + for _, testCase := range testCases { + allocDevices, err := testManager.devicesToAllocate(testCase.podUID, testCase.contName, testCase.resource, testCase.required, testCase.reusableDevices) + if !reflect.DeepEqual(err, testCase.expErr) { + t.Errorf("devicePluginManager error (%v). expected error: %v but got: %v", + testCase.description, testCase.expErr, err) + } + if !reflect.DeepEqual(allocDevices, testCase.expectedAllocatedDevices) { + t.Errorf("devicePluginManager error (%v). expected error: %v but got: %v", + testCase.description, testCase.expectedAllocatedDevices, allocDevices) + } + } + +} + func TestGetDeviceRunContainerOptions(t *testing.T) { res1 := TestResource{ resourceName: "domain1.com/resource1",