From 98eb6db7c0202d1951ae5c14e218bd30e76ab36a Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 12 May 2022 19:51:37 +0200 Subject: [PATCH] e2e: node: fix plugins directory Previously, the e2e test was overriding the plugins socket directory to "/var/lib/kubelet/plugins_registry". This seems wrong, and with that setting the e2e test was already failing, because the registration process was timing out, in turn because the kubelet was trying to call back the device plugin in the wrong place (see below for details). I can't explain why it worked before - or it if worked at all - but it really seems that `pluginapi.DevicePluginPath` is the right setting here. +++ In a nutshell, the device plugin registration process works like this: 1. The kubelet runs and creates the device plugin socket registration endpoint: KubeletSocket = DevicePluginPath + "kubelet.sock" DevicePluginPath = "/var/lib/kubelet/device-plugins/" 2. Each device plugin will listen to an ENDPOINT the kubelet will connect backk to. IOW the kubelet will act like a client to each device plugin, to perform allocation requests (and more) Each device plugin will serve from a endpoint. The endpoint name is plugin-specific, but they all must be inside a well-known directory: pluginapi.DevicePluginPath 3. The kubelet creates the device plugin pod, like any other pod 4. During the startup, each device plugin wants to register itself in the kubelet. So it sends a request through the registration endpoint. Key details: grpc.Dial(kubelet registration socket) registration request reqt := &pluginapi.RegisterRequest{ Version: pluginapi.Version, Endpoint: endpointSocket, <- socket relative to pluginapi.DevicePluginPath ResourceName: resourceName, <- resource name to be exposed } 5. While handling the registration request, kubelet dial back the device plugin on socketDir + req.Endpoint. But socketDir is hardcoded in the device manager code to pluginapi.KubeletSocket Signed-off-by: Francesco Romani --- test/e2e_node/device_plugin_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e_node/device_plugin_test.go b/test/e2e_node/device_plugin_test.go index 755589c1578..629b5000b02 100644 --- a/test/e2e_node/device_plugin_test.go +++ b/test/e2e_node/device_plugin_test.go @@ -25,6 +25,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" + kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" admissionapi "k8s.io/pod-security-admission/api" @@ -64,7 +65,7 @@ var ( var _ = SIGDescribe("Device Plugin [Feature:DevicePluginProbe][NodeFeature:DevicePluginProbe][Serial]", func() { f := framework.NewDefaultFramework("device-plugin-errors") f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged - testDevicePlugin(f, "/var/lib/kubelet/plugins_registry") + testDevicePlugin(f, kubeletdevicepluginv1beta1.DevicePluginPath) }) // numberOfSampleResources returns the number of resources advertised by a node.