From aa1a0385e27df4e0a9dc1d8ae2aa9d5145504ef6 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 22 Feb 2023 13:45:33 +0100 Subject: [PATCH 1/4] e2e: node: podresources: internal cleanup rename getPodResources for clarity. Allow to return error (and not use ginkgo expectations), so it can actually be used as intended inside `Eventually` blocks without blow up at the first failure. Signed-off-by: Francesco Romani --- test/e2e_node/podresources_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index 0625995dcad..70bbeac2c17 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -136,9 +136,11 @@ func logPodResources(podIdx int, pr *kubeletpodresourcesv1.PodResources) { type podResMap map[string]map[string]kubeletpodresourcesv1.ContainerResources -func getPodResources(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient) podResMap { +func getPodResourcesValues(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient) (podResMap, error) { resp, err := cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{}) - framework.ExpectNoError(err) + if err != nil { + return nil, err + } res := make(map[string]map[string]kubeletpodresourcesv1.ContainerResources) for idx, podResource := range resp.GetPodResources() { @@ -151,7 +153,7 @@ func getPodResources(ctx context.Context, cli kubeletpodresourcesv1.PodResources } res[podResource.GetName()] = cnts } - return res + return res, nil } type testPodData struct { @@ -252,7 +254,10 @@ func matchPodDescWithResources(expected []podDesc, found podResMap) error { func expectPodResources(ctx context.Context, offset int, cli kubeletpodresourcesv1.PodResourcesListerClient, expected []podDesc) { gomega.EventuallyWithOffset(1+offset, ctx, func(ctx context.Context) error { - found := getPodResources(ctx, cli) + found, err := getPodResourcesValues(ctx, cli) + if err != nil { + return err + } return matchPodDescWithResources(expected, found) }, time.Minute, 10*time.Second).Should(gomega.Succeed()) } @@ -280,8 +285,10 @@ func podresourcesListTests(ctx context.Context, f *framework.Framework, cli kube expectedBasePods = 1 // sriovdp } + var err error ginkgo.By("checking the output when no pods are present") - found = getPodResources(ctx, cli) + found, err = getPodResourcesValues(ctx, cli) + framework.ExpectNoError(err) gomega.ExpectWithOffset(1, found).To(gomega.HaveLen(expectedBasePods), "base pod expectation mismatch") tpd = newTestPodData() From 871201ba648d5291fd48a1ae96453895e74804a5 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 Feb 2023 12:55:29 +0100 Subject: [PATCH 2/4] e2e: node: remove kubevirt device plugin The podresources e2e tests want to exercise the case on which a device plugin doesn't report topology affinity. The only known device plugin which had this requirement and didn't depend on specialized hardware was the kubevirt device plugin, which was however deprecated after we started using it. So the e2e tests are now broken, and in any case they can't depend on unmaintained and liable to be obsolete code. To unblock the state and preserve some e2e signal, we switch to the sample device plugin, which is a stub implementation and which is managed in-tree, so we can maintain it and ensure it fits the e2e test usecase. This is however a regression, because e2e tests should try their hardest to use real devices and avoid any mocking or faking. The upside is that using a OS-neutral device plugin for the tests enables us to run on all the supported platform (windows!) so this could allow us to transition these tests to conformance. Signed-off-by: Francesco Romani --- test/e2e_node/image_list.go | 25 ------ test/e2e_node/podresources_test.go | 82 +++++++++---------- .../testing-manifests/kubevirt-kvm-ds.yaml | 28 ------- test/e2e_node/util_kubevirt.go | 27 ------ 4 files changed, 40 insertions(+), 122 deletions(-) delete mode 100644 test/e2e_node/testing-manifests/kubevirt-kvm-ds.yaml delete mode 100644 test/e2e_node/util_kubevirt.go diff --git a/test/e2e_node/image_list.go b/test/e2e_node/image_list.go index 2a4018c73b4..641117d6095 100644 --- a/test/e2e_node/image_list.go +++ b/test/e2e_node/image_list.go @@ -87,11 +87,6 @@ func updateImageAllowList(ctx context.Context) { } else { e2epod.ImagePrePullList.Insert(gpuDevicePluginImage) } - if kubeVirtPluginImage, err := getKubeVirtDevicePluginImage(); err != nil { - klog.Errorln(err) - } else { - e2epod.ImagePrePullList.Insert(kubeVirtPluginImage) - } if samplePluginImage, err := getSampleDevicePluginImage(); err != nil { klog.Errorln(err) } else { @@ -262,23 +257,3 @@ func getSRIOVDevicePluginImage() (string, error) { } return ds.Spec.Template.Spec.Containers[0].Image, nil } - -// TODO generilize this function with above one -// getKubeVirtDevicePluginImage returns the image of SRIOV device plugin. -func getKubeVirtDevicePluginImage() (string, error) { - data, err := e2etestfiles.Read(KubeVirtDevicePluginDSYAML) - if err != nil { - return "", fmt.Errorf("failed to read the device plugin manifest: %w", err) - } - ds, err := e2emanifest.DaemonSetFromData(data) - if err != nil { - return "", fmt.Errorf("failed to parse the device plugin image: %w", err) - } - if ds == nil { - return "", fmt.Errorf("failed to parse the device plugin image: the extracted DaemonSet is nil") - } - if len(ds.Spec.Template.Spec.Containers) < 1 { - return "", fmt.Errorf("failed to parse the device plugin image: cannot extract the container from YAML") - } - return ds.Spec.Template.Spec.Containers[0].Image, nil -} diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index 70bbeac2c17..cf37e6c19bd 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -18,7 +18,6 @@ package e2enode import ( "context" - "errors" "fmt" "os" "strings" @@ -49,6 +48,10 @@ import ( e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" ) +const ( + defaultTopologyUnawareResourceName = "example.com/resource" +) + type podDesc struct { podName string cntName string @@ -234,10 +237,10 @@ func matchPodDescWithResources(expected []podDesc, found podResMap) error { return fmt.Errorf("pod %q container %q expected no resources, got %v", podReq.podName, podReq.cntName, devs) } } - if cnts, ok := found[KubeVirtResourceName]; ok { + if cnts, ok := found[defaultTopologyUnawareResourceName]; ok { for _, cnt := range cnts { for _, cd := range cnt.GetDevices() { - if cd.ResourceName != KubeVirtResourceName { + if cd.ResourceName != defaultTopologyUnawareResourceName { continue } if cd.Topology != nil { @@ -739,14 +742,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P }) }) - ginkgo.Context("with KubeVirt device plugin, which reports resources w/o hardware topology", func() { - ginkgo.BeforeEach(func() { - _, err := os.Stat("/dev/kvm") - if errors.Is(err, os.ErrNotExist) { - e2eskipper.Skipf("KubeVirt device plugin could work only in kvm based environment") - } - }) - + ginkgo.Context("with a topology-unaware device plugin, which reports resources w/o hardware topology", func() { ginkgo.Context("with CPU manager Static policy", func() { ginkgo.BeforeEach(func(ctx context.Context) { // this is a very rough check. We just want to rule out system that does NOT have enough resources @@ -772,10 +768,10 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P }) ginkgo.It("should return proper podresources the same as before the restart of kubelet", func(ctx context.Context) { - dpPod := setupKubeVirtDevicePluginOrFail(ctx, f) - ginkgo.DeferCleanup(teardownKubeVirtDevicePluginOrFail, f, dpPod) + dpPod := setupSampleDevicePluginOrFail(ctx, f) + ginkgo.DeferCleanup(teardownSampleDevicePluginOrFail, f, dpPod) - waitForKubeVirtResources(ctx, f, dpPod) + waitForTopologyUnawareResources(ctx, f) endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) framework.ExpectNoError(err) @@ -784,22 +780,21 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P framework.ExpectNoError(err) defer conn.Close() - ginkgo.By("checking List and resources kubevirt resource should be without topology") + ginkgo.By("checking List and resources topology unaware resource should be without topology") allocatableResponse, _ := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{}) for _, dev := range allocatableResponse.GetDevices() { - if dev.ResourceName != KubeVirtResourceName { + if dev.ResourceName != defaultTopologyUnawareResourceName { continue } - framework.ExpectEqual(dev.Topology == nil, true, "Topology is expected to be empty for kubevirt resources") + framework.ExpectEqual(dev.Topology == nil, true, "Topology is expected to be empty for topology unaware resources") } - // Run pod which requires KubeVirtResourceName desc := podDesc{ podName: "pod-01", cntName: "cnt-01", - resourceName: KubeVirtResourceName, + resourceName: defaultTopologyUnawareResourceName, resourceAmount: 1, cpuRequest: 1000, } @@ -894,41 +889,41 @@ func getOnlineCPUs() (cpuset.CPUSet, error) { return cpuset.Parse(strings.TrimSpace(string(onlineCPUList))) } -func setupKubeVirtDevicePluginOrFail(ctx context.Context, f *framework.Framework) *v1.Pod { +func setupSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework) *v1.Pod { e2enode.WaitForNodeToBeReady(ctx, f.ClientSet, framework.TestContext.NodeName, 5*time.Minute) - dp := getKubeVirtDevicePluginPod() + dp := getSampleDevicePluginPod() dp.Spec.NodeName = framework.TestContext.NodeName - ginkgo.By("Create KubeVirt device plugin pod") + ginkgo.By("Create the sample device plugin pod") - dpPod, err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(ctx, dp, metav1.CreateOptions{}) - framework.ExpectNoError(err) + dpPod = e2epod.NewPodClient(f).CreateSync(ctx, dp) - if err = e2epod.WaitForPodCondition(ctx, f.ClientSet, metav1.NamespaceSystem, dp.Name, "Ready", 120*time.Second, testutils.PodRunningReady); err != nil { - framework.Logf("KubeVirt Pod %v took too long to enter running/ready: %v", dp.Name, err) + err := e2epod.WaitForPodCondition(ctx, f.ClientSet, dpPod.Namespace, dpPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady) + if err != nil { + framework.Logf("Sample Device Pod %v took too long to enter running/ready: %v", dp.Name, err) } framework.ExpectNoError(err) return dpPod } -func teardownKubeVirtDevicePluginOrFail(ctx context.Context, f *framework.Framework, pod *v1.Pod) { +func teardownSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework, pod *v1.Pod) { gp := int64(0) deleteOptions := metav1.DeleteOptions{ GracePeriodSeconds: &gp, } - ginkgo.By(fmt.Sprintf("Delete KubeVirt device plugin pod %s/%s", pod.Namespace, pod.Name)) + ginkgo.By(fmt.Sprintf("Delete sample device plugin pod %s/%s", pod.Namespace, pod.Name)) err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, deleteOptions) framework.ExpectNoError(err) waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) } -func findKubeVirtResource(node *v1.Node) int64 { +func findTopologyUnawareResource(node *v1.Node) int64 { framework.Logf("Node status allocatable: %v", node.Status.Allocatable) for key, val := range node.Status.Allocatable { - if string(key) == KubeVirtResourceName { + if string(key) == defaultTopologyUnawareResourceName { v := val.Value() if v > 0 { return v @@ -938,34 +933,37 @@ func findKubeVirtResource(node *v1.Node) int64 { return 0 } -func waitForKubeVirtResources(ctx context.Context, f *framework.Framework, pod *v1.Pod) { - ginkgo.By("Waiting for kubevirt resources to become available on the local node") +func waitForTopologyUnawareResources(ctx context.Context, f *framework.Framework, pod *v1.Pod) { + ginkgo.By(fmt.Sprintf("Waiting for %q resources to become available on the local node", defaultTopologyUnawareResourceName)) gomega.Eventually(ctx, func(ctx context.Context) bool { node := getLocalNode(ctx, f) - kubeVirtResourceAmount := findKubeVirtResource(node) - return kubeVirtResourceAmount != 0 + resourceAmount := findTopologyUnawareResource(node) + return resourceAmount != 0 }, 2*time.Minute, framework.Poll).Should(gomega.BeTrue()) } -// getKubeVirtDevicePluginPod returns the Device Plugin pod for kube resources in e2e tests. -func getKubeVirtDevicePluginPod() *v1.Pod { - data, err := e2etestfiles.Read(KubeVirtDevicePluginDSYAML) +// getSampleDevicePluginPod returns the Sample Device Plugin pod to be used e2e tests. +func getSampleDevicePluginPod() *v1.Pod { + data, err := e2etestfiles.Read(SampleDevicePluginDSYAML) if err != nil { framework.Fail(err.Error()) } ds := readDaemonSetV1OrDie(data) - p := &v1.Pod{ + dp := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: KubeVirtDevicePluginName, - Namespace: metav1.NamespaceSystem, + Name: sampleDevicePluginName, }, - Spec: ds.Spec.Template.Spec, } + for i := range dp.Spec.Containers[0].Env { + if dp.Spec.Containers[0].Env[i].Name == envVarNamePluginSockDir { + dp.Spec.Containers[0].Env[i].Value = pluginSockDir + } + } - return p + return dp } func getPodResourcesMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) { diff --git a/test/e2e_node/testing-manifests/kubevirt-kvm-ds.yaml b/test/e2e_node/testing-manifests/kubevirt-kvm-ds.yaml deleted file mode 100644 index d8e8b0fdc4f..00000000000 --- a/test/e2e_node/testing-manifests/kubevirt-kvm-ds.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - labels: - name: kubevirt-kvm-device-plugin - name: kubevirt-kvm-device-plugin -spec: - selector: - matchLabels: - name: kubevirt-kvm-device-plugin - template: - metadata: - labels: - name: kubevirt-kvm-device-plugin - spec: - containers: - - name: kubevirt-kvm-device-plugin - image: quay.io/kubevirt/device-plugin-kvm - args: ["-v", "3", "-logtostderr"] - securityContext: - privileged: true - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins diff --git a/test/e2e_node/util_kubevirt.go b/test/e2e_node/util_kubevirt.go deleted file mode 100644 index 4b444138fde..00000000000 --- a/test/e2e_node/util_kubevirt.go +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package e2enode - -const ( - KubeVirtDevicePluginDSYAML = "test/e2e_node/testing-manifests/kubevirt-kvm-ds.yaml" - - // KubeVirtDevicePluginName is the name of the device plugin pod - KubeVirtDevicePluginName = "kubevirt-device-plugin" - - // KubeVirtResourceName is the name of the resource provided by kubevirt device plugin - KubeVirtResourceName = "devices.kubevirt.io/kvm" -) From 92e00203e027bf88ddc695fc17e80c913a6b178a Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 Feb 2023 13:50:33 +0100 Subject: [PATCH 3/4] e2e: node: unify sample device plugin utilities Start to consolidate the sample device plugin utility and constants in a central place, because we need to use it in different e2e tests. Having a central dependency is better than a maze of entangled e2e tests depending on each other helpers. Signed-off-by: Francesco Romani --- test/e2e_node/device_plugin_test.go | 92 +++++------------------------ test/e2e_node/image_list.go | 29 ++++++++- test/e2e_node/podresources_test.go | 48 ++------------- test/e2e_node/util_sampledevice.go | 52 ++++++++++++++++ 4 files changed, 100 insertions(+), 121 deletions(-) create mode 100644 test/e2e_node/util_sampledevice.go diff --git a/test/e2e_node/device_plugin_test.go b/test/e2e_node/device_plugin_test.go index 5cdf50e2ba8..f8c4420b581 100644 --- a/test/e2e_node/device_plugin_test.go +++ b/test/e2e_node/device_plugin_test.go @@ -31,7 +31,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" - e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/apimachinery/pkg/api/resource" @@ -44,17 +43,6 @@ import ( e2epod "k8s.io/kubernetes/test/e2e/framework/pod" ) -const ( - // sampleResourceName is the name of the example resource which is used in the e2e test - sampleResourceName = "example.com/resource" - // sampleDevicePluginName is the name of the device plugin pod - sampleDevicePluginName = "sample-device-plugin" - - // fake resource name - resourceName = "example.com/resource" - envVarNamePluginSockDir = "PLUGIN_SOCK_DIR" -) - var ( appsScheme = runtime.NewScheme() appsCodecs = serializer.NewCodecFactory(appsScheme) @@ -67,17 +55,6 @@ var _ = SIGDescribe("Device Plugin [Feature:DevicePluginProbe][NodeFeature:Devic testDevicePlugin(f, kubeletdevicepluginv1beta1.DevicePluginPath) }) -// numberOfSampleResources returns the number of resources advertised by a node. -func numberOfSampleResources(node *v1.Node) int64 { - val, ok := node.Status.Capacity[sampleResourceName] - - if !ok { - return 0 - } - - return val.Value() -} - // readDaemonSetV1OrDie reads daemonset object from bytes. Panics on error. func readDaemonSetV1OrDie(objBytes []byte) *appsv1.DaemonSet { appsv1.AddToScheme(appsScheme) @@ -133,31 +110,14 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed()) ginkgo.By("Scheduling a sample device plugin pod") - data, err := e2etestfiles.Read(SampleDevicePluginDSYAML) - if err != nil { - framework.Fail(err.Error()) - } - ds := readDaemonSetV1OrDie(data) - - dp := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: sampleDevicePluginName, - }, - Spec: ds.Spec.Template.Spec, - } - - for i := range dp.Spec.Containers[0].Env { - if dp.Spec.Containers[0].Env[i].Name == envVarNamePluginSockDir { - dp.Spec.Containers[0].Env[i].Value = pluginSockDir - } - } + dp := getSampleDevicePluginPod(pluginSockDir) dptemplate = dp.DeepCopy() devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp) ginkgo.By("Waiting for devices to become available on the local node") gomega.Eventually(ctx, func(ctx context.Context) bool { node, ready := getLocalTestNode(ctx, f) - return ready && numberOfSampleResources(node) > 0 + return ready && CountSampleDeviceCapacity(node) > 0 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) framework.Logf("Successfully created device plugin pod") @@ -165,8 +125,8 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { gomega.Eventually(ctx, func(ctx context.Context) bool { node, ready := getLocalTestNode(ctx, f) return ready && - numberOfDevicesCapacity(node, resourceName) == devsLen && - numberOfDevicesAllocatable(node, resourceName) == devsLen + CountSampleDeviceCapacity(node) == devsLen && + CountSampleDeviceAllocatable(node) == devsLen }, 30*time.Second, framework.Poll).Should(gomega.BeTrue()) }) @@ -191,7 +151,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { ginkgo.By("Waiting for devices to become unavailable on the local node") gomega.Eventually(ctx, func(ctx context.Context) bool { node, ready := getLocalTestNode(ctx, f) - return ready && numberOfSampleResources(node) <= 0 + return ready && CountSampleDeviceCapacity(node) <= 0 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) ginkgo.By("devices now unavailable on the local node") @@ -199,7 +159,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { ginkgo.It("Can schedule a pod that requires a device", func(ctx context.Context) { podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 60" - pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(resourceName, podRECMD)) + pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD)) deviceIDRE := "stub devices: (Dev-[0-9]+)" devID1 := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE) gomega.Expect(devID1).To(gomega.Not(gomega.Equal(""))) @@ -250,8 +210,8 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { framework.ExpectEqual(len(v1alphaResourcesForOurPod.Containers[0].Devices), 1) framework.ExpectEqual(len(v1ResourcesForOurPod.Containers[0].Devices), 1) - framework.ExpectEqual(v1alphaResourcesForOurPod.Containers[0].Devices[0].ResourceName, resourceName) - framework.ExpectEqual(v1ResourcesForOurPod.Containers[0].Devices[0].ResourceName, resourceName) + framework.ExpectEqual(v1alphaResourcesForOurPod.Containers[0].Devices[0].ResourceName, SampleDeviceResourceName) + framework.ExpectEqual(v1ResourcesForOurPod.Containers[0].Devices[0].ResourceName, SampleDeviceResourceName) framework.ExpectEqual(len(v1alphaResourcesForOurPod.Containers[0].Devices[0].DeviceIds), 1) framework.ExpectEqual(len(v1ResourcesForOurPod.Containers[0].Devices[0].DeviceIds), 1) @@ -259,7 +219,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { ginkgo.It("Keeps device plugin assignments across pod and kubelet restarts", func(ctx context.Context) { podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 60" - pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(resourceName, podRECMD)) + pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD)) deviceIDRE := "stub devices: (Dev-[0-9]+)" devID1 := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE) gomega.Expect(devID1).To(gomega.Not(gomega.Equal(""))) @@ -288,7 +248,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { ginkgo.It("Keeps device plugin assignments after the device plugin has been re-registered", func(ctx context.Context) { podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 60" - pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(resourceName, podRECMD)) + pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD)) deviceIDRE := "stub devices: (Dev-[0-9]+)" devID1 := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE) gomega.Expect(devID1).To(gomega.Not(gomega.Equal(""))) @@ -322,12 +282,12 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { gomega.Eventually(ctx, func() bool { node, ready := getLocalTestNode(ctx, f) return ready && - numberOfDevicesCapacity(node, resourceName) == devsLen && - numberOfDevicesAllocatable(node, resourceName) == devsLen + CountSampleDeviceCapacity(node) == devsLen && + CountSampleDeviceAllocatable(node) == devsLen }, 30*time.Second, framework.Poll).Should(gomega.BeTrue()) ginkgo.By("Creating another pod") - pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(resourceName, podRECMD)) + pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD)) ginkgo.By("Checking that pod got a different fake device") devID2 := parseLog(ctx, f, pod2.Name, pod2.Name, deviceIDRE) @@ -338,10 +298,10 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { } // makeBusyboxPod returns a simple Pod spec with a busybox container -// that requests resourceName and runs the specified command. -func makeBusyboxPod(resourceName, cmd string) *v1.Pod { +// that requests SampleDeviceResourceName and runs the specified command. +func makeBusyboxPod(SampleDeviceResourceName, cmd string) *v1.Pod { podName := "device-plugin-test-" + string(uuid.NewUUID()) - rl := v1.ResourceList{v1.ResourceName(resourceName): *resource.NewQuantity(1, resource.DecimalSI)} + rl := v1.ResourceList{v1.ResourceName(SampleDeviceResourceName): *resource.NewQuantity(1, resource.DecimalSI)} return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{Name: podName}, @@ -397,23 +357,3 @@ func parseLog(ctx context.Context, f *framework.Framework, podName string, contN return matches[1] } - -// numberOfDevicesCapacity returns the number of devices of resourceName advertised by a node capacity -func numberOfDevicesCapacity(node *v1.Node, resourceName string) int64 { - val, ok := node.Status.Capacity[v1.ResourceName(resourceName)] - if !ok { - return 0 - } - - return val.Value() -} - -// numberOfDevicesAllocatable returns the number of devices of resourceName advertised by a node allocatable -func numberOfDevicesAllocatable(node *v1.Node, resourceName string) int64 { - val, ok := node.Status.Allocatable[v1.ResourceName(resourceName)] - if !ok { - return 0 - } - - return val.Value() -} diff --git a/test/e2e_node/image_list.go b/test/e2e_node/image_list.go index 641117d6095..611590642bc 100644 --- a/test/e2e_node/image_list.go +++ b/test/e2e_node/image_list.go @@ -24,13 +24,16 @@ import ( "sync" "time" + v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" internalapi "k8s.io/cri-api/pkg/apis" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" commontest "k8s.io/kubernetes/test/e2e/common" + "k8s.io/kubernetes/test/e2e/framework" e2egpu "k8s.io/kubernetes/test/e2e/framework/gpu" e2emanifest "k8s.io/kubernetes/test/e2e/framework/manifest" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" @@ -45,9 +48,6 @@ const ( imagePullRetryDelay = time.Second // Number of parallel count to pull images. maxParallelImagePullCount = 5 - - // SampleDevicePluginDSYAML is the path of the daemonset template of the sample device plugin. // TODO: Parametrize it by making it a feature in TestFramework. - SampleDevicePluginDSYAML = "test/e2e/testing-manifests/sample-device-plugin.yaml" ) // NodePrePullImageList is a list of images used in node e2e test. These images will be prepulled @@ -239,6 +239,29 @@ func getSampleDevicePluginImage() (string, error) { return ds.Spec.Template.Spec.Containers[0].Image, nil } +// getSampleDevicePluginPod returns the Sample Device Plugin pod to be used e2e tests. +func getSampleDevicePluginPod(pluginSockDir string) *v1.Pod { + data, err := e2etestfiles.Read(SampleDevicePluginDSYAML) + if err != nil { + framework.Fail(err.Error()) + } + + ds := readDaemonSetV1OrDie(data) + dp := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: SampleDevicePluginName, + }, + Spec: ds.Spec.Template.Spec, + } + for i := range dp.Spec.Containers[0].Env { + if dp.Spec.Containers[0].Env[i].Name == SampleDeviceEnvVarNamePluginSockDir { + dp.Spec.Containers[0].Env[i].Value = pluginSockDir + } + } + + return dp +} + // getSRIOVDevicePluginImage returns the image of SRIOV device plugin. func getSRIOVDevicePluginImage() (string, error) { data, err := e2etestfiles.Read(SRIOVDevicePluginDSYAML) diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index cf37e6c19bd..11fd14dc88e 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -26,6 +26,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1" kubefeatures "k8s.io/kubernetes/pkg/features" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" @@ -45,7 +46,6 @@ import ( e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" - e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" ) const ( @@ -892,12 +892,12 @@ func getOnlineCPUs() (cpuset.CPUSet, error) { func setupSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework) *v1.Pod { e2enode.WaitForNodeToBeReady(ctx, f.ClientSet, framework.TestContext.NodeName, 5*time.Minute) - dp := getSampleDevicePluginPod() + dp := getSampleDevicePluginPod(kubeletdevicepluginv1beta1.DevicePluginPath) dp.Spec.NodeName = framework.TestContext.NodeName ginkgo.By("Create the sample device plugin pod") - dpPod = e2epod.NewPodClient(f).CreateSync(ctx, dp) + dpPod := e2epod.NewPodClient(f).CreateSync(ctx, dp) err := e2epod.WaitForPodCondition(ctx, f.ClientSet, dpPod.Namespace, dpPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady) if err != nil { @@ -920,52 +920,16 @@ func teardownSampleDevicePluginOrFail(ctx context.Context, f *framework.Framewor waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) } -func findTopologyUnawareResource(node *v1.Node) int64 { - framework.Logf("Node status allocatable: %v", node.Status.Allocatable) - for key, val := range node.Status.Allocatable { - if string(key) == defaultTopologyUnawareResourceName { - v := val.Value() - if v > 0 { - return v - } - } - } - return 0 -} - -func waitForTopologyUnawareResources(ctx context.Context, f *framework.Framework, pod *v1.Pod) { +func waitForTopologyUnawareResources(ctx context.Context, f *framework.Framework) { ginkgo.By(fmt.Sprintf("Waiting for %q resources to become available on the local node", defaultTopologyUnawareResourceName)) gomega.Eventually(ctx, func(ctx context.Context) bool { node := getLocalNode(ctx, f) - resourceAmount := findTopologyUnawareResource(node) - return resourceAmount != 0 + resourceAmount := CountSampleDeviceAllocatable(node) + return resourceAmount > 0 }, 2*time.Minute, framework.Poll).Should(gomega.BeTrue()) } -// getSampleDevicePluginPod returns the Sample Device Plugin pod to be used e2e tests. -func getSampleDevicePluginPod() *v1.Pod { - data, err := e2etestfiles.Read(SampleDevicePluginDSYAML) - if err != nil { - framework.Fail(err.Error()) - } - - ds := readDaemonSetV1OrDie(data) - dp := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: sampleDevicePluginName, - }, - Spec: ds.Spec.Template.Spec, - } - for i := range dp.Spec.Containers[0].Env { - if dp.Spec.Containers[0].Env[i].Name == envVarNamePluginSockDir { - dp.Spec.Containers[0].Env[i].Value = pluginSockDir - } - } - - return dp -} - func getPodResourcesMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) { // we are running out of good names, so we need to be unnecessarily specific to avoid clashes ginkgo.By("getting Pod Resources metrics from the metrics API") diff --git a/test/e2e_node/util_sampledevice.go b/test/e2e_node/util_sampledevice.go new file mode 100644 index 00000000000..a1e22ee761e --- /dev/null +++ b/test/e2e_node/util_sampledevice.go @@ -0,0 +1,52 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +import ( + v1 "k8s.io/api/core/v1" +) + +const ( + // SampleDevicePluginDSYAML is the path of the daemonset template of the sample device plugin. // TODO: Parametrize it by making it a feature in TestFramework. + SampleDevicePluginDSYAML = "test/e2e/testing-manifests/sample-device-plugin.yaml" + + // SampleDevicePluginName is the name of the device plugin pod + SampleDevicePluginName = "sample-device-plugin" + + // SampleDeviceResourceName is the name of the resource provided by the sample device plugin + SampleDeviceResourceName = "example.com/resource" + + SampleDeviceEnvVarNamePluginSockDir = "PLUGIN_SOCK_DIR" +) + +// CountSampleDeviceCapacity returns the number of devices of SampleDeviceResourceName advertised by a node capacity +func CountSampleDeviceCapacity(node *v1.Node) int64 { + val, ok := node.Status.Capacity[v1.ResourceName(SampleDeviceResourceName)] + if !ok { + return 0 + } + return val.Value() +} + +// CountSampleDeviceAllocatable returns the number of devices of SampleDeviceResourceName advertised by a node allocatable +func CountSampleDeviceAllocatable(node *v1.Node) int64 { + val, ok := node.Status.Allocatable[v1.ResourceName(SampleDeviceResourceName)] + if !ok { + return 0 + } + return val.Value() +} From 00b41334bf3f0c716c3a5080ffec0352d282db8a Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 22 Feb 2023 13:50:31 +0100 Subject: [PATCH 4/4] e2e: node: podresources: fix restart wait Fix the waiting logic in the e2e test loop to wait for resources to be reported again instead of making logic on the timestamp. The idea is that waiting for resource availability is the canonical way clients should observe the desired state, and it should also be more robust than comparing timestamps, especially on CI environments. Signed-off-by: Francesco Romani --- test/e2e_node/podresources_test.go | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index 11fd14dc88e..cba77b8234a 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -806,23 +806,13 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P expectPodResources(ctx, 1, cli, []podDesc{desc}) - restartTime := time.Now() ginkgo.By("Restarting Kubelet") restartKubelet(true) // we need to wait for the node to be reported ready before we can safely query // the podresources endpoint again. Otherwise we will have false negatives. ginkgo.By("Wait for node to be ready") - gomega.Eventually(ctx, func() bool { - node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - for _, cond := range node.Status.Conditions { - if cond.Type == v1.NodeReady && cond.Status == v1.ConditionTrue && cond.LastHeartbeatTime.After(restartTime) { - return true - } - } - return false - }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) + waitForTopologyUnawareResources(ctx, f) expectPodResources(ctx, 1, cli, []podDesc{desc}) tpd.deletePodsForTest(ctx, f)