From 993f3c361f2ec07de5294450948b4c4b9f6255f1 Mon Sep 17 00:00:00 2001 From: Lionel Jouin Date: Wed, 19 Feb 2025 20:34:38 +0100 Subject: [PATCH] Fix DRA flaky test for ResourceClaim device status The previous Eventually loop was not properly checking if the pod was scheduled and running. Thus, the node name could not be retrieved from the pod specs. The plugin could not be retrieved and the UpdateStatus was called on a nil object. TestPod function is now used instead, so the test waits for the pod to be scheduled. The Eventually loop to get the pod and resourceClaim is then no longer needed. Signed-off-by: Lionel Jouin --- test/e2e/dra/dra.go | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 05f0bfdc949..ae4835fe04f 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -411,27 +411,18 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, b.create(ctx, claim, pod) // Waits for the ResourceClaim to be allocated and the pod to be scheduled. - var allocatedResourceClaim *resourceapi.ResourceClaim - var scheduledPod *v1.Pod - - gomega.Eventually(ctx, func(ctx context.Context) (*resourceapi.ResourceClaim, error) { - var err error - allocatedResourceClaim, err = b.f.ClientSet.ResourceV1beta1().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) - return allocatedResourceClaim, err - }).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourceapi.AllocationResult)(nil))) - - gomega.Eventually(ctx, func(ctx context.Context) error { - var err error - scheduledPod, err = b.f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) - if err != nil && scheduledPod.Spec.NodeName != "" { - return fmt.Errorf("expected the test pod %s to exist and to be scheduled on a node: %w", pod.Name, err) - } - return nil - }).WithTimeout(f.Timeouts.PodDelete).Should(gomega.BeNil()) + b.testPod(ctx, f, pod) + allocatedResourceClaim, err := b.f.ClientSet.ResourceV1beta1().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + gomega.Expect(allocatedResourceClaim).ToNot(gomega.BeNil()) gomega.Expect(allocatedResourceClaim.Status.Allocation).ToNot(gomega.BeNil()) gomega.Expect(allocatedResourceClaim.Status.Allocation.Devices.Results).To(gomega.HaveLen(1)) + scheduledPod, err := b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + gomega.Expect(scheduledPod).ToNot(gomega.BeNil()) + ginkgo.By("Setting the device status a first time") allocatedResourceClaim.Status.Devices = append(allocatedResourceClaim.Status.Devices, resourceapi.AllocatedDeviceStatus{ @@ -446,8 +437,13 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, HardwareAddress: "bc:1c:b6:3e:b8:25", }, }) + // Updates the ResourceClaim from the driver on the same node as the pod. - updatedResourceClaim, err := driver.Nodes[scheduledPod.Spec.NodeName].ExamplePlugin.UpdateStatus(ctx, allocatedResourceClaim) + plugin, ok := driver.Nodes[scheduledPod.Spec.NodeName] + if !ok { + framework.Failf("pod got scheduled to node %s without a plugin", scheduledPod.Spec.NodeName) + } + updatedResourceClaim, err := plugin.UpdateStatus(ctx, allocatedResourceClaim) framework.ExpectNoError(err) gomega.Expect(updatedResourceClaim).ToNot(gomega.BeNil()) gomega.Expect(updatedResourceClaim.Status.Devices).To(gomega.Equal(allocatedResourceClaim.Status.Devices)) @@ -465,7 +461,8 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, HardwareAddress: "bc:1c:b6:3e:b8:26", }, } - updatedResourceClaim2, err := driver.Nodes[scheduledPod.Spec.NodeName].ExamplePlugin.UpdateStatus(ctx, updatedResourceClaim) + + updatedResourceClaim2, err := plugin.UpdateStatus(ctx, updatedResourceClaim) framework.ExpectNoError(err) gomega.Expect(updatedResourceClaim2).ToNot(gomega.BeNil()) gomega.Expect(updatedResourceClaim2.Status.Devices).To(gomega.Equal(updatedResourceClaim.Status.Devices))