mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 10:20:51 +00:00
test: Fix node e2e device plugin flake
The device plugin test expects that no other pods are running prior to the test starting. However, it has been observed that in some cases some resources may still be around from previous tests. This is because the deletion of resources from other tests is handled by deleting that test's framework's namespace which is done asynchronously without waiting for the other test's namespace to be deleted. As a result, when the node e2e device plugin starts, there may still be other pods in process of termination. To work around this, add a retry to the device plugin test to account for the time it takes to delete the resources from the prior test. Signed-off-by: David Porter <david@porter.me>
This commit is contained in:
parent
f58f70bd57
commit
225658884b
@ -18,6 +18,7 @@ package e2enode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"time"
|
||||
@ -105,17 +106,31 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
||||
return nodes == 1
|
||||
}, time.Minute, time.Second).Should(gomega.BeTrue())
|
||||
|
||||
v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
|
||||
framework.ExpectNoError(err, "should get node local podresources by accessing the (v1alpha) podresources API endpoint")
|
||||
|
||||
v1PodResources, err = getV1NodeDevices(ctx)
|
||||
framework.ExpectNoError(err, "should get node local podresources by accessing the (v1) podresources API endpoint")
|
||||
|
||||
// Before we run the device plugin test, we need to ensure
|
||||
// that the cluster is in a clean state and there are no
|
||||
// pods running on this node.
|
||||
gomega.Expect(v1alphaPodResources.PodResources).To(gomega.BeEmpty(), "should have no pod resources")
|
||||
gomega.Expect(v1PodResources.PodResources).To(gomega.BeEmpty(), "should have no pod resources")
|
||||
// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
|
||||
// xref: https://issue.k8s.io/115381
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get node local podresources by accessing the (v1alpha) podresources API endpoint: %v", err)
|
||||
}
|
||||
|
||||
v1PodResources, err = getV1NodeDevices(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
|
||||
}
|
||||
|
||||
if len(v1alphaPodResources.PodResources) > 0 {
|
||||
return fmt.Errorf("expected v1alpha pod resources to be empty, but got non-empty resources: %+v", v1alphaPodResources.PodResources)
|
||||
}
|
||||
|
||||
if len(v1PodResources.PodResources) > 0 {
|
||||
return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
|
||||
}
|
||||
return nil
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed())
|
||||
|
||||
ginkgo.By("Scheduling a sample device plugin pod")
|
||||
data, err := e2etestfiles.Read(SampleDevicePluginDSYAML)
|
||||
|
Loading…
Reference in New Issue
Block a user