node: device-mgr: e2e: address e2e test review comments

Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
Swati Sehgal 2023-02-07 18:09:32 +00:00
parent 674879a959
commit bae8a164e0

View File

@ -49,6 +49,8 @@ import (
"github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega" "github.com/onsi/gomega"
"github.com/onsi/gomega/gcustom"
"github.com/onsi/gomega/types"
) )
const ( const (
@ -325,11 +327,10 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur
// this test wants to reproduce what happened in https://github.com/kubernetes/kubernetes/issues/109595 // this test wants to reproduce what happened in https://github.com/kubernetes/kubernetes/issues/109595
ginkgo.BeforeEach(func(ctx context.Context) { ginkgo.BeforeEach(func(ctx context.Context) {
ginkgo.By("Wait for node to be ready") ginkgo.By("Wait for node to be ready")
gomega.Eventually(func() bool { gomega.Eventually(ctx, e2enode.TotalReady).
nodes, err := e2enode.TotalReady(ctx, f.ClientSet) WithArguments(f.ClientSet).
framework.ExpectNoError(err) WithTimeout(time.Minute).
return nodes == 1 Should(gomega.BeEquivalentTo(1))
}, time.Minute, time.Second).Should(gomega.BeTrue())
ginkgo.By("Setting up the directory and file for controlling registration") ginkgo.By("Setting up the directory and file for controlling registration")
triggerPathDir = filepath.Join(devicePluginDir, "sample") triggerPathDir = filepath.Join(devicePluginDir, "sample")
@ -374,26 +375,28 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur
// This is done by deleting the control file at the following path: // This is done by deleting the control file at the following path:
// `/var/lib/kubelet/device-plugins/sample/registration`. // `/var/lib/kubelet/device-plugins/sample/registration`.
defer ginkgo.GinkgoRecover()
framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile) framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile)
err := os.Remove(triggerPathFile) err := os.Remove(triggerPathFile)
framework.ExpectNoError(err) framework.ExpectNoError(err)
}() }()
ginkgo.By("Waiting for devices to become available on the local node") ginkgo.By("Waiting for devices to become available on the local node")
gomega.Eventually(func() bool {
node, ready := getLocalTestNode(ctx, f) gomega.Eventually(ctx, isNodeReadyWithSampleResources).
return ready && numberOfSampleResources(node) > 0 WithArguments(f).
}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) WithTimeout(5 * time.Minute).
Should(BeReady())
framework.Logf("Successfully created device plugin pod") framework.Logf("Successfully created device plugin pod")
devsLen := int64(deviceCount) // shortcut devsLen := int64(deviceCount) // shortcut
ginkgo.By("Waiting for the resource exported by the sample device plugin to become available on the local node") ginkgo.By("Waiting for the resource exported by the sample device plugin to become available on the local node")
gomega.Eventually(func() bool {
node, ready := getLocalTestNode(ctx, f) gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
return ready && WithArguments(f, devsLen).
numberOfDevicesCapacity(node, resourceName) == devsLen && WithTimeout(5 * time.Minute).
numberOfDevicesAllocatable(node, resourceName) == devsLen Should(HaveAllocatableDevices())
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
}) })
ginkgo.It("should deploy pod consuming devices first but fail with admission error after kubelet restart in case device plugin hasn't re-registered", func(ctx context.Context) { ginkgo.It("should deploy pod consuming devices first but fail with admission error after kubelet restart in case device plugin hasn't re-registered", func(ctx context.Context) {
@ -432,15 +435,15 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur
ginkgo.By("waiting for the kubelet to be ready again") ginkgo.By("waiting for the kubelet to be ready again")
// Wait for the Kubelet to be ready. // Wait for the Kubelet to be ready.
gomega.Eventually(func() bool {
nodes, err := e2enode.TotalReady(ctx, f.ClientSet) gomega.Eventually(ctx, e2enode.TotalReady).
framework.ExpectNoError(err) WithArguments(f.ClientSet).
return nodes == 1 WithTimeout(2 * time.Minute).
}, 2*time.Minute, time.Second).Should(gomega.BeTrue()) Should(gomega.BeEquivalentTo(1))
ginkgo.By("making sure all the pods are ready after the recovery") ginkgo.By("making sure all the pods are ready after the recovery")
var devicePluginPodAfterRestart, tmpPod *v1.Pod var devicePluginPodAfterRestart *v1.Pod
devicePluginPodAfterRestart, err = e2epod.NewPodClient(f).Get(ctx, devicePluginPod.Name, metav1.GetOptions{}) devicePluginPodAfterRestart, err = e2epod.NewPodClient(f).Get(ctx, devicePluginPod.Name, metav1.GetOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
@ -455,42 +458,25 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur
// and registration wasn't triggered manually (by writing to the unix socket exposed at // and registration wasn't triggered manually (by writing to the unix socket exposed at
// `/var/lib/kubelet/device-plugins/registered`). Because of this, the capacity and allocatable corresponding // `/var/lib/kubelet/device-plugins/registered`). Because of this, the capacity and allocatable corresponding
// to the resource exposed by the device plugin should be zero. // to the resource exposed by the device plugin should be zero.
gomega.Eventually(func() bool {
node, ready := getLocalTestNode(ctx, f) gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
return ready && WithArguments(f, int64(0)).
numberOfDevicesCapacity(node, resourceName) == 0 && WithTimeout(5 * time.Minute).
numberOfDevicesAllocatable(node, resourceName) == 0 Should(HaveAllocatableDevices())
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
ginkgo.By("Checking that pod requesting devices failed to start because of admission error") ginkgo.By("Checking that pod requesting devices failed to start because of admission error")
// NOTE: The device plugin won't re-register again and this is intentional. // NOTE: The device plugin won't re-register again and this is intentional.
// Because of this, the testpod (requesting a device) should fail with an admission error. // Because of this, the testpod (requesting a device) should fail with an admission error.
gomega.Eventually(ctx, func() bool { gomega.Eventually(ctx, getPod).
tmpPod, err = e2epod.NewPodClient(f).Get(ctx, testPod.Name, metav1.GetOptions{}) WithArguments(f, testPod.Name).
framework.ExpectNoError(err) WithTimeout(time.Minute).
Should(HaveFailedWithAdmissionError(),
if tmpPod.Status.Phase != v1.PodFailed { "the pod succeeded to start, when it should fail with the admission error")
return false
}
if tmpPod.Status.Reason != "UnexpectedAdmissionError" {
return false
}
if !strings.Contains(tmpPod.Status.Message, "Allocate failed due to can't allocate unhealthy devices") {
return false
}
return true
}, time.Minute, 5*time.Second).Should(
gomega.Equal(true),
"the pod succeeded to start, when it should fail with the admission error",
)
ginkgo.By("removing application pods") ginkgo.By("removing application pods")
e2epod.NewPodClient(f).DeleteSync(ctx, tmpPod.Name, metav1.DeleteOptions{}, 2*time.Minute) e2epod.NewPodClient(f).DeleteSync(ctx, testPod.Name, metav1.DeleteOptions{}, 2*time.Minute)
}) })
ginkgo.AfterEach(func(ctx context.Context) { ginkgo.AfterEach(func(ctx context.Context) {
@ -514,10 +500,10 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur
} }
ginkgo.By("Waiting for devices to become unavailable on the local node") ginkgo.By("Waiting for devices to become unavailable on the local node")
gomega.Eventually(func() bool { gomega.Eventually(ctx, isNodeReadyWithoutSampleResources).
node, ready := getLocalTestNode(ctx, f) WithArguments(f).
return ready && numberOfSampleResources(node) <= 0 WithTimeout(5 * time.Minute).
}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) Should(BeReady())
}) })
}) })
@ -632,3 +618,128 @@ func makeBusyboxDeviceRequiringPod(resourceName, cmd string) *v1.Pod {
}, },
} }
} }
// BeReady verifies that a node is ready and devices have registered.
func BeReady() types.GomegaMatcher {
return gomega.And(
// This additional matcher checks for the final error condition.
gcustom.MakeMatcher(func(ready bool) (bool, error) {
if !ready {
return false, fmt.Errorf("Expected node to be ready=%t", ready)
}
return true, nil
}),
BeInReadyPhase(true),
)
}
// BeInReadyPhase matches if node is ready i.e. ready is true.
func BeInReadyPhase(isReady bool) types.GomegaMatcher {
return gcustom.MakeMatcher(func(ready bool) (bool, error) {
return ready == isReady, nil
}).WithTemplate("Expected Node Ready {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(isReady)
}
func isNodeReadyWithSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
node, ready := getLocalTestNode(ctx, f)
if !ready {
return false, fmt.Errorf("Expected node to be ready=%t", ready)
}
if numberOfSampleResources(node) <= 0 {
return false, fmt.Errorf("Expected devices to be advertised")
}
return true, nil
}
// HaveAllocatableDevices verifies that a node has allocatable devices.
func HaveAllocatableDevices() types.GomegaMatcher {
return gomega.And(
// This additional matcher checks for the final error condition.
gcustom.MakeMatcher(func(hasAllocatable bool) (bool, error) {
if !hasAllocatable {
return false, fmt.Errorf("Expected node to be have allocatable devices=%t", hasAllocatable)
}
return true, nil
}),
hasAllocatable(true),
)
}
// hasAllocatable matches if node is ready i.e. ready is true.
func hasAllocatable(hasAllocatable bool) types.GomegaMatcher {
return gcustom.MakeMatcher(func(hasAllocatableDevices bool) (bool, error) {
return hasAllocatableDevices == hasAllocatable, nil
}).WithTemplate("Expected Node with allocatable {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasAllocatable)
}
func isNodeReadyWithAllocatableSampleResources(ctx context.Context, f *framework.Framework, devCount int64) (bool, error) {
node, ready := getLocalTestNode(ctx, f)
if !ready {
return false, fmt.Errorf("Expected node to be ready=%t", ready)
}
if numberOfDevicesCapacity(node, resourceName) != devCount {
return false, fmt.Errorf("Expected devices capacity to be: %d", devCount)
}
if numberOfDevicesAllocatable(node, resourceName) != devCount {
return false, fmt.Errorf("Expected devices allocatable to be: %d", devCount)
}
return true, nil
}
func isNodeReadyWithoutSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
node, ready := getLocalTestNode(ctx, f)
if !ready {
return false, fmt.Errorf("Expected node to be ready=%t", ready)
}
if numberOfSampleResources(node) > 0 {
return false, fmt.Errorf("Expected devices to be not present")
}
return true, nil
}
// HaveFailedWithAdmissionError verifies that a pod fails at admission.
func HaveFailedWithAdmissionError() types.GomegaMatcher {
return gomega.And(
gcustom.MakeMatcher(func(hasFailed bool) (bool, error) {
if !hasFailed {
return false, fmt.Errorf("Expected pod to have failed=%t", hasFailed)
}
return true, nil
}),
hasFailed(true),
)
}
// hasFailed matches if pod has failed.
func hasFailed(hasFailed bool) types.GomegaMatcher {
return gcustom.MakeMatcher(func(hasPodFailed bool) (bool, error) {
return hasPodFailed == hasFailed, nil
}).WithTemplate("Expected Pod failed {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasFailed)
}
func getPod(ctx context.Context, f *framework.Framework, podName string) (bool, error) {
pod, err := e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("Expected node to get pod=%q got err=%q", pod.Name, err)
}
expectedStatusReason := "UnexpectedAdmissionError"
expectedStatusMessage := "Allocate failed due to can't allocate unhealthy devices"
// This additional matcher checks for the final error condition.
if pod.Status.Phase != v1.PodFailed {
return false, fmt.Errorf("Expected pod to reach phase %q, got final phase %q instead.", v1.PodFailed, pod.Status.Phase)
}
if pod.Status.Reason != expectedStatusReason {
return false, fmt.Errorf("Expected pod status reason to be %q, got %q instead.", expectedStatusReason, pod.Status.Reason)
}
if !strings.Contains(pod.Status.Message, expectedStatusMessage) {
return false, fmt.Errorf("Expected pod status reason to contain %q, got %q instead.", expectedStatusMessage, pod.Status.Message)
}
return true, nil
}