node: device-plugin: e2e: Update test description to make it explicit

Explicitly state that the test involves kubelet restart and device plugin
re-registration (no pod restart)

We remove the part of the code where we wait for the pod to restart as this
test case should no longer involve pod restart.

In addition to that, we use `waitForNodeReady` instead of `WaitForAllNodesSchedulable`
for ensuring that the node is ready for pods to be scheduled on it.

Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
Co-authored-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
Swati Sehgal 2023-03-13 12:00:35 +05:30 committed by Francesco Romani
parent fd459beeff
commit 4a0f7c791f

View File

@ -279,8 +279,11 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
})
ginkgo.It("Keeps device plugin assignments after the device plugin has been re-registered", func(ctx context.Context) {
podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 60"
// simulate kubelet restart *and* device plugin re-registration, while the pod and the container stays running.
// The device assignment should be kept and be stable across the kubelet/device plugin restart, as both the aforementioned components
// archestrate the device allocation: the actual consumer (container) is stable.
ginkgo.It("Keeps device plugin assignments after kubelet restart and device plugin has been re-registered (no pod restart)", func(ctx context.Context) {
podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 24h" // the pod has to run "forever" in the timescale of this test
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
@ -297,7 +300,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
ginkgo.By("Re-Register resources and delete the plugin pod")
ginkgo.By("Re-Register resources by deleting the plugin pod")
gp := int64(0)
deleteOptions := metav1.DeleteOptions{
GracePeriodSeconds: &gp,
@ -308,12 +311,6 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
ginkgo.By("Recreating the plugin pod")
devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
ginkgo.By("Confirming that after a kubelet and pod restart, fake-device assignment is kept")
ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
framework.ExpectEqual(devIDRestart1, devID1)
ginkgo.By("Waiting for resource to become available on the local node after re-registration")
gomega.Eventually(ctx, func() bool {
node, ready := getLocalTestNode(ctx, f)
@ -322,6 +319,19 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
// note we don't check again the logs of the container: the check is done at startup, the container
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
// is useless.
ginkgo.By("Verifying the device assignment after kubelet and device plugin restart using podresources API")
gomega.Eventually(ctx, func() error {
v1PodResources, err = getV1NodeDevices(ctx)
return err
}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet and device plugin restart")
err = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
ginkgo.By("Creating another pod")
pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
@ -330,6 +340,17 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err, "getting logs for pod %q", pod2.Name)
gomega.Expect(devID1).To(gomega.Not(gomega.Equal(devID2)), "pod2 requested a device but started successfully without")
ginkgo.By("Verifying the device assignment after kubelet restart and device plugin re-registration using podresources API")
// note we don't use eventually: the kubelet is supposed to be running and stable by now, so the call should just succeed
v1PodResources, err = getV1NodeDevices(ctx)
if err != nil {
framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
}
err = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod1")
err = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID2})
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
})
})
}