mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 03:33:56 +00:00
node: device-plugin: e2e: Isolate test to pod restart scenario
Rather than testing out for both pod restart and kubelet restart, we change the tests to just handle pod restart scenario. Clarify the test purpose and add extra check to tighten the test. We would be adding additional tests to cover kubelet restart scenarios in subsequent commits. Signed-off-by: Swati Sehgal <swsehgal@redhat.com> Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
parent
5ab4ba6205
commit
fd459beeff
@ -21,6 +21,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/onsi/ginkgo/v2"
|
"github.com/onsi/ginkgo/v2"
|
||||||
@ -30,6 +31,7 @@ import (
|
|||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/runtime/serializer"
|
"k8s.io/apimachinery/pkg/runtime/serializer"
|
||||||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||||
admissionapi "k8s.io/pod-security-admission/api"
|
admissionapi "k8s.io/pod-security-admission/api"
|
||||||
|
|
||||||
@ -78,6 +80,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
var v1alphaPodResources *kubeletpodresourcesv1alpha1.ListPodResourcesResponse
|
var v1alphaPodResources *kubeletpodresourcesv1alpha1.ListPodResourcesResponse
|
||||||
var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
|
var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
ginkgo.By("Wait for node to be ready")
|
ginkgo.By("Wait for node to be ready")
|
||||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||||
@ -221,7 +224,10 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
framework.ExpectEqual(len(v1ResourcesForOurPod.Containers[0].Devices[0].DeviceIds), 1)
|
framework.ExpectEqual(len(v1ResourcesForOurPod.Containers[0].Devices[0].DeviceIds), 1)
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("Keeps device plugin assignments across pod and kubelet restarts", func(ctx context.Context) {
|
// simulate container restart, while all other involved components (kubelet, device plugin) stay stable. To do so, in the container
|
||||||
|
// entry point we sleep for a limited and short period of time. The device assignment should be kept and be stable across the container
|
||||||
|
// restarts. For the sake of brevity we however check just the fist restart.
|
||||||
|
ginkgo.It("Keeps device plugin assignments across pod restarts (no kubelet restart, device plugin re-registration)", func(ctx context.Context) {
|
||||||
podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 60"
|
podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep 60"
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
||||||
@ -232,25 +238,45 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
ginkgo.By("Waiting for container to restart")
|
||||||
ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
|
ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
|
||||||
|
|
||||||
ginkgo.By("Confirming that device assignment persists even after container restart")
|
// check from the device assignment is preserved and stable from perspective of the container
|
||||||
devIDAfterRestart, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
|
ginkgo.By("Confirming that after a container restart, fake-device assignment is kept")
|
||||||
framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
|
|
||||||
framework.ExpectEqual(devIDAfterRestart, devID1)
|
|
||||||
|
|
||||||
ginkgo.By("Restarting Kubelet")
|
|
||||||
restartKubelet(true)
|
|
||||||
|
|
||||||
ginkgo.By("Wait for node to be ready again")
|
|
||||||
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
|
|
||||||
|
|
||||||
ginkgo.By("Validating that assignment is kept")
|
|
||||||
ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
|
|
||||||
ginkgo.By("Confirming that after a kubelet restart, fake-device assignment is kept")
|
|
||||||
devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
|
devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
|
||||||
framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
|
framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
|
||||||
framework.ExpectEqual(devIDRestart1, devID1)
|
framework.ExpectEqual(devIDRestart1, devID1)
|
||||||
|
|
||||||
|
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
|
||||||
|
// needs to match the container perspective.
|
||||||
|
ginkgo.By("Verifying the device assignment after container restart using podresources API")
|
||||||
|
v1PodResources, err = getV1NodeDevices(ctx)
|
||||||
|
if err != nil {
|
||||||
|
framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
|
||||||
|
}
|
||||||
|
err = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
|
||||||
|
framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
|
||||||
|
|
||||||
|
ginkgo.By("Creating another pod")
|
||||||
|
pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
|
err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod2.Name, f.Namespace.Name, 1*time.Minute)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
ginkgo.By("Checking that pod got a fake device")
|
||||||
|
devID2, err := parseLog(ctx, f, pod2.Name, pod2.Name, deviceIDRE)
|
||||||
|
framework.ExpectNoError(err, "getting logs for pod %q", pod2.Name)
|
||||||
|
|
||||||
|
gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod2 requested a device but started successfully without")
|
||||||
|
|
||||||
|
ginkgo.By("Verifying the device assignment after extra container start using podresources API")
|
||||||
|
v1PodResources, err = getV1NodeDevices(ctx)
|
||||||
|
if err != nil {
|
||||||
|
framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
|
||||||
|
}
|
||||||
|
err = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
|
||||||
|
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod1")
|
||||||
|
err = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID2})
|
||||||
|
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("Keeps device plugin assignments after the device plugin has been re-registered", func(ctx context.Context) {
|
ginkgo.It("Keeps device plugin assignments after the device plugin has been re-registered", func(ctx context.Context) {
|
||||||
@ -368,3 +394,36 @@ func parseLog(ctx context.Context, f *framework.Framework, podName string, contN
|
|||||||
|
|
||||||
return matches[1], nil
|
return matches[1], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkPodResourcesAssignment(v1PodRes *kubeletpodresourcesv1.ListPodResourcesResponse, podNamespace, podName, containerName, resourceName string, devs []string) error {
|
||||||
|
for _, podRes := range v1PodRes.PodResources {
|
||||||
|
if podRes.Namespace != podNamespace || podRes.Name != podName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, contRes := range podRes.Containers {
|
||||||
|
if contRes.Name != containerName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return matchContainerDevices(podNamespace+"/"+podName+"/"+containerName, contRes.Devices, resourceName, devs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("no resources found for %s/%s/%s", podNamespace, podName, containerName)
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchContainerDevices(ident string, contDevs []*kubeletpodresourcesv1.ContainerDevices, resourceName string, devs []string) error {
|
||||||
|
expected := sets.New[string](devs...)
|
||||||
|
assigned := sets.New[string]()
|
||||||
|
for _, contDev := range contDevs {
|
||||||
|
if contDev.ResourceName != resourceName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
assigned = assigned.Insert(contDev.DeviceIds...)
|
||||||
|
}
|
||||||
|
expectedStr := strings.Join(expected.UnsortedList(), ",")
|
||||||
|
assignedStr := strings.Join(assigned.UnsortedList(), ",")
|
||||||
|
framework.Logf("%s: devices expected %q assigned %q", ident, expectedStr, assignedStr)
|
||||||
|
if !assigned.Equal(expected) {
|
||||||
|
return fmt.Errorf("device allocation mismatch for %s expected %s assigned %s", ident, expectedStr, assignedStr)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user