mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-09 12:07:47 +00:00
e2e: node: devicemanager: update tests
Fix e2e device manager tests. Most notably, the workload pods needs to survive a kubelet restart. Update tests to reflect that. Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
parent
3bcf4220ec
commit
b926aba268
@ -721,11 +721,14 @@ func hasFailed(hasFailed bool) types.GomegaMatcher {
|
|||||||
}).WithTemplate("expected Pod failed {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasFailed)
|
}).WithTemplate("expected Pod failed {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasFailed)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPod(ctx context.Context, f *framework.Framework, podName string) (bool, error) {
|
func getPodByName(ctx context.Context, f *framework.Framework, podName string) (*v1.Pod, error) {
|
||||||
|
return e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
|
||||||
|
}
|
||||||
|
|
||||||
pod, err := e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
|
func getPod(ctx context.Context, f *framework.Framework, podName string) (bool, error) {
|
||||||
|
pod, err := getPodByName(ctx, f, podName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("expected node to get pod=%q got err=%q", pod.Name, err)
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedStatusReason := "UnexpectedAdmissionError"
|
expectedStatusReason := "UnexpectedAdmissionError"
|
||||||
|
@ -28,11 +28,14 @@ import (
|
|||||||
|
|
||||||
"github.com/onsi/ginkgo/v2"
|
"github.com/onsi/ginkgo/v2"
|
||||||
"github.com/onsi/gomega"
|
"github.com/onsi/gomega"
|
||||||
|
"github.com/onsi/gomega/gcustom"
|
||||||
|
"github.com/onsi/gomega/types"
|
||||||
|
|
||||||
appsv1 "k8s.io/api/apps/v1"
|
appsv1 "k8s.io/api/apps/v1"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/runtime/serializer"
|
"k8s.io/apimachinery/pkg/runtime/serializer"
|
||||||
|
k8stypes "k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
@ -42,6 +45,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
|
"k8s.io/kubectl/pkg/util/podutils"
|
||||||
kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
|
kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||||
kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
@ -239,7 +243,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
// simulate container restart, while all other involved components (kubelet, device plugin) stay stable. To do so, in the container
|
// simulate container restart, while all other involved components (kubelet, device plugin) stay stable. To do so, in the container
|
||||||
// entry point we sleep for a limited and short period of time. The device assignment should be kept and be stable across the container
|
// entry point we sleep for a limited and short period of time. The device assignment should be kept and be stable across the container
|
||||||
// restarts. For the sake of brevity we however check just the fist restart.
|
// restarts. For the sake of brevity we however check just the fist restart.
|
||||||
ginkgo.It("Keeps device plugin assignments across pod restarts (no kubelet restart, device plugin re-registration)", func(ctx context.Context) {
|
ginkgo.It("Keeps device plugin assignments across pod restarts (no kubelet restart, no device plugin restart)", func(ctx context.Context) {
|
||||||
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
|
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
||||||
@ -291,10 +295,11 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
|
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
|
||||||
})
|
})
|
||||||
|
|
||||||
// simulate kubelet restart, *but not* device plugin re-registration, while the pod and the container stays running.
|
// simulate kubelet restart. A compliant device plugin is expected to re-register, while the pod and the container stays running.
|
||||||
|
// The flow with buggy or slow device plugin is deferred to another test.
|
||||||
// The device assignment should be kept and be stable across the kubelet restart, because it's the kubelet which performs the device allocation,
|
// The device assignment should be kept and be stable across the kubelet restart, because it's the kubelet which performs the device allocation,
|
||||||
// and both the device plugin and the actual consumer (container) are stable.
|
// and both the device plugin and the actual consumer (container) are stable.
|
||||||
ginkgo.It("Keeps device plugin assignments across kubelet restarts (no pod restart, no device plugin re-registration)", func(ctx context.Context) {
|
ginkgo.It("Keeps device plugin assignments across kubelet restarts (no pod restart, no device plugin restart)", func(ctx context.Context) {
|
||||||
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
|
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
||||||
@ -304,6 +309,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
|
|
||||||
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
|
framework.Logf("testing pod: UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1))
|
||||||
|
|
||||||
ginkgo.By("Restarting Kubelet")
|
ginkgo.By("Restarting Kubelet")
|
||||||
restartKubelet(true)
|
restartKubelet(true)
|
||||||
@ -319,12 +325,16 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
|
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
|
||||||
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
|
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
|
||||||
|
|
||||||
ginkgo.By("Waiting for the pod to fail with admission error as device plugin hasn't re-registered yet")
|
ginkgo.By("Checking the same instance of the pod is still running")
|
||||||
gomega.Eventually(ctx, getPod).
|
gomega.Eventually(ctx, getPodByName).
|
||||||
WithArguments(f, pod1.Name).
|
WithArguments(f, pod1.Name).
|
||||||
WithTimeout(time.Minute).
|
WithTimeout(time.Minute).
|
||||||
Should(HaveFailedWithAdmissionError(),
|
Should(BeTheSamePodStillRunning(pod1),
|
||||||
"the pod succeeded to start, when it should fail with the admission error")
|
"the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts")
|
||||||
|
|
||||||
|
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
framework.Logf("testing pod: UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1))
|
||||||
|
|
||||||
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
|
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
|
||||||
// note we don't check again the logs of the container: the check is done at startup, the container
|
// note we don't check again the logs of the container: the check is done at startup, the container
|
||||||
@ -340,10 +350,10 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
|
framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
|
||||||
})
|
})
|
||||||
|
|
||||||
// simulate kubelet and container restart, *but not* device plugin re-registration.
|
// simulate kubelet and container restart, *but not* device plugin restart.
|
||||||
// The device assignment should be kept and be stable across the kubelet and container restart, because it's the kubelet which
|
// The device assignment should be kept and be stable across the kubelet and container restart, because it's the kubelet which
|
||||||
// performs the device allocation, and both the device plugin is stable.
|
// performs the device allocation, and both the device plugin is stable.
|
||||||
ginkgo.It("Keeps device plugin assignments across pod and kubelet restarts (no device plugin re-registration)", func(ctx context.Context) {
|
ginkgo.It("Keeps device plugin assignments across pod and kubelet restarts (no device plugin restart)", func(ctx context.Context) {
|
||||||
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
|
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
||||||
@ -372,18 +382,28 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
ginkgo.By("Wait for node to be ready again")
|
ginkgo.By("Wait for node to be ready again")
|
||||||
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
|
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
|
||||||
|
|
||||||
ginkgo.By("Waiting for the pod to fail with admission error as device plugin hasn't re-registered yet")
|
ginkgo.By("Checking an instance of the pod is running")
|
||||||
gomega.Eventually(ctx, getPod).
|
gomega.Eventually(ctx, getPodByName).
|
||||||
WithArguments(f, pod1.Name).
|
WithArguments(f, pod1.Name).
|
||||||
WithTimeout(time.Minute).
|
WithTimeout(time.Minute).
|
||||||
Should(HaveFailedWithAdmissionError(),
|
Should(gomega.And(
|
||||||
"the pod succeeded to start, when it should fail with the admission error")
|
BeAPodInPhase(v1.PodRunning),
|
||||||
|
BeAPodReady(),
|
||||||
|
),
|
||||||
|
"the pod should still be running, the workload should not be perturbed by kubelet restarts")
|
||||||
|
|
||||||
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
|
ginkgo.By("Verifying the device assignment after pod and kubelet restart using container logs")
|
||||||
// note we don't check again the logs of the container: the check is done at startup, the container
|
var devID1Restarted string
|
||||||
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
|
gomega.Eventually(ctx, func() string {
|
||||||
// is useless.
|
devID1Restarted, err = parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
|
||||||
ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
|
if err != nil {
|
||||||
|
framework.Logf("error getting logds for pod %q: %v", pod1.Name, err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return devID1Restarted
|
||||||
|
}, 30*time.Second, framework.Poll).Should(gomega.Equal(devID1), "pod %s reports a different device after restarts: %s (expected %s)", pod1.Name, devID1Restarted, devID1)
|
||||||
|
|
||||||
|
ginkgo.By("Verifying the device assignment after pod and kubelet restart using podresources API")
|
||||||
gomega.Eventually(ctx, func() error {
|
gomega.Eventually(ctx, func() error {
|
||||||
v1PodResources, err = getV1NodeDevices(ctx)
|
v1PodResources, err = getV1NodeDevices(ctx)
|
||||||
return err
|
return err
|
||||||
@ -397,8 +417,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
// After the device plugin has re-registered, the list healthy devices is repopulated based on the devices discovered.
|
// After the device plugin has re-registered, the list healthy devices is repopulated based on the devices discovered.
|
||||||
// Once Pod2 is running we determine the device that was allocated it. As long as the device allocation succeeds the
|
// Once Pod2 is running we determine the device that was allocated it. As long as the device allocation succeeds the
|
||||||
// test should pass.
|
// test should pass.
|
||||||
|
ginkgo.It("Keeps device plugin assignments after the device plugin has restarted (no kubelet restart, pod restart)", func(ctx context.Context) {
|
||||||
ginkgo.It("Keeps device plugin assignments after the device plugin has been re-registered (no kubelet, pod restart)", func(ctx context.Context) {
|
|
||||||
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
|
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
||||||
@ -433,12 +452,12 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
|
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
|
||||||
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
|
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
|
||||||
|
|
||||||
// crosscheck that after device plugin re-registration the device assignment is preserved and
|
// crosscheck that after device plugin restart the device assignment is preserved and
|
||||||
// stable from the kubelet's perspective.
|
// stable from the kubelet's perspective.
|
||||||
// note we don't check again the logs of the container: the check is done at startup, the container
|
// note we don't check again the logs of the container: the check is done at startup, the container
|
||||||
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
|
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
|
||||||
// is useless.
|
// is useless.
|
||||||
ginkgo.By("Verifying the device assignment after device plugin re-registration using podresources API")
|
ginkgo.By("Verifying the device assignment after device plugin restart using podresources API")
|
||||||
gomega.Eventually(ctx, func() error {
|
gomega.Eventually(ctx, func() error {
|
||||||
v1PodResources, err = getV1NodeDevices(ctx)
|
v1PodResources, err = getV1NodeDevices(ctx)
|
||||||
return err
|
return err
|
||||||
@ -446,30 +465,19 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
|
|
||||||
err = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
|
err = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
|
||||||
framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
|
framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
|
||||||
|
|
||||||
ginkgo.By("Creating another pod")
|
|
||||||
pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
|
||||||
err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod2.Name, f.Namespace.Name, 1*time.Minute)
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
|
|
||||||
ginkgo.By("Checking that pod got a fake device")
|
|
||||||
devID2, err := parseLog(ctx, f, pod2.Name, pod2.Name, deviceIDRE)
|
|
||||||
framework.ExpectNoError(err, "getting logs for pod %q", pod2.Name)
|
|
||||||
|
|
||||||
gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod2 requested a device but started successfully without")
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// simulate kubelet restart *and* device plugin re-registration, while the pod and the container stays running.
|
// simulate kubelet restart *and* device plugin restart, while the pod and the container stays running.
|
||||||
// The device assignment should be kept and be stable across the kubelet/device plugin restart, as both the aforementioned components
|
// The device assignment should be kept and be stable across the kubelet/device plugin restart, as both the aforementioned components
|
||||||
// orchestrate the device allocation: the actual consumer (container) is stable.
|
// orchestrate the device allocation: the actual consumer (container) is stable.
|
||||||
ginkgo.It("Keeps device plugin assignments after kubelet restart and device plugin has been re-registered (no pod restart)", func(ctx context.Context) {
|
ginkgo.It("Keeps device plugin assignments after kubelet restart and device plugin restart (no pod restart)", func(ctx context.Context) {
|
||||||
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever) // the pod has to run "forever" in the timescale of this test
|
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever) // the pod has to run "forever" in the timescale of this test
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
deviceIDRE := "stub devices: (Dev-[0-9]+)"
|
||||||
devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
|
devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
|
||||||
framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
|
framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
|
||||||
|
|
||||||
gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
|
gomega.Expect(devID1).To(gomega.Not(gomega.BeEmpty()), "pod1 requested a device but started successfully without")
|
||||||
|
|
||||||
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
@ -480,12 +488,12 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
ginkgo.By("Wait for node to be ready again")
|
ginkgo.By("Wait for node to be ready again")
|
||||||
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
|
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
|
||||||
|
|
||||||
ginkgo.By("Waiting for the pod to fail with admission error as device plugin hasn't re-registered yet")
|
ginkgo.By("Checking the same instance of the pod is still running after kubelet restart")
|
||||||
gomega.Eventually(ctx, getPod).
|
gomega.Eventually(ctx, getPodByName).
|
||||||
WithArguments(f, pod1.Name).
|
WithArguments(f, pod1.Name).
|
||||||
WithTimeout(time.Minute).
|
WithTimeout(time.Minute).
|
||||||
Should(HaveFailedWithAdmissionError(),
|
Should(BeTheSamePodStillRunning(pod1),
|
||||||
"the pod succeeded to start, when it should fail with the admission error")
|
"the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts")
|
||||||
|
|
||||||
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
|
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
|
||||||
// note we don't check again the logs of the container: the check is done at startup, the container
|
// note we don't check again the logs of the container: the check is done at startup, the container
|
||||||
@ -511,7 +519,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
ginkgo.By("Recreating the plugin pod")
|
ginkgo.By("Recreating the plugin pod")
|
||||||
devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
|
devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
|
||||||
|
|
||||||
ginkgo.By("Waiting for resource to become available on the local node after re-registration")
|
ginkgo.By("Waiting for resource to become available on the local node after restart")
|
||||||
gomega.Eventually(ctx, func() bool {
|
gomega.Eventually(ctx, func() bool {
|
||||||
node, ready := getLocalTestNode(ctx, f)
|
node, ready := getLocalTestNode(ctx, f)
|
||||||
return ready &&
|
return ready &&
|
||||||
@ -519,22 +527,12 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
|
CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
|
||||||
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
|
}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
|
||||||
|
|
||||||
ginkgo.By("Creating another pod")
|
ginkgo.By("Checking the same instance of the pod is still running after the device plugin restart")
|
||||||
pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
gomega.Eventually(ctx, getPodByName).
|
||||||
|
WithArguments(f, pod1.Name).
|
||||||
ginkgo.By("Checking that pod got a fake device")
|
WithTimeout(time.Minute).
|
||||||
devID2, err := parseLog(ctx, f, pod2.Name, pod2.Name, deviceIDRE)
|
Should(BeTheSamePodStillRunning(pod1),
|
||||||
framework.ExpectNoError(err, "getting logs for pod %q", pod2.Name)
|
"the same pod instance not running across kubelet restarts, workload should not be perturbed by device plugins restarts")
|
||||||
|
|
||||||
ginkgo.By("Verifying the device assignment after kubelet restart and device plugin re-registration using podresources API")
|
|
||||||
// note we don't use eventually: the kubelet is supposed to be running and stable by now, so the call should just succeed
|
|
||||||
v1PodResources, err = getV1NodeDevices(ctx)
|
|
||||||
if err != nil {
|
|
||||||
framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
|
|
||||||
}
|
|
||||||
|
|
||||||
err = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID2})
|
|
||||||
framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -542,10 +540,10 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
|
func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
|
||||||
ginkgo.Context("DevicePlugin [Serial] [Disruptive]", func() {
|
ginkgo.Context("DevicePlugin [Serial] [Disruptive]", func() {
|
||||||
var devicePluginPod *v1.Pod
|
var devicePluginPod *v1.Pod
|
||||||
var v1alphaPodResources *kubeletpodresourcesv1alpha1.ListPodResourcesResponse
|
|
||||||
var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
|
var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
|
||||||
var triggerPathFile, triggerPathDir string
|
var triggerPathFile, triggerPathDir string
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
ginkgo.By("Wait for node to be ready")
|
ginkgo.By("Wait for node to be ready")
|
||||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||||
@ -560,25 +558,16 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
|
|||||||
// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
|
// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
|
||||||
// xref: https://issue.k8s.io/115381
|
// xref: https://issue.k8s.io/115381
|
||||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get node local podresources by accessing the (v1alpha) podresources API endpoint: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
v1PodResources, err = getV1NodeDevices(ctx)
|
v1PodResources, err = getV1NodeDevices(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
|
return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(v1alphaPodResources.PodResources) > 0 {
|
|
||||||
return fmt.Errorf("expected v1alpha pod resources to be empty, but got non-empty resources: %+v", v1alphaPodResources.PodResources)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(v1PodResources.PodResources) > 0 {
|
if len(v1PodResources.PodResources) > 0 {
|
||||||
return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
|
return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed())
|
}, f.Timeouts.SystemDaemonsetStartup, f.Timeouts.Poll).Should(gomega.Succeed())
|
||||||
|
|
||||||
ginkgo.By("Setting up the directory and file for controlling registration")
|
ginkgo.By("Setting up the directory and file for controlling registration")
|
||||||
triggerPathDir = filepath.Join(devicePluginDir, "sample")
|
triggerPathDir = filepath.Join(devicePluginDir, "sample")
|
||||||
@ -675,8 +664,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
|
|||||||
|
|
||||||
// simulate node reboot scenario by removing pods using CRI before kubelet is started. In addition to that,
|
// simulate node reboot scenario by removing pods using CRI before kubelet is started. In addition to that,
|
||||||
// intentionally a scenario is created where after node reboot, application pods requesting devices appear before the device plugin pod
|
// intentionally a scenario is created where after node reboot, application pods requesting devices appear before the device plugin pod
|
||||||
// exposing those devices as resource has re-registers itself to Kubelet. The expected behavior is that the application pod fails at
|
// exposing those devices as resource has restarted. The expected behavior is that the application pod fails at admission time.
|
||||||
// admission time.
|
|
||||||
ginkgo.It("Keeps device plugin assignments across node reboots (no pod restart, no device plugin re-registration)", func(ctx context.Context) {
|
ginkgo.It("Keeps device plugin assignments across node reboots (no pod restart, no device plugin re-registration)", func(ctx context.Context) {
|
||||||
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
|
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
|
||||||
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
|
||||||
@ -851,3 +839,32 @@ func getSampleDevicePluginPod(pluginSockDir string) *v1.Pod {
|
|||||||
|
|
||||||
return dp
|
return dp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BeTheSamePodStillRunning(expected *v1.Pod) types.GomegaMatcher {
|
||||||
|
return gomega.And(
|
||||||
|
BeTheSamePodAs(expected.UID),
|
||||||
|
BeAPodInPhase(v1.PodRunning),
|
||||||
|
BeAPodReady(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BeReady matches if the pod is reported ready
|
||||||
|
func BeAPodReady() types.GomegaMatcher {
|
||||||
|
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
|
||||||
|
return podutils.IsPodReady(actual), nil
|
||||||
|
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} not ready yet")
|
||||||
|
}
|
||||||
|
|
||||||
|
// BeAPodInPhase matches if the pod is running
|
||||||
|
func BeAPodInPhase(phase v1.PodPhase) types.GomegaMatcher {
|
||||||
|
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
|
||||||
|
return actual.Status.Phase == phase, nil
|
||||||
|
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} failed {{.To}} be in phase {{.Data}} instead is in phase {{.Actual.Status.Phase}}").WithTemplateData(phase)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BeTheSamePodAs matches if the pod has the given UID
|
||||||
|
func BeTheSamePodAs(podUID k8stypes.UID) types.GomegaMatcher {
|
||||||
|
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
|
||||||
|
return actual.UID == podUID, nil
|
||||||
|
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} expected UID {{.Data}} has UID instead {{.Actual.UID}}").WithTemplateData(podUID)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user