diff --git a/test/e2e_node/container_lifecycle_test.go b/test/e2e_node/container_lifecycle_test.go index 0e744c22c47..d52c2b304e1 100644 --- a/test/e2e_node/container_lifecycle_test.go +++ b/test/e2e_node/container_lifecycle_test.go @@ -1234,17 +1234,14 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { podSandboxID := sandboxes[0].Id ginkgo.By("Stopping the kubelet") - restartKubelet := stopKubelet() - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet was expected to be stopped but it is still running")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("Stopping the pod sandbox to simulate the node reboot") err = rs.StopPodSandbox(ctx, podSandboxID) framework.ExpectNoError(err) ginkgo.By("Restarting the kubelet") - restartKubelet() + restartKubelet(ctx) gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy")) @@ -1361,14 +1358,10 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) { ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("restarting the kubelet") - startKubelet() + restartKubelet(ctx) // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) @@ -1409,11 +1402,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) { ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("removing the completed init container statuses from the container runtime") rs, _, err := getCRIClient() @@ -1437,7 +1426,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { } ginkgo.By("restarting the kubelet") - startKubelet() + restartKubelet(ctx) // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) @@ -1544,18 +1533,10 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) { ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("restarting the kubelet") - startKubelet() - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted")) + restartKubelet(ctx) ginkgo.By("ensuring that no completed init container is restarted") gomega.Consistently(ctx, func() bool { @@ -1588,11 +1569,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) { ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("removing the completed init container statuses from the container runtime") rs, _, err := getCRIClient() @@ -1616,11 +1593,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() { } ginkgo.By("restarting the kubelet") - startKubelet() - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted")) + restartKubelet(ctx) ginkgo.By("ensuring that no completed init container is restarted") gomega.Consistently(ctx, func() bool { @@ -5517,17 +5490,14 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont podSandboxID := sandboxes[0].Id ginkgo.By("Stopping the kubelet") - restartKubelet := stopKubelet() - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet would have been stopped but it is still running")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("Stopping the pod sandbox to simulate the node reboot") err = rs.StopPodSandbox(ctx, podSandboxID) framework.ExpectNoError(err) ginkgo.By("Restarting the kubelet") - restartKubelet() + restartKubelet(ctx) gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy")) @@ -5662,10 +5632,7 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont podSandboxID := sandboxes[0].Id ginkgo.By("Stopping the kubelet") - restartKubelet := stopKubelet() - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet would have been stopped but it is still running")) + restartKubelet := mustStopKubelet(ctx, f) if nodeReboot { ginkgo.By("Stopping the pod sandbox to simulate the node reboot") @@ -5674,7 +5641,7 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont } ginkgo.By("Restarting the kubelet") - restartKubelet() + restartKubelet(ctx) gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy")) diff --git a/test/e2e_node/device_manager_test.go b/test/e2e_node/device_manager_test.go index af608dc15d9..dc0f6ae7625 100644 --- a/test/e2e_node/device_manager_test.go +++ b/test/e2e_node/device_manager_test.go @@ -194,7 +194,7 @@ var _ = SIGDescribe("Device Manager", framework.WithSerial(), nodefeature.Device framework.Logf("pod %s/%s running", testPod.Namespace, testPod.Name) ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("stopping all the local containers - using CRI") rs, _, err := getCRIClient() @@ -210,7 +210,7 @@ var _ = SIGDescribe("Device Manager", framework.WithSerial(), nodefeature.Device } ginkgo.By("restarting the kubelet") - startKubelet() + restartKubelet(ctx) ginkgo.By("waiting for the kubelet to be ready again") // Wait for the Kubelet to be ready. diff --git a/test/e2e_node/device_plugin_test.go b/test/e2e_node/device_plugin_test.go index 5520c78dfed..2db84ce2b80 100644 --- a/test/e2e_node/device_plugin_test.go +++ b/test/e2e_node/device_plugin_test.go @@ -211,7 +211,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute) } - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("Waiting for devices to become unavailable on the local node") gomega.Eventually(ctx, func(ctx context.Context) bool { @@ -365,7 +365,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { framework.Logf("testing pod: pre-restart UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1)) ginkgo.By("Restarting Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("Wait for node to be ready again") e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute) @@ -430,7 +430,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { gomega.Expect(devIDRestart1).To(gomega.Equal(devID1)) ginkgo.By("Restarting Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("Wait for node to be ready again") e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute) @@ -499,7 +499,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { gomega.Eventually(getNodeResourceValues, devicePluginGracefulTimeout, f.Timeouts.Poll).WithContext(ctx).WithArguments(SampleDeviceResourceName).Should(gomega.Equal(ResourceValue{Allocatable: 0, Capacity: int(expectedSampleDevsAmount)})) ginkgo.By("Restarting Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("Wait for node to be ready again") gomega.Expect(e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)).To(gomega.Succeed()) @@ -587,7 +587,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { framework.ExpectNoError(err) ginkgo.By("Restarting Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("Wait for node to be ready again") e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute) @@ -652,7 +652,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { framework.ExpectNoError(err) ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) // wait until the kubelet health check will fail gomega.Eventually(ctx, func() bool { @@ -666,7 +666,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { deletePodSyncByName(ctx, f, pod.Name) framework.Logf("Starting the kubelet") - startKubelet() + restartKubelet(ctx) // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { @@ -947,7 +947,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) { framework.ExpectNoError(err) ginkgo.By("stopping the kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("stopping all the local containers - using CRI") rs, _, err := getCRIClient() @@ -963,7 +963,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) { } ginkgo.By("restarting the kubelet") - startKubelet() + restartKubelet(ctx) ginkgo.By("Wait for node to be ready again") e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute) diff --git a/test/e2e_node/dra_test.go b/test/e2e_node/dra_test.go index 72ce7013762..45644fd8b5c 100644 --- a/test/e2e_node/dra_test.go +++ b/test/e2e_node/dra_test.go @@ -107,7 +107,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, } ginkgo.By("restarting Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("wait for Kubelet plugin re-registration") gomega.Eventually(getNewCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered) @@ -129,16 +129,15 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, // Stop Kubelet ginkgo.By("stop kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod", true, []string{driverName}) // Pod must be in pending state err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) { return pod.Status.Phase == v1.PodPending, nil }) framework.ExpectNoError(err) - // Start Kubelet ginkgo.By("restart kubelet") - startKubelet() + restartKubelet(ctx) // Pod should succeed err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout) framework.ExpectNoError(err) @@ -227,12 +226,12 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesFailed) ginkgo.By("stop Kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) unsetNodePrepareResourcesFailureMode() - ginkgo.By("start Kubelet") - startKubelet() + ginkgo.By("restart Kubelet") + restartKubelet(ctx) ginkgo.By("wait for NodePrepareResources call to succeed") gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesSucceeded) @@ -254,12 +253,12 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed) ginkgo.By("stop Kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) unsetNodeUnprepareResourcesFailureMode() - ginkgo.By("start Kubelet") - startKubelet() + ginkgo.By("restart Kubelet") + restartKubelet(ctx) ginkgo.By("wait for NodeUnprepareResources call to succeed") gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesSucceeded) @@ -313,7 +312,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed) ginkgo.By("restart Kubelet") - stopKubelet()() + restartKubelet(ctx, true) ginkgo.By("wait for NodeUnprepareResources call to fail") gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed) @@ -337,15 +336,15 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, framework.ExpectNoError(err) ginkgo.By("stop Kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By("delete pod") e2epod.DeletePodOrFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name) unblockNodePrepareResources() - ginkgo.By("start Kubelet") - startKubelet() + ginkgo.By("restart Kubelet") + restartKubelet(ctx) calls := kubeletPlugin.CountCalls("/NodePrepareResources") ginkgo.By("make sure NodePrepareResources is not called again") @@ -447,7 +446,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, framework.ExpectNoError(err) ginkgo.By("restart Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) unblockNodePrepareResources() @@ -472,7 +471,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, gomega.Eventually(kubeletPlugin2.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesSucceeded) ginkgo.By("restart Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) unblockNodeUnprepareResources() @@ -500,10 +499,10 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, f.It("must be removed on kubelet startup", f.WithDisruptive(), func(ctx context.Context) { ginkgo.By("stop kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) ginkgo.DeferCleanup(func() { - if startKubelet != nil { - startKubelet() + if restartKubelet != nil { + restartKubelet(ctx) } }) @@ -518,9 +517,9 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, gomega.Consistently(ctx, listResources).WithTimeout(5*time.Second).Should(matchAll, "ResourceSlices without kubelet") - ginkgo.By("start kubelet") - startKubelet() - startKubelet = nil + ginkgo.By("restart kubelet") + restartKubelet(ctx) + restartKubelet = nil ginkgo.By("wait for exactly the node's ResourceSlice to get deleted") gomega.Eventually(ctx, listResources).Should(matchOtherNode, "ResourceSlices with kubelet") diff --git a/test/e2e_node/hugepages_test.go b/test/e2e_node/hugepages_test.go index c7090d4c93f..b20ca0683ad 100644 --- a/test/e2e_node/hugepages_test.go +++ b/test/e2e_node/hugepages_test.go @@ -222,7 +222,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No gomega.Expect(value.String()).To(gomega.Equal("9Mi"), "huge pages with size 3Mi should be supported") ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported") - restartKubelet(true) + restartKubelet(ctx, true) ginkgo.By("verifying that the hugepages-3Mi resource no longer is present") gomega.Eventually(ctx, func() bool { @@ -235,14 +235,14 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) { ginkgo.By("Stopping kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`) patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`) result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx) framework.ExpectNoError(result.Error(), "while patching") - ginkgo.By("Starting kubelet again") - startKubelet() + ginkgo.By("Restarting kubelet again") + restartKubelet(ctx) ginkgo.By("verifying that the hugepages-2Mi resource is present") gomega.Eventually(ctx, func() bool { @@ -352,7 +352,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No setHugepages(ctx) ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") - restartKubelet(true) + restartKubelet(ctx, true) waitForHugepages(ctx) @@ -370,7 +370,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No releaseHugepages(ctx) ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") - restartKubelet(true) + restartKubelet(ctx, true) waitForHugepages(ctx) }) diff --git a/test/e2e_node/image_gc_test.go b/test/e2e_node/image_gc_test.go index 5a725e66875..6c27dc92b20 100644 --- a/test/e2e_node/image_gc_test.go +++ b/test/e2e_node/image_gc_test.go @@ -94,8 +94,7 @@ var _ = SIGDescribe("ImageGarbageCollect", framework.WithSerial(), framework.Wit e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout) - restartKubelet(true) - waitForKubeletToStart(ctx, f) + restartKubelet(ctx, true) // Wait until the maxAge of the image after the kubelet is restarted to ensure it doesn't // GC too early. diff --git a/test/e2e_node/kubelet_config_dir_test.go b/test/e2e_node/kubelet_config_dir_test.go index 1d541afb17a..9e0e59ef117 100644 --- a/test/e2e_node/kubelet_config_dir_test.go +++ b/test/e2e_node/kubelet_config_dir_test.go @@ -54,12 +54,7 @@ var _ = SIGDescribe("Kubelet Config", framework.WithSlow(), framework.WithSerial framework.ExpectNoError(err) ginkgo.By("Stopping the kubelet") - restartKubelet := stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) configDir := framework.TestContext.KubeletConfigDropinDir @@ -128,7 +123,7 @@ featureGates: DynamicResourceAllocation: true`) framework.ExpectNoError(os.WriteFile(filepath.Join(configDir, "20-kubelet.conf"), contents, 0755)) ginkgo.By("Restarting the kubelet") - restartKubelet() + restartKubelet(ctx) // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) diff --git a/test/e2e_node/node_container_manager_test.go b/test/e2e_node/node_container_manager_test.go index 6761e195a3b..5f61e4a8676 100644 --- a/test/e2e_node/node_container_manager_test.go +++ b/test/e2e_node/node_container_manager_test.go @@ -101,12 +101,9 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() { framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) ginkgo.By("Restarting the kubelet") - restartKubelet(true) + restartKubelet(ctx, true) - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func(ctx context.Context) bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + waitForKubeletToStart(ctx, f) ginkgo.By("Started the kubelet") } }) @@ -121,12 +118,9 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() { framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) ginkgo.By("Restarting the kubelet") - restartKubelet(true) + restartKubelet(ctx, true) - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func(ctx context.Context) bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + waitForKubeletToStart(ctx, f) ginkgo.By("Started the kubelet") gomega.Consistently(ctx, func(ctx context.Context) bool { @@ -243,7 +237,7 @@ func runTest(ctx context.Context, f *framework.Framework) error { if oldCfg != nil { // Update the Kubelet configuration. ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) // wait until the kubelet health check will fail gomega.Eventually(ctx, func() bool { @@ -252,8 +246,8 @@ func runTest(ctx context.Context, f *framework.Framework) error { framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) - ginkgo.By("Starting the kubelet") - startKubelet() + ginkgo.By("Restarting the kubelet") + restartKubelet(ctx) // wait until the kubelet health check will succeed gomega.Eventually(ctx, func(ctx context.Context) bool { @@ -271,12 +265,7 @@ func runTest(ctx context.Context, f *framework.Framework) error { // Set the new kubelet configuration. // Update the Kubelet configuration. ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) expectedNAPodCgroup := cm.NewCgroupName(cm.RootCgroupName, nodeAllocatableCgroup) @@ -293,7 +282,7 @@ func runTest(ctx context.Context, f *framework.Framework) error { framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) ginkgo.By("Starting the kubelet") - startKubelet() + restartKubelet(ctx) // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { diff --git a/test/e2e_node/node_perf_test.go b/test/e2e_node/node_perf_test.go index 8f74c7ebc57..08a02b4ff3d 100644 --- a/test/e2e_node/node_perf_test.go +++ b/test/e2e_node/node_perf_test.go @@ -52,22 +52,12 @@ func setKubeletConfig(ctx context.Context, f *framework.Framework, cfg *kubeletc if cfg != nil { // Update the Kubelet configuration. ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg)) - ginkgo.By("Starting the kubelet") - startKubelet() - - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + ginkgo.By("Restarting the kubelet") + restartKubelet(ctx) } // Wait for the Kubelet to be ready. diff --git a/test/e2e_node/os_label_rename_test.go b/test/e2e_node/os_label_rename_test.go index dbc456895f9..cb868d45d8e 100644 --- a/test/e2e_node/os_label_rename_test.go +++ b/test/e2e_node/os_label_rename_test.go @@ -49,7 +49,7 @@ var _ = SIGDescribe("OSArchLabelReconciliation", framework.WithSerial(), framewo ginkgo.By("killing and restarting kubelet") // Let's kill the kubelet - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) // Update labels newNode := node.DeepCopy() newNode.Labels[v1.LabelOSStable] = "dummyOS" @@ -57,7 +57,7 @@ var _ = SIGDescribe("OSArchLabelReconciliation", framework.WithSerial(), framewo _, _, err := nodeutil.PatchNodeStatus(f.ClientSet.CoreV1(), types.NodeName(node.Name), node, newNode) framework.ExpectNoError(err) // Restart kubelet - startKubelet() + restartKubelet(ctx) framework.ExpectNoError(e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, framework.RestartNodeReadyAgainTimeout)) // If this happens right, node should have all the labels reset properly err = waitForNodeLabels(ctx, f.ClientSet.CoreV1(), node.Name, 5*time.Minute) diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index b29eb5fba65..86e1573c2b5 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -1146,7 +1146,7 @@ var _ = SIGDescribe("POD Resources", framework.WithSerial(), feature.PodResource expectPodResources(ctx, 1, cli, []podDesc{desc}) ginkgo.By("Restarting Kubelet") - restartKubelet(true) + restartKubelet(ctx, true) // we need to wait for the node to be reported ready before we can safely query // the podresources endpoint again. Otherwise we will have false negatives. diff --git a/test/e2e_node/restart_test.go b/test/e2e_node/restart_test.go index 2ea929e3747..ca3224f42d4 100644 --- a/test/e2e_node/restart_test.go +++ b/test/e2e_node/restart_test.go @@ -200,8 +200,8 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra ginkgo.By("killing and restarting kubelet") // We want to kill the kubelet rather than a graceful restart - startKubelet := stopKubelet() - startKubelet() + restartKubelet := mustStopKubelet(ctx, f) + restartKubelet(ctx) // If this test works correctly, each of these pods will exit // with no issue. But if accounting breaks, pods scheduled after @@ -309,19 +309,10 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra // As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet. // This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925 ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) - ginkgo.By("Starting the kubelet") - startKubelet() - - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + ginkgo.By("Restarting the kubelet") + restartKubelet(ctx) // Wait for the Kubelet to be ready. gomega.Eventually(ctx, func(ctx context.Context) bool { @@ -361,12 +352,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra }, }) ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) // Create the pod bound to the node. It will remain in the Pending // phase as Kubelet is down. @@ -379,19 +365,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra // Restart Kubelet so that it proceeds with deletion ginkgo.By("Starting the kubelet") - startKubelet() - - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) - - // Wait for the Kubelet to be ready. - gomega.Eventually(ctx, func(ctx context.Context) bool { - nodes, err := e2enode.TotalReady(ctx, f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in ready state")) + restartKubelet(ctx) ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name)) gomega.Eventually(ctx, func(ctx context.Context) error { @@ -439,12 +413,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name) ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name)) err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod}) @@ -454,13 +423,8 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) // Restart Kubelet so that it proceeds with deletion - ginkgo.By("Starting the kubelet") - startKubelet() - - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + ginkgo.By("Restarting the kubelet") + restartKubelet(ctx) // Wait for the Kubelet to be ready. gomega.Eventually(ctx, func(ctx context.Context) bool { diff --git a/test/e2e_node/unknown_pods_test.go b/test/e2e_node/unknown_pods_test.go index 378b1e8ba76..436b696cf6e 100644 --- a/test/e2e_node/unknown_pods_test.go +++ b/test/e2e_node/unknown_pods_test.go @@ -69,29 +69,19 @@ var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisrup ginkgo.It("the static pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) { framework.Logf("Stopping the kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{}) framework.ExpectNoError(err) - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) - framework.Logf("Delete the static pod manifest while the kubelet is not running") file := staticPodPath(podPath, staticPodName, ns) framework.Logf("deleting static pod manifest %q", file) err = os.Remove(file) framework.ExpectNoError(err) - framework.Logf("Starting the kubelet") - startKubelet() - - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + framework.Logf("Restarting the kubelet") + restartKubelet(ctx) framework.Logf("wait for the mirror pod %v to disappear", mirrorPodName) gomega.Eventually(ctx, func(ctx context.Context) error { @@ -140,27 +130,17 @@ var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisrup ginkgo.It("the api pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) { framework.Logf("Stopping the kubelet") - startKubelet := stopKubelet() + restartKubelet := mustStopKubelet(ctx, f) pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{}) framework.ExpectNoError(err) - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) - framework.Logf("Delete the pod while the kubelet is not running") // Delete pod sync by name will force delete the pod, removing it from kubelet's config deletePodSyncByName(ctx, f, podName) - framework.Logf("Starting the kubelet") - startKubelet() - - // wait until the kubelet health check will succeed - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + framework.Logf("Restarting the kubelet") + restartKubelet(ctx) framework.Logf("wait for the pod %v to disappear", podName) gomega.Eventually(ctx, func(ctx context.Context) error { diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 6fc0e0a2e35..7919f13220b 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -216,12 +216,7 @@ func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ctx func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletConfig *kubeletconfig.KubeletConfiguration, deleteStateFiles bool) { // Update the Kubelet configuration. ginkgo.By("Stopping the kubelet") - startKubelet := stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(ctx, func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed")) + restartKubelet := mustStopKubelet(ctx, f) // Delete CPU and memory manager state files to be sure it will not prevent the kubelet restart if deleteStateFiles { @@ -231,9 +226,8 @@ func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletCon framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(kubeletConfig)) - ginkgo.By("Starting the kubelet") - startKubelet() - waitForKubeletToStart(ctx, f) + ginkgo.By("Restarting the kubelet") + restartKubelet(ctx) } func waitForKubeletToStart(ctx context.Context, f *framework.Framework) { @@ -433,31 +427,37 @@ func startContainerRuntime() error { // Warning: the "current" kubelet is poorly defined. The "current" kubelet is assumed to be the most // recent kubelet service unit, IOW there is not a unique ID we use to bind explicitly a kubelet // instance to a test run. -func restartKubelet(running bool) { +func restartKubelet(ctx context.Context, running bool) { kubeletServiceName := findKubeletServiceName(running) // reset the kubelet service start-limit-hit - stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() + stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout)) - stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() + stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout)) } -// stopKubelet will kill the running kubelet, and returns a func that will restart the process again -func stopKubelet() func() { +// mustStopKubelet will kill the running kubelet, and returns a func that will restart the process again +func mustStopKubelet(ctx context.Context, f *framework.Framework) func(ctx context.Context) { kubeletServiceName := findKubeletServiceName(true) // reset the kubelet service start-limit-hit - stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() + stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout)) - stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput() + stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput() framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout)) - return func() { + // wait until the kubelet health check fail + gomega.Eventually(ctx, func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet was expected to be stopped but it is still running")) + + return func(ctx context.Context) { // we should restart service, otherwise the transient service start will fail - stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() + stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) + waitForKubeletToStart(ctx, f) } }