Merge pull request #128576 from bart0sh/PR166-refactor-kubelet-stop-and-restart

e2e_node: refactor Kubelet stopping and restarting
This commit is contained in:
Kubernetes Prow Robot 2024-11-06 20:10:40 +00:00 committed by GitHub
commit 48c65d1870
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 104 additions and 221 deletions

View File

@ -1234,17 +1234,14 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
podSandboxID := sandboxes[0].Id
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet was expected to be stopped but it is still running"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("Stopping the pod sandbox to simulate the node reboot")
err = rs.StopPodSandbox(ctx, podSandboxID)
framework.ExpectNoError(err)
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy"))
@ -1361,14 +1358,10 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
@ -1409,11 +1402,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("removing the completed init container statuses from the container runtime")
rs, _, err := getCRIClient()
@ -1437,7 +1426,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
}
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
@ -1544,18 +1533,10 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
restartKubelet(ctx)
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
@ -1588,11 +1569,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("removing the completed init container statuses from the container runtime")
rs, _, err := getCRIClient()
@ -1616,11 +1593,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
}
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
restartKubelet(ctx)
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
@ -5517,17 +5490,14 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont
podSandboxID := sandboxes[0].Id
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet would have been stopped but it is still running"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("Stopping the pod sandbox to simulate the node reboot")
err = rs.StopPodSandbox(ctx, podSandboxID)
framework.ExpectNoError(err)
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy"))
@ -5662,10 +5632,7 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont
podSandboxID := sandboxes[0].Id
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet would have been stopped but it is still running"))
restartKubelet := mustStopKubelet(ctx, f)
if nodeReboot {
ginkgo.By("Stopping the pod sandbox to simulate the node reboot")
@ -5674,7 +5641,7 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont
}
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy"))

View File

@ -194,7 +194,7 @@ var _ = SIGDescribe("Device Manager", framework.WithSerial(), nodefeature.Device
framework.Logf("pod %s/%s running", testPod.Namespace, testPod.Name)
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("stopping all the local containers - using CRI")
rs, _, err := getCRIClient()
@ -210,7 +210,7 @@ var _ = SIGDescribe("Device Manager", framework.WithSerial(), nodefeature.Device
}
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
ginkgo.By("waiting for the kubelet to be ready again")
// Wait for the Kubelet to be ready.

View File

@ -211,7 +211,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
}
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Waiting for devices to become unavailable on the local node")
gomega.Eventually(ctx, func(ctx context.Context) bool {
@ -365,7 +365,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.Logf("testing pod: pre-restart UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1))
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
@ -430,7 +430,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
gomega.Expect(devIDRestart1).To(gomega.Equal(devID1))
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
@ -499,7 +499,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
gomega.Eventually(getNodeResourceValues, devicePluginGracefulTimeout, f.Timeouts.Poll).WithContext(ctx).WithArguments(SampleDeviceResourceName).Should(gomega.Equal(ResourceValue{Allocatable: 0, Capacity: int(expectedSampleDevsAmount)}))
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
gomega.Expect(e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)).To(gomega.Succeed())
@ -587,7 +587,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err)
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
@ -652,7 +652,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err)
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
@ -666,7 +666,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
deletePodSyncByName(ctx, f, pod.Name)
framework.Logf("Starting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
@ -947,7 +947,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err)
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("stopping all the local containers - using CRI")
rs, _, err := getCRIClient()
@ -963,7 +963,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
}
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)

View File

@ -107,7 +107,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
}
ginkgo.By("restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("wait for Kubelet plugin re-registration")
gomega.Eventually(getNewCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
@ -129,16 +129,15 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
// Stop Kubelet
ginkgo.By("stop kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod", true, []string{driverName})
// Pod must be in pending state
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) {
return pod.Status.Phase == v1.PodPending, nil
})
framework.ExpectNoError(err)
// Start Kubelet
ginkgo.By("restart kubelet")
startKubelet()
restartKubelet(ctx)
// Pod should succeed
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout)
framework.ExpectNoError(err)
@ -227,12 +226,12 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesFailed)
ginkgo.By("stop Kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
unsetNodePrepareResourcesFailureMode()
ginkgo.By("start Kubelet")
startKubelet()
ginkgo.By("restart Kubelet")
restartKubelet(ctx)
ginkgo.By("wait for NodePrepareResources call to succeed")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesSucceeded)
@ -254,12 +253,12 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed)
ginkgo.By("stop Kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
unsetNodeUnprepareResourcesFailureMode()
ginkgo.By("start Kubelet")
startKubelet()
ginkgo.By("restart Kubelet")
restartKubelet(ctx)
ginkgo.By("wait for NodeUnprepareResources call to succeed")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesSucceeded)
@ -313,7 +312,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed)
ginkgo.By("restart Kubelet")
stopKubelet()()
restartKubelet(ctx, true)
ginkgo.By("wait for NodeUnprepareResources call to fail")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed)
@ -337,15 +336,15 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
framework.ExpectNoError(err)
ginkgo.By("stop Kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("delete pod")
e2epod.DeletePodOrFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name)
unblockNodePrepareResources()
ginkgo.By("start Kubelet")
startKubelet()
ginkgo.By("restart Kubelet")
restartKubelet(ctx)
calls := kubeletPlugin.CountCalls("/NodePrepareResources")
ginkgo.By("make sure NodePrepareResources is not called again")
@ -447,7 +446,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
framework.ExpectNoError(err)
ginkgo.By("restart Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
unblockNodePrepareResources()
@ -472,7 +471,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin2.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesSucceeded)
ginkgo.By("restart Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
unblockNodeUnprepareResources()
@ -500,10 +499,10 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
f.It("must be removed on kubelet startup", f.WithDisruptive(), func(ctx context.Context) {
ginkgo.By("stop kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.DeferCleanup(func() {
if startKubelet != nil {
startKubelet()
if restartKubelet != nil {
restartKubelet(ctx)
}
})
@ -518,9 +517,9 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Consistently(ctx, listResources).WithTimeout(5*time.Second).Should(matchAll, "ResourceSlices without kubelet")
ginkgo.By("start kubelet")
startKubelet()
startKubelet = nil
ginkgo.By("restart kubelet")
restartKubelet(ctx)
restartKubelet = nil
ginkgo.By("wait for exactly the node's ResourceSlice to get deleted")
gomega.Eventually(ctx, listResources).Should(matchOtherNode, "ResourceSlices with kubelet")

View File

@ -222,7 +222,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
gomega.Expect(value.String()).To(gomega.Equal("9Mi"), "huge pages with size 3Mi should be supported")
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
gomega.Eventually(ctx, func() bool {
@ -235,14 +235,14 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) {
ginkgo.By("Stopping kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
framework.ExpectNoError(result.Error(), "while patching")
ginkgo.By("Starting kubelet again")
startKubelet()
ginkgo.By("Restarting kubelet again")
restartKubelet(ctx)
ginkgo.By("verifying that the hugepages-2Mi resource is present")
gomega.Eventually(ctx, func() bool {
@ -352,7 +352,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
setHugepages(ctx)
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
restartKubelet(true)
restartKubelet(ctx, true)
waitForHugepages(ctx)
@ -370,7 +370,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
releaseHugepages(ctx)
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
restartKubelet(true)
restartKubelet(ctx, true)
waitForHugepages(ctx)
})

View File

@ -94,8 +94,7 @@ var _ = SIGDescribe("ImageGarbageCollect", framework.WithSerial(), framework.Wit
e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
restartKubelet(true)
waitForKubeletToStart(ctx, f)
restartKubelet(ctx, true)
// Wait until the maxAge of the image after the kubelet is restarted to ensure it doesn't
// GC too early.

View File

@ -54,12 +54,7 @@ var _ = SIGDescribe("Kubelet Config", framework.WithSlow(), framework.WithSerial
framework.ExpectNoError(err)
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
configDir := framework.TestContext.KubeletConfigDropinDir
@ -128,7 +123,7 @@ featureGates:
DynamicResourceAllocation: true`)
framework.ExpectNoError(os.WriteFile(filepath.Join(configDir, "20-kubelet.conf"), contents, 0755))
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)

View File

@ -101,12 +101,9 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
ginkgo.By("Restarting the kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func(ctx context.Context) bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
waitForKubeletToStart(ctx, f)
ginkgo.By("Started the kubelet")
}
})
@ -121,12 +118,9 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
ginkgo.By("Restarting the kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func(ctx context.Context) bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
waitForKubeletToStart(ctx, f)
ginkgo.By("Started the kubelet")
gomega.Consistently(ctx, func(ctx context.Context) bool {
@ -243,7 +237,7 @@ func runTest(ctx context.Context, f *framework.Framework) error {
if oldCfg != nil {
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
@ -252,8 +246,8 @@ func runTest(ctx context.Context, f *framework.Framework) error {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
ginkgo.By("Starting the kubelet")
startKubelet()
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func(ctx context.Context) bool {
@ -271,12 +265,7 @@ func runTest(ctx context.Context, f *framework.Framework) error {
// Set the new kubelet configuration.
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
expectedNAPodCgroup := cm.NewCgroupName(cm.RootCgroupName, nodeAllocatableCgroup)
@ -293,7 +282,7 @@ func runTest(ctx context.Context, f *framework.Framework) error {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
ginkgo.By("Starting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {

View File

@ -52,22 +52,12 @@ func setKubeletConfig(ctx context.Context, f *framework.Framework, cfg *kubeletc
if cfg != nil {
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg))
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
}
// Wait for the Kubelet to be ready.

View File

@ -49,7 +49,7 @@ var _ = SIGDescribe("OSArchLabelReconciliation", framework.WithSerial(), framewo
ginkgo.By("killing and restarting kubelet")
// Let's kill the kubelet
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
// Update labels
newNode := node.DeepCopy()
newNode.Labels[v1.LabelOSStable] = "dummyOS"
@ -57,7 +57,7 @@ var _ = SIGDescribe("OSArchLabelReconciliation", framework.WithSerial(), framewo
_, _, err := nodeutil.PatchNodeStatus(f.ClientSet.CoreV1(), types.NodeName(node.Name), node, newNode)
framework.ExpectNoError(err)
// Restart kubelet
startKubelet()
restartKubelet(ctx)
framework.ExpectNoError(e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, framework.RestartNodeReadyAgainTimeout))
// If this happens right, node should have all the labels reset properly
err = waitForNodeLabels(ctx, f.ClientSet.CoreV1(), node.Name, 5*time.Minute)

View File

@ -1146,7 +1146,7 @@ var _ = SIGDescribe("POD Resources", framework.WithSerial(), feature.PodResource
expectPodResources(ctx, 1, cli, []podDesc{desc})
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
// we need to wait for the node to be reported ready before we can safely query
// the podresources endpoint again. Otherwise we will have false negatives.

View File

@ -200,8 +200,8 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
ginkgo.By("killing and restarting kubelet")
// We want to kill the kubelet rather than a graceful restart
startKubelet := stopKubelet()
startKubelet()
restartKubelet := mustStopKubelet(ctx, f)
restartKubelet(ctx)
// If this test works correctly, each of these pods will exit
// with no issue. But if accounting breaks, pods scheduled after
@ -309,19 +309,10 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
// As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet.
// This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
// Wait for the Kubelet to be ready.
gomega.Eventually(ctx, func(ctx context.Context) bool {
@ -361,12 +352,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
},
})
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
// Create the pod bound to the node. It will remain in the Pending
// phase as Kubelet is down.
@ -379,19 +365,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
// Restart Kubelet so that it proceeds with deletion
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
// Wait for the Kubelet to be ready.
gomega.Eventually(ctx, func(ctx context.Context) bool {
nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
framework.ExpectNoError(err)
return nodes == 1
}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in ready state"))
restartKubelet(ctx)
ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
gomega.Eventually(ctx, func(ctx context.Context) error {
@ -439,12 +413,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
@ -454,13 +423,8 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey)
// Restart Kubelet so that it proceeds with deletion
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
// Wait for the Kubelet to be ready.
gomega.Eventually(ctx, func(ctx context.Context) bool {

View File

@ -69,29 +69,19 @@ var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisrup
ginkgo.It("the static pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
framework.Logf("Stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
framework.Logf("Delete the static pod manifest while the kubelet is not running")
file := staticPodPath(podPath, staticPodName, ns)
framework.Logf("deleting static pod manifest %q", file)
err = os.Remove(file)
framework.ExpectNoError(err)
framework.Logf("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
framework.Logf("Restarting the kubelet")
restartKubelet(ctx)
framework.Logf("wait for the mirror pod %v to disappear", mirrorPodName)
gomega.Eventually(ctx, func(ctx context.Context) error {
@ -140,27 +130,17 @@ var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisrup
ginkgo.It("the api pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
framework.Logf("Stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
framework.ExpectNoError(err)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
framework.Logf("Delete the pod while the kubelet is not running")
// Delete pod sync by name will force delete the pod, removing it from kubelet's config
deletePodSyncByName(ctx, f, podName)
framework.Logf("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
framework.Logf("Restarting the kubelet")
restartKubelet(ctx)
framework.Logf("wait for the pod %v to disappear", podName)
gomega.Eventually(ctx, func(ctx context.Context) error {

View File

@ -216,12 +216,7 @@ func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ctx
func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletConfig *kubeletconfig.KubeletConfiguration, deleteStateFiles bool) {
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
// Delete CPU and memory manager state files to be sure it will not prevent the kubelet restart
if deleteStateFiles {
@ -231,9 +226,8 @@ func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletCon
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(kubeletConfig))
ginkgo.By("Starting the kubelet")
startKubelet()
waitForKubeletToStart(ctx, f)
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
}
func waitForKubeletToStart(ctx context.Context, f *framework.Framework) {
@ -433,31 +427,37 @@ func startContainerRuntime() error {
// Warning: the "current" kubelet is poorly defined. The "current" kubelet is assumed to be the most
// recent kubelet service unit, IOW there is not a unique ID we use to bind explicitly a kubelet
// instance to a test run.
func restartKubelet(running bool) {
func restartKubelet(ctx context.Context, running bool) {
kubeletServiceName := findKubeletServiceName(running)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
}
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
func stopKubelet() func() {
// mustStopKubelet will kill the running kubelet, and returns a func that will restart the process again
func mustStopKubelet(ctx context.Context, f *framework.Framework) func(ctx context.Context) {
kubeletServiceName := findKubeletServiceName(true)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout))
return func() {
// wait until the kubelet health check fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet was expected to be stopped but it is still running"))
return func(ctx context.Context) {
// we should restart service, otherwise the transient service start will fail
stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
waitForKubeletToStart(ctx, f)
}
}