e2e_node: refactor stopping and restarting kubelet

Moved Kubelet health checks from test cases to the stopKubelet API.
This should make the API cleaner and easier to use.
This commit is contained in:
Ed Bartosh 2024-11-05 16:09:04 +02:00
parent ab4b869b52
commit 3aa95dafea
14 changed files with 104 additions and 221 deletions

View File

@ -1234,17 +1234,14 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
podSandboxID := sandboxes[0].Id
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet was expected to be stopped but it is still running"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("Stopping the pod sandbox to simulate the node reboot")
err = rs.StopPodSandbox(ctx, podSandboxID)
framework.ExpectNoError(err)
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy"))
@ -1361,14 +1358,10 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
@ -1409,11 +1402,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("removing the completed init container statuses from the container runtime")
rs, _, err := getCRIClient()
@ -1437,7 +1426,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
}
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
@ -1544,18 +1533,10 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
restartKubelet(ctx)
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
@ -1588,11 +1569,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("removing the completed init container statuses from the container runtime")
rs, _, err := getCRIClient()
@ -1616,11 +1593,7 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
}
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
restartKubelet(ctx)
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
@ -5517,17 +5490,14 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont
podSandboxID := sandboxes[0].Id
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet would have been stopped but it is still running"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("Stopping the pod sandbox to simulate the node reboot")
err = rs.StopPodSandbox(ctx, podSandboxID)
framework.ExpectNoError(err)
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy"))
@ -5662,10 +5632,7 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont
podSandboxID := sandboxes[0].Id
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet would have been stopped but it is still running"))
restartKubelet := mustStopKubelet(ctx, f)
if nodeReboot {
ginkgo.By("Stopping the pod sandbox to simulate the node reboot")
@ -5674,7 +5641,7 @@ var _ = SIGDescribe(nodefeature.SidecarContainers, framework.WithSerial(), "Cont
}
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy"))

View File

@ -194,7 +194,7 @@ var _ = SIGDescribe("Device Manager", framework.WithSerial(), nodefeature.Device
framework.Logf("pod %s/%s running", testPod.Namespace, testPod.Name)
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("stopping all the local containers - using CRI")
rs, _, err := getCRIClient()
@ -210,7 +210,7 @@ var _ = SIGDescribe("Device Manager", framework.WithSerial(), nodefeature.Device
}
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
ginkgo.By("waiting for the kubelet to be ready again")
// Wait for the Kubelet to be ready.

View File

@ -211,7 +211,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
}
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Waiting for devices to become unavailable on the local node")
gomega.Eventually(ctx, func(ctx context.Context) bool {
@ -365,7 +365,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.Logf("testing pod: pre-restart UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1))
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
@ -430,7 +430,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
gomega.Expect(devIDRestart1).To(gomega.Equal(devID1))
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
@ -499,7 +499,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
gomega.Eventually(getNodeResourceValues, devicePluginGracefulTimeout, f.Timeouts.Poll).WithContext(ctx).WithArguments(SampleDeviceResourceName).Should(gomega.Equal(ResourceValue{Allocatable: 0, Capacity: int(expectedSampleDevsAmount)}))
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
gomega.Expect(e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)).To(gomega.Succeed())
@ -587,7 +587,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err)
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
@ -652,7 +652,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err)
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
@ -666,7 +666,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
deletePodSyncByName(ctx, f, pod.Name)
framework.Logf("Starting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
@ -947,7 +947,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
framework.ExpectNoError(err)
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("stopping all the local containers - using CRI")
rs, _, err := getCRIClient()
@ -963,7 +963,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
}
ginkgo.By("restarting the kubelet")
startKubelet()
restartKubelet(ctx)
ginkgo.By("Wait for node to be ready again")
e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)

View File

@ -107,7 +107,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
}
ginkgo.By("restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("wait for Kubelet plugin re-registration")
gomega.Eventually(getNewCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
@ -129,16 +129,15 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
// Stop Kubelet
ginkgo.By("stop kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod", true, []string{driverName})
// Pod must be in pending state
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) {
return pod.Status.Phase == v1.PodPending, nil
})
framework.ExpectNoError(err)
// Start Kubelet
ginkgo.By("restart kubelet")
startKubelet()
restartKubelet(ctx)
// Pod should succeed
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout)
framework.ExpectNoError(err)
@ -227,12 +226,12 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesFailed)
ginkgo.By("stop Kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
unsetNodePrepareResourcesFailureMode()
ginkgo.By("start Kubelet")
startKubelet()
ginkgo.By("restart Kubelet")
restartKubelet(ctx)
ginkgo.By("wait for NodePrepareResources call to succeed")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesSucceeded)
@ -254,12 +253,12 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed)
ginkgo.By("stop Kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
unsetNodeUnprepareResourcesFailureMode()
ginkgo.By("start Kubelet")
startKubelet()
ginkgo.By("restart Kubelet")
restartKubelet(ctx)
ginkgo.By("wait for NodeUnprepareResources call to succeed")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesSucceeded)
@ -313,7 +312,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed)
ginkgo.By("restart Kubelet")
stopKubelet()()
restartKubelet(ctx, true)
ginkgo.By("wait for NodeUnprepareResources call to fail")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodeUnprepareResourcesFailed)
@ -337,15 +336,15 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
framework.ExpectNoError(err)
ginkgo.By("stop Kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("delete pod")
e2epod.DeletePodOrFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name)
unblockNodePrepareResources()
ginkgo.By("start Kubelet")
startKubelet()
ginkgo.By("restart Kubelet")
restartKubelet(ctx)
calls := kubeletPlugin.CountCalls("/NodePrepareResources")
ginkgo.By("make sure NodePrepareResources is not called again")
@ -447,7 +446,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
framework.ExpectNoError(err)
ginkgo.By("restart Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
unblockNodePrepareResources()
@ -472,7 +471,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Eventually(kubeletPlugin2.GetGRPCCalls).WithTimeout(retryTestTimeout).Should(testdriver.NodePrepareResourcesSucceeded)
ginkgo.By("restart Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
unblockNodeUnprepareResources()
@ -500,10 +499,10 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
f.It("must be removed on kubelet startup", f.WithDisruptive(), func(ctx context.Context) {
ginkgo.By("stop kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.DeferCleanup(func() {
if startKubelet != nil {
startKubelet()
if restartKubelet != nil {
restartKubelet(ctx)
}
})
@ -518,9 +517,9 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
gomega.Consistently(ctx, listResources).WithTimeout(5*time.Second).Should(matchAll, "ResourceSlices without kubelet")
ginkgo.By("start kubelet")
startKubelet()
startKubelet = nil
ginkgo.By("restart kubelet")
restartKubelet(ctx)
restartKubelet = nil
ginkgo.By("wait for exactly the node's ResourceSlice to get deleted")
gomega.Eventually(ctx, listResources).Should(matchOtherNode, "ResourceSlices with kubelet")

View File

@ -222,7 +222,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
gomega.Expect(value.String()).To(gomega.Equal("9Mi"), "huge pages with size 3Mi should be supported")
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
restartKubelet(true)
restartKubelet(ctx, true)
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
gomega.Eventually(ctx, func() bool {
@ -235,14 +235,14 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) {
ginkgo.By("Stopping kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
framework.ExpectNoError(result.Error(), "while patching")
ginkgo.By("Starting kubelet again")
startKubelet()
ginkgo.By("Restarting kubelet again")
restartKubelet(ctx)
ginkgo.By("verifying that the hugepages-2Mi resource is present")
gomega.Eventually(ctx, func() bool {
@ -352,7 +352,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
setHugepages(ctx)
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
restartKubelet(true)
restartKubelet(ctx, true)
waitForHugepages(ctx)
@ -370,7 +370,7 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[No
releaseHugepages(ctx)
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
restartKubelet(true)
restartKubelet(ctx, true)
waitForHugepages(ctx)
})

View File

@ -94,8 +94,7 @@ var _ = SIGDescribe("ImageGarbageCollect", framework.WithSerial(), framework.Wit
e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
restartKubelet(true)
waitForKubeletToStart(ctx, f)
restartKubelet(ctx, true)
// Wait until the maxAge of the image after the kubelet is restarted to ensure it doesn't
// GC too early.

View File

@ -54,12 +54,7 @@ var _ = SIGDescribe("Kubelet Config", framework.WithSlow(), framework.WithSerial
framework.ExpectNoError(err)
ginkgo.By("Stopping the kubelet")
restartKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
configDir := framework.TestContext.KubeletConfigDropinDir
@ -128,7 +123,7 @@ featureGates:
DynamicResourceAllocation: true`)
framework.ExpectNoError(os.WriteFile(filepath.Join(configDir, "20-kubelet.conf"), contents, 0755))
ginkgo.By("Restarting the kubelet")
restartKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)

View File

@ -101,12 +101,9 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
ginkgo.By("Restarting the kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func(ctx context.Context) bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
waitForKubeletToStart(ctx, f)
ginkgo.By("Started the kubelet")
}
})
@ -121,12 +118,9 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
ginkgo.By("Restarting the kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func(ctx context.Context) bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
waitForKubeletToStart(ctx, f)
ginkgo.By("Started the kubelet")
gomega.Consistently(ctx, func(ctx context.Context) bool {
@ -243,7 +237,7 @@ func runTest(ctx context.Context, f *framework.Framework) error {
if oldCfg != nil {
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
@ -252,8 +246,8 @@ func runTest(ctx context.Context, f *framework.Framework) error {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
ginkgo.By("Starting the kubelet")
startKubelet()
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func(ctx context.Context) bool {
@ -271,12 +265,7 @@ func runTest(ctx context.Context, f *framework.Framework) error {
// Set the new kubelet configuration.
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
expectedNAPodCgroup := cm.NewCgroupName(cm.RootCgroupName, nodeAllocatableCgroup)
@ -293,7 +282,7 @@ func runTest(ctx context.Context, f *framework.Framework) error {
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
ginkgo.By("Starting the kubelet")
startKubelet()
restartKubelet(ctx)
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {

View File

@ -52,22 +52,12 @@ func setKubeletConfig(ctx context.Context, f *framework.Framework, cfg *kubeletc
if cfg != nil {
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg))
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
}
// Wait for the Kubelet to be ready.

View File

@ -49,7 +49,7 @@ var _ = SIGDescribe("OSArchLabelReconciliation", framework.WithSerial(), framewo
ginkgo.By("killing and restarting kubelet")
// Let's kill the kubelet
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
// Update labels
newNode := node.DeepCopy()
newNode.Labels[v1.LabelOSStable] = "dummyOS"
@ -57,7 +57,7 @@ var _ = SIGDescribe("OSArchLabelReconciliation", framework.WithSerial(), framewo
_, _, err := nodeutil.PatchNodeStatus(f.ClientSet.CoreV1(), types.NodeName(node.Name), node, newNode)
framework.ExpectNoError(err)
// Restart kubelet
startKubelet()
restartKubelet(ctx)
framework.ExpectNoError(e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, framework.RestartNodeReadyAgainTimeout))
// If this happens right, node should have all the labels reset properly
err = waitForNodeLabels(ctx, f.ClientSet.CoreV1(), node.Name, 5*time.Minute)

View File

@ -1146,7 +1146,7 @@ var _ = SIGDescribe("POD Resources", framework.WithSerial(), feature.PodResource
expectPodResources(ctx, 1, cli, []podDesc{desc})
ginkgo.By("Restarting Kubelet")
restartKubelet(true)
restartKubelet(ctx, true)
// we need to wait for the node to be reported ready before we can safely query
// the podresources endpoint again. Otherwise we will have false negatives.

View File

@ -200,8 +200,8 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
ginkgo.By("killing and restarting kubelet")
// We want to kill the kubelet rather than a graceful restart
startKubelet := stopKubelet()
startKubelet()
restartKubelet := mustStopKubelet(ctx, f)
restartKubelet(ctx)
// If this test works correctly, each of these pods will exit
// with no issue. But if accounting breaks, pods scheduled after
@ -309,19 +309,10 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
// As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet.
// This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
// Wait for the Kubelet to be ready.
gomega.Eventually(ctx, func(ctx context.Context) bool {
@ -361,12 +352,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
},
})
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
// Create the pod bound to the node. It will remain in the Pending
// phase as Kubelet is down.
@ -379,19 +365,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
// Restart Kubelet so that it proceeds with deletion
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
// Wait for the Kubelet to be ready.
gomega.Eventually(ctx, func(ctx context.Context) bool {
nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
framework.ExpectNoError(err)
return nodes == 1
}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in ready state"))
restartKubelet(ctx)
ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
gomega.Eventually(ctx, func(ctx context.Context) error {
@ -439,12 +413,7 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
@ -454,13 +423,8 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey)
// Restart Kubelet so that it proceeds with deletion
ginkgo.By("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
// Wait for the Kubelet to be ready.
gomega.Eventually(ctx, func(ctx context.Context) bool {

View File

@ -69,29 +69,19 @@ var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisrup
ginkgo.It("the static pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
framework.Logf("Stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
framework.Logf("Delete the static pod manifest while the kubelet is not running")
file := staticPodPath(podPath, staticPodName, ns)
framework.Logf("deleting static pod manifest %q", file)
err = os.Remove(file)
framework.ExpectNoError(err)
framework.Logf("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
framework.Logf("Restarting the kubelet")
restartKubelet(ctx)
framework.Logf("wait for the mirror pod %v to disappear", mirrorPodName)
gomega.Eventually(ctx, func(ctx context.Context) error {
@ -140,27 +130,17 @@ var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisrup
ginkgo.It("the api pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
framework.Logf("Stopping the kubelet")
startKubelet := stopKubelet()
restartKubelet := mustStopKubelet(ctx, f)
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
framework.ExpectNoError(err)
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
framework.Logf("Delete the pod while the kubelet is not running")
// Delete pod sync by name will force delete the pod, removing it from kubelet's config
deletePodSyncByName(ctx, f, podName)
framework.Logf("Starting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
framework.Logf("Restarting the kubelet")
restartKubelet(ctx)
framework.Logf("wait for the pod %v to disappear", podName)
gomega.Eventually(ctx, func(ctx context.Context) error {

View File

@ -216,12 +216,7 @@ func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ctx
func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletConfig *kubeletconfig.KubeletConfiguration, deleteStateFiles bool) {
// Update the Kubelet configuration.
ginkgo.By("Stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
restartKubelet := mustStopKubelet(ctx, f)
// Delete CPU and memory manager state files to be sure it will not prevent the kubelet restart
if deleteStateFiles {
@ -231,9 +226,8 @@ func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletCon
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(kubeletConfig))
ginkgo.By("Starting the kubelet")
startKubelet()
waitForKubeletToStart(ctx, f)
ginkgo.By("Restarting the kubelet")
restartKubelet(ctx)
}
func waitForKubeletToStart(ctx context.Context, f *framework.Framework) {
@ -433,31 +427,37 @@ func startContainerRuntime() error {
// Warning: the "current" kubelet is poorly defined. The "current" kubelet is assumed to be the most
// recent kubelet service unit, IOW there is not a unique ID we use to bind explicitly a kubelet
// instance to a test run.
func restartKubelet(running bool) {
func restartKubelet(ctx context.Context, running bool) {
kubeletServiceName := findKubeletServiceName(running)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
}
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
func stopKubelet() func() {
// mustStopKubelet will kill the running kubelet, and returns a func that will restart the process again
func mustStopKubelet(ctx context.Context, f *framework.Framework) func(ctx context.Context) {
kubeletServiceName := findKubeletServiceName(true)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout))
return func() {
// wait until the kubelet health check fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet was expected to be stopped but it is still running"))
return func(ctx context.Context) {
// we should restart service, otherwise the transient service start will fail
stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
waitForKubeletToStart(ctx, f)
}
}