mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 10:20:51 +00:00
e2e: node: expose the running
flag
Each e2e test knows it wants to restart a running kubelet or a non-running kubelet. The vast majority of times, we want to restart a running kubelet (e.g. to change config or to check some properties hold across kubelet crashes/restarts), but sometimes we stop the kubelet, do some actions and only then restart. To accomodate both use cases, we just expose the `running` boolean flag to the e2e tests. Having the `restartKubelet` explicitly restarting a running kubelet helps us to trobuleshoot e2e failures on which the kubelet was supposed to be running, while it was not; attempting a restart in such cases only murkied the waters further, making the troubleshooting and the eventual fix harder. In the happy path, no expected change in behaviour. Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
parent
e878c20ac7
commit
d15bff2839
@ -220,7 +220,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
||||
|
||||
restartTime := time.Now()
|
||||
ginkgo.By("Restarting Kubelet")
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
|
||||
// We need to wait for node to be ready before re-registering stub device plugin.
|
||||
// Otherwise, Kubelet DeviceManager may remove the re-registered sockets after it starts.
|
||||
|
@ -103,7 +103,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
||||
f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute)
|
||||
}
|
||||
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
|
||||
ginkgo.By("Waiting for GPUs to become unavailable on the local node")
|
||||
gomega.Eventually(func() bool {
|
||||
@ -142,7 +142,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
||||
framework.ExpectEqual(devIDRestart1, devID1)
|
||||
|
||||
ginkgo.By("Restarting Kubelet")
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
|
||||
|
||||
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
|
||||
@ -172,7 +172,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
||||
}
|
||||
|
||||
ginkgo.By("Restarting Kubelet")
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
|
||||
ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept")
|
||||
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
||||
@ -181,7 +181,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
||||
|
||||
ginkgo.By("Restarting Kubelet and creating another pod")
|
||||
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
|
||||
|
||||
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
||||
|
@ -207,7 +207,7 @@ var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:H
|
||||
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
|
||||
|
||||
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
|
||||
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
|
||||
gomega.Eventually(func() bool {
|
||||
|
@ -348,7 +348,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, time.Minute, time.Second).Should(gomega.BeFalse())
|
||||
|
||||
restartKubelet()
|
||||
restartKubelet(false)
|
||||
|
||||
// wait until the kubelet health check will pass
|
||||
gomega.Eventually(func() bool {
|
||||
|
@ -731,7 +731,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
||||
expectPodResources(1, cli, []podDesc{desc})
|
||||
|
||||
ginkgo.By("Restarting Kubelet")
|
||||
restartKubelet()
|
||||
restartKubelet(true)
|
||||
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
|
||||
expectPodResources(1, cli, []podDesc{desc})
|
||||
tpd.deletePodsForTest(f)
|
||||
|
@ -414,8 +414,8 @@ func findKubletServiceName(running bool) string {
|
||||
return kubeletServiceName
|
||||
}
|
||||
|
||||
func restartKubelet() {
|
||||
kubeletServiceName := findKubletServiceName(false)
|
||||
func restartKubelet(running bool) {
|
||||
kubeletServiceName := findKubletServiceName(running)
|
||||
// reset the kubelet service start-limit-hit
|
||||
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
|
||||
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
|
||||
|
Loading…
Reference in New Issue
Block a user