mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #105516 from fromanirh/e2e-kubelet-restart-improvements
e2e: node: kubelet restart improvements
This commit is contained in:
commit
fe62fcc9b4
@ -220,7 +220,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
|
|||||||
|
|
||||||
restartTime := time.Now()
|
restartTime := time.Now()
|
||||||
ginkgo.By("Restarting Kubelet")
|
ginkgo.By("Restarting Kubelet")
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
|
|
||||||
// We need to wait for node to be ready before re-registering stub device plugin.
|
// We need to wait for node to be ready before re-registering stub device plugin.
|
||||||
// Otherwise, Kubelet DeviceManager may remove the re-registered sockets after it starts.
|
// Otherwise, Kubelet DeviceManager may remove the re-registered sockets after it starts.
|
||||||
|
@ -103,7 +103,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
|||||||
f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute)
|
f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute)
|
||||||
}
|
}
|
||||||
|
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
|
|
||||||
ginkgo.By("Waiting for GPUs to become unavailable on the local node")
|
ginkgo.By("Waiting for GPUs to become unavailable on the local node")
|
||||||
gomega.Eventually(func() bool {
|
gomega.Eventually(func() bool {
|
||||||
@ -142,7 +142,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
|||||||
framework.ExpectEqual(devIDRestart1, devID1)
|
framework.ExpectEqual(devIDRestart1, devID1)
|
||||||
|
|
||||||
ginkgo.By("Restarting Kubelet")
|
ginkgo.By("Restarting Kubelet")
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
|
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
|
||||||
|
|
||||||
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
|
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
|
||||||
@ -172,7 +172,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
|||||||
}
|
}
|
||||||
|
|
||||||
ginkgo.By("Restarting Kubelet")
|
ginkgo.By("Restarting Kubelet")
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
|
|
||||||
ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept")
|
ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept")
|
||||||
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
||||||
@ -181,7 +181,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
|
|||||||
|
|
||||||
ginkgo.By("Restarting Kubelet and creating another pod")
|
ginkgo.By("Restarting Kubelet and creating another pod")
|
||||||
|
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
|
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
|
||||||
|
|
||||||
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
||||||
|
@ -207,7 +207,7 @@ var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:H
|
|||||||
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
|
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
|
||||||
|
|
||||||
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
|
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
|
|
||||||
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
|
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
|
||||||
gomega.Eventually(func() bool {
|
gomega.Eventually(func() bool {
|
||||||
|
@ -344,7 +344,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() {
|
|||||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
}, time.Minute, time.Second).Should(gomega.BeFalse())
|
}, time.Minute, time.Second).Should(gomega.BeFalse())
|
||||||
|
|
||||||
restartKubelet()
|
restartKubelet(false)
|
||||||
|
|
||||||
// wait until the kubelet health check will pass
|
// wait until the kubelet health check will pass
|
||||||
gomega.Eventually(func() bool {
|
gomega.Eventually(func() bool {
|
||||||
|
@ -765,7 +765,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
|||||||
expectPodResources(1, cli, []podDesc{desc})
|
expectPodResources(1, cli, []podDesc{desc})
|
||||||
|
|
||||||
ginkgo.By("Restarting Kubelet")
|
ginkgo.By("Restarting Kubelet")
|
||||||
restartKubelet()
|
restartKubelet(true)
|
||||||
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
|
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
|
||||||
expectPodResources(1, cli, []podDesc{desc})
|
expectPodResources(1, cli, []podDesc{desc})
|
||||||
tpd.deletePodsForTest(f)
|
tpd.deletePodsForTest(f)
|
||||||
|
@ -414,14 +414,14 @@ func findKubletServiceName(running bool) string {
|
|||||||
return kubeletServiceName
|
return kubeletServiceName
|
||||||
}
|
}
|
||||||
|
|
||||||
func restartKubelet() {
|
func restartKubelet(running bool) {
|
||||||
kubeletServiceName := findKubletServiceName(false)
|
kubeletServiceName := findKubletServiceName(running)
|
||||||
// reset the kubelet service start-limit-hit
|
// reset the kubelet service start-limit-hit
|
||||||
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
|
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
|
||||||
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
|
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
|
||||||
|
|
||||||
stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
|
stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
|
||||||
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
|
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
|
||||||
}
|
}
|
||||||
|
|
||||||
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
|
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
|
||||||
@ -430,14 +430,14 @@ func stopKubelet() func() {
|
|||||||
|
|
||||||
// reset the kubelet service start-limit-hit
|
// reset the kubelet service start-limit-hit
|
||||||
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
|
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
|
||||||
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
|
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
|
||||||
|
|
||||||
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
|
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
|
||||||
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %v", err, stdout)
|
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout))
|
||||||
|
|
||||||
return func() {
|
return func() {
|
||||||
stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput()
|
stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput()
|
||||||
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
|
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user