Merge pull request #105516 from fromanirh/e2e-kubelet-restart-improvements

e2e: node: kubelet restart improvements
This commit is contained in:
Kubernetes Prow Robot 2021-10-14 17:58:54 -07:00 committed by GitHub
commit fe62fcc9b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 15 additions and 15 deletions

View File

@ -220,7 +220,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
restartTime := time.Now()
ginkgo.By("Restarting Kubelet")
restartKubelet()
restartKubelet(true)
// We need to wait for node to be ready before re-registering stub device plugin.
// Otherwise, Kubelet DeviceManager may remove the re-registered sockets after it starts.

View File

@ -103,7 +103,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute)
}
restartKubelet()
restartKubelet(true)
ginkgo.By("Waiting for GPUs to become unavailable on the local node")
gomega.Eventually(func() bool {
@ -142,7 +142,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
framework.ExpectEqual(devIDRestart1, devID1)
ginkgo.By("Restarting Kubelet")
restartKubelet()
restartKubelet(true)
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
@ -172,7 +172,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
}
ginkgo.By("Restarting Kubelet")
restartKubelet()
restartKubelet(true)
ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept")
ensurePodContainerRestart(f, p1.Name, p1.Name)
@ -181,7 +181,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
ginkgo.By("Restarting Kubelet and creating another pod")
restartKubelet()
restartKubelet(true)
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
ensurePodContainerRestart(f, p1.Name, p1.Name)

View File

@ -207,7 +207,7 @@ var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:H
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
restartKubelet()
restartKubelet(true)
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
gomega.Eventually(func() bool {

View File

@ -344,7 +344,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalse())
restartKubelet()
restartKubelet(false)
// wait until the kubelet health check will pass
gomega.Eventually(func() bool {

View File

@ -765,7 +765,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
expectPodResources(1, cli, []podDesc{desc})
ginkgo.By("Restarting Kubelet")
restartKubelet()
restartKubelet(true)
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
expectPodResources(1, cli, []podDesc{desc})
tpd.deletePodsForTest(f)

View File

@ -414,14 +414,14 @@ func findKubletServiceName(running bool) string {
return kubeletServiceName
}
func restartKubelet() {
kubeletServiceName := findKubletServiceName(false)
func restartKubelet(running bool) {
kubeletServiceName := findKubletServiceName(running)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
}
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
@ -430,14 +430,14 @@ func stopKubelet() func() {
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %v", err, stdout)
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout))
return func() {
stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
}
}