Merge pull request #105516 from fromanirh/e2e-kubelet-restart-improvements

e2e: node: kubelet restart improvements
This commit is contained in:
Kubernetes Prow Robot 2021-10-14 17:58:54 -07:00 committed by GitHub
commit fe62fcc9b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 15 additions and 15 deletions

View File

@ -220,7 +220,7 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
restartTime := time.Now() restartTime := time.Now()
ginkgo.By("Restarting Kubelet") ginkgo.By("Restarting Kubelet")
restartKubelet() restartKubelet(true)
// We need to wait for node to be ready before re-registering stub device plugin. // We need to wait for node to be ready before re-registering stub device plugin.
// Otherwise, Kubelet DeviceManager may remove the re-registered sockets after it starts. // Otherwise, Kubelet DeviceManager may remove the re-registered sockets after it starts.

View File

@ -103,7 +103,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute) f.PodClient().DeleteSync(p.Name, metav1.DeleteOptions{}, 2*time.Minute)
} }
restartKubelet() restartKubelet(true)
ginkgo.By("Waiting for GPUs to become unavailable on the local node") ginkgo.By("Waiting for GPUs to become unavailable on the local node")
gomega.Eventually(func() bool { gomega.Eventually(func() bool {
@ -142,7 +142,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
framework.ExpectEqual(devIDRestart1, devID1) framework.ExpectEqual(devIDRestart1, devID1)
ginkgo.By("Restarting Kubelet") ginkgo.By("Restarting Kubelet")
restartKubelet() restartKubelet(true)
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute) framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.") ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin and restart Kubelet.")
@ -172,7 +172,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
} }
ginkgo.By("Restarting Kubelet") ginkgo.By("Restarting Kubelet")
restartKubelet() restartKubelet(true)
ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept") ginkgo.By("Confirming that after a kubelet and pod restart, GPU assignment is kept")
ensurePodContainerRestart(f, p1.Name, p1.Name) ensurePodContainerRestart(f, p1.Name, p1.Name)
@ -181,7 +181,7 @@ var _ = SIGDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeat
ginkgo.By("Restarting Kubelet and creating another pod") ginkgo.By("Restarting Kubelet and creating another pod")
restartKubelet() restartKubelet(true)
framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute) framework.WaitForAllNodesSchedulable(f.ClientSet, 30*time.Minute)
ensurePodContainerRestart(f, p1.Name, p1.Name) ensurePodContainerRestart(f, p1.Name, p1.Name)

View File

@ -207,7 +207,7 @@ var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:H
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported") framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported") ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
restartKubelet() restartKubelet(true)
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present") ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
gomega.Eventually(func() bool { gomega.Eventually(func() bool {

View File

@ -344,7 +344,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() {
return kubeletHealthCheck(kubeletHealthCheckURL) return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalse()) }, time.Minute, time.Second).Should(gomega.BeFalse())
restartKubelet() restartKubelet(false)
// wait until the kubelet health check will pass // wait until the kubelet health check will pass
gomega.Eventually(func() bool { gomega.Eventually(func() bool {

View File

@ -765,7 +765,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
expectPodResources(1, cli, []podDesc{desc}) expectPodResources(1, cli, []podDesc{desc})
ginkgo.By("Restarting Kubelet") ginkgo.By("Restarting Kubelet")
restartKubelet() restartKubelet(true)
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout) framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
expectPodResources(1, cli, []podDesc{desc}) expectPodResources(1, cli, []podDesc{desc})
tpd.deletePodsForTest(f) tpd.deletePodsForTest(f)

View File

@ -414,14 +414,14 @@ func findKubletServiceName(running bool) string {
return kubeletServiceName return kubeletServiceName
} }
func restartKubelet() { func restartKubelet(running bool) {
kubeletServiceName := findKubletServiceName(false) kubeletServiceName := findKubletServiceName(running)
// reset the kubelet service start-limit-hit // reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
} }
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again // stopKubelet will kill the running kubelet, and returns a func that will restart the process again
@ -430,14 +430,14 @@ func stopKubelet() func() {
// reset the kubelet service start-limit-hit // reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput() stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout))
return func() { return func() {
stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput() stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout))
} }
} }