Explicitly restart kubelet to stabilize serial-containerd job

Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
Davanum Srinivas 2021-08-02 11:24:11 -04:00
parent 5be21c50c2
commit dab19517e5
No known key found for this signature in database
GPG Key ID: 80D83A796103BF59
2 changed files with 15 additions and 4 deletions

View File

@ -339,12 +339,17 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() {
}, 30*time.Second, framework.Poll).Should(gomega.BeNil())
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
// stop the kubelet and wait until the server will restart it automatically
stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalse())
restartKubelet()
// wait until the kubelet health check will pass
gomega.Eventually(func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)

View File

@ -397,8 +397,14 @@ func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService
}
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
func findRunningKubletServiceName() string {
stdout, err := exec.Command("sudo", "systemctl", "list-units", "*kubelet*", "--state=running").CombinedOutput()
func findKubletServiceName(running bool) string {
cmdLine := []string{
"systemctl", "list-units", "*kubelet*",
}
if running {
cmdLine = append(cmdLine, "--state=running")
}
stdout, err := exec.Command("sudo", cmdLine...).CombinedOutput()
framework.ExpectNoError(err)
regex := regexp.MustCompile("(kubelet-\\w+)")
matches := regex.FindStringSubmatch(string(stdout))
@ -409,7 +415,7 @@ func findRunningKubletServiceName() string {
}
func restartKubelet() {
kubeletServiceName := findRunningKubletServiceName()
kubeletServiceName := findKubletServiceName(false)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
@ -420,7 +426,7 @@ func restartKubelet() {
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
func stopKubelet() func() {
kubeletServiceName := findRunningKubletServiceName()
kubeletServiceName := findKubletServiceName(true)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()