Merge pull request #104076 from dims/explicitly-restart-kubelet-to-stabilize-serial-containerd-job

Explicitly restart kubelet to stabilize serial-containerd job
This commit is contained in:
Kubernetes Prow Robot
2021-08-02 10:21:22 -07:00
committed by GitHub
2 changed files with 15 additions and 4 deletions

View File

@@ -339,12 +339,17 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() {
}, 30*time.Second, framework.Poll).Should(gomega.BeNil())
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
// stop the kubelet and wait until the server will restart it automatically
stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalse())
restartKubelet()
// wait until the kubelet health check will pass
gomega.Eventually(func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)

View File

@@ -397,8 +397,14 @@ func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService
}
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
func findRunningKubletServiceName() string {
stdout, err := exec.Command("sudo", "systemctl", "list-units", "*kubelet*", "--state=running").CombinedOutput()
func findKubletServiceName(running bool) string {
cmdLine := []string{
"systemctl", "list-units", "*kubelet*",
}
if running {
cmdLine = append(cmdLine, "--state=running")
}
stdout, err := exec.Command("sudo", cmdLine...).CombinedOutput()
framework.ExpectNoError(err)
regex := regexp.MustCompile("(kubelet-\\w+)")
matches := regex.FindStringSubmatch(string(stdout))
@@ -409,7 +415,7 @@ func findRunningKubletServiceName() string {
}
func restartKubelet() {
kubeletServiceName := findRunningKubletServiceName()
kubeletServiceName := findKubletServiceName(false)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
@@ -420,7 +426,7 @@ func restartKubelet() {
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again
func stopKubelet() func() {
kubeletServiceName := findRunningKubletServiceName()
kubeletServiceName := findKubletServiceName(true)
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()