Merge pull request #99584 from cynepco3hahue/e2e_fix_hugepages_tests

e2e node: fix hugepages test flakiness
This commit is contained in:
Kubernetes Prow Robot 2021-03-04 11:00:48 -08:00 committed by GitHub
commit 3cab9f5d74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 3 deletions

View File

@ -337,7 +337,16 @@ var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:H
setHugepages() setHugepages()
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
restartKubelet() // stop the kubelet and wait until the server will restart it automatically
stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, time.Minute, time.Second).Should(gomega.BeFalse())
// wait until the kubelet health check will pass
gomega.Eventually(func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, 2*time.Minute, 10*time.Second).Should(gomega.BeTrue())
waitForHugepages() waitForHugepages()
@ -352,7 +361,8 @@ var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:H
releaseHugepages() releaseHugepages()
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
restartKubelet() // stop the kubelet and wait until the server will restart it automatically
stopKubelet()
waitForHugepages() waitForHugepages()
}) })

View File

@ -18,6 +18,7 @@ package e2enode
import ( import (
"context" "context"
"crypto/tls"
"encoding/json" "encoding/json"
"flag" "flag"
"fmt" "fmt"
@ -70,6 +71,8 @@ const (
defaultPodResourcesPath = "/var/lib/kubelet/pod-resources" defaultPodResourcesPath = "/var/lib/kubelet/pod-resources"
defaultPodResourcesTimeout = 10 * time.Second defaultPodResourcesTimeout = 10 * time.Second
defaultPodResourcesMaxSize = 1024 * 1024 * 16 // 16 Mb defaultPodResourcesMaxSize = 1024 * 1024 * 16 // 16 Mb
kubeletReadOnlyPort = "10255"
kubeletHealthCheckURL = "http://127.0.0.1:" + kubeletReadOnlyPort + "/healthz"
) )
func getNodeSummary() (*stats.Summary, error) { func getNodeSummary() (*stats.Summary, error) {
@ -418,14 +421,41 @@ func restartKubelet() {
// stopKubelet will kill the running kubelet, and returns a func that will restart the process again // stopKubelet will kill the running kubelet, and returns a func that will restart the process again
func stopKubelet() func() { func stopKubelet() func() {
kubeletServiceName := findRunningKubletServiceName() kubeletServiceName := findRunningKubletServiceName()
stdout, err := exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
// reset the kubelet service start-limit-hit
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %v", err, stdout)
stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %v", err, stdout)
return func() { return func() {
stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput() stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
} }
} }
func kubeletHealthCheck(url string) bool {
insecureTransport := http.DefaultTransport.(*http.Transport).Clone()
insecureTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
insecureHTTPClient := &http.Client{
Transport: insecureTransport,
}
req, err := http.NewRequest("HEAD", url, nil)
if err != nil {
return false
}
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", framework.TestContext.BearerToken))
resp, err := insecureHTTPClient.Do(req)
if err != nil {
klog.Warningf("Health check on %q failed, error=%v", url, err)
} else if resp.StatusCode != http.StatusOK {
klog.Warningf("Health check on %q failed, status=%d", url, resp.StatusCode)
}
return err == nil && resp.StatusCode == http.StatusOK
}
func toCgroupFsName(cgroupName cm.CgroupName) string { func toCgroupFsName(cgroupName cm.CgroupName) string {
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
return cgroupName.ToSystemd() return cgroupName.ToSystemd()