Merge pull request #99027 from wojtek-t/enable_npd_test

Fix NPD test to run it in private and large clusters
This commit is contained in:
Kubernetes Prow Robot 2021-02-18 00:36:51 -08:00 committed by GitHub
commit 9ead4bf2ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -40,11 +40,12 @@ import (
)
// This test checks if node-problem-detector (NPD) runs fine without error on
// the nodes in the cluster. NPD's functionality is tested in e2e_node tests.
var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
// the up to 10 nodes in the cluster. NPD's functionality is tested in e2e_node tests.
var _ = SIGDescribe("NodeProblemDetector", func() {
const (
pollInterval = 1 * time.Second
pollTimeout = 1 * time.Minute
pollInterval = 1 * time.Second
pollTimeout = 1 * time.Minute
maxNodesToProcess = 10
)
f := framework.NewDefaultFramework("node-problem-detector")
@ -60,18 +61,34 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
e2eskipper.SkipUnlessSSHKeyPresent()
ginkgo.By("Getting all nodes and their SSH-able IP addresses")
nodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
readyNodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
framework.ExpectNoError(err)
nodes := []v1.Node{}
hosts := []string{}
for _, node := range nodes.Items {
for _, node := range readyNodes.Items {
host := ""
for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeExternalIP {
hosts = append(hosts, net.JoinHostPort(addr.Address, "22"))
host = net.JoinHostPort(addr.Address, "22")
break
}
}
// Not every node has to have an external IP address.
if len(host) > 0 {
nodes = append(nodes, node)
hosts = append(hosts, host)
}
}
if len(nodes) == 0 {
ginkgo.Skip("Skipping test due to lack of ready nodes with public IP")
}
if len(nodes) > maxNodesToProcess {
nodes = nodes[:maxNodesToProcess]
hosts = hosts[:maxNodesToProcess]
}
framework.ExpectEqual(len(hosts), len(nodes.Items))
isStandaloneMode := make(map[string]bool)
cpuUsageStats := make(map[string][]float64)
@ -121,7 +138,7 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
}
ginkgo.By("Check node-problem-detector can post conditions and events to API server")
for _, node := range nodes.Items {
for _, node := range nodes {
ginkgo.By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name))
gomega.Eventually(func() error {
return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name)
@ -156,7 +173,7 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
uptimeStats[host] = append(uptimeStats[host], uptime)
}
} else {
cpuUsage, rss, workingSet := getNpdPodStat(f, nodes.Items[j].Name)
cpuUsage, rss, workingSet := getNpdPodStat(f, nodes[j].Name)
cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage)
rssStats[host] = append(rssStats[host], rss)
workingSetStats[host] = append(workingSetStats[host], workingSet)
@ -174,19 +191,19 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
// calculate its cpu usage from cgroup cpuacct value differences.
cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0]
totaltime := uptimeStats[host][1] - uptimeStats[host][0]
cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes.Items[i].Name, cpuUsage/totaltime)
cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes[i].Name, cpuUsage/totaltime)
} else {
sort.Float64s(cpuUsageStats[host])
cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes.Items[i].Name,
cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes[i].Name,
cpuUsageStats[host][0], cpuUsageStats[host][len(cpuUsageStats[host])/2], cpuUsageStats[host][len(cpuUsageStats[host])-1])
}
sort.Float64s(rssStats[host])
rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name,
rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes[i].Name,
rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1])
sort.Float64s(workingSetStats[host])
workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name,
workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes[i].Name,
workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1])
}
framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg)