Merge pull request #99027 from wojtek-t/enable_npd_test

Fix NPD test to run it in private and large clusters
This commit is contained in:
Kubernetes Prow Robot 2021-02-18 00:36:51 -08:00 committed by GitHub
commit 9ead4bf2ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -40,11 +40,12 @@ import (
) )
// This test checks if node-problem-detector (NPD) runs fine without error on // This test checks if node-problem-detector (NPD) runs fine without error on
// the nodes in the cluster. NPD's functionality is tested in e2e_node tests. // the up to 10 nodes in the cluster. NPD's functionality is tested in e2e_node tests.
var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() { var _ = SIGDescribe("NodeProblemDetector", func() {
const ( const (
pollInterval = 1 * time.Second pollInterval = 1 * time.Second
pollTimeout = 1 * time.Minute pollTimeout = 1 * time.Minute
maxNodesToProcess = 10
) )
f := framework.NewDefaultFramework("node-problem-detector") f := framework.NewDefaultFramework("node-problem-detector")
@ -60,18 +61,34 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
e2eskipper.SkipUnlessSSHKeyPresent() e2eskipper.SkipUnlessSSHKeyPresent()
ginkgo.By("Getting all nodes and their SSH-able IP addresses") ginkgo.By("Getting all nodes and their SSH-able IP addresses")
nodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet) readyNodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
framework.ExpectNoError(err) framework.ExpectNoError(err)
nodes := []v1.Node{}
hosts := []string{} hosts := []string{}
for _, node := range nodes.Items { for _, node := range readyNodes.Items {
host := ""
for _, addr := range node.Status.Addresses { for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeExternalIP { if addr.Type == v1.NodeExternalIP {
hosts = append(hosts, net.JoinHostPort(addr.Address, "22")) host = net.JoinHostPort(addr.Address, "22")
break break
} }
} }
// Not every node has to have an external IP address.
if len(host) > 0 {
nodes = append(nodes, node)
hosts = append(hosts, host)
}
}
if len(nodes) == 0 {
ginkgo.Skip("Skipping test due to lack of ready nodes with public IP")
}
if len(nodes) > maxNodesToProcess {
nodes = nodes[:maxNodesToProcess]
hosts = hosts[:maxNodesToProcess]
} }
framework.ExpectEqual(len(hosts), len(nodes.Items))
isStandaloneMode := make(map[string]bool) isStandaloneMode := make(map[string]bool)
cpuUsageStats := make(map[string][]float64) cpuUsageStats := make(map[string][]float64)
@ -121,7 +138,7 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
} }
ginkgo.By("Check node-problem-detector can post conditions and events to API server") ginkgo.By("Check node-problem-detector can post conditions and events to API server")
for _, node := range nodes.Items { for _, node := range nodes {
ginkgo.By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name)) ginkgo.By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name))
gomega.Eventually(func() error { gomega.Eventually(func() error {
return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name) return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name)
@ -156,7 +173,7 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
uptimeStats[host] = append(uptimeStats[host], uptime) uptimeStats[host] = append(uptimeStats[host], uptime)
} }
} else { } else {
cpuUsage, rss, workingSet := getNpdPodStat(f, nodes.Items[j].Name) cpuUsage, rss, workingSet := getNpdPodStat(f, nodes[j].Name)
cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage) cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage)
rssStats[host] = append(rssStats[host], rss) rssStats[host] = append(rssStats[host], rss)
workingSetStats[host] = append(workingSetStats[host], workingSet) workingSetStats[host] = append(workingSetStats[host], workingSet)
@ -174,19 +191,19 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
// calculate its cpu usage from cgroup cpuacct value differences. // calculate its cpu usage from cgroup cpuacct value differences.
cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0] cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0]
totaltime := uptimeStats[host][1] - uptimeStats[host][0] totaltime := uptimeStats[host][1] - uptimeStats[host][0]
cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes.Items[i].Name, cpuUsage/totaltime) cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes[i].Name, cpuUsage/totaltime)
} else { } else {
sort.Float64s(cpuUsageStats[host]) sort.Float64s(cpuUsageStats[host])
cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes.Items[i].Name, cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes[i].Name,
cpuUsageStats[host][0], cpuUsageStats[host][len(cpuUsageStats[host])/2], cpuUsageStats[host][len(cpuUsageStats[host])-1]) cpuUsageStats[host][0], cpuUsageStats[host][len(cpuUsageStats[host])/2], cpuUsageStats[host][len(cpuUsageStats[host])-1])
} }
sort.Float64s(rssStats[host]) sort.Float64s(rssStats[host])
rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name, rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes[i].Name,
rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1]) rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1])
sort.Float64s(workingSetStats[host]) sort.Float64s(workingSetStats[host])
workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name, workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes[i].Name,
workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1]) workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1])
} }
framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg) framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg)