mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #99027 from wojtek-t/enable_npd_test
Fix NPD test to run it in private and large clusters
This commit is contained in:
commit
9ead4bf2ee
@ -40,11 +40,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// This test checks if node-problem-detector (NPD) runs fine without error on
|
// This test checks if node-problem-detector (NPD) runs fine without error on
|
||||||
// the nodes in the cluster. NPD's functionality is tested in e2e_node tests.
|
// the up to 10 nodes in the cluster. NPD's functionality is tested in e2e_node tests.
|
||||||
var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
|
var _ = SIGDescribe("NodeProblemDetector", func() {
|
||||||
const (
|
const (
|
||||||
pollInterval = 1 * time.Second
|
pollInterval = 1 * time.Second
|
||||||
pollTimeout = 1 * time.Minute
|
pollTimeout = 1 * time.Minute
|
||||||
|
maxNodesToProcess = 10
|
||||||
)
|
)
|
||||||
f := framework.NewDefaultFramework("node-problem-detector")
|
f := framework.NewDefaultFramework("node-problem-detector")
|
||||||
|
|
||||||
@ -60,18 +61,34 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
|
|||||||
e2eskipper.SkipUnlessSSHKeyPresent()
|
e2eskipper.SkipUnlessSSHKeyPresent()
|
||||||
|
|
||||||
ginkgo.By("Getting all nodes and their SSH-able IP addresses")
|
ginkgo.By("Getting all nodes and their SSH-able IP addresses")
|
||||||
nodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
|
readyNodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
nodes := []v1.Node{}
|
||||||
hosts := []string{}
|
hosts := []string{}
|
||||||
for _, node := range nodes.Items {
|
for _, node := range readyNodes.Items {
|
||||||
|
host := ""
|
||||||
for _, addr := range node.Status.Addresses {
|
for _, addr := range node.Status.Addresses {
|
||||||
if addr.Type == v1.NodeExternalIP {
|
if addr.Type == v1.NodeExternalIP {
|
||||||
hosts = append(hosts, net.JoinHostPort(addr.Address, "22"))
|
host = net.JoinHostPort(addr.Address, "22")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Not every node has to have an external IP address.
|
||||||
|
if len(host) > 0 {
|
||||||
|
nodes = append(nodes, node)
|
||||||
|
hosts = append(hosts, host)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(nodes) == 0 {
|
||||||
|
ginkgo.Skip("Skipping test due to lack of ready nodes with public IP")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(nodes) > maxNodesToProcess {
|
||||||
|
nodes = nodes[:maxNodesToProcess]
|
||||||
|
hosts = hosts[:maxNodesToProcess]
|
||||||
}
|
}
|
||||||
framework.ExpectEqual(len(hosts), len(nodes.Items))
|
|
||||||
|
|
||||||
isStandaloneMode := make(map[string]bool)
|
isStandaloneMode := make(map[string]bool)
|
||||||
cpuUsageStats := make(map[string][]float64)
|
cpuUsageStats := make(map[string][]float64)
|
||||||
@ -121,7 +138,7 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ginkgo.By("Check node-problem-detector can post conditions and events to API server")
|
ginkgo.By("Check node-problem-detector can post conditions and events to API server")
|
||||||
for _, node := range nodes.Items {
|
for _, node := range nodes {
|
||||||
ginkgo.By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name))
|
ginkgo.By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name))
|
||||||
gomega.Eventually(func() error {
|
gomega.Eventually(func() error {
|
||||||
return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name)
|
return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name)
|
||||||
@ -156,7 +173,7 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
|
|||||||
uptimeStats[host] = append(uptimeStats[host], uptime)
|
uptimeStats[host] = append(uptimeStats[host], uptime)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cpuUsage, rss, workingSet := getNpdPodStat(f, nodes.Items[j].Name)
|
cpuUsage, rss, workingSet := getNpdPodStat(f, nodes[j].Name)
|
||||||
cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage)
|
cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage)
|
||||||
rssStats[host] = append(rssStats[host], rss)
|
rssStats[host] = append(rssStats[host], rss)
|
||||||
workingSetStats[host] = append(workingSetStats[host], workingSet)
|
workingSetStats[host] = append(workingSetStats[host], workingSet)
|
||||||
@ -174,19 +191,19 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
|
|||||||
// calculate its cpu usage from cgroup cpuacct value differences.
|
// calculate its cpu usage from cgroup cpuacct value differences.
|
||||||
cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0]
|
cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0]
|
||||||
totaltime := uptimeStats[host][1] - uptimeStats[host][0]
|
totaltime := uptimeStats[host][1] - uptimeStats[host][0]
|
||||||
cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes.Items[i].Name, cpuUsage/totaltime)
|
cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes[i].Name, cpuUsage/totaltime)
|
||||||
} else {
|
} else {
|
||||||
sort.Float64s(cpuUsageStats[host])
|
sort.Float64s(cpuUsageStats[host])
|
||||||
cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes.Items[i].Name,
|
cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes[i].Name,
|
||||||
cpuUsageStats[host][0], cpuUsageStats[host][len(cpuUsageStats[host])/2], cpuUsageStats[host][len(cpuUsageStats[host])-1])
|
cpuUsageStats[host][0], cpuUsageStats[host][len(cpuUsageStats[host])/2], cpuUsageStats[host][len(cpuUsageStats[host])-1])
|
||||||
}
|
}
|
||||||
|
|
||||||
sort.Float64s(rssStats[host])
|
sort.Float64s(rssStats[host])
|
||||||
rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name,
|
rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes[i].Name,
|
||||||
rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1])
|
rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1])
|
||||||
|
|
||||||
sort.Float64s(workingSetStats[host])
|
sort.Float64s(workingSetStats[host])
|
||||||
workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name,
|
workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes[i].Name,
|
||||||
workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1])
|
workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1])
|
||||||
}
|
}
|
||||||
framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg)
|
framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg)
|
||||||
|
Loading…
Reference in New Issue
Block a user