mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 22:17:14 +00:00
e2e: set expected cpu usage upper bounds
Fail the test if the actual cpu usage is greater than expected.
This commit is contained in:
parent
c1a0502487
commit
7e8f4d831d
@ -18,6 +18,7 @@ package e2e
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||||
@ -31,13 +32,18 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// Interval to poll /stats/container on a node
|
// Interval to poll /stats/container on a node
|
||||||
containerStatsPollingPeriod = 10 * time.Second
|
containerStatsPollingPeriod = 3 * time.Second
|
||||||
// The monitoring time for one test.
|
// The monitoring time for one test.
|
||||||
monitoringTime = 20 * time.Minute
|
monitoringTime = 20 * time.Minute
|
||||||
// The periodic reporting period.
|
// The periodic reporting period.
|
||||||
reportingPeriod = 5 * time.Minute
|
reportingPeriod = 5 * time.Minute
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type resourceTest struct {
|
||||||
|
podsPerNode int
|
||||||
|
limits containersCPUSummary
|
||||||
|
}
|
||||||
|
|
||||||
func logPodsOnNodes(c *client.Client, nodeNames []string) {
|
func logPodsOnNodes(c *client.Client, nodeNames []string) {
|
||||||
for _, n := range nodeNames {
|
for _, n := range nodeNames {
|
||||||
podList, err := GetKubeletPods(c, n)
|
podList, err := GetKubeletPods(c, n)
|
||||||
@ -49,7 +55,7 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) {
|
func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, rm *resourceMonitor, expected map[string]map[float64]float64) {
|
||||||
numNodes := nodeNames.Len()
|
numNodes := nodeNames.Len()
|
||||||
totalPods := podsPerNode * numNodes
|
totalPods := podsPerNode * numNodes
|
||||||
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
|
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
|
||||||
@ -65,8 +71,8 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
|
|||||||
})).NotTo(HaveOccurred())
|
})).NotTo(HaveOccurred())
|
||||||
|
|
||||||
// Log once and flush the stats.
|
// Log once and flush the stats.
|
||||||
resourceMonitor.LogLatest()
|
rm.LogLatest()
|
||||||
resourceMonitor.Reset()
|
rm.Reset()
|
||||||
|
|
||||||
By("Start monitoring resource usage")
|
By("Start monitoring resource usage")
|
||||||
// Periodically dump the cpu summary until the deadline is met.
|
// Periodically dump the cpu summary until the deadline is met.
|
||||||
@ -76,8 +82,6 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
|
|||||||
// entries if we plan to monitor longer (e.g., 8 hours).
|
// entries if we plan to monitor longer (e.g., 8 hours).
|
||||||
deadline := time.Now().Add(monitoringTime)
|
deadline := time.Now().Add(monitoringTime)
|
||||||
for time.Now().Before(deadline) {
|
for time.Now().Before(deadline) {
|
||||||
Logf("Still running...%v left", deadline.Sub(time.Now()))
|
|
||||||
time.Sleep(reportingPeriod)
|
|
||||||
timeLeft := deadline.Sub(time.Now())
|
timeLeft := deadline.Sub(time.Now())
|
||||||
Logf("Still running...%v left", timeLeft)
|
Logf("Still running...%v left", timeLeft)
|
||||||
if timeLeft < reportingPeriod {
|
if timeLeft < reportingPeriod {
|
||||||
@ -90,17 +94,54 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
|
|||||||
|
|
||||||
By("Reporting overall resource usage")
|
By("Reporting overall resource usage")
|
||||||
logPodsOnNodes(framework.Client, nodeNames.List())
|
logPodsOnNodes(framework.Client, nodeNames.List())
|
||||||
resourceMonitor.LogCPUSummary()
|
rm.LogLatest()
|
||||||
resourceMonitor.LogLatest()
|
|
||||||
|
summary := rm.GetCPUSummary()
|
||||||
|
Logf("%s", rm.FormatCPUSummary(summary))
|
||||||
|
verifyCPULimits(expected, summary)
|
||||||
|
|
||||||
By("Deleting the RC")
|
By("Deleting the RC")
|
||||||
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
|
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func verifyCPULimits(expected containersCPUSummary, actual nodesCPUSummary) {
|
||||||
|
if expected == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var errList []string
|
||||||
|
for nodeName, perNodeSummary := range actual {
|
||||||
|
var nodeErrs []string
|
||||||
|
for cName, expectedResult := range expected {
|
||||||
|
perContainerSummary, ok := perNodeSummary[cName]
|
||||||
|
if !ok {
|
||||||
|
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for p, expectedValue := range expectedResult {
|
||||||
|
actualValue, ok := perContainerSummary[p]
|
||||||
|
if !ok {
|
||||||
|
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing percentile %v", cName, p))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if actualValue > expectedValue {
|
||||||
|
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected %.0fth%% usage < %.3f; got %.3f",
|
||||||
|
cName, p*100, expectedValue, actualValue))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(nodeErrs) > 0 {
|
||||||
|
errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", ")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(errList) > 0 {
|
||||||
|
Failf("CPU usage exceeding limits:\n %s", strings.Join(errList, "\n"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var _ = Describe("Kubelet", func() {
|
var _ = Describe("Kubelet", func() {
|
||||||
var nodeNames sets.String
|
var nodeNames sets.String
|
||||||
framework := NewFramework("kubelet-perf")
|
framework := NewFramework("kubelet-perf")
|
||||||
var resourceMonitor *resourceMonitor
|
var rm *resourceMonitor
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
nodes, err := framework.Client.Nodes().List(unversioned.ListOptions{})
|
nodes, err := framework.Client.Nodes().List(unversioned.ListOptions{})
|
||||||
@ -109,22 +150,35 @@ var _ = Describe("Kubelet", func() {
|
|||||||
for _, node := range nodes.Items {
|
for _, node := range nodes.Items {
|
||||||
nodeNames.Insert(node.Name)
|
nodeNames.Insert(node.Name)
|
||||||
}
|
}
|
||||||
resourceMonitor = newResourceMonitor(framework.Client, targetContainers(), containerStatsPollingPeriod)
|
rm = newResourceMonitor(framework.Client, targetContainers(), containerStatsPollingPeriod)
|
||||||
resourceMonitor.Start()
|
rm.Start()
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
resourceMonitor.Stop()
|
rm.Stop()
|
||||||
})
|
})
|
||||||
|
|
||||||
Describe("regular resource usage tracking", func() {
|
Describe("regular resource usage tracking", func() {
|
||||||
density := []int{0, 40}
|
rTests := []resourceTest{
|
||||||
for i := range density {
|
{podsPerNode: 0,
|
||||||
podsPerNode := density[i]
|
limits: containersCPUSummary{
|
||||||
|
"/kubelet": {0.50: 0.05, 0.95: 0.15},
|
||||||
|
"/docker-daemon": {0.50: 0.03, 0.95: 0.06},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{podsPerNode: 40,
|
||||||
|
limits: containersCPUSummary{
|
||||||
|
"/kubelet": {0.50: 0.15, 0.95: 0.35},
|
||||||
|
"/docker-daemon": {0.50: 0.06, 0.95: 0.30},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, testArg := range rTests {
|
||||||
|
itArg := testArg
|
||||||
|
podsPerNode := itArg.podsPerNode
|
||||||
name := fmt.Sprintf(
|
name := fmt.Sprintf(
|
||||||
"over %v with %d pods per node", monitoringTime, podsPerNode)
|
"for %d pods per node over %v", podsPerNode, monitoringTime)
|
||||||
It(name, func() {
|
It(name, func() {
|
||||||
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
|
runResourceTrackingTest(framework, podsPerNode, nodeNames, rm, itArg.limits)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -133,9 +187,9 @@ var _ = Describe("Kubelet", func() {
|
|||||||
for i := range density {
|
for i := range density {
|
||||||
podsPerNode := density[i]
|
podsPerNode := density[i]
|
||||||
name := fmt.Sprintf(
|
name := fmt.Sprintf(
|
||||||
"over %v with %d pods per node.", monitoringTime, podsPerNode)
|
"for %d pods per node over %v", podsPerNode, monitoringTime)
|
||||||
It(name, func() {
|
It(name, func() {
|
||||||
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
|
runResourceTrackingTest(framework, podsPerNode, nodeNames, rm, nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -692,7 +692,7 @@ func (r *resourceMonitor) Start() {
|
|||||||
}
|
}
|
||||||
r.collectors = make(map[string]*resourceCollector, 0)
|
r.collectors = make(map[string]*resourceCollector, 0)
|
||||||
for _, node := range nodes.Items {
|
for _, node := range nodes.Items {
|
||||||
collector := newResourceCollector(r.client, node.Name, r.containers, pollInterval)
|
collector := newResourceCollector(r.client, node.Name, r.containers, r.pollingInterval)
|
||||||
r.collectors[node.Name] = collector
|
r.collectors[node.Name] = collector
|
||||||
collector.Start()
|
collector.Start()
|
||||||
}
|
}
|
||||||
@ -716,33 +716,64 @@ func (r *resourceMonitor) LogLatest() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *resourceMonitor) LogCPUSummary() {
|
// containersCPUSummary is indexed by the container name with each entry a
|
||||||
|
// (percentile, value) map.
|
||||||
|
type containersCPUSummary map[string]map[float64]float64
|
||||||
|
|
||||||
|
// nodesCPUSummary is indexed by the node name with each entry a
|
||||||
|
// containersCPUSummary map.
|
||||||
|
type nodesCPUSummary map[string]containersCPUSummary
|
||||||
|
|
||||||
|
func (r *resourceMonitor) FormatCPUSummary(summary nodesCPUSummary) string {
|
||||||
// Example output for a node (the percentiles may differ):
|
// Example output for a node (the percentiles may differ):
|
||||||
// CPU usage of containers on node "e2e-test-yjhong-minion-0vj7":
|
// CPU usage of containers on node "e2e-test-foo-minion-0vj7":
|
||||||
// container 5th% 50th% 90th% 95th%
|
// container 5th% 50th% 90th% 95th%
|
||||||
// "/" 0.051 0.159 0.387 0.455
|
// "/" 0.051 0.159 0.387 0.455
|
||||||
// "/docker-daemon" 0.000 0.000 0.146 0.166
|
// "/docker-daemon" 0.000 0.000 0.146 0.166
|
||||||
// "/kubelet" 0.036 0.053 0.091 0.154
|
// "/kubelet" 0.036 0.053 0.091 0.154
|
||||||
// "/system" 0.001 0.001 0.001 0.002
|
// "/system" 0.001 0.001 0.001 0.002
|
||||||
|
var summaryStrings []string
|
||||||
var header []string
|
var header []string
|
||||||
header = append(header, "container")
|
header = append(header, "container")
|
||||||
for _, p := range percentiles {
|
for _, p := range percentiles {
|
||||||
header = append(header, fmt.Sprintf("%.0fth%%", p*100))
|
header = append(header, fmt.Sprintf("%.0fth%%", p*100))
|
||||||
}
|
}
|
||||||
for nodeName, collector := range r.collectors {
|
for nodeName, containers := range summary {
|
||||||
buf := &bytes.Buffer{}
|
buf := &bytes.Buffer{}
|
||||||
w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
|
w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
|
||||||
fmt.Fprintf(w, "%s\n", strings.Join(header, "\t"))
|
fmt.Fprintf(w, "%s\n", strings.Join(header, "\t"))
|
||||||
for _, containerName := range targetContainers() {
|
for _, containerName := range targetContainers() {
|
||||||
data := collector.GetBasicCPUStats(containerName)
|
|
||||||
var s []string
|
var s []string
|
||||||
s = append(s, fmt.Sprintf("%q", containerName))
|
s = append(s, fmt.Sprintf("%q", containerName))
|
||||||
|
data, ok := containers[containerName]
|
||||||
for _, p := range percentiles {
|
for _, p := range percentiles {
|
||||||
s = append(s, fmt.Sprintf("%.3f", data[p]))
|
value := "N/A"
|
||||||
|
if ok {
|
||||||
|
value = fmt.Sprintf("%.3f", data[p])
|
||||||
|
}
|
||||||
|
s = append(s, value)
|
||||||
}
|
}
|
||||||
fmt.Fprintf(w, "%s\n", strings.Join(s, "\t"))
|
fmt.Fprintf(w, "%s\n", strings.Join(s, "\t"))
|
||||||
}
|
}
|
||||||
w.Flush()
|
w.Flush()
|
||||||
Logf("\nCPU usage of containers on node %q:\n%s", nodeName, buf.String())
|
summaryStrings = append(summaryStrings, fmt.Sprintf("CPU usage of containers on node %q\n:%s", nodeName, buf.String()))
|
||||||
}
|
}
|
||||||
|
return strings.Join(summaryStrings, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *resourceMonitor) LogCPUSummary() {
|
||||||
|
summary := r.GetCPUSummary()
|
||||||
|
Logf(r.FormatCPUSummary(summary))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *resourceMonitor) GetCPUSummary() nodesCPUSummary {
|
||||||
|
result := make(nodesCPUSummary)
|
||||||
|
for nodeName, collector := range r.collectors {
|
||||||
|
result[nodeName] = make(containersCPUSummary)
|
||||||
|
for _, containerName := range targetContainers() {
|
||||||
|
data := collector.GetBasicCPUStats(containerName)
|
||||||
|
result[nodeName][containerName] = data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user