mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
add logging time series data to benchmark tests
This commit is contained in:
parent
ff58d04a34
commit
5498130255
103
test/e2e_node/benchmark_util.go
Normal file
103
test/e2e_node/benchmark_util.go
Normal file
@ -0,0 +1,103 @@
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
"k8s.io/kubernetes/test/e2e/perftype"
|
||||
)
|
||||
|
||||
const (
|
||||
// TODO(coufon): be consistent with perf_util.go version (not exposed)
|
||||
currentTimeSeriesVersion = "v1"
|
||||
TimeSeriesTag = "[Result:TimeSeries]"
|
||||
TimeSeriesEnd = "[Finish:TimeSeries]"
|
||||
)
|
||||
|
||||
type NodeTimeSeries struct {
|
||||
// value in OperationData is an array of timestamps
|
||||
OperationData map[string][]int64 `json:"op_data,omitempty"`
|
||||
ResourceData map[string]*ResourceSeries `json:"resource_data,omitempty"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
// logDensityTimeSeries logs the time series data of operation and resource usage
|
||||
func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]unversioned.Time, testName string) {
|
||||
timeSeries := &NodeTimeSeries{
|
||||
Labels: map[string]string{
|
||||
"node": framework.TestContext.NodeName,
|
||||
"datatype": "timeseries",
|
||||
"test": testName,
|
||||
},
|
||||
Version: currentTimeSeriesVersion,
|
||||
}
|
||||
// Attach operation time series.
|
||||
timeSeries.OperationData = map[string][]int64{
|
||||
"create": getCumulatedPodTimeSeries(create),
|
||||
"running": getCumulatedPodTimeSeries(watch),
|
||||
}
|
||||
// Attach resource time series.
|
||||
timeSeries.ResourceData = rc.GetResourceTimeSeries()
|
||||
// Log time series with tags
|
||||
framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
|
||||
}
|
||||
|
||||
type int64arr []int64
|
||||
|
||||
func (a int64arr) Len() int { return len(a) }
|
||||
func (a int64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a int64arr) Less(i, j int) bool { return a[i] < a[j] }
|
||||
|
||||
// getCumulatedPodTimeSeries gets the cumulative pod number time series.
|
||||
func getCumulatedPodTimeSeries(timePerPod map[string]unversioned.Time) []int64 {
|
||||
timeSeries := make(int64arr, 0)
|
||||
for _, ts := range timePerPod {
|
||||
timeSeries = append(timeSeries, ts.Time.UnixNano())
|
||||
}
|
||||
// Sort all timestamps.
|
||||
sort.Sort(timeSeries)
|
||||
return timeSeries
|
||||
}
|
||||
|
||||
// getLatencyPerfData returns perf data from latency
|
||||
func getLatencyPerfData(latency framework.LatencyMetric, testName string) *perftype.PerfData {
|
||||
return &perftype.PerfData{
|
||||
Version: "v1",
|
||||
DataItems: []perftype.DataItem{
|
||||
{
|
||||
Data: map[string]float64{
|
||||
"Perc50": float64(latency.Perc50) / 1000000,
|
||||
"Perc90": float64(latency.Perc90) / 1000000,
|
||||
"Perc99": float64(latency.Perc99) / 1000000,
|
||||
},
|
||||
Unit: "ms",
|
||||
Labels: map[string]string{
|
||||
"datatype": "latency",
|
||||
"latencytype": "test-e2e",
|
||||
"node": framework.TestContext.NodeName,
|
||||
"test": testName,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
@ -106,14 +106,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||
itArg := testArg
|
||||
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
|
||||
itArg.podsNr, itArg.interval), func() {
|
||||
|
||||
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg)
|
||||
itArg.createMethod = "batch"
|
||||
testName := itArg.getTestName()
|
||||
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, false)
|
||||
|
||||
By("Verifying latency")
|
||||
printAndVerifyLatency(batchLag, e2eLags, itArg, true)
|
||||
logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true)
|
||||
|
||||
By("Verifying resource")
|
||||
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||
logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true)
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -138,14 +139,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||
itArg := testArg
|
||||
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
|
||||
itArg.podsNr, itArg.interval), func() {
|
||||
|
||||
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg)
|
||||
itArg.createMethod = "batch"
|
||||
testName := itArg.getTestName()
|
||||
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, true)
|
||||
|
||||
By("Verifying latency")
|
||||
printAndVerifyLatency(batchLag, e2eLags, itArg, false)
|
||||
logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false)
|
||||
|
||||
By("Verifying resource")
|
||||
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false)
|
||||
logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false)
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -175,14 +177,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||
itArg := testArg
|
||||
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
|
||||
itArg.podsNr, itArg.bgPodsNr), func() {
|
||||
|
||||
itArg.createMethod = "sequence"
|
||||
testName := itArg.getTestName()
|
||||
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg)
|
||||
|
||||
By("Verifying latency")
|
||||
printAndVerifyLatency(batchlag, e2eLags, itArg, true)
|
||||
logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true)
|
||||
|
||||
By("Verifying resource")
|
||||
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||
logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true)
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -207,14 +210,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||
itArg := testArg
|
||||
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
|
||||
itArg.podsNr, itArg.bgPodsNr), func() {
|
||||
|
||||
itArg.createMethod = "sequence"
|
||||
testName := itArg.getTestName()
|
||||
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg)
|
||||
|
||||
By("Verifying latency")
|
||||
printAndVerifyLatency(batchlag, e2eLags, itArg, false)
|
||||
logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false)
|
||||
|
||||
By("Verifying resource")
|
||||
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false)
|
||||
logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false)
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -227,6 +231,8 @@ type densityTest struct {
|
||||
bgPodsNr int
|
||||
// interval between creating pod (rate control)
|
||||
interval time.Duration
|
||||
// create pods in 'batch' or 'sequence'
|
||||
createMethod string
|
||||
// performance limits
|
||||
cpuLimits framework.ContainersCPUSummary
|
||||
memLimits framework.ResourceUsagePerContainer
|
||||
@ -234,8 +240,13 @@ type densityTest struct {
|
||||
podBatchStartupLimit time.Duration
|
||||
}
|
||||
|
||||
func (dt *densityTest) getTestName() string {
|
||||
return fmt.Sprintf("create_%s_%d_%d_%d", dt.createMethod, dt.podsNr, dt.bgPodsNr, dt.interval.Nanoseconds()/1000000)
|
||||
}
|
||||
|
||||
// runDensityBatchTest runs the density batch pod creation test
|
||||
func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) {
|
||||
func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest,
|
||||
isLogTimeSeries bool) (time.Duration, []framework.PodLatencyData) {
|
||||
const (
|
||||
podType = "density_test_pod"
|
||||
sleepBeforeCreatePods = 30 * time.Second
|
||||
@ -308,6 +319,13 @@ func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg
|
||||
sort.Sort(framework.LatencySlice(e2eLags))
|
||||
batchLag := lastRunning.Time.Sub(firstCreate.Time)
|
||||
|
||||
// Log time series data.
|
||||
if isLogTimeSeries {
|
||||
logDensityTimeSeries(rc, createTimes, watchTimes, testArg.getTestName())
|
||||
}
|
||||
// Log throughput data.
|
||||
logPodCreateThroughput(batchLag, e2eLags, testArg.podsNr)
|
||||
|
||||
return batchLag, e2eLags
|
||||
}
|
||||
|
||||
@ -333,6 +351,9 @@ func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg de
|
||||
// create pods sequentially (back-to-back)
|
||||
batchlag, e2eLags := createBatchPodSequential(f, testPods)
|
||||
|
||||
// Log throughput data.
|
||||
logPodCreateThroughput(batchlag, e2eLags, testArg.podsNr)
|
||||
|
||||
return batchlag, e2eLags
|
||||
}
|
||||
|
||||
@ -454,31 +475,36 @@ func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Dur
|
||||
return batchLag, e2eLags
|
||||
}
|
||||
|
||||
// printAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
|
||||
func printAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest, isVerify bool) {
|
||||
// logAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
|
||||
func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, podStartupLimits framework.LatencyMetric,
|
||||
podBatchStartupLimit time.Duration, testName string, isVerify bool) {
|
||||
framework.PrintLatencies(e2eLags, "worst client e2e total latencies")
|
||||
|
||||
// TODO(coufon): do not trust `kubelet' metrics since they are not reset!
|
||||
// TODO(coufon): do not trust 'kubelet' metrics since they are not reset!
|
||||
latencyMetrics, _ := getPodStartLatency(kubeletAddr)
|
||||
framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics))
|
||||
|
||||
// check whether e2e pod startup time is acceptable.
|
||||
podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}
|
||||
framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency))
|
||||
|
||||
// calculate and log throughput
|
||||
throughputBatch := float64(testArg.podsNr) / batchLag.Minutes()
|
||||
framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch)
|
||||
throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes()
|
||||
framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
|
||||
// log latency perf data
|
||||
framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testName))
|
||||
|
||||
if isVerify {
|
||||
framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency))
|
||||
// check whether e2e pod startup time is acceptable.
|
||||
framework.ExpectNoError(verifyPodStartupLatency(podStartupLimits, podCreateLatency.Latency))
|
||||
|
||||
// check bactch pod creation latency
|
||||
if testArg.podBatchStartupLimit > 0 {
|
||||
Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v",
|
||||
batchLag, testArg.podBatchStartupLimit)
|
||||
if podBatchStartupLimit > 0 {
|
||||
Expect(batchLag <= podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v",
|
||||
batchLag, podBatchStartupLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// logThroughput calculates and logs pod creation throughput.
|
||||
func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int) {
|
||||
throughputBatch := float64(podsNr) / batchLag.Minutes()
|
||||
framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch)
|
||||
throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes()
|
||||
framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
|
||||
}
|
||||
|
@ -53,8 +53,6 @@ const (
|
||||
cadvisorPort = 8090
|
||||
// housekeeping interval of Cadvisor (second)
|
||||
houseKeepingInterval = 1
|
||||
// TODO(coufon): be consistent with perf_util.go version (not exposed)
|
||||
currentTimeSeriesVersion = "v1"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -426,39 +424,22 @@ type ResourceSeries struct {
|
||||
Units map[string]string `json:"unit"`
|
||||
}
|
||||
|
||||
// Time series of resource usage per container
|
||||
type ResourceSeriesPerContainer struct {
|
||||
Data map[string]*ResourceSeries `json:"data"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
// GetResourceSeriesWithLabels gets the time series of resource usage of each container.
|
||||
// TODO(coufon): the labels are to be re-defined based on benchmark dashboard.
|
||||
func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string) *ResourceSeriesPerContainer {
|
||||
seriesPerContainer := &ResourceSeriesPerContainer{
|
||||
Data: map[string]*ResourceSeries{},
|
||||
Labels: map[string]string{
|
||||
"node": framework.TestContext.NodeName,
|
||||
},
|
||||
Version: currentTimeSeriesVersion,
|
||||
}
|
||||
func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries {
|
||||
resourceSeries := make(map[string]*ResourceSeries)
|
||||
for key, name := range systemContainers {
|
||||
newSeries := &ResourceSeries{Units: map[string]string{
|
||||
"cpu": "mCPU",
|
||||
"memory": "MB",
|
||||
}}
|
||||
seriesPerContainer.Data[key] = newSeries
|
||||
resourceSeries[key] = newSeries
|
||||
for _, usage := range r.buffers[name] {
|
||||
newSeries.Timestamp = append(newSeries.Timestamp, usage.Timestamp.UnixNano())
|
||||
newSeries.CPUUsageInMilliCores = append(newSeries.CPUUsageInMilliCores, int64(usage.CPUUsageInCores*1000))
|
||||
newSeries.MemoryRSSInMegaBytes = append(newSeries.MemoryRSSInMegaBytes, int64(float64(usage.MemoryUsageInBytes)/(1024*1024)))
|
||||
}
|
||||
}
|
||||
for k, v := range labels {
|
||||
seriesPerContainer.Labels[k] = v
|
||||
}
|
||||
return seriesPerContainer
|
||||
return resourceSeries
|
||||
}
|
||||
|
||||
// Code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go
|
||||
|
@ -67,7 +67,7 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||
Context("regular resource usage tracking", func() {
|
||||
rTests := []resourceTest{
|
||||
{
|
||||
pods: 10,
|
||||
podsNr: 10,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
|
||||
stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
|
||||
@ -82,10 +82,10 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||
for _, testArg := range rTests {
|
||||
itArg := testArg
|
||||
|
||||
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() {
|
||||
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() {
|
||||
runResourceUsageTest(f, rc, itArg)
|
||||
// Log and verify resource usage
|
||||
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||
logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true)
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -93,34 +93,38 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||
Context("regular resource usage tracking [Benchmark]", func() {
|
||||
rTests := []resourceTest{
|
||||
{
|
||||
pods: 10,
|
||||
podsNr: 10,
|
||||
},
|
||||
{
|
||||
pods: 35,
|
||||
podsNr: 35,
|
||||
},
|
||||
{
|
||||
pods: 105,
|
||||
podsNr: 105,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testArg := range rTests {
|
||||
itArg := testArg
|
||||
|
||||
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() {
|
||||
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() {
|
||||
runResourceUsageTest(f, rc, itArg)
|
||||
// Log and verify resource usage
|
||||
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||
logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true)
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
type resourceTest struct {
|
||||
pods int
|
||||
podsNr int
|
||||
cpuLimits framework.ContainersCPUSummary
|
||||
memLimits framework.ResourceUsagePerContainer
|
||||
}
|
||||
|
||||
func (rt *resourceTest) getTestName() string {
|
||||
return fmt.Sprintf("resource_%d", rt.podsNr)
|
||||
}
|
||||
|
||||
// runResourceUsageTest runs the resource usage test
|
||||
func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg resourceTest) {
|
||||
const (
|
||||
@ -136,7 +140,7 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg
|
||||
defer rc.Stop()
|
||||
|
||||
By("Creating a batch of Pods")
|
||||
pods := newTestPods(testArg.pods, ImageRegistry[pauseImage], "test_pod")
|
||||
pods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], "test_pod")
|
||||
f.PodClient().CreateBatch(pods)
|
||||
|
||||
// wait for a while to let the node be steady
|
||||
@ -168,9 +172,9 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg
|
||||
logPods(f.Client)
|
||||
}
|
||||
|
||||
// printAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit.
|
||||
func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary,
|
||||
memLimits framework.ResourceUsagePerContainer, isVerify bool) {
|
||||
// logAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit.
|
||||
func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary,
|
||||
memLimits framework.ResourceUsagePerContainer, testName string, isVerify bool) {
|
||||
nodeName := framework.TestContext.NodeName
|
||||
|
||||
// Obtain memory PerfData
|
||||
@ -189,8 +193,16 @@ func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLi
|
||||
cpuSummaryPerNode[nodeName] = cpuSummary
|
||||
|
||||
// Print resource usage
|
||||
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
|
||||
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
|
||||
framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode,
|
||||
map[string]string{
|
||||
"datatype": "resource",
|
||||
"test": testName,
|
||||
}))
|
||||
framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode,
|
||||
map[string]string{
|
||||
"datatype": "resource",
|
||||
"test": testName,
|
||||
}))
|
||||
|
||||
// Verify resource usage
|
||||
if isVerify {
|
||||
|
Loading…
Reference in New Issue
Block a user