From 54981302554c80f05e292beac4496b164bc959f5 Mon Sep 17 00:00:00 2001 From: Zhou Fang Date: Wed, 17 Aug 2016 10:38:43 -0700 Subject: [PATCH] add logging time series data to benchmark tests --- test/e2e_node/benchmark_util.go | 103 +++++++++++++++++++++++++++ test/e2e_node/density_test.go | 84 ++++++++++++++-------- test/e2e_node/resource_collector.go | 27 ++----- test/e2e_node/resource_usage_test.go | 42 +++++++---- 4 files changed, 189 insertions(+), 67 deletions(-) create mode 100644 test/e2e_node/benchmark_util.go diff --git a/test/e2e_node/benchmark_util.go b/test/e2e_node/benchmark_util.go new file mode 100644 index 00000000000..7a30cd982ec --- /dev/null +++ b/test/e2e_node/benchmark_util.go @@ -0,0 +1,103 @@ +// +build linux + +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "sort" + + "k8s.io/kubernetes/pkg/api/unversioned" + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e/perftype" +) + +const ( + // TODO(coufon): be consistent with perf_util.go version (not exposed) + currentTimeSeriesVersion = "v1" + TimeSeriesTag = "[Result:TimeSeries]" + TimeSeriesEnd = "[Finish:TimeSeries]" +) + +type NodeTimeSeries struct { + // value in OperationData is an array of timestamps + OperationData map[string][]int64 `json:"op_data,omitempty"` + ResourceData map[string]*ResourceSeries `json:"resource_data,omitempty"` + Labels map[string]string `json:"labels"` + Version string `json:"version"` +} + +// logDensityTimeSeries logs the time series data of operation and resource usage +func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]unversioned.Time, testName string) { + timeSeries := &NodeTimeSeries{ + Labels: map[string]string{ + "node": framework.TestContext.NodeName, + "datatype": "timeseries", + "test": testName, + }, + Version: currentTimeSeriesVersion, + } + // Attach operation time series. + timeSeries.OperationData = map[string][]int64{ + "create": getCumulatedPodTimeSeries(create), + "running": getCumulatedPodTimeSeries(watch), + } + // Attach resource time series. + timeSeries.ResourceData = rc.GetResourceTimeSeries() + // Log time series with tags + framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd) +} + +type int64arr []int64 + +func (a int64arr) Len() int { return len(a) } +func (a int64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a int64arr) Less(i, j int) bool { return a[i] < a[j] } + +// getCumulatedPodTimeSeries gets the cumulative pod number time series. +func getCumulatedPodTimeSeries(timePerPod map[string]unversioned.Time) []int64 { + timeSeries := make(int64arr, 0) + for _, ts := range timePerPod { + timeSeries = append(timeSeries, ts.Time.UnixNano()) + } + // Sort all timestamps. + sort.Sort(timeSeries) + return timeSeries +} + +// getLatencyPerfData returns perf data from latency +func getLatencyPerfData(latency framework.LatencyMetric, testName string) *perftype.PerfData { + return &perftype.PerfData{ + Version: "v1", + DataItems: []perftype.DataItem{ + { + Data: map[string]float64{ + "Perc50": float64(latency.Perc50) / 1000000, + "Perc90": float64(latency.Perc90) / 1000000, + "Perc99": float64(latency.Perc99) / 1000000, + }, + Unit: "ms", + Labels: map[string]string{ + "datatype": "latency", + "latencytype": "test-e2e", + "node": framework.TestContext.NodeName, + "test": testName, + }, + }, + }, + } +} diff --git a/test/e2e_node/density_test.go b/test/e2e_node/density_test.go index 12bfe3695e2..d6e4126f586 100644 --- a/test/e2e_node/density_test.go +++ b/test/e2e_node/density_test.go @@ -106,14 +106,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { itArg := testArg It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval), func() { - - batchLag, e2eLags := runDensityBatchTest(f, rc, itArg) + itArg.createMethod = "batch" + testName := itArg.getTestName() + batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, false) By("Verifying latency") - printAndVerifyLatency(batchLag, e2eLags, itArg, true) + logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true) By("Verifying resource") - printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true) }) } }) @@ -138,14 +139,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { itArg := testArg It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval), func() { - - batchLag, e2eLags := runDensityBatchTest(f, rc, itArg) + itArg.createMethod = "batch" + testName := itArg.getTestName() + batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, true) By("Verifying latency") - printAndVerifyLatency(batchLag, e2eLags, itArg, false) + logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false) By("Verifying resource") - printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false) }) } }) @@ -175,14 +177,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { itArg := testArg It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr), func() { - + itArg.createMethod = "sequence" + testName := itArg.getTestName() batchlag, e2eLags := runDensitySeqTest(f, rc, itArg) By("Verifying latency") - printAndVerifyLatency(batchlag, e2eLags, itArg, true) + logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true) By("Verifying resource") - printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true) }) } }) @@ -207,14 +210,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { itArg := testArg It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr), func() { - + itArg.createMethod = "sequence" + testName := itArg.getTestName() batchlag, e2eLags := runDensitySeqTest(f, rc, itArg) By("Verifying latency") - printAndVerifyLatency(batchlag, e2eLags, itArg, false) + logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false) By("Verifying resource") - printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false) }) } }) @@ -227,6 +231,8 @@ type densityTest struct { bgPodsNr int // interval between creating pod (rate control) interval time.Duration + // create pods in 'batch' or 'sequence' + createMethod string // performance limits cpuLimits framework.ContainersCPUSummary memLimits framework.ResourceUsagePerContainer @@ -234,8 +240,13 @@ type densityTest struct { podBatchStartupLimit time.Duration } +func (dt *densityTest) getTestName() string { + return fmt.Sprintf("create_%s_%d_%d_%d", dt.createMethod, dt.podsNr, dt.bgPodsNr, dt.interval.Nanoseconds()/1000000) +} + // runDensityBatchTest runs the density batch pod creation test -func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) { +func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest, + isLogTimeSeries bool) (time.Duration, []framework.PodLatencyData) { const ( podType = "density_test_pod" sleepBeforeCreatePods = 30 * time.Second @@ -308,6 +319,13 @@ func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg sort.Sort(framework.LatencySlice(e2eLags)) batchLag := lastRunning.Time.Sub(firstCreate.Time) + // Log time series data. + if isLogTimeSeries { + logDensityTimeSeries(rc, createTimes, watchTimes, testArg.getTestName()) + } + // Log throughput data. + logPodCreateThroughput(batchLag, e2eLags, testArg.podsNr) + return batchLag, e2eLags } @@ -333,6 +351,9 @@ func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg de // create pods sequentially (back-to-back) batchlag, e2eLags := createBatchPodSequential(f, testPods) + // Log throughput data. + logPodCreateThroughput(batchlag, e2eLags, testArg.podsNr) + return batchlag, e2eLags } @@ -454,31 +475,36 @@ func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Dur return batchLag, e2eLags } -// printAndVerifyLatency verifies that whether pod creation latency satisfies the limit. -func printAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest, isVerify bool) { +// logAndVerifyLatency verifies that whether pod creation latency satisfies the limit. +func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, podStartupLimits framework.LatencyMetric, + podBatchStartupLimit time.Duration, testName string, isVerify bool) { framework.PrintLatencies(e2eLags, "worst client e2e total latencies") - // TODO(coufon): do not trust `kubelet' metrics since they are not reset! + // TODO(coufon): do not trust 'kubelet' metrics since they are not reset! latencyMetrics, _ := getPodStartLatency(kubeletAddr) framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics)) - // check whether e2e pod startup time is acceptable. podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)} - framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency)) - // calculate and log throughput - throughputBatch := float64(testArg.podsNr) / batchLag.Minutes() - framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch) - throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes() - framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential) + // log latency perf data + framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testName)) if isVerify { - framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency)) + // check whether e2e pod startup time is acceptable. + framework.ExpectNoError(verifyPodStartupLatency(podStartupLimits, podCreateLatency.Latency)) // check bactch pod creation latency - if testArg.podBatchStartupLimit > 0 { - Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v", - batchLag, testArg.podBatchStartupLimit) + if podBatchStartupLimit > 0 { + Expect(batchLag <= podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v", + batchLag, podBatchStartupLimit) } } } + +// logThroughput calculates and logs pod creation throughput. +func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int) { + throughputBatch := float64(podsNr) / batchLag.Minutes() + framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch) + throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes() + framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential) +} diff --git a/test/e2e_node/resource_collector.go b/test/e2e_node/resource_collector.go index 9d0ab37c637..9d3a396646a 100644 --- a/test/e2e_node/resource_collector.go +++ b/test/e2e_node/resource_collector.go @@ -53,8 +53,6 @@ const ( cadvisorPort = 8090 // housekeeping interval of Cadvisor (second) houseKeepingInterval = 1 - // TODO(coufon): be consistent with perf_util.go version (not exposed) - currentTimeSeriesVersion = "v1" ) var ( @@ -426,39 +424,22 @@ type ResourceSeries struct { Units map[string]string `json:"unit"` } -// Time series of resource usage per container -type ResourceSeriesPerContainer struct { - Data map[string]*ResourceSeries `json:"data"` - Labels map[string]string `json:"labels"` - Version string `json:"version"` -} - // GetResourceSeriesWithLabels gets the time series of resource usage of each container. -// TODO(coufon): the labels are to be re-defined based on benchmark dashboard. -func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string) *ResourceSeriesPerContainer { - seriesPerContainer := &ResourceSeriesPerContainer{ - Data: map[string]*ResourceSeries{}, - Labels: map[string]string{ - "node": framework.TestContext.NodeName, - }, - Version: currentTimeSeriesVersion, - } +func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries { + resourceSeries := make(map[string]*ResourceSeries) for key, name := range systemContainers { newSeries := &ResourceSeries{Units: map[string]string{ "cpu": "mCPU", "memory": "MB", }} - seriesPerContainer.Data[key] = newSeries + resourceSeries[key] = newSeries for _, usage := range r.buffers[name] { newSeries.Timestamp = append(newSeries.Timestamp, usage.Timestamp.UnixNano()) newSeries.CPUUsageInMilliCores = append(newSeries.CPUUsageInMilliCores, int64(usage.CPUUsageInCores*1000)) newSeries.MemoryRSSInMegaBytes = append(newSeries.MemoryRSSInMegaBytes, int64(float64(usage.MemoryUsageInBytes)/(1024*1024))) } } - for k, v := range labels { - seriesPerContainer.Labels[k] = v - } - return seriesPerContainer + return resourceSeries } // Code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go diff --git a/test/e2e_node/resource_usage_test.go b/test/e2e_node/resource_usage_test.go index bed8a3810be..da5dcfeb9f7 100644 --- a/test/e2e_node/resource_usage_test.go +++ b/test/e2e_node/resource_usage_test.go @@ -67,7 +67,7 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { Context("regular resource usage tracking", func() { rTests := []resourceTest{ { - pods: 10, + podsNr: 10, cpuLimits: framework.ContainersCPUSummary{ stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30}, stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40}, @@ -82,10 +82,10 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { for _, testArg := range rTests { itArg := testArg - It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() { + It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() { runResourceUsageTest(f, rc, itArg) // Log and verify resource usage - printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true) }) } }) @@ -93,34 +93,38 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { Context("regular resource usage tracking [Benchmark]", func() { rTests := []resourceTest{ { - pods: 10, + podsNr: 10, }, { - pods: 35, + podsNr: 35, }, { - pods: 105, + podsNr: 105, }, } for _, testArg := range rTests { itArg := testArg - It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() { + It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() { runResourceUsageTest(f, rc, itArg) // Log and verify resource usage - printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true) }) } }) }) type resourceTest struct { - pods int + podsNr int cpuLimits framework.ContainersCPUSummary memLimits framework.ResourceUsagePerContainer } +func (rt *resourceTest) getTestName() string { + return fmt.Sprintf("resource_%d", rt.podsNr) +} + // runResourceUsageTest runs the resource usage test func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg resourceTest) { const ( @@ -136,7 +140,7 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg defer rc.Stop() By("Creating a batch of Pods") - pods := newTestPods(testArg.pods, ImageRegistry[pauseImage], "test_pod") + pods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], "test_pod") f.PodClient().CreateBatch(pods) // wait for a while to let the node be steady @@ -168,9 +172,9 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg logPods(f.Client) } -// printAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit. -func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary, - memLimits framework.ResourceUsagePerContainer, isVerify bool) { +// logAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit. +func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary, + memLimits framework.ResourceUsagePerContainer, testName string, isVerify bool) { nodeName := framework.TestContext.NodeName // Obtain memory PerfData @@ -189,8 +193,16 @@ func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLi cpuSummaryPerNode[nodeName] = cpuSummary // Print resource usage - framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode)) - framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode)) + framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, + map[string]string{ + "datatype": "resource", + "test": testName, + })) + framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, + map[string]string{ + "datatype": "resource", + "test": testName, + })) // Verify resource usage if isVerify {