From 54981302554c80f05e292beac4496b164bc959f5 Mon Sep 17 00:00:00 2001
From: Zhou Fang <zhoufang@google.com>
Date: Wed, 17 Aug 2016 10:38:43 -0700
Subject: [PATCH] add logging time series data to benchmark tests

---
 test/e2e_node/benchmark_util.go      | 103 +++++++++++++++++++++++++++
 test/e2e_node/density_test.go        |  84 ++++++++++++++--------
 test/e2e_node/resource_collector.go  |  27 ++-----
 test/e2e_node/resource_usage_test.go |  42 +++++++----
 4 files changed, 189 insertions(+), 67 deletions(-)
 create mode 100644 test/e2e_node/benchmark_util.go

diff --git a/test/e2e_node/benchmark_util.go b/test/e2e_node/benchmark_util.go
new file mode 100644
index 00000000000..7a30cd982ec
--- /dev/null
+++ b/test/e2e_node/benchmark_util.go
@@ -0,0 +1,103 @@
+// +build linux
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e_node
+
+import (
+	"sort"
+
+	"k8s.io/kubernetes/pkg/api/unversioned"
+	"k8s.io/kubernetes/test/e2e/framework"
+	"k8s.io/kubernetes/test/e2e/perftype"
+)
+
+const (
+	// TODO(coufon): be consistent with perf_util.go version (not exposed)
+	currentTimeSeriesVersion = "v1"
+	TimeSeriesTag            = "[Result:TimeSeries]"
+	TimeSeriesEnd            = "[Finish:TimeSeries]"
+)
+
+type NodeTimeSeries struct {
+	// value in OperationData is an array of timestamps
+	OperationData map[string][]int64         `json:"op_data,omitempty"`
+	ResourceData  map[string]*ResourceSeries `json:"resource_data,omitempty"`
+	Labels        map[string]string          `json:"labels"`
+	Version       string                     `json:"version"`
+}
+
+// logDensityTimeSeries logs the time series data of operation and resource usage
+func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]unversioned.Time, testName string) {
+	timeSeries := &NodeTimeSeries{
+		Labels: map[string]string{
+			"node":     framework.TestContext.NodeName,
+			"datatype": "timeseries",
+			"test":     testName,
+		},
+		Version: currentTimeSeriesVersion,
+	}
+	// Attach operation time series.
+	timeSeries.OperationData = map[string][]int64{
+		"create":  getCumulatedPodTimeSeries(create),
+		"running": getCumulatedPodTimeSeries(watch),
+	}
+	// Attach resource time series.
+	timeSeries.ResourceData = rc.GetResourceTimeSeries()
+	// Log time series with tags
+	framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
+}
+
+type int64arr []int64
+
+func (a int64arr) Len() int           { return len(a) }
+func (a int64arr) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a int64arr) Less(i, j int) bool { return a[i] < a[j] }
+
+// getCumulatedPodTimeSeries gets the cumulative pod number time series.
+func getCumulatedPodTimeSeries(timePerPod map[string]unversioned.Time) []int64 {
+	timeSeries := make(int64arr, 0)
+	for _, ts := range timePerPod {
+		timeSeries = append(timeSeries, ts.Time.UnixNano())
+	}
+	// Sort all timestamps.
+	sort.Sort(timeSeries)
+	return timeSeries
+}
+
+// getLatencyPerfData returns perf data from latency
+func getLatencyPerfData(latency framework.LatencyMetric, testName string) *perftype.PerfData {
+	return &perftype.PerfData{
+		Version: "v1",
+		DataItems: []perftype.DataItem{
+			{
+				Data: map[string]float64{
+					"Perc50": float64(latency.Perc50) / 1000000,
+					"Perc90": float64(latency.Perc90) / 1000000,
+					"Perc99": float64(latency.Perc99) / 1000000,
+				},
+				Unit: "ms",
+				Labels: map[string]string{
+					"datatype":    "latency",
+					"latencytype": "test-e2e",
+					"node":        framework.TestContext.NodeName,
+					"test":        testName,
+				},
+			},
+		},
+	}
+}
diff --git a/test/e2e_node/density_test.go b/test/e2e_node/density_test.go
index 12bfe3695e2..d6e4126f586 100644
--- a/test/e2e_node/density_test.go
+++ b/test/e2e_node/density_test.go
@@ -106,14 +106,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 			itArg := testArg
 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
 				itArg.podsNr, itArg.interval), func() {
-
-				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg)
+				itArg.createMethod = "batch"
+				testName := itArg.getTestName()
+				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, false)
 
 				By("Verifying latency")
-				printAndVerifyLatency(batchLag, e2eLags, itArg, true)
+				logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true)
 
 				By("Verifying resource")
-				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true)
 			})
 		}
 	})
@@ -138,14 +139,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 			itArg := testArg
 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
 				itArg.podsNr, itArg.interval), func() {
-
-				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg)
+				itArg.createMethod = "batch"
+				testName := itArg.getTestName()
+				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, true)
 
 				By("Verifying latency")
-				printAndVerifyLatency(batchLag, e2eLags, itArg, false)
+				logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false)
 
 				By("Verifying resource")
-				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false)
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false)
 			})
 		}
 	})
@@ -175,14 +177,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 			itArg := testArg
 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
 				itArg.podsNr, itArg.bgPodsNr), func() {
-
+				itArg.createMethod = "sequence"
+				testName := itArg.getTestName()
 				batchlag, e2eLags := runDensitySeqTest(f, rc, itArg)
 
 				By("Verifying latency")
-				printAndVerifyLatency(batchlag, e2eLags, itArg, true)
+				logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true)
 
 				By("Verifying resource")
-				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true)
 			})
 		}
 	})
@@ -207,14 +210,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 			itArg := testArg
 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
 				itArg.podsNr, itArg.bgPodsNr), func() {
-
+				itArg.createMethod = "sequence"
+				testName := itArg.getTestName()
 				batchlag, e2eLags := runDensitySeqTest(f, rc, itArg)
 
 				By("Verifying latency")
-				printAndVerifyLatency(batchlag, e2eLags, itArg, false)
+				logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false)
 
 				By("Verifying resource")
-				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false)
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false)
 			})
 		}
 	})
@@ -227,6 +231,8 @@ type densityTest struct {
 	bgPodsNr int
 	// interval between creating pod (rate control)
 	interval time.Duration
+	// create pods in 'batch' or 'sequence'
+	createMethod string
 	// performance limits
 	cpuLimits            framework.ContainersCPUSummary
 	memLimits            framework.ResourceUsagePerContainer
@@ -234,8 +240,13 @@ type densityTest struct {
 	podBatchStartupLimit time.Duration
 }
 
+func (dt *densityTest) getTestName() string {
+	return fmt.Sprintf("create_%s_%d_%d_%d", dt.createMethod, dt.podsNr, dt.bgPodsNr, dt.interval.Nanoseconds()/1000000)
+}
+
 // runDensityBatchTest runs the density batch pod creation test
-func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) {
+func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest,
+	isLogTimeSeries bool) (time.Duration, []framework.PodLatencyData) {
 	const (
 		podType               = "density_test_pod"
 		sleepBeforeCreatePods = 30 * time.Second
@@ -308,6 +319,13 @@ func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg
 	sort.Sort(framework.LatencySlice(e2eLags))
 	batchLag := lastRunning.Time.Sub(firstCreate.Time)
 
+	// Log time series data.
+	if isLogTimeSeries {
+		logDensityTimeSeries(rc, createTimes, watchTimes, testArg.getTestName())
+	}
+	// Log throughput data.
+	logPodCreateThroughput(batchLag, e2eLags, testArg.podsNr)
+
 	return batchLag, e2eLags
 }
 
@@ -333,6 +351,9 @@ func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg de
 	// create pods sequentially (back-to-back)
 	batchlag, e2eLags := createBatchPodSequential(f, testPods)
 
+	// Log throughput data.
+	logPodCreateThroughput(batchlag, e2eLags, testArg.podsNr)
+
 	return batchlag, e2eLags
 }
 
@@ -454,31 +475,36 @@ func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Dur
 	return batchLag, e2eLags
 }
 
-// printAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
-func printAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest, isVerify bool) {
+// logAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
+func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, podStartupLimits framework.LatencyMetric,
+	podBatchStartupLimit time.Duration, testName string, isVerify bool) {
 	framework.PrintLatencies(e2eLags, "worst client e2e total latencies")
 
-	// TODO(coufon): do not trust `kubelet' metrics since they are not reset!
+	// TODO(coufon): do not trust 'kubelet' metrics since they are not reset!
 	latencyMetrics, _ := getPodStartLatency(kubeletAddr)
 	framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics))
 
-	// check whether e2e pod startup time is acceptable.
 	podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}
-	framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency))
 
-	// calculate and log throughput
-	throughputBatch := float64(testArg.podsNr) / batchLag.Minutes()
-	framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch)
-	throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes()
-	framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
+	// log latency perf data
+	framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testName))
 
 	if isVerify {
-		framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency))
+		// check whether e2e pod startup time is acceptable.
+		framework.ExpectNoError(verifyPodStartupLatency(podStartupLimits, podCreateLatency.Latency))
 
 		// check bactch pod creation latency
-		if testArg.podBatchStartupLimit > 0 {
-			Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v",
-				batchLag, testArg.podBatchStartupLimit)
+		if podBatchStartupLimit > 0 {
+			Expect(batchLag <= podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v",
+				batchLag, podBatchStartupLimit)
 		}
 	}
 }
+
+// logThroughput calculates and logs pod creation throughput.
+func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int) {
+	throughputBatch := float64(podsNr) / batchLag.Minutes()
+	framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch)
+	throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes()
+	framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
+}
diff --git a/test/e2e_node/resource_collector.go b/test/e2e_node/resource_collector.go
index 9d0ab37c637..9d3a396646a 100644
--- a/test/e2e_node/resource_collector.go
+++ b/test/e2e_node/resource_collector.go
@@ -53,8 +53,6 @@ const (
 	cadvisorPort      = 8090
 	// housekeeping interval of Cadvisor (second)
 	houseKeepingInterval = 1
-	// TODO(coufon): be consistent with perf_util.go version (not exposed)
-	currentTimeSeriesVersion = "v1"
 )
 
 var (
@@ -426,39 +424,22 @@ type ResourceSeries struct {
 	Units                map[string]string `json:"unit"`
 }
 
-// Time series of resource usage per container
-type ResourceSeriesPerContainer struct {
-	Data    map[string]*ResourceSeries `json:"data"`
-	Labels  map[string]string          `json:"labels"`
-	Version string                     `json:"version"`
-}
-
 // GetResourceSeriesWithLabels gets the time series of resource usage of each container.
-// TODO(coufon): the labels are to be re-defined based on benchmark dashboard.
-func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string) *ResourceSeriesPerContainer {
-	seriesPerContainer := &ResourceSeriesPerContainer{
-		Data: map[string]*ResourceSeries{},
-		Labels: map[string]string{
-			"node": framework.TestContext.NodeName,
-		},
-		Version: currentTimeSeriesVersion,
-	}
+func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries {
+	resourceSeries := make(map[string]*ResourceSeries)
 	for key, name := range systemContainers {
 		newSeries := &ResourceSeries{Units: map[string]string{
 			"cpu":    "mCPU",
 			"memory": "MB",
 		}}
-		seriesPerContainer.Data[key] = newSeries
+		resourceSeries[key] = newSeries
 		for _, usage := range r.buffers[name] {
 			newSeries.Timestamp = append(newSeries.Timestamp, usage.Timestamp.UnixNano())
 			newSeries.CPUUsageInMilliCores = append(newSeries.CPUUsageInMilliCores, int64(usage.CPUUsageInCores*1000))
 			newSeries.MemoryRSSInMegaBytes = append(newSeries.MemoryRSSInMegaBytes, int64(float64(usage.MemoryUsageInBytes)/(1024*1024)))
 		}
 	}
-	for k, v := range labels {
-		seriesPerContainer.Labels[k] = v
-	}
-	return seriesPerContainer
+	return resourceSeries
 }
 
 // Code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go
diff --git a/test/e2e_node/resource_usage_test.go b/test/e2e_node/resource_usage_test.go
index bed8a3810be..da5dcfeb9f7 100644
--- a/test/e2e_node/resource_usage_test.go
+++ b/test/e2e_node/resource_usage_test.go
@@ -67,7 +67,7 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
 	Context("regular resource usage tracking", func() {
 		rTests := []resourceTest{
 			{
-				pods: 10,
+				podsNr: 10,
 				cpuLimits: framework.ContainersCPUSummary{
 					stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
 					stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
@@ -82,10 +82,10 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
 		for _, testArg := range rTests {
 			itArg := testArg
 
-			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() {
+			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() {
 				runResourceUsageTest(f, rc, itArg)
 				// Log and verify resource usage
-				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true)
 			})
 		}
 	})
@@ -93,34 +93,38 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
 	Context("regular resource usage tracking [Benchmark]", func() {
 		rTests := []resourceTest{
 			{
-				pods: 10,
+				podsNr: 10,
 			},
 			{
-				pods: 35,
+				podsNr: 35,
 			},
 			{
-				pods: 105,
+				podsNr: 105,
 			},
 		}
 
 		for _, testArg := range rTests {
 			itArg := testArg
 
-			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() {
+			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() {
 				runResourceUsageTest(f, rc, itArg)
 				// Log and verify resource usage
-				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true)
 			})
 		}
 	})
 })
 
 type resourceTest struct {
-	pods      int
+	podsNr    int
 	cpuLimits framework.ContainersCPUSummary
 	memLimits framework.ResourceUsagePerContainer
 }
 
+func (rt *resourceTest) getTestName() string {
+	return fmt.Sprintf("resource_%d", rt.podsNr)
+}
+
 // runResourceUsageTest runs the resource usage test
 func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg resourceTest) {
 	const (
@@ -136,7 +140,7 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg
 	defer rc.Stop()
 
 	By("Creating a batch of Pods")
-	pods := newTestPods(testArg.pods, ImageRegistry[pauseImage], "test_pod")
+	pods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], "test_pod")
 	f.PodClient().CreateBatch(pods)
 
 	// wait for a while to let the node be steady
@@ -168,9 +172,9 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg
 	logPods(f.Client)
 }
 
-// printAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit.
-func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary,
-	memLimits framework.ResourceUsagePerContainer, isVerify bool) {
+// logAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit.
+func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary,
+	memLimits framework.ResourceUsagePerContainer, testName string, isVerify bool) {
 	nodeName := framework.TestContext.NodeName
 
 	// Obtain memory PerfData
@@ -189,8 +193,16 @@ func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLi
 	cpuSummaryPerNode[nodeName] = cpuSummary
 
 	// Print resource usage
-	framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
-	framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
+	framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode,
+		map[string]string{
+			"datatype": "resource",
+			"test":     testName,
+		}))
+	framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode,
+		map[string]string{
+			"datatype": "resource",
+			"test":     testName,
+		}))
 
 	// Verify resource usage
 	if isVerify {