diff --git a/test/e2e_node/benchmark_util.go b/test/e2e_node/benchmark_util.go index 60713f29b2c..4083b2926ec 100644 --- a/test/e2e_node/benchmark_util.go +++ b/test/e2e_node/benchmark_util.go @@ -19,12 +19,15 @@ limitations under the License. package e2e_node import ( + "fmt" "sort" "time" "k8s.io/kubernetes/pkg/api/unversioned" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/perftype" + + . "github.com/onsi/gomega" ) const ( @@ -43,12 +46,9 @@ type NodeTimeSeries struct { } // logDensityTimeSeries logs the time series data of operation and resource usage -func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]unversioned.Time, testName string) { +func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]unversioned.Time, testInfo map[string]string) { timeSeries := &NodeTimeSeries{ - Labels: map[string]string{ - "node": framework.TestContext.NodeName, - "test": testName, - }, + Labels: testInfo, Version: currentDataVersion, } // Attach operation time series. @@ -80,7 +80,7 @@ func getCumulatedPodTimeSeries(timePerPod map[string]unversioned.Time) []int64 { } // getLatencyPerfData returns perf data of pod startup latency. -func getLatencyPerfData(latency framework.LatencyMetric, testName string) *perftype.PerfData { +func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]string) *perftype.PerfData { return &perftype.PerfData{ Version: currentDataVersion, DataItems: []perftype.DataItem{ @@ -98,15 +98,12 @@ func getLatencyPerfData(latency framework.LatencyMetric, testName string) *perft }, }, }, - Labels: map[string]string{ - "node": framework.TestContext.NodeName, - "test": testName, - }, + Labels: testInfo, } } // getThroughputPerfData returns perf data of pod creation startup throughput. -func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testName string) *perftype.PerfData { +func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) *perftype.PerfData { return &perftype.PerfData{ Version: currentDataVersion, DataItems: []perftype.DataItem{ @@ -122,9 +119,40 @@ func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatenc }, }, }, - Labels: map[string]string{ - "node": framework.TestContext.NodeName, - "test": testName, - }, + Labels: testInfo, + } +} + +// getTestNodeInfo fetches the capacity of a node from API server and returns a map of labels. +func getTestNodeInfo(f *framework.Framework, testName string) map[string]string { + nodeName := framework.TestContext.NodeName + node, err := f.Client.Nodes().Get(nodeName) + Expect(err).NotTo(HaveOccurred()) + + cpu, ok := node.Status.Capacity["cpu"] + if !ok { + framework.Failf("Fail to fetch CPU capacity value of test node.") + } + + memory, ok := node.Status.Capacity["memory"] + if !ok { + framework.Failf("Fail to fetch Memory capacity value of test node.") + } + + cpuValue, ok := cpu.AsInt64() + if !ok { + framework.Failf("Fail to fetch CPU capacity value as Int64.") + } + + memoryValue, ok := memory.AsInt64() + if !ok { + framework.Failf("Fail to fetch Memory capacity value as Int64.") + } + + return map[string]string{ + "node": nodeName, + "test": testName, + "image": node.Status.NodeInfo.OSImage, + "machine": fmt.Sprintf("cpu:%dcore,memory:%.1fGB", cpuValue, float32(memoryValue)/(1024*1024*1024)), } } diff --git a/test/e2e_node/density_test.go b/test/e2e_node/density_test.go index a6d28ceee0f..644ac924d56 100644 --- a/test/e2e_node/density_test.go +++ b/test/e2e_node/density_test.go @@ -54,16 +54,12 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { ) var ( - ns string - nodeName string - rc *ResourceCollector + rc *ResourceCollector ) f := framework.NewDefaultFramework("density-test") BeforeEach(func() { - ns = f.Namespace.Name - nodeName = framework.TestContext.NodeName // Start a standalone cadvisor pod using 'createSync', the pod is running when it returns f.PodClient().CreateSync(getCadvisorPod()) // Resource collector monitors fine-grain CPU/memory usage by a standalone Cadvisor with @@ -102,14 +98,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval), func() { itArg.createMethod = "batch" - testName := itArg.getTestName() - batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, false) + testInfo := getTestNodeInfo(f, itArg.getTestName()) + + batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, false) By("Verifying latency") - logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true) + logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, true) By("Verifying resource") - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, true) }) } }) @@ -159,14 +156,15 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]", itArg.podsNr, itArg.interval), func() { itArg.createMethod = "batch" - testName := itArg.getTestName() - batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, true) + testInfo := getTestNodeInfo(f, itArg.getTestName()) + + batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, true) By("Verifying latency") - logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false) + logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, false) By("Verifying resource") - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false) }) } }) @@ -195,19 +193,19 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]", itArg.podsNr, itArg.interval, itArg.APIQPSLimit), func() { itArg.createMethod = "batch" - testName := itArg.getTestName() + testInfo := getTestNodeInfo(f, itArg.getTestName()) // The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency. // It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated. // Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance. // Note that it will cause higher resource usage. setKubeletAPIQPSLimit(f, int32(itArg.APIQPSLimit)) - batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, true) + batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, true) By("Verifying latency") - logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false) + logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, false) By("Verifying resource") - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false) }) } }) @@ -238,14 +236,14 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr), func() { itArg.createMethod = "sequence" - testName := itArg.getTestName() - batchlag, e2eLags := runDensitySeqTest(f, rc, itArg) + testInfo := getTestNodeInfo(f, itArg.getTestName()) + batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo) By("Verifying latency") - logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, true) + logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, true) By("Verifying resource") - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, true) }) } }) @@ -271,14 +269,14 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]", itArg.podsNr, itArg.bgPodsNr), func() { itArg.createMethod = "sequence" - testName := itArg.getTestName() - batchlag, e2eLags := runDensitySeqTest(f, rc, itArg) + testInfo := getTestNodeInfo(f, itArg.getTestName()) + batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo) By("Verifying latency") - logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false) + logAndVerifyLatency(batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, false) By("Verifying resource") - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false) }) } }) @@ -314,7 +312,7 @@ func (dt *densityTest) getTestName() string { } // runDensityBatchTest runs the density batch pod creation test -func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest, +func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest, testInfo map[string]string, isLogTimeSeries bool) (time.Duration, []framework.PodLatencyData) { const ( podType = "density_test_pod" @@ -390,19 +388,18 @@ func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg sort.Sort(framework.LatencySlice(e2eLags)) batchLag := lastRunning.Time.Sub(firstCreate.Time) - testName := testArg.getTestName() // Log time series data. if isLogTimeSeries { - logDensityTimeSeries(rc, createTimes, watchTimes, testName) + logDensityTimeSeries(rc, createTimes, watchTimes, testInfo) } // Log throughput data. - logPodCreateThroughput(batchLag, e2eLags, testArg.podsNr, testName) + logPodCreateThroughput(batchLag, e2eLags, testArg.podsNr, testInfo) return batchLag, e2eLags } // runDensitySeqTest runs the density sequential pod creation test -func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) { +func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest, testInfo map[string]string) (time.Duration, []framework.PodLatencyData) { const ( podType = "density_test_pod" sleepBeforeCreatePods = 30 * time.Second @@ -426,7 +423,7 @@ func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg de batchlag, e2eLags := createBatchPodSequential(f, testPods) // Log throughput data. - logPodCreateThroughput(batchlag, e2eLags, testArg.podsNr, testArg.getTestName()) + logPodCreateThroughput(batchlag, e2eLags, testArg.podsNr, testInfo) return batchlag, e2eLags } @@ -541,7 +538,7 @@ func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Dur // logAndVerifyLatency verifies that whether pod creation latency satisfies the limit. func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, podStartupLimits framework.LatencyMetric, - podBatchStartupLimit time.Duration, testName string, isVerify bool) { + podBatchStartupLimit time.Duration, testInfo map[string]string, isVerify bool) { framework.PrintLatencies(e2eLags, "worst client e2e total latencies") // TODO(coufon): do not trust 'kubelet' metrics since they are not reset! @@ -551,7 +548,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)} // log latency perf data - framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testName)) + framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo)) if isVerify { // check whether e2e pod startup time is acceptable. @@ -566,8 +563,8 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD } // logThroughput calculates and logs pod creation throughput. -func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testName string) { - framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testName)) +func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) { + framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo)) } // increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS. diff --git a/test/e2e_node/resource_usage_test.go b/test/e2e_node/resource_usage_test.go index ebb28589260..2ebfbe1f214 100644 --- a/test/e2e_node/resource_usage_test.go +++ b/test/e2e_node/resource_usage_test.go @@ -38,7 +38,6 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { ) var ( - ns string rc *ResourceCollector om *framework.RuntimeOperationMonitor ) @@ -46,7 +45,6 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { f := framework.NewDefaultFramework("resource-usage") BeforeEach(func() { - ns = f.Namespace.Name om = framework.NewRuntimeOperationMonitor(f.Client) // The test collects resource usage from a standalone Cadvisor pod. // The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to @@ -83,9 +81,12 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { itArg := testArg It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() { + testInfo := getTestNodeInfo(f, itArg.getTestName()) + runResourceUsageTest(f, rc, itArg) + // Log and verify resource usage - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, true) }) } }) @@ -107,9 +108,12 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { itArg := testArg It(fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr), func() { + testInfo := getTestNodeInfo(f, itArg.getTestName()) + runResourceUsageTest(f, rc, itArg) + // Log and verify resource usage - logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, itArg.getTestName(), true) + logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false) }) } }) @@ -176,7 +180,7 @@ func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg // logAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit. func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary, - memLimits framework.ResourceUsagePerContainer, testName string, isVerify bool) { + memLimits framework.ResourceUsagePerContainer, testInfo map[string]string, isVerify bool) { nodeName := framework.TestContext.NodeName // Obtain memory PerfData @@ -195,10 +199,8 @@ func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimi cpuSummaryPerNode[nodeName] = cpuSummary // Print resource usage - framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, - map[string]string{"test": testName, "node": nodeName})) - framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, - map[string]string{"test": testName, "node": nodeName})) + framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo)) + framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo)) // Verify resource usage if isVerify { diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 77e587eec56..193e697479f 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -598,8 +598,8 @@ func imageToInstanceName(imageConfig *internalGCEImage) string { if imageConfig.machine == "" { return *instanceNamePrefix + "-" + imageConfig.image } - // For benchmark test, node name has the format 'machine-image-uuid'. - // Node name is added to test data item labels and used for benchmark dashboard. + // For benchmark test, node name has the format 'machine-image-uuid' to run + // different machine types with the same image in parallel return imageConfig.machine + "-" + imageConfig.image + "-" + uuid.NewUUID().String()[:8] }