Add perfdash support in kubelet_perf and metrics_util

2025-09-08 04:32:37 +00:00 · 2016-03-12 02:57:58 -08:00
parent f2db7d07a3
commit 5b2ddc9f2c
5 changed files with 225 additions and 2 deletions
--- a/test/e2e/framework/kubelet_stats.go
+++ b/test/e2e/framework/kubelet_stats.go
@@ -651,6 +651,40 @@ func (r *ResourceMonitor) GetLatest() (ResourceUsagePerNode, error) {
 	return result, utilerrors.NewAggregate(errs)
 }
 func (r *ResourceMonitor) GetMasterNodeLatest(usagePerNode ResourceUsagePerNode) ResourceUsagePerNode {
 	result := make(ResourceUsagePerNode)
 	var masterUsage ResourceUsagePerContainer
 	var nodesUsage []ResourceUsagePerContainer
 	for node, usage := range usagePerNode {
 		if strings.HasSuffix(node, "master") {
 			masterUsage = usage
 		} else {
 			nodesUsage = append(nodesUsage, usage)
 		}
 	}
 	nodeAvgUsage := make(ResourceUsagePerContainer)
 	for _, nodeUsage := range nodesUsage {
 		for c, usage := range nodeUsage {
 			if _, found := nodeAvgUsage[c]; !found {
 				nodeAvgUsage[c] = &ContainerResourceUsage{Name: usage.Name}
 			}
 			nodeAvgUsage[c].CPUUsageInCores += usage.CPUUsageInCores
 			nodeAvgUsage[c].MemoryRSSInBytes += usage.MemoryRSSInBytes
 			nodeAvgUsage[c].MemoryWorkingSetInBytes += usage.MemoryWorkingSetInBytes
 			nodeAvgUsage[c].MemoryRSSInBytes += usage.MemoryRSSInBytes
 		}
 	}
 	for c := range nodeAvgUsage {
 		nodeAvgUsage[c].CPUUsageInCores /= float64(len(nodesUsage))
 		nodeAvgUsage[c].MemoryUsageInBytes /= uint64(len(nodesUsage))
 		nodeAvgUsage[c].MemoryWorkingSetInBytes /= uint64(len(nodesUsage))
 		nodeAvgUsage[c].MemoryRSSInBytes /= uint64(len(nodesUsage))
 	}
 	result["master"] = masterUsage
 	result["node"] = nodeAvgUsage
 	return result
 }
 // ContainersCPUSummary is indexed by the container name with each entry a
 // (percentile, value) map.
 type ContainersCPUSummary map[string]map[float64]float64
@@ -712,3 +746,36 @@ func (r *ResourceMonitor) GetCPUSummary() NodesCPUSummary {
 	}
 	return result
 }
 func (r *ResourceMonitor) GetMasterNodeCPUSummary(summaryPerNode NodesCPUSummary) NodesCPUSummary {
 	result := make(NodesCPUSummary)
 	var masterSummary ContainersCPUSummary
 	var nodesSummaries []ContainersCPUSummary
 	for node, summary := range summaryPerNode {
 		if strings.HasSuffix(node, "master") {
 			masterSummary = summary
 		} else {
 			nodesSummaries = append(nodesSummaries, summary)
 		}
 	}
 	nodeAvgSummary := make(ContainersCPUSummary)
 	for _, nodeSummary := range nodesSummaries {
 		for c, summary := range nodeSummary {
 			if _, found := nodeAvgSummary[c]; !found {
 				nodeAvgSummary[c] = map[float64]float64{}
 			}
 			for perc, value := range summary {
 				nodeAvgSummary[c][perc] += value
 			}
 		}
 	}
 	for c := range nodeAvgSummary {
 		for perc := range nodeAvgSummary[c] {
 			nodeAvgSummary[c][perc] /= float64(len(nodesSummaries))
 		}
 	}
 	result["master"] = masterSummary
 	result["node"] = nodeAvgSummary
 	return result
 }
--- a/test/e2e/framework/metrics_util.go
+++ b/test/e2e/framework/metrics_util.go
@@ -287,7 +287,10 @@ func HighLatencyRequests(c *client.Client) (int, error) {
 		}
 	}
 	// TODO(random-liu): Remove the log when we migrate to new perfdash
 	Logf("API calls latencies: %s", PrettyPrintJSON(metrics))
 	// Log perf data
 	PrintPerfData(ApiCallToPerfData(metrics))
 	return badMetrics, nil
 }
--- a/test/e2e/framework/perf_util.go
+++ b/test/e2e/framework/perf_util.go
@@ -0,0 +1,104 @@
 /*
 Copyright 2016 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package framework
 import (
 	"fmt"
 	"k8s.io/kubernetes/test/e2e/perftype"
 )
 // TODO(random-liu): Change the tests to actually use PerfData from the begining instead of
 // translating one to the other here.
 // ApiCallToPerfData transforms APIResponsiveness to PerfData.
 func ApiCallToPerfData(apicalls APIResponsiveness) *perftype.PerfData {
 	perfData := &perftype.PerfData{}
 	for _, apicall := range apicalls.APICalls {
 		item := perftype.DataItem{
 			Data: map[string]float64{
 				"Perc50": float64(apicall.Latency.Perc50) / 1000000, // us -> ms
 				"Perc90": float64(apicall.Latency.Perc90) / 1000000,
 				"Perc99": float64(apicall.Latency.Perc99) / 1000000,
 			},
 			Unit: "ms",
 			Labels: map[string]string{
 				"Verb":     apicall.Verb,
 				"Resource": apicall.Resource,
 			},
 		}
 		perfData.DataItems = append(perfData.DataItems, item)
 	}
 	return perfData
 }
 // ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function
 // only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary.
 func ResourceUsageToPerfData(usagePerNode ResourceUsagePerNode) *perftype.PerfData {
 	items := []perftype.DataItem{}
 	for node, usages := range usagePerNode {
 		for c, usage := range usages {
 			item := perftype.DataItem{
 				Data: map[string]float64{
 					"memory":     float64(usage.MemoryRSSInBytes) / (1024 * 1024),
 					"workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024),
 					"rss":        float64(usage.MemoryRSSInBytes) / (1024 * 1024),
 				},
 				Unit: "MB",
 				Labels: map[string]string{
 					"node":      node,
 					"container": c,
 					"resource":  "memory",
 				},
 			}
 			items = append(items, item)
 		}
 	}
 	return &perftype.PerfData{DataItems: items}
 }
 // CPUUsageToPerfData transforms NodesCPUSummary to PerfData.
 func CPUUsageToPerfData(usagePerNode NodesCPUSummary) *perftype.PerfData {
 	items := []perftype.DataItem{}
 	for node, usages := range usagePerNode {
 		for c, usage := range usages {
 			data := map[string]float64{}
 			for perc, value := range usage {
 				data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 100
 			}
 			item := perftype.DataItem{
 				Data: data,
 				Unit: "%",
 				Labels: map[string]string{
 					"node":      node,
 					"container": c,
 					"resource":  "cpu",
 				},
 			}
 			items = append(items, item)
 		}
 	}
 	return &perftype.PerfData{DataItems: items}
 }
 // PrintPerfData prints the perfdata in json format with PerfResultTag prefix.
 // If an error occurs, nothing will be printed.
 func PrintPerfData(p *perftype.PerfData) {
 	if str := PrettyPrintJSON(p); str != "" {
 		Logf("%s", perftype.PerfResultTag+" "+str)
 	}
 }
--- a/test/e2e/kubelet_perf.go
+++ b/test/e2e/kubelet_perf.go
@@ -99,11 +99,16 @@ func runResourceTrackingTest(f *framework.Framework, podsPerNode int, nodeNames
 	logPodsOnNodes(f.Client, nodeNames.List())
 	usageSummary, err := rm.GetLatest()
 	Expect(err).NotTo(HaveOccurred())
 	// TODO(random-liu): Remove the original log when we migrate to new perfdash
 	framework.Logf("%s", rm.FormatResourceUsage(usageSummary))
 	// Log perf result
 	framework.PrintPerfData(framework.ResourceUsageToPerfData(rm.GetMasterNodeLatest(usageSummary)))
 	verifyMemoryLimits(f.Client, expectedMemory, usageSummary)
 	cpuSummary := rm.GetCPUSummary()
 	framework.Logf("%s", rm.FormatCPUSummary(cpuSummary))
 	// Log perf result
 	framework.PrintPerfData(framework.CPUUsageToPerfData(rm.GetMasterNodeCPUSummary(cpuSummary)))
 	verifyCPULimits(expectedCPU, cpuSummary)
 	By("Deleting the RC")
@@ -243,7 +248,7 @@ var _ = framework.KubeDescribe("Kubelet [Serial] [Slow]", func() {
 			itArg := testArg
 			podsPerNode := itArg.podsPerNode
 			name := fmt.Sprintf(
-				"for %d pods per node over %v", podsPerNode, monitoringTime)
+				"resource tracking for %d pods per node", podsPerNode)
 			It(name, func() {
 				runResourceTrackingTest(f, podsPerNode, nodeNames, rm, itArg.cpuLimits, itArg.memLimits)
 			})
@@ -254,7 +259,7 @@ var _ = framework.KubeDescribe("Kubelet [Serial] [Slow]", func() {
 		for i := range density {
 			podsPerNode := density[i]
 			name := fmt.Sprintf(
-				"for %d pods per node over %v", podsPerNode, monitoringTime)
+				"resource tracking for %d pods per node", podsPerNode)
 			It(name, func() {
 				runResourceTrackingTest(f, podsPerNode, nodeNames, rm, nil, nil)
 			})
--- a/test/e2e/perftype/perftype.go
+++ b/test/e2e/perftype/perftype.go
@@ -0,0 +1,44 @@
 /*
 Copyright 2016 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package perftype
 // TODO(random-liu): Replace this with prometheus' data model.
 // The following performance data structures are generalized and well-formatted.
 // They can be pretty printed in json format and be analyzed by other performance
 // analyzing tools, such as Perfdash (k8s.io/contrib/perfdash).
 // DataItem is the data point.
 type DataItem struct {
 	// Data is a map from bucket to real data point (e.g. "Perc90" -> 23.5). Notice
 	// that all data items with the same label conbination should have the same buckets.
 	Data map[string]float64 `json:"data"`
 	// Unit is the data unit. Notice that all data items with the same label combination
 	// should have the same unit.
 	Unit string `json:"unit"`
 	// Labels is the labels of the data item.
 	Labels map[string]string `json:"labels"`
 }
 // PerfData contains all data items generated in current test.
 type PerfData struct {
 	DataItems []DataItem `json:"dataItems"`
 }
 // PerfResultTag is the prefix of generated perfdata. Analyzing tools can find the perf result
 // with this tag.
 const PerfResultTag = "[Result:Performance]"