migrate UsageToPerfDataWithLabels to perf_util.go

This commit is contained in:
Zhou Fang 2016-08-10 10:48:32 -07:00
parent 67a071eb6c
commit ad81b6da80
4 changed files with 148 additions and 203 deletions

View File

@ -57,56 +57,12 @@ const currentKubeletPerfMetricsVersion = "v1"
// ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function
// only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary.
func ResourceUsageToPerfData(usagePerNode ResourceUsagePerNode) *perftype.PerfData {
items := []perftype.DataItem{}
for node, usages := range usagePerNode {
for c, usage := range usages {
item := perftype.DataItem{
Data: map[string]float64{
"memory": float64(usage.MemoryUsageInBytes) / (1024 * 1024),
"workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024),
"rss": float64(usage.MemoryRSSInBytes) / (1024 * 1024),
},
Unit: "MB",
Labels: map[string]string{
"node": node,
"container": c,
"resource": "memory",
},
}
items = append(items, item)
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
DataItems: items,
}
return ResourceUsageToPerfDataWithLabels(usagePerNode, nil)
}
// CPUUsageToPerfData transforms NodesCPUSummary to PerfData.
func CPUUsageToPerfData(usagePerNode NodesCPUSummary) *perftype.PerfData {
items := []perftype.DataItem{}
for node, usages := range usagePerNode {
for c, usage := range usages {
data := map[string]float64{}
for perc, value := range usage {
data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 1000
}
item := perftype.DataItem{
Data: data,
Unit: "mCPU",
Labels: map[string]string{
"node": node,
"container": c,
"resource": "cpu",
},
}
items = append(items, item)
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
DataItems: items,
}
return CPUUsageToPerfDataWithLabels(usagePerNode, nil)
}
// PrintPerfData prints the perfdata in json format with PerfResultTag prefix.
@ -117,3 +73,73 @@ func PrintPerfData(p *perftype.PerfData) {
Logf("%s %s\n%s", perftype.PerfResultTag, str, perftype.PerfResultEnd)
}
}
// ResourceUsageToPerfDataWithLabels transforms ResourceUsagePerNode to PerfData with additional labels.
// Notice that this function only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary.
func ResourceUsageToPerfDataWithLabels(usagePerNode ResourceUsagePerNode, labels map[string]string) *perftype.PerfData {
items := []perftype.DataItem{}
for node, usages := range usagePerNode {
for c, usage := range usages {
newLabels := map[string]string{
"node": node,
"container": c,
"resource": "memory",
}
if labels != nil {
for k, v := range labels {
newLabels[k] = v
}
}
item := perftype.DataItem{
Data: map[string]float64{
"memory": float64(usage.MemoryUsageInBytes) / (1024 * 1024),
"workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024),
"rss": float64(usage.MemoryRSSInBytes) / (1024 * 1024),
},
Unit: "MB",
Labels: newLabels,
}
items = append(items, item)
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
DataItems: items,
}
}
// CPUUsageToPerfDataWithLabels transforms NodesCPUSummary to PerfData with additional labels.
func CPUUsageToPerfDataWithLabels(usagePerNode NodesCPUSummary, labels map[string]string) *perftype.PerfData {
items := []perftype.DataItem{}
for node, usages := range usagePerNode {
for c, usage := range usages {
newLabels := map[string]string{
"node": node,
"container": c,
"resource": "cpu",
}
if labels != nil {
for k, v := range labels {
newLabels[k] = v
}
}
data := map[string]float64{}
for perc, value := range usage {
data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 1000
}
item := perftype.DataItem{
Data: data,
Unit: "mCPU",
Labels: newLabels,
}
items = append(items, item)
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
DataItems: items,
}
}

View File

@ -73,8 +73,8 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
})
Context("create a batch of pods", func() {
// Zhou(ToDo): add more tests and the values are generous, set more precise limits after benchmark
densityTests := []DensityTest{
// TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark
dTests := []densityTest{
{
podsNr: 10,
interval: 0 * time.Millisecond,
@ -97,7 +97,7 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
},
}
for _, testArg := range densityTests {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
itArg.podsNr, itArg.interval), func() {
@ -186,14 +186,14 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
// verify resource
By("Verifying resource")
verifyResource(f, itArg, rc)
verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc)
})
}
})
Context("create a sequence of pods", func() {
// Zhou(ToDo): add more tests and the values are generous, set more precise limits after benchmark
densityTests := []DensityTest{
// TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark
dTests := []densityTest{
{
podsNr: 10,
bgPodsNr: 10,
@ -213,7 +213,7 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
},
}
for _, testArg := range densityTests {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
itArg.podsNr, itArg.bgPodsNr), func() {
@ -244,13 +244,13 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
// verify resource
By("Verifying resource")
verifyResource(f, itArg, rc)
verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc)
})
}
})
})
type DensityTest struct {
type densityTest struct {
// number of pods
podsNr int
// number of background pods
@ -276,6 +276,7 @@ func createBatchPodWithRateControl(f *framework.Framework, pods []*api.Pod, inte
return createTimes
}
// checkPodDeleted checks whether a pod has been successfully deleted
func checkPodDeleted(f *framework.Framework, podName string) error {
ns := f.Namespace.Name
_, err := f.Client.Pods(ns).Get(podName)
@ -306,7 +307,7 @@ func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) {
return latencyMetrics, nil
}
// Verifies whether 50, 90 and 99th percentiles of PodStartupLatency are
// verifyPodStartupLatency verifies whether 50, 90 and 99th percentiles of PodStartupLatency are
// within the threshold.
func verifyPodStartupLatency(expect, actual framework.LatencyMetric) error {
if actual.Perc50 > expect.Perc50 {
@ -321,6 +322,7 @@ func verifyPodStartupLatency(expect, actual framework.LatencyMetric) error {
return nil
}
// newInformerWatchPod creates an informer to check whether all pods are running.
func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes map[string]unversioned.Time,
podType string) *controllerframework.Controller {
ns := f.Namespace.Name
@ -365,7 +367,8 @@ func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes m
return controller
}
func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg DensityTest) {
// verifyLatency verifies that whether pod creation latency satisfies the limit.
func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest) {
framework.PrintLatencies(e2eLags, "worst client e2e total latencies")
// Zhou: do not trust `kubelet' metrics since they are not reset!
@ -390,35 +393,7 @@ func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, t
framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
}
func verifyResource(f *framework.Framework, testArg DensityTest, rc *ResourceCollector) {
nodeName := framework.TestContext.NodeName
// verify and log memory
usagePerContainer, err := rc.GetLatest()
Expect(err).NotTo(HaveOccurred())
framework.Logf("%s", formatResourceUsageStats(usagePerContainer))
usagePerNode := make(framework.ResourceUsagePerNode)
usagePerNode[nodeName] = usagePerContainer
memPerfData := framework.ResourceUsageToPerfData(usagePerNode)
framework.PrintPerfData(memPerfData)
verifyMemoryLimits(f.Client, testArg.memLimits, usagePerNode)
// verify and log cpu
cpuSummary := rc.GetCPUSummary()
framework.Logf("%s", formatCPUSummary(cpuSummary))
cpuSummaryPerNode := make(framework.NodesCPUSummary)
cpuSummaryPerNode[nodeName] = cpuSummary
cpuPerfData := framework.CPUUsageToPerfData(cpuSummaryPerNode)
framework.PrintPerfData(cpuPerfData)
verifyCPULimits(testArg.cpuLimits, cpuSummaryPerNode)
}
// createBatchPodSequential creats pods back-to-back in sequence.
func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Duration, []framework.PodLatencyData) {
batchStartTime := unversioned.Now()
e2eLags := make([]framework.PodLatencyData, 0)

View File

@ -42,7 +42,6 @@ import (
"k8s.io/kubernetes/pkg/util/uuid"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/perftype"
. "github.com/onsi/gomega"
)
@ -54,8 +53,8 @@ const (
cadvisorPort = 8090
// housekeeping interval of Cadvisor (second)
houseKeepingInterval = 1
// Zhou(ToDo): be consistent with perf_util.go version (not exposed)
currentKubeletPerfMetricsVersion = "v1"
// TODO(coufon): be consistent with perf_util.go version (not exposed)
currentTimeSeriesVersion = "v1"
)
var (
@ -72,6 +71,8 @@ type ResourceCollector struct {
stopCh chan struct{}
}
// NewResourceCollector creates a resource collector object which collects
// resource usage periodically from Cadvisor
func NewResourceCollector(interval time.Duration) *ResourceCollector {
buffers := make(map[string][]*framework.ContainerResourceUsage)
return &ResourceCollector{
@ -80,10 +81,10 @@ func NewResourceCollector(interval time.Duration) *ResourceCollector {
}
}
// Start resource collector
// It connects to the standalone Cadvisor pod
// Start starts resource collector and connects to the standalone Cadvisor pod
// then repeatedly runs collectStats.
func (r *ResourceCollector) Start() {
// Get the cgroup containers for kubelet and docker
// Get the cgroup container names for kubelet and docker
kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "")
dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
if err == nil {
@ -113,12 +114,12 @@ func (r *ResourceCollector) Start() {
go wait.Until(func() { r.collectStats(oldStatsMap) }, r.pollingInterval, r.stopCh)
}
// Stop resource collector
// Stop stops resource collector collecting stats. It does not clear the buffer
func (r *ResourceCollector) Stop() {
close(r.stopCh)
}
// Clear resource collector buffer
// Reset clears the stats buffer of resource collector.
func (r *ResourceCollector) Reset() {
r.lock.Lock()
defer r.lock.Unlock()
@ -127,7 +128,7 @@ func (r *ResourceCollector) Reset() {
}
}
// Get CPU usage in percentile
// GetCPUSummary gets CPU usage in percentile.
func (r *ResourceCollector) GetCPUSummary() framework.ContainersCPUSummary {
result := make(framework.ContainersCPUSummary)
for key, name := range systemContainers {
@ -137,6 +138,7 @@ func (r *ResourceCollector) GetCPUSummary() framework.ContainersCPUSummary {
return result
}
// LogLatest logs the latest resource usage.
func (r *ResourceCollector) LogLatest() {
summary, err := r.GetLatest()
if err != nil {
@ -145,7 +147,7 @@ func (r *ResourceCollector) LogLatest() {
framework.Logf("%s", formatResourceUsageStats(summary))
}
// Collect resource usage from Cadvisor
// collectStats collects resource usage from Cadvisor.
func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.ContainerStats) {
for _, name := range systemContainers {
ret, err := r.client.Stats(name, r.request)
@ -171,7 +173,7 @@ func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.C
}
}
// Compute resource usage based on new data
// computeContainerResourceUsage computes resource usage based on new data sample.
func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapiv2.ContainerStats) *framework.ContainerResourceUsage {
return &framework.ContainerResourceUsage{
Name: name,
@ -184,7 +186,7 @@ func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapiv
}
}
// Get the latest resource usage
// GetLatest gets the latest resource usage from stats buffer.
func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, error) {
r.lock.RLock()
defer r.lock.RUnlock()
@ -214,11 +216,13 @@ func (r *ResourceCollector) GetBasicCPUStats(containerName string) map[float64]f
r.lock.RLock()
defer r.lock.RUnlock()
result := make(map[float64]float64, len(percentiles))
usages := make([]*framework.ContainerResourceUsage, len(r.buffers[containerName]))
// must make a copy of array, otherwise the timeseries order is changed
for i, usage := range r.buffers[containerName] {
usages[i] = usage
// We must make a copy of array, otherwise the timeseries order is changed.
usages := make([]*framework.ContainerResourceUsage, 0)
for _, usage := range r.buffers[containerName] {
usages = append(usages, usage)
}
sort.Sort(resourceUsageByCPU(usages))
for _, q := range percentiles {
index := int(float64(len(usages))*q) - 1
@ -289,8 +293,7 @@ func formatCPUSummary(summary framework.ContainersCPUSummary) string {
return strings.Join(summaryStrings, "\n")
}
// Create a standalone cadvisor pod for fine-grain resource monitoring
// It uses the host-network port
// createCadvisorPod creates a standalone cadvisor pod for fine-grain resource monitoring.
func createCadvisorPod(f *framework.Framework) {
f.PodClient().CreateSync(&api.Pod{
ObjectMeta: api.ObjectMeta{
@ -365,7 +368,7 @@ func createCadvisorPod(f *framework.Framework) {
})
}
// Delete a batch of pods (synchronous)
// deleteBatchPod deletes a batch of pods (synchronous).
func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
ns := f.Namespace.Name
var wg sync.WaitGroup
@ -386,7 +389,7 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
return
}
// Create a list of pods (specification) for test
// newTestPods creates a list of pods (specification) for test.
func newTestPods(numPods int, imageName, podType string) []*api.Pod {
var pods []*api.Pod
for i := 0; i < numPods; i++ {
@ -402,8 +405,7 @@ func newTestPods(numPods int, imageName, podType string) []*api.Pod {
Labels: labels,
},
Spec: api.PodSpec{
// ToDo: restart policy is always
// Check whether pods restart at the end of tests
// Restart policy is always (default).
Containers: []api.Container{
{
Image: imageName,
@ -431,15 +433,15 @@ type ResourceSeriesPerContainer struct {
Version string `json:"version"`
}
// Get the time series of resource usage of each container
// Zhou(ToDo): the labels are to be re-defined based on benchmark dashboard
// GetResourceSeriesWithLabels gets the time series of resource usage of each container.
// TODO(coufon): the labels are to be re-defined based on benchmark dashboard.
func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string) *ResourceSeriesPerContainer {
seriesPerContainer := &ResourceSeriesPerContainer{
Data: map[string]*ResourceSeries{},
Labels: map[string]string{
"node": framework.TestContext.NodeName,
},
Version: currentKubeletPerfMetricsVersion,
Version: currentTimeSeriesVersion,
}
for key, name := range systemContainers {
newSeries := &ResourceSeries{Units: map[string]string{
@ -459,71 +461,6 @@ func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string
return seriesPerContainer
}
// Use additional labels to pass in test information for benchmark
func ResourceUsageToPerfDataWithLabels(usagePerNode framework.ResourceUsagePerNode, labels map[string]string) *perftype.PerfData {
items := []perftype.DataItem{}
for node, usages := range usagePerNode {
for c, usage := range usages {
newLabels := map[string]string{
"node": node,
"container": c,
"resource": "memory",
}
for k, v := range labels {
newLabels[k] = v
}
item := perftype.DataItem{
Data: map[string]float64{
"memory": float64(usage.MemoryUsageInBytes) / (1024 * 1024),
"workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024),
"rss": float64(usage.MemoryRSSInBytes) / (1024 * 1024),
},
Unit: "MB",
Labels: newLabels,
}
items = append(items, item)
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
DataItems: items,
}
}
// Use additional labels to pass in test information for benchmark
func CPUUsageToPerfDataWithLabels(usagePerNode framework.NodesCPUSummary, labels map[string]string) *perftype.PerfData {
items := []perftype.DataItem{}
for node, usages := range usagePerNode {
for c, usage := range usages {
newLabels := map[string]string{
"node": node,
"container": c,
"resource": "cpu",
}
for k, v := range labels {
newLabels[k] = v
}
data := map[string]float64{}
for perc, value := range usage {
data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 1000
}
item := perftype.DataItem{
Data: data,
Unit: "mCPU",
Labels: newLabels,
}
items = append(items, item)
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
DataItems: items,
}
}
// Zhou: code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go
// since they are not exposed
const (

View File

@ -88,8 +88,6 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
name := fmt.Sprintf("resource tracking for %d pods per node", podsPerNode)
It(name, func() {
expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits
// The test collects resource usage from a standalone Cadvisor pod.
// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
// show the resource usage spikes. But changing its interval increases the overhead
@ -126,36 +124,16 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
} else {
time.Sleep(reportingPeriod)
}
logPodsOnNode(f.Client)
logPods(f.Client)
}
rc.Stop()
By("Reporting overall resource usage")
logPodsOnNode(f.Client)
logPods(f.Client)
usagePerContainer, err := rc.GetLatest()
Expect(err).NotTo(HaveOccurred())
// TODO(random-liu): Remove the original log when we migrate to new perfdash
nodeName := framework.TestContext.NodeName
framework.Logf("%s", formatResourceUsageStats(usagePerContainer))
// Log perf result
usagePerNode := make(framework.ResourceUsagePerNode)
usagePerNode[nodeName] = usagePerContainer
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
verifyMemoryLimits(f.Client, expectedMemory, usagePerNode)
cpuSummary := rc.GetCPUSummary()
framework.Logf("%s", formatCPUSummary(cpuSummary))
// Log perf result
cpuSummaryPerNode := make(framework.NodesCPUSummary)
cpuSummaryPerNode[nodeName] = cpuSummary
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
verifyCPULimits(expectedCPU, cpuSummaryPerNode)
// Log and verify resource usage
verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc)
})
}
})
@ -167,6 +145,35 @@ type resourceTest struct {
memLimits framework.ResourceUsagePerContainer
}
// verifyResource verifies whether resource usage satisfies the limit.
func verifyResource(f *framework.Framework, cpuLimits framework.ContainersCPUSummary,
memLimits framework.ResourceUsagePerContainer, rc *ResourceCollector) {
nodeName := framework.TestContext.NodeName
// Obtain memory PerfData
usagePerContainer, err := rc.GetLatest()
Expect(err).NotTo(HaveOccurred())
framework.Logf("%s", formatResourceUsageStats(usagePerContainer))
usagePerNode := make(framework.ResourceUsagePerNode)
usagePerNode[nodeName] = usagePerContainer
// Obtain cpu PerfData
cpuSummary := rc.GetCPUSummary()
framework.Logf("%s", formatCPUSummary(cpuSummary))
cpuSummaryPerNode := make(framework.NodesCPUSummary)
cpuSummaryPerNode[nodeName] = cpuSummary
// Log resource usage
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
// Verify resource usage
verifyMemoryLimits(f.Client, memLimits, usagePerNode)
verifyCPULimits(cpuLimits, cpuSummaryPerNode)
}
func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) {
if expected == nil {
return
@ -237,7 +244,7 @@ func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.N
}
}
func logPodsOnNode(c *client.Client) {
func logPods(c *client.Client) {
nodeName := framework.TestContext.NodeName
podList, err := framework.GetKubeletRunningPods(c, nodeName)
if err != nil {