diff --git a/test/e2e/framework/perf_util.go b/test/e2e/framework/perf_util.go index 83f15c8aa90..6ce84164ae7 100644 --- a/test/e2e/framework/perf_util.go +++ b/test/e2e/framework/perf_util.go @@ -57,56 +57,12 @@ const currentKubeletPerfMetricsVersion = "v1" // ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function // only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary. func ResourceUsageToPerfData(usagePerNode ResourceUsagePerNode) *perftype.PerfData { - items := []perftype.DataItem{} - for node, usages := range usagePerNode { - for c, usage := range usages { - item := perftype.DataItem{ - Data: map[string]float64{ - "memory": float64(usage.MemoryUsageInBytes) / (1024 * 1024), - "workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024), - "rss": float64(usage.MemoryRSSInBytes) / (1024 * 1024), - }, - Unit: "MB", - Labels: map[string]string{ - "node": node, - "container": c, - "resource": "memory", - }, - } - items = append(items, item) - } - } - return &perftype.PerfData{ - Version: currentKubeletPerfMetricsVersion, - DataItems: items, - } + return ResourceUsageToPerfDataWithLabels(usagePerNode, nil) } // CPUUsageToPerfData transforms NodesCPUSummary to PerfData. func CPUUsageToPerfData(usagePerNode NodesCPUSummary) *perftype.PerfData { - items := []perftype.DataItem{} - for node, usages := range usagePerNode { - for c, usage := range usages { - data := map[string]float64{} - for perc, value := range usage { - data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 1000 - } - item := perftype.DataItem{ - Data: data, - Unit: "mCPU", - Labels: map[string]string{ - "node": node, - "container": c, - "resource": "cpu", - }, - } - items = append(items, item) - } - } - return &perftype.PerfData{ - Version: currentKubeletPerfMetricsVersion, - DataItems: items, - } + return CPUUsageToPerfDataWithLabels(usagePerNode, nil) } // PrintPerfData prints the perfdata in json format with PerfResultTag prefix. @@ -117,3 +73,73 @@ func PrintPerfData(p *perftype.PerfData) { Logf("%s %s\n%s", perftype.PerfResultTag, str, perftype.PerfResultEnd) } } + +// ResourceUsageToPerfDataWithLabels transforms ResourceUsagePerNode to PerfData with additional labels. +// Notice that this function only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary. +func ResourceUsageToPerfDataWithLabels(usagePerNode ResourceUsagePerNode, labels map[string]string) *perftype.PerfData { + items := []perftype.DataItem{} + for node, usages := range usagePerNode { + for c, usage := range usages { + newLabels := map[string]string{ + "node": node, + "container": c, + "resource": "memory", + } + if labels != nil { + for k, v := range labels { + newLabels[k] = v + } + } + + item := perftype.DataItem{ + Data: map[string]float64{ + "memory": float64(usage.MemoryUsageInBytes) / (1024 * 1024), + "workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024), + "rss": float64(usage.MemoryRSSInBytes) / (1024 * 1024), + }, + Unit: "MB", + Labels: newLabels, + } + items = append(items, item) + } + } + return &perftype.PerfData{ + Version: currentKubeletPerfMetricsVersion, + DataItems: items, + } +} + +// CPUUsageToPerfDataWithLabels transforms NodesCPUSummary to PerfData with additional labels. +func CPUUsageToPerfDataWithLabels(usagePerNode NodesCPUSummary, labels map[string]string) *perftype.PerfData { + items := []perftype.DataItem{} + for node, usages := range usagePerNode { + for c, usage := range usages { + newLabels := map[string]string{ + "node": node, + "container": c, + "resource": "cpu", + } + if labels != nil { + for k, v := range labels { + newLabels[k] = v + } + } + + data := map[string]float64{} + for perc, value := range usage { + data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 1000 + } + + item := perftype.DataItem{ + Data: data, + Unit: "mCPU", + Labels: newLabels, + } + items = append(items, item) + } + } + return &perftype.PerfData{ + Version: currentKubeletPerfMetricsVersion, + DataItems: items, + } +} diff --git a/test/e2e_node/density_test.go b/test/e2e_node/density_test.go index ef5ea23846d..560f45f3770 100644 --- a/test/e2e_node/density_test.go +++ b/test/e2e_node/density_test.go @@ -73,8 +73,8 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { }) Context("create a batch of pods", func() { - // Zhou(ToDo): add more tests and the values are generous, set more precise limits after benchmark - densityTests := []DensityTest{ + // TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark + dTests := []densityTest{ { podsNr: 10, interval: 0 * time.Millisecond, @@ -97,7 +97,7 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { }, } - for _, testArg := range densityTests { + for _, testArg := range dTests { itArg := testArg It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval), func() { @@ -186,14 +186,14 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { // verify resource By("Verifying resource") - verifyResource(f, itArg, rc) + verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc) }) } }) Context("create a sequence of pods", func() { - // Zhou(ToDo): add more tests and the values are generous, set more precise limits after benchmark - densityTests := []DensityTest{ + // TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark + dTests := []densityTest{ { podsNr: 10, bgPodsNr: 10, @@ -213,7 +213,7 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { }, } - for _, testArg := range densityTests { + for _, testArg := range dTests { itArg := testArg It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr), func() { @@ -244,13 +244,13 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { // verify resource By("Verifying resource") - verifyResource(f, itArg, rc) + verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc) }) } }) }) -type DensityTest struct { +type densityTest struct { // number of pods podsNr int // number of background pods @@ -276,6 +276,7 @@ func createBatchPodWithRateControl(f *framework.Framework, pods []*api.Pod, inte return createTimes } +// checkPodDeleted checks whether a pod has been successfully deleted func checkPodDeleted(f *framework.Framework, podName string) error { ns := f.Namespace.Name _, err := f.Client.Pods(ns).Get(podName) @@ -306,7 +307,7 @@ func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) { return latencyMetrics, nil } -// Verifies whether 50, 90 and 99th percentiles of PodStartupLatency are +// verifyPodStartupLatency verifies whether 50, 90 and 99th percentiles of PodStartupLatency are // within the threshold. func verifyPodStartupLatency(expect, actual framework.LatencyMetric) error { if actual.Perc50 > expect.Perc50 { @@ -321,6 +322,7 @@ func verifyPodStartupLatency(expect, actual framework.LatencyMetric) error { return nil } +// newInformerWatchPod creates an informer to check whether all pods are running. func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes map[string]unversioned.Time, podType string) *controllerframework.Controller { ns := f.Namespace.Name @@ -365,7 +367,8 @@ func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes m return controller } -func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg DensityTest) { +// verifyLatency verifies that whether pod creation latency satisfies the limit. +func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest) { framework.PrintLatencies(e2eLags, "worst client e2e total latencies") // Zhou: do not trust `kubelet' metrics since they are not reset! @@ -390,35 +393,7 @@ func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, t framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential) } -func verifyResource(f *framework.Framework, testArg DensityTest, rc *ResourceCollector) { - nodeName := framework.TestContext.NodeName - - // verify and log memory - usagePerContainer, err := rc.GetLatest() - Expect(err).NotTo(HaveOccurred()) - framework.Logf("%s", formatResourceUsageStats(usagePerContainer)) - - usagePerNode := make(framework.ResourceUsagePerNode) - usagePerNode[nodeName] = usagePerContainer - - memPerfData := framework.ResourceUsageToPerfData(usagePerNode) - framework.PrintPerfData(memPerfData) - - verifyMemoryLimits(f.Client, testArg.memLimits, usagePerNode) - - // verify and log cpu - cpuSummary := rc.GetCPUSummary() - framework.Logf("%s", formatCPUSummary(cpuSummary)) - - cpuSummaryPerNode := make(framework.NodesCPUSummary) - cpuSummaryPerNode[nodeName] = cpuSummary - - cpuPerfData := framework.CPUUsageToPerfData(cpuSummaryPerNode) - framework.PrintPerfData(cpuPerfData) - - verifyCPULimits(testArg.cpuLimits, cpuSummaryPerNode) -} - +// createBatchPodSequential creats pods back-to-back in sequence. func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Duration, []framework.PodLatencyData) { batchStartTime := unversioned.Now() e2eLags := make([]framework.PodLatencyData, 0) diff --git a/test/e2e_node/resource_collector.go b/test/e2e_node/resource_collector.go index 2d12f31185d..7699e33ac6e 100644 --- a/test/e2e_node/resource_collector.go +++ b/test/e2e_node/resource_collector.go @@ -42,7 +42,6 @@ import ( "k8s.io/kubernetes/pkg/util/uuid" "k8s.io/kubernetes/pkg/util/wait" "k8s.io/kubernetes/test/e2e/framework" - "k8s.io/kubernetes/test/e2e/perftype" . "github.com/onsi/gomega" ) @@ -54,8 +53,8 @@ const ( cadvisorPort = 8090 // housekeeping interval of Cadvisor (second) houseKeepingInterval = 1 - // Zhou(ToDo): be consistent with perf_util.go version (not exposed) - currentKubeletPerfMetricsVersion = "v1" + // TODO(coufon): be consistent with perf_util.go version (not exposed) + currentTimeSeriesVersion = "v1" ) var ( @@ -72,6 +71,8 @@ type ResourceCollector struct { stopCh chan struct{} } +// NewResourceCollector creates a resource collector object which collects +// resource usage periodically from Cadvisor func NewResourceCollector(interval time.Duration) *ResourceCollector { buffers := make(map[string][]*framework.ContainerResourceUsage) return &ResourceCollector{ @@ -80,10 +81,10 @@ func NewResourceCollector(interval time.Duration) *ResourceCollector { } } -// Start resource collector -// It connects to the standalone Cadvisor pod +// Start starts resource collector and connects to the standalone Cadvisor pod +// then repeatedly runs collectStats. func (r *ResourceCollector) Start() { - // Get the cgroup containers for kubelet and docker + // Get the cgroup container names for kubelet and docker kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "") dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile) if err == nil { @@ -113,12 +114,12 @@ func (r *ResourceCollector) Start() { go wait.Until(func() { r.collectStats(oldStatsMap) }, r.pollingInterval, r.stopCh) } -// Stop resource collector +// Stop stops resource collector collecting stats. It does not clear the buffer func (r *ResourceCollector) Stop() { close(r.stopCh) } -// Clear resource collector buffer +// Reset clears the stats buffer of resource collector. func (r *ResourceCollector) Reset() { r.lock.Lock() defer r.lock.Unlock() @@ -127,7 +128,7 @@ func (r *ResourceCollector) Reset() { } } -// Get CPU usage in percentile +// GetCPUSummary gets CPU usage in percentile. func (r *ResourceCollector) GetCPUSummary() framework.ContainersCPUSummary { result := make(framework.ContainersCPUSummary) for key, name := range systemContainers { @@ -137,6 +138,7 @@ func (r *ResourceCollector) GetCPUSummary() framework.ContainersCPUSummary { return result } +// LogLatest logs the latest resource usage. func (r *ResourceCollector) LogLatest() { summary, err := r.GetLatest() if err != nil { @@ -145,7 +147,7 @@ func (r *ResourceCollector) LogLatest() { framework.Logf("%s", formatResourceUsageStats(summary)) } -// Collect resource usage from Cadvisor +// collectStats collects resource usage from Cadvisor. func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.ContainerStats) { for _, name := range systemContainers { ret, err := r.client.Stats(name, r.request) @@ -171,7 +173,7 @@ func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.C } } -// Compute resource usage based on new data +// computeContainerResourceUsage computes resource usage based on new data sample. func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapiv2.ContainerStats) *framework.ContainerResourceUsage { return &framework.ContainerResourceUsage{ Name: name, @@ -184,7 +186,7 @@ func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapiv } } -// Get the latest resource usage +// GetLatest gets the latest resource usage from stats buffer. func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, error) { r.lock.RLock() defer r.lock.RUnlock() @@ -214,11 +216,13 @@ func (r *ResourceCollector) GetBasicCPUStats(containerName string) map[float64]f r.lock.RLock() defer r.lock.RUnlock() result := make(map[float64]float64, len(percentiles)) - usages := make([]*framework.ContainerResourceUsage, len(r.buffers[containerName])) - // must make a copy of array, otherwise the timeseries order is changed - for i, usage := range r.buffers[containerName] { - usages[i] = usage + + // We must make a copy of array, otherwise the timeseries order is changed. + usages := make([]*framework.ContainerResourceUsage, 0) + for _, usage := range r.buffers[containerName] { + usages = append(usages, usage) } + sort.Sort(resourceUsageByCPU(usages)) for _, q := range percentiles { index := int(float64(len(usages))*q) - 1 @@ -289,8 +293,7 @@ func formatCPUSummary(summary framework.ContainersCPUSummary) string { return strings.Join(summaryStrings, "\n") } -// Create a standalone cadvisor pod for fine-grain resource monitoring -// It uses the host-network port +// createCadvisorPod creates a standalone cadvisor pod for fine-grain resource monitoring. func createCadvisorPod(f *framework.Framework) { f.PodClient().CreateSync(&api.Pod{ ObjectMeta: api.ObjectMeta{ @@ -365,7 +368,7 @@ func createCadvisorPod(f *framework.Framework) { }) } -// Delete a batch of pods (synchronous) +// deleteBatchPod deletes a batch of pods (synchronous). func deleteBatchPod(f *framework.Framework, pods []*api.Pod) { ns := f.Namespace.Name var wg sync.WaitGroup @@ -386,7 +389,7 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) { return } -// Create a list of pods (specification) for test +// newTestPods creates a list of pods (specification) for test. func newTestPods(numPods int, imageName, podType string) []*api.Pod { var pods []*api.Pod for i := 0; i < numPods; i++ { @@ -402,8 +405,7 @@ func newTestPods(numPods int, imageName, podType string) []*api.Pod { Labels: labels, }, Spec: api.PodSpec{ - // ToDo: restart policy is always - // Check whether pods restart at the end of tests + // Restart policy is always (default). Containers: []api.Container{ { Image: imageName, @@ -431,15 +433,15 @@ type ResourceSeriesPerContainer struct { Version string `json:"version"` } -// Get the time series of resource usage of each container -// Zhou(ToDo): the labels are to be re-defined based on benchmark dashboard +// GetResourceSeriesWithLabels gets the time series of resource usage of each container. +// TODO(coufon): the labels are to be re-defined based on benchmark dashboard. func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string) *ResourceSeriesPerContainer { seriesPerContainer := &ResourceSeriesPerContainer{ Data: map[string]*ResourceSeries{}, Labels: map[string]string{ "node": framework.TestContext.NodeName, }, - Version: currentKubeletPerfMetricsVersion, + Version: currentTimeSeriesVersion, } for key, name := range systemContainers { newSeries := &ResourceSeries{Units: map[string]string{ @@ -459,71 +461,6 @@ func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string return seriesPerContainer } -// Use additional labels to pass in test information for benchmark -func ResourceUsageToPerfDataWithLabels(usagePerNode framework.ResourceUsagePerNode, labels map[string]string) *perftype.PerfData { - items := []perftype.DataItem{} - for node, usages := range usagePerNode { - for c, usage := range usages { - newLabels := map[string]string{ - "node": node, - "container": c, - "resource": "memory", - } - for k, v := range labels { - newLabels[k] = v - } - - item := perftype.DataItem{ - Data: map[string]float64{ - "memory": float64(usage.MemoryUsageInBytes) / (1024 * 1024), - "workingset": float64(usage.MemoryWorkingSetInBytes) / (1024 * 1024), - "rss": float64(usage.MemoryRSSInBytes) / (1024 * 1024), - }, - Unit: "MB", - Labels: newLabels, - } - items = append(items, item) - } - } - return &perftype.PerfData{ - Version: currentKubeletPerfMetricsVersion, - DataItems: items, - } -} - -// Use additional labels to pass in test information for benchmark -func CPUUsageToPerfDataWithLabels(usagePerNode framework.NodesCPUSummary, labels map[string]string) *perftype.PerfData { - items := []perftype.DataItem{} - for node, usages := range usagePerNode { - for c, usage := range usages { - newLabels := map[string]string{ - "node": node, - "container": c, - "resource": "cpu", - } - for k, v := range labels { - newLabels[k] = v - } - - data := map[string]float64{} - for perc, value := range usage { - data[fmt.Sprintf("Perc%02.0f", perc*100)] = value * 1000 - } - - item := perftype.DataItem{ - Data: data, - Unit: "mCPU", - Labels: newLabels, - } - items = append(items, item) - } - } - return &perftype.PerfData{ - Version: currentKubeletPerfMetricsVersion, - DataItems: items, - } -} - // Zhou: code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go // since they are not exposed const ( diff --git a/test/e2e_node/resource_usage_test.go b/test/e2e_node/resource_usage_test.go index f95f7cb0851..663a8804a23 100644 --- a/test/e2e_node/resource_usage_test.go +++ b/test/e2e_node/resource_usage_test.go @@ -88,8 +88,6 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { name := fmt.Sprintf("resource tracking for %d pods per node", podsPerNode) It(name, func() { - expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits - // The test collects resource usage from a standalone Cadvisor pod. // The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to // show the resource usage spikes. But changing its interval increases the overhead @@ -126,36 +124,16 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { } else { time.Sleep(reportingPeriod) } - logPodsOnNode(f.Client) + logPods(f.Client) } rc.Stop() By("Reporting overall resource usage") - logPodsOnNode(f.Client) + logPods(f.Client) - usagePerContainer, err := rc.GetLatest() - Expect(err).NotTo(HaveOccurred()) - - // TODO(random-liu): Remove the original log when we migrate to new perfdash - nodeName := framework.TestContext.NodeName - framework.Logf("%s", formatResourceUsageStats(usagePerContainer)) - - // Log perf result - usagePerNode := make(framework.ResourceUsagePerNode) - usagePerNode[nodeName] = usagePerContainer - - framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode)) - verifyMemoryLimits(f.Client, expectedMemory, usagePerNode) - - cpuSummary := rc.GetCPUSummary() - framework.Logf("%s", formatCPUSummary(cpuSummary)) - - // Log perf result - cpuSummaryPerNode := make(framework.NodesCPUSummary) - cpuSummaryPerNode[nodeName] = cpuSummary - framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode)) - verifyCPULimits(expectedCPU, cpuSummaryPerNode) + // Log and verify resource usage + verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc) }) } }) @@ -167,6 +145,35 @@ type resourceTest struct { memLimits framework.ResourceUsagePerContainer } +// verifyResource verifies whether resource usage satisfies the limit. +func verifyResource(f *framework.Framework, cpuLimits framework.ContainersCPUSummary, + memLimits framework.ResourceUsagePerContainer, rc *ResourceCollector) { + nodeName := framework.TestContext.NodeName + + // Obtain memory PerfData + usagePerContainer, err := rc.GetLatest() + Expect(err).NotTo(HaveOccurred()) + framework.Logf("%s", formatResourceUsageStats(usagePerContainer)) + + usagePerNode := make(framework.ResourceUsagePerNode) + usagePerNode[nodeName] = usagePerContainer + + // Obtain cpu PerfData + cpuSummary := rc.GetCPUSummary() + framework.Logf("%s", formatCPUSummary(cpuSummary)) + + cpuSummaryPerNode := make(framework.NodesCPUSummary) + cpuSummaryPerNode[nodeName] = cpuSummary + + // Log resource usage + framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode)) + framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode)) + + // Verify resource usage + verifyMemoryLimits(f.Client, memLimits, usagePerNode) + verifyCPULimits(cpuLimits, cpuSummaryPerNode) +} + func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) { if expected == nil { return @@ -237,7 +244,7 @@ func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.N } } -func logPodsOnNode(c *client.Client) { +func logPods(c *client.Client) { nodeName := framework.TestContext.NodeName podList, err := framework.GetKubeletRunningPods(c, nodeName) if err != nil {