diff --git a/pkg/kubelet/metrics/collectors/resource_metrics.go b/pkg/kubelet/metrics/collectors/resource_metrics.go index 0c3932f801e..0a98bd85882 100644 --- a/pkg/kubelet/metrics/collectors/resource_metrics.go +++ b/pkg/kubelet/metrics/collectors/resource_metrics.go @@ -54,7 +54,21 @@ var ( metrics.ALPHA, "") - resouceScrapeResultDesc = metrics.NewDesc("scrape_error", + podCPUUsageDesc = metrics.NewDesc("pod_cpu_usage_seconds_total", + "Cumulative cpu time consumed by the pod in core-seconds", + []string{"pod", "namespace"}, + nil, + metrics.ALPHA, + "") + + podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes", + "Current working set of the pod in bytes", + []string{"pod", "namespace"}, + nil, + metrics.ALPHA, + "") + + resourceScrapeResultDesc = metrics.NewDesc("scrape_error", "1 if there was an error while getting container metrics, 0 otherwise", nil, nil, @@ -84,7 +98,9 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des ch <- nodeMemoryUsageDesc ch <- containerCPUUsageDesc ch <- containerMemoryUsageDesc - ch <- resouceScrapeResultDesc + ch <- podCPUUsageDesc + ch <- podMemoryUsageDesc + ch <- resourceScrapeResultDesc } // CollectWithStability implements metrics.StableCollector @@ -94,7 +110,7 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) { var errorCount float64 defer func() { - ch <- metrics.NewLazyConstMetric(resouceScrapeResultDesc, metrics.GaugeValue, errorCount) + ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount) }() statsSummary, err := rc.provider.GetCPUAndMemoryStats() if err != nil { @@ -111,6 +127,8 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri rc.collectContainerCPUMetrics(ch, pod, container) rc.collectContainerMemoryMetrics(ch, pod, container) } + rc.collectPodCPUMetrics(ch, pod) + rc.collectPodMemoryMetrics(ch, pod) } } @@ -151,3 +169,23 @@ func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metr metrics.NewLazyConstMetric(containerMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace)) } + +func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats) { + if pod.CPU == nil { + return + } + + ch <- metrics.NewLazyMetricWithTimestamp(pod.CPU.Time.Time, + metrics.NewLazyConstMetric(podCPUUsageDesc, metrics.CounterValue, + float64(*pod.CPU.UsageCoreNanoSeconds)/float64(time.Second), pod.PodRef.Name, pod.PodRef.Namespace)) +} + +func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats) { + if pod.Memory == nil { + return + } + + ch <- metrics.NewLazyMetricWithTimestamp(pod.Memory.Time.Time, + metrics.NewLazyConstMetric(podMemoryUsageDesc, metrics.GaugeValue, + float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace)) +} diff --git a/pkg/kubelet/metrics/collectors/resource_metrics_test.go b/pkg/kubelet/metrics/collectors/resource_metrics_test.go index b0a3fb972dc..f98546471e1 100644 --- a/pkg/kubelet/metrics/collectors/resource_metrics_test.go +++ b/pkg/kubelet/metrics/collectors/resource_metrics_test.go @@ -51,6 +51,8 @@ func TestCollectResourceMetrics(t *testing.T) { "node_memory_working_set_bytes", "container_cpu_usage_seconds_total", "container_memory_working_set_bytes", + "pod_cpu_usage_seconds_total", + "pod_memory_working_set_bytes", } tests := []struct { @@ -168,6 +170,39 @@ func TestCollectResourceMetrics(t *testing.T) { container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000 `, }, + { + name: "arbitrary pod metrics", + summary: &statsapi.Summary{ + Pods: []statsapi.PodStats{ + { + PodRef: statsapi.PodReference{ + Name: "pod_a", + Namespace: "namespace_a", + }, + CPU: &statsapi.CPUStats{ + Time: testTime, + UsageCoreNanoSeconds: uint64Ptr(10000000000), + }, + Memory: &statsapi.MemoryStats{ + Time: testTime, + WorkingSetBytes: uint64Ptr(1000), + }, + }, + }, + }, + summaryErr: nil, + expectedMetrics: ` + # HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise + # TYPE scrape_error gauge + scrape_error 0 + # HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds + # TYPE pod_cpu_usage_seconds_total counter + pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 2000 + # HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes + # TYPE pod_memory_working_set_bytes gauge + pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 2000 + `, + }, } for _, test := range tests { diff --git a/test/e2e_node/resource_metrics_test.go b/test/e2e_node/resource_metrics_test.go index c1c4b638c0f..f6a076bf66e 100644 --- a/test/e2e_node/resource_metrics_test.go +++ b/test/e2e_node/resource_metrics_test.go @@ -87,6 +87,16 @@ var _ = framework.KubeDescribe("ResourceMetricsAPI [NodeFeature:ResourceMetrics] fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb), fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb), }), + + "pod_cpu_usage_seconds_total": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{ + fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0, 100), + fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0, 100), + }), + + "pod_memory_working_set_bytes": gstruct.MatchAllElements(containerID, gstruct.Elements{ + fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb), + fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb), + }), }) ginkgo.By("Giving pods a minute to start up and produce metrics") gomega.Eventually(getResourceMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)