Merge pull request #95839 from egernst/pod-usage

resource-metrics: add pod/sandbox metrics to endpoint
This commit is contained in:
Kubernetes Prow Robot 2020-11-12 19:36:23 -08:00 committed by GitHub
commit 0e0cc1ead8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 3 deletions

View File

@ -54,7 +54,21 @@ var (
metrics.ALPHA,
"")
resouceScrapeResultDesc = metrics.NewDesc("scrape_error",
podCPUUsageDesc = metrics.NewDesc("pod_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the pod in core-seconds",
[]string{"pod", "namespace"},
nil,
metrics.ALPHA,
"")
podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes",
"Current working set of the pod in bytes",
[]string{"pod", "namespace"},
nil,
metrics.ALPHA,
"")
resourceScrapeResultDesc = metrics.NewDesc("scrape_error",
"1 if there was an error while getting container metrics, 0 otherwise",
nil,
nil,
@ -84,7 +98,9 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des
ch <- nodeMemoryUsageDesc
ch <- containerCPUUsageDesc
ch <- containerMemoryUsageDesc
ch <- resouceScrapeResultDesc
ch <- podCPUUsageDesc
ch <- podMemoryUsageDesc
ch <- resourceScrapeResultDesc
}
// CollectWithStability implements metrics.StableCollector
@ -94,7 +110,7 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des
func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) {
var errorCount float64
defer func() {
ch <- metrics.NewLazyConstMetric(resouceScrapeResultDesc, metrics.GaugeValue, errorCount)
ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount)
}()
statsSummary, err := rc.provider.GetCPUAndMemoryStats()
if err != nil {
@ -111,6 +127,8 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
rc.collectContainerCPUMetrics(ch, pod, container)
rc.collectContainerMemoryMetrics(ch, pod, container)
}
rc.collectPodCPUMetrics(ch, pod)
rc.collectPodMemoryMetrics(ch, pod)
}
}
@ -151,3 +169,23 @@ func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metr
metrics.NewLazyConstMetric(containerMemoryUsageDesc, metrics.GaugeValue,
float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}
func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
if pod.CPU == nil {
return
}
ch <- metrics.NewLazyMetricWithTimestamp(pod.CPU.Time.Time,
metrics.NewLazyConstMetric(podCPUUsageDesc, metrics.CounterValue,
float64(*pod.CPU.UsageCoreNanoSeconds)/float64(time.Second), pod.PodRef.Name, pod.PodRef.Namespace))
}
func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
if pod.Memory == nil {
return
}
ch <- metrics.NewLazyMetricWithTimestamp(pod.Memory.Time.Time,
metrics.NewLazyConstMetric(podMemoryUsageDesc, metrics.GaugeValue,
float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace))
}

View File

@ -51,6 +51,8 @@ func TestCollectResourceMetrics(t *testing.T) {
"node_memory_working_set_bytes",
"container_cpu_usage_seconds_total",
"container_memory_working_set_bytes",
"pod_cpu_usage_seconds_total",
"pod_memory_working_set_bytes",
}
tests := []struct {
@ -168,6 +170,39 @@ func TestCollectResourceMetrics(t *testing.T) {
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000
`,
},
{
name: "arbitrary pod metrics",
summary: &statsapi.Summary{
Pods: []statsapi.PodStats{
{
PodRef: statsapi.PodReference{
Name: "pod_a",
Namespace: "namespace_a",
},
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000),
},
Memory: &statsapi.MemoryStats{
Time: testTime,
WorkingSetBytes: uint64Ptr(1000),
},
},
},
},
summaryErr: nil,
expectedMetrics: `
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge
scrape_error 0
# HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds
# TYPE pod_cpu_usage_seconds_total counter
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 2000
# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
# TYPE pod_memory_working_set_bytes gauge
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 2000
`,
},
}
for _, test := range tests {

View File

@ -87,6 +87,16 @@ var _ = framework.KubeDescribe("ResourceMetricsAPI [NodeFeature:ResourceMetrics]
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
}),
"pod_cpu_usage_seconds_total": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0, 100),
fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0, 100),
}),
"pod_memory_working_set_bytes": gstruct.MatchAllElements(containerID, gstruct.Elements{
fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
}),
})
ginkgo.By("Giving pods a minute to start up and produce metrics")
gomega.Eventually(getResourceMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)