Merge pull request #102444 from sanwishe/resourceStartTime

Expose container start time in kubelet /metrics/resource endpoint
This commit is contained in:
Kubernetes Prow Robot 2021-07-01 14:27:51 -07:00 committed by GitHub
commit 062bc359ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 14 deletions

View File

@ -74,6 +74,13 @@ var (
nil, nil,
metrics.ALPHA, metrics.ALPHA,
"") "")
containerStartTimeDesc = metrics.NewDesc("container_start_time_seconds",
"Start time of the container since unix epoch in seconds",
[]string{"container", "pod", "namespace"},
nil,
metrics.ALPHA,
"")
) )
// NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics // NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics
@ -96,6 +103,7 @@ var _ metrics.StableCollector = &resourceMetricsCollector{}
func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) { func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
ch <- nodeCPUUsageDesc ch <- nodeCPUUsageDesc
ch <- nodeMemoryUsageDesc ch <- nodeMemoryUsageDesc
ch <- containerStartTimeDesc
ch <- containerCPUUsageDesc ch <- containerCPUUsageDesc
ch <- containerMemoryUsageDesc ch <- containerMemoryUsageDesc
ch <- podCPUUsageDesc ch <- podCPUUsageDesc
@ -124,6 +132,7 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
for _, pod := range statsSummary.Pods { for _, pod := range statsSummary.Pods {
for _, container := range pod.Containers { for _, container := range pod.Containers {
rc.collectContainerStartTime(ch, pod, container)
rc.collectContainerCPUMetrics(ch, pod, container) rc.collectContainerCPUMetrics(ch, pod, container)
rc.collectContainerMemoryMetrics(ch, pod, container) rc.collectContainerMemoryMetrics(ch, pod, container)
} }
@ -150,6 +159,14 @@ func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.M
metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes))) metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
} }
func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.StartTime.Unix() == 0 {
return
}
ch <- metrics.NewLazyConstMetric(containerStartTimeDesc, metrics.GaugeValue, float64(s.StartTime.UnixNano())/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace)
}
func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) { func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.CPU == nil { if s.CPU == nil {
return return

View File

@ -44,13 +44,16 @@ func (m *mockSummaryProvider) GetCPUAndMemoryStats() (*statsapi.Summary, error)
} }
func TestCollectResourceMetrics(t *testing.T) { func TestCollectResourceMetrics(t *testing.T) {
testTime := metav1.NewTime(time.Unix(2, 0)) // a static timestamp: 2000 // a static timestamp: 2021-06-23 05:11:18.302091597 +0800
staticTimestamp := time.Unix(0, 1624396278302091597)
testTime := metav1.NewTime(staticTimestamp)
interestedMetrics := []string{ interestedMetrics := []string{
"scrape_error", "scrape_error",
"node_cpu_usage_seconds_total", "node_cpu_usage_seconds_total",
"node_memory_working_set_bytes", "node_memory_working_set_bytes",
"container_cpu_usage_seconds_total", "container_cpu_usage_seconds_total",
"container_memory_working_set_bytes", "container_memory_working_set_bytes",
"container_start_time_seconds",
"pod_cpu_usage_seconds_total", "pod_cpu_usage_seconds_total",
"pod_memory_working_set_bytes", "pod_memory_working_set_bytes",
} }
@ -89,10 +92,10 @@ func TestCollectResourceMetrics(t *testing.T) {
expectedMetrics: ` expectedMetrics: `
# HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds # HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
# TYPE node_cpu_usage_seconds_total counter # TYPE node_cpu_usage_seconds_total counter
node_cpu_usage_seconds_total 10 2000 node_cpu_usage_seconds_total 10 1624396278302
# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes # HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
# TYPE node_memory_working_set_bytes gauge # TYPE node_memory_working_set_bytes gauge
node_memory_working_set_bytes 1000 2000 node_memory_working_set_bytes 1000 1624396278302
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise # HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge # TYPE scrape_error gauge
scrape_error 0 scrape_error 0
@ -109,7 +112,8 @@ func TestCollectResourceMetrics(t *testing.T) {
}, },
Containers: []statsapi.ContainerStats{ Containers: []statsapi.ContainerStats{
{ {
Name: "container_a", Name: "container_a",
StartTime: metav1.NewTime(staticTimestamp.Add(-30 * time.Second)),
CPU: &statsapi.CPUStats{ CPU: &statsapi.CPUStats{
Time: testTime, Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000), UsageCoreNanoSeconds: uint64Ptr(10000000000),
@ -120,7 +124,8 @@ func TestCollectResourceMetrics(t *testing.T) {
}, },
}, },
{ {
Name: "container_b", Name: "container_b",
StartTime: metav1.NewTime(staticTimestamp.Add(-2 * time.Minute)),
CPU: &statsapi.CPUStats{ CPU: &statsapi.CPUStats{
Time: testTime, Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000), UsageCoreNanoSeconds: uint64Ptr(10000000000),
@ -139,7 +144,8 @@ func TestCollectResourceMetrics(t *testing.T) {
}, },
Containers: []statsapi.ContainerStats{ Containers: []statsapi.ContainerStats{
{ {
Name: "container_a", Name: "container_a",
StartTime: metav1.NewTime(staticTimestamp.Add(-10 * time.Minute)),
CPU: &statsapi.CPUStats{ CPU: &statsapi.CPUStats{
Time: testTime, Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000), UsageCoreNanoSeconds: uint64Ptr(10000000000),
@ -160,14 +166,19 @@ func TestCollectResourceMetrics(t *testing.T) {
scrape_error 0 scrape_error 0
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds # HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
# TYPE container_cpu_usage_seconds_total counter # TYPE container_cpu_usage_seconds_total counter
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 2000 container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 2000 container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 2000 container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 1624396278302
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes # HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
# TYPE container_memory_working_set_bytes gauge # TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 2000 container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 2000 container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000 container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
# TYPE container_start_time_seconds gauge
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09
container_start_time_seconds{container="container_b",namespace="namespace_a",pod="pod_a"} 1.6243961583020916e+09
`, `,
}, },
{ {
@ -197,10 +208,10 @@ func TestCollectResourceMetrics(t *testing.T) {
scrape_error 0 scrape_error 0
# HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds # HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds
# TYPE pod_cpu_usage_seconds_total counter # TYPE pod_cpu_usage_seconds_total counter
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 2000 pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 1624396278302
# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes # HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
# TYPE pod_memory_working_set_bytes gauge # TYPE pod_memory_working_set_bytes gauge
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 2000 pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
`, `,
}, },
} }

View File

@ -88,6 +88,11 @@ var _ = SIGDescribe("ResourceMetricsAPI [NodeFeature:ResourceMetrics]", func() {
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb), fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
}), }),
"container_start_time_seconds": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(time.Now().Add(-maxStatsAge).UnixNano(), time.Now().Add(2*time.Minute).UnixNano()),
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(time.Now().Add(-maxStatsAge).UnixNano(), time.Now().Add(2*time.Minute).UnixNano()),
}),
"pod_cpu_usage_seconds_total": gstruct.MatchElements(podID, gstruct.IgnoreExtras, gstruct.Elements{ "pod_cpu_usage_seconds_total": gstruct.MatchElements(podID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0, 100), fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0, 100),
fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0, 100), fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0, 100),