Merge pull request #102444 from sanwishe/resourceStartTime

Expose container start time in kubelet /metrics/resource endpoint
This commit is contained in:
Kubernetes Prow Robot 2021-07-01 14:27:51 -07:00 committed by GitHub
commit 062bc359ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 14 deletions

View File

@ -74,6 +74,13 @@ var (
nil,
metrics.ALPHA,
"")
containerStartTimeDesc = metrics.NewDesc("container_start_time_seconds",
"Start time of the container since unix epoch in seconds",
[]string{"container", "pod", "namespace"},
nil,
metrics.ALPHA,
"")
)
// NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics
@ -96,6 +103,7 @@ var _ metrics.StableCollector = &resourceMetricsCollector{}
func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
ch <- nodeCPUUsageDesc
ch <- nodeMemoryUsageDesc
ch <- containerStartTimeDesc
ch <- containerCPUUsageDesc
ch <- containerMemoryUsageDesc
ch <- podCPUUsageDesc
@ -124,6 +132,7 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
for _, pod := range statsSummary.Pods {
for _, container := range pod.Containers {
rc.collectContainerStartTime(ch, pod, container)
rc.collectContainerCPUMetrics(ch, pod, container)
rc.collectContainerMemoryMetrics(ch, pod, container)
}
@ -150,6 +159,14 @@ func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.M
metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
}
func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.StartTime.Unix() == 0 {
return
}
ch <- metrics.NewLazyConstMetric(containerStartTimeDesc, metrics.GaugeValue, float64(s.StartTime.UnixNano())/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace)
}
func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.CPU == nil {
return

View File

@ -44,13 +44,16 @@ func (m *mockSummaryProvider) GetCPUAndMemoryStats() (*statsapi.Summary, error)
}
func TestCollectResourceMetrics(t *testing.T) {
testTime := metav1.NewTime(time.Unix(2, 0)) // a static timestamp: 2000
// a static timestamp: 2021-06-23 05:11:18.302091597 +0800
staticTimestamp := time.Unix(0, 1624396278302091597)
testTime := metav1.NewTime(staticTimestamp)
interestedMetrics := []string{
"scrape_error",
"node_cpu_usage_seconds_total",
"node_memory_working_set_bytes",
"container_cpu_usage_seconds_total",
"container_memory_working_set_bytes",
"container_start_time_seconds",
"pod_cpu_usage_seconds_total",
"pod_memory_working_set_bytes",
}
@ -89,10 +92,10 @@ func TestCollectResourceMetrics(t *testing.T) {
expectedMetrics: `
# HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
# TYPE node_cpu_usage_seconds_total counter
node_cpu_usage_seconds_total 10 2000
node_cpu_usage_seconds_total 10 1624396278302
# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
# TYPE node_memory_working_set_bytes gauge
node_memory_working_set_bytes 1000 2000
node_memory_working_set_bytes 1000 1624396278302
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge
scrape_error 0
@ -109,7 +112,8 @@ func TestCollectResourceMetrics(t *testing.T) {
},
Containers: []statsapi.ContainerStats{
{
Name: "container_a",
Name: "container_a",
StartTime: metav1.NewTime(staticTimestamp.Add(-30 * time.Second)),
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000),
@ -120,7 +124,8 @@ func TestCollectResourceMetrics(t *testing.T) {
},
},
{
Name: "container_b",
Name: "container_b",
StartTime: metav1.NewTime(staticTimestamp.Add(-2 * time.Minute)),
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000),
@ -139,7 +144,8 @@ func TestCollectResourceMetrics(t *testing.T) {
},
Containers: []statsapi.ContainerStats{
{
Name: "container_a",
Name: "container_a",
StartTime: metav1.NewTime(staticTimestamp.Add(-10 * time.Minute)),
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000),
@ -160,14 +166,19 @@ func TestCollectResourceMetrics(t *testing.T) {
scrape_error 0
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
# TYPE container_cpu_usage_seconds_total counter
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 2000
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 2000
container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 2000
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 1624396278302
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
# TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 2000
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 2000
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
# TYPE container_start_time_seconds gauge
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09
container_start_time_seconds{container="container_b",namespace="namespace_a",pod="pod_a"} 1.6243961583020916e+09
`,
},
{
@ -197,10 +208,10 @@ func TestCollectResourceMetrics(t *testing.T) {
scrape_error 0
# HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds
# TYPE pod_cpu_usage_seconds_total counter
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 2000
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 1624396278302
# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
# TYPE pod_memory_working_set_bytes gauge
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 2000
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
`,
},
}

View File

@ -88,6 +88,11 @@ var _ = SIGDescribe("ResourceMetricsAPI [NodeFeature:ResourceMetrics]", func() {
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
}),
"container_start_time_seconds": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(time.Now().Add(-maxStatsAge).UnixNano(), time.Now().Add(2*time.Minute).UnixNano()),
fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(time.Now().Add(-maxStatsAge).UnixNano(), time.Now().Add(2*time.Minute).UnixNano()),
}),
"pod_cpu_usage_seconds_total": gstruct.MatchElements(podID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0, 100),
fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0, 100),