Fix kubelet panic when accessing metrics/resource endpoint

This commit is contained in:
JunYang 2022-07-14 16:38:48 +08:00
parent f3654386ab
commit f33652ce61
2 changed files with 117 additions and 6 deletions

View File

@ -142,7 +142,7 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
}
func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
if s.CPU == nil {
if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil {
return
}
@ -151,7 +151,7 @@ func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- metrics.Metr
}
func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
if s.Memory == nil {
if s.Memory == nil || s.Memory.WorkingSetBytes == nil {
return
}
@ -169,7 +169,7 @@ func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- metrics.
}
func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.CPU == nil {
if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil {
return
}
@ -179,7 +179,7 @@ func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics
}
func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.Memory == nil {
if s.Memory == nil || s.Memory.WorkingSetBytes == nil {
return
}
@ -189,7 +189,7 @@ func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metr
}
func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
if pod.CPU == nil {
if pod.CPU == nil || pod.CPU.UsageCoreNanoSeconds == nil {
return
}
@ -199,7 +199,7 @@ func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metri
}
func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
if pod.Memory == nil {
if pod.Memory == nil || pod.Memory.WorkingSetBytes == nil {
return
}

View File

@ -89,6 +89,27 @@ func TestCollectResourceMetrics(t *testing.T) {
scrape_error 0
`,
},
{
name: "nil node metrics",
summary: &statsapi.Summary{
Node: statsapi.NodeStats{
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: nil,
},
Memory: &statsapi.MemoryStats{
Time: testTime,
WorkingSetBytes: nil,
},
},
},
summaryErr: nil,
expectedMetrics: `
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge
scrape_error 0
`,
},
{
name: "arbitrary container metrics for different container, pods and namespaces",
summary: &statsapi.Summary{
@ -169,6 +190,69 @@ func TestCollectResourceMetrics(t *testing.T) {
container_start_time_seconds{container="container_b",namespace="namespace_a",pod="pod_a"} 1.6243961583020916e+09 1624396158302
`,
},
{
name: "nil container metrics",
summary: &statsapi.Summary{
Pods: []statsapi.PodStats{
{
PodRef: statsapi.PodReference{
Name: "pod_a",
Namespace: "namespace_a",
},
Containers: []statsapi.ContainerStats{
{
Name: "container_a",
StartTime: metav1.NewTime(staticTimestamp.Add(-30 * time.Second)),
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: nil,
},
Memory: &statsapi.MemoryStats{
Time: testTime,
WorkingSetBytes: nil,
},
},
},
},
{
PodRef: statsapi.PodReference{
Name: "pod_b",
Namespace: "namespace_b",
},
Containers: []statsapi.ContainerStats{
{
Name: "container_a",
StartTime: metav1.NewTime(staticTimestamp.Add(-10 * time.Minute)),
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: uint64Ptr(10000000000),
},
Memory: &statsapi.MemoryStats{
Time: testTime,
WorkingSetBytes: uint64Ptr(1000),
},
},
},
},
},
},
summaryErr: nil,
expectedMetrics: `
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
# TYPE container_cpu_usage_seconds_total counter
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
# TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
# TYPE container_start_time_seconds gauge
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge
scrape_error 0
`,
},
{
name: "arbitrary pod metrics",
summary: &statsapi.Summary{
@ -202,6 +286,33 @@ func TestCollectResourceMetrics(t *testing.T) {
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
`,
},
{
name: "nil pod metrics",
summary: &statsapi.Summary{
Pods: []statsapi.PodStats{
{
PodRef: statsapi.PodReference{
Name: "pod_a",
Namespace: "namespace_a",
},
CPU: &statsapi.CPUStats{
Time: testTime,
UsageCoreNanoSeconds: nil,
},
Memory: &statsapi.MemoryStats{
Time: testTime,
WorkingSetBytes: nil,
},
},
},
},
summaryErr: nil,
expectedMetrics: `
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge
scrape_error 0
`,
},
}
for _, test := range tests {