mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 18:31:15 +00:00
Merge pull request #84987 from RainbowMango/pr_migrate_custom_collector_kubelet_part2
migrate kubelet custom metrics to stability framework part 2
This commit is contained in:
commit
ec86baf00b
@ -8,6 +8,7 @@ go_library(
|
||||
deps = [
|
||||
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
|
||||
"//pkg/kubelet/server/stats:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -19,6 +19,7 @@ package v1alpha1
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
)
|
||||
@ -26,55 +27,93 @@ import (
|
||||
// Version is the string representation of the version of this configuration
|
||||
const Version = "v1alpha1"
|
||||
|
||||
var (
|
||||
nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
|
||||
"Cumulative cpu time consumed by the node in core-seconds",
|
||||
nil,
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
|
||||
"Current working set of the node in bytes",
|
||||
nil,
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
|
||||
"Cumulative cpu time consumed by the container in core-seconds",
|
||||
[]string{"container", "pod", "namespace"},
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
|
||||
"Current working set of the container in bytes",
|
||||
[]string{"container", "pod", "namespace"},
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
)
|
||||
|
||||
// getNodeCPUMetrics returns CPU utilization of a node.
|
||||
func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
}
|
||||
|
||||
// getNodeMemoryMetrics returns memory utilization of a node.
|
||||
func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
}
|
||||
|
||||
// getContainerCPUMetrics returns CPU utilization of a container.
|
||||
func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
}
|
||||
|
||||
// getContainerMemoryMetrics returns memory utilization of a container.
|
||||
func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
}
|
||||
|
||||
// Config is the v1alpha1 resource metrics definition
|
||||
func Config() stats.ResourceMetricsConfig {
|
||||
return stats.ResourceMetricsConfig{
|
||||
NodeMetrics: []stats.NodeResourceMetric{
|
||||
{
|
||||
Name: "node_cpu_usage_seconds_total",
|
||||
Description: "Cumulative cpu time consumed by the node in core-seconds",
|
||||
ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
},
|
||||
Desc: nodeCPUUsageDesc,
|
||||
ValueFn: getNodeCPUMetrics,
|
||||
},
|
||||
{
|
||||
Name: "node_memory_working_set_bytes",
|
||||
Description: "Current working set of the node in bytes",
|
||||
ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
},
|
||||
Desc: nodeMemoryUsageDesc,
|
||||
ValueFn: getNodeMemoryMetrics,
|
||||
},
|
||||
},
|
||||
ContainerMetrics: []stats.ContainerResourceMetric{
|
||||
{
|
||||
Name: "container_cpu_usage_seconds_total",
|
||||
Description: "Cumulative cpu time consumed by the container in core-seconds",
|
||||
ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
},
|
||||
Desc: containerCPUUsageDesc,
|
||||
ValueFn: getContainerCPUMetrics,
|
||||
},
|
||||
{
|
||||
Name: "container_memory_working_set_bytes",
|
||||
Description: "Current working set of the container in bytes",
|
||||
ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
},
|
||||
Desc: containerMemoryUsageDesc,
|
||||
ValueFn: getContainerMemoryMetrics,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -52,7 +52,6 @@ go_library(
|
||||
"//vendor/github.com/google/cadvisor/container:go_default_library",
|
||||
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
||||
"//vendor/github.com/google/cadvisor/metrics:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||
"//vendor/google.golang.org/grpc:go_default_library",
|
||||
"//vendor/k8s.io/klog:go_default_library",
|
||||
],
|
||||
|
@ -36,7 +36,6 @@ import (
|
||||
cadvisormetrics "github.com/google/cadvisor/container"
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"github.com/google/cadvisor/metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"google.golang.org/grpc"
|
||||
"k8s.io/klog"
|
||||
|
||||
@ -302,7 +301,7 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
|
||||
s.restfulCont.Handle(metricsPath, legacyregistry.Handler())
|
||||
|
||||
// cAdvisor metrics are exposed under the secured handler as well
|
||||
r := prometheus.NewRegistry()
|
||||
r := compbasemetrics.NewKubeRegistry()
|
||||
|
||||
includedMetrics := cadvisormetrics.MetricSet{
|
||||
cadvisormetrics.CpuUsageMetrics: struct{}{},
|
||||
@ -315,13 +314,13 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
|
||||
cadvisormetrics.AppMetrics: struct{}{},
|
||||
cadvisormetrics.ProcessMetrics: struct{}{},
|
||||
}
|
||||
r.MustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
|
||||
r.RawMustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
|
||||
s.restfulCont.Handle(cadvisorMetricsPath,
|
||||
compbasemetrics.HandlerFor(r, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
|
||||
)
|
||||
|
||||
v1alpha1ResourceRegistry := prometheus.NewRegistry()
|
||||
v1alpha1ResourceRegistry.MustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
|
||||
v1alpha1ResourceRegistry := compbasemetrics.NewKubeRegistry()
|
||||
v1alpha1ResourceRegistry.CustomMustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
|
||||
s.restfulCont.Handle(path.Join(resourceMetricsPathPrefix, v1alpha1.Version),
|
||||
compbasemetrics.HandlerFor(v1alpha1ResourceRegistry, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
|
||||
)
|
||||
|
@ -26,9 +26,9 @@ go_library(
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics:go_default_library",
|
||||
"//vendor/github.com/emicklei/go-restful:go_default_library",
|
||||
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||
"//vendor/k8s.io/klog:go_default_library",
|
||||
],
|
||||
)
|
||||
@ -49,8 +49,8 @@ go_test(
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_model/go:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/mock:go_default_library",
|
||||
] + select({
|
||||
|
@ -19,32 +19,29 @@ package stats
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/klog"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// NodeResourceMetric describes a metric for the node
|
||||
type NodeResourceMetric struct {
|
||||
Name string
|
||||
Description string
|
||||
ValueFn func(stats.NodeStats) (*float64, time.Time)
|
||||
Desc *metrics.Desc
|
||||
ValueFn func(stats.NodeStats) (*float64, time.Time)
|
||||
}
|
||||
|
||||
func (n *NodeResourceMetric) desc() *prometheus.Desc {
|
||||
return prometheus.NewDesc(n.Name, n.Description, []string{}, nil)
|
||||
func (n *NodeResourceMetric) desc() *metrics.Desc {
|
||||
return n.Desc
|
||||
}
|
||||
|
||||
// ContainerResourceMetric describes a metric for containers
|
||||
type ContainerResourceMetric struct {
|
||||
Name string
|
||||
Description string
|
||||
ValueFn func(stats.ContainerStats) (*float64, time.Time)
|
||||
Desc *metrics.Desc
|
||||
ValueFn func(stats.ContainerStats) (*float64, time.Time)
|
||||
}
|
||||
|
||||
func (n *ContainerResourceMetric) desc() *prometheus.Desc {
|
||||
return prometheus.NewDesc(n.Name, n.Description, []string{"container", "pod", "namespace"}, nil)
|
||||
func (n *ContainerResourceMetric) desc() *metrics.Desc {
|
||||
return n.Desc
|
||||
}
|
||||
|
||||
// ResourceMetricsConfig specifies which metrics to collect and export
|
||||
@ -53,29 +50,34 @@ type ResourceMetricsConfig struct {
|
||||
ContainerMetrics []ContainerResourceMetric
|
||||
}
|
||||
|
||||
// NewPrometheusResourceMetricCollector returns a prometheus.Collector which exports resource metrics
|
||||
func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) prometheus.Collector {
|
||||
// NewPrometheusResourceMetricCollector returns a metrics.StableCollector which exports resource metrics
|
||||
func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) metrics.StableCollector {
|
||||
return &resourceMetricCollector{
|
||||
provider: provider,
|
||||
config: config,
|
||||
errors: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "scrape_error",
|
||||
Help: "1 if there was an error while getting container metrics, 0 otherwise",
|
||||
}),
|
||||
errors: metrics.NewDesc("scrape_error",
|
||||
"1 if there was an error while getting container metrics, 0 otherwise",
|
||||
nil,
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
""),
|
||||
}
|
||||
}
|
||||
|
||||
type resourceMetricCollector struct {
|
||||
metrics.BaseStableCollector
|
||||
|
||||
provider SummaryProvider
|
||||
config ResourceMetricsConfig
|
||||
errors prometheus.Gauge
|
||||
errors *metrics.Desc
|
||||
}
|
||||
|
||||
var _ prometheus.Collector = &resourceMetricCollector{}
|
||||
var _ metrics.StableCollector = &resourceMetricCollector{}
|
||||
|
||||
// DescribeWithStability implements metrics.StableCollector
|
||||
func (rc *resourceMetricCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
|
||||
ch <- rc.errors
|
||||
|
||||
// Describe implements prometheus.Collector
|
||||
func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
rc.errors.Describe(ch)
|
||||
for _, metric := range rc.config.NodeMetrics {
|
||||
ch <- metric.desc()
|
||||
}
|
||||
@ -84,24 +86,26 @@ func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
}
|
||||
}
|
||||
|
||||
// Collect implements prometheus.Collector
|
||||
// Since new containers are frequently created and removed, using the prometheus.Gauge Collector would
|
||||
// CollectWithStability implements metrics.StableCollector
|
||||
// Since new containers are frequently created and removed, using the Gauge would
|
||||
// leak metric collectors for containers or pods that no longer exist. Instead, implement
|
||||
// prometheus.Collector in a way that only collects metrics for active containers.
|
||||
func (rc *resourceMetricCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
rc.errors.Set(0)
|
||||
defer rc.errors.Collect(ch)
|
||||
// custom collector in a way that only collects metrics for active containers.
|
||||
func (rc *resourceMetricCollector) CollectWithStability(ch chan<- metrics.Metric) {
|
||||
var errorCount float64
|
||||
defer func() {
|
||||
ch <- metrics.NewLazyConstMetric(rc.errors, metrics.GaugeValue, errorCount)
|
||||
}()
|
||||
summary, err := rc.provider.GetCPUAndMemoryStats()
|
||||
if err != nil {
|
||||
rc.errors.Set(1)
|
||||
errorCount = 1
|
||||
klog.Warningf("Error getting summary for resourceMetric prometheus endpoint: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, metric := range rc.config.NodeMetrics {
|
||||
if value, timestamp := metric.ValueFn(summary.Node); value != nil {
|
||||
ch <- prometheus.NewMetricWithTimestamp(timestamp,
|
||||
prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value))
|
||||
ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
|
||||
metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value))
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,8 +113,8 @@ func (rc *resourceMetricCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
for _, container := range pod.Containers {
|
||||
for _, metric := range rc.config.ContainerMetrics {
|
||||
if value, timestamp := metric.ValueFn(container); value != nil {
|
||||
ch <- prometheus.NewMetricWithTimestamp(timestamp,
|
||||
prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
|
||||
ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
|
||||
metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,28 +18,114 @@ package stats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
)
|
||||
|
||||
const (
|
||||
errorName = "scrape_error"
|
||||
errorHelp = "1 if there was an error while getting container metrics, 0 otherwise"
|
||||
)
|
||||
|
||||
// TODO(RainbowMango): The Desc variables and value functions should be shared with source code.
|
||||
// It can not be shared now because there is a import cycle.
|
||||
// Consider deprecate endpoint `/resource/v1alpha1` as stability framework could offer guarantee now.
|
||||
var (
|
||||
noError = float64(0)
|
||||
hasError = float64(1)
|
||||
nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
|
||||
"Cumulative cpu time consumed by the node in core-seconds",
|
||||
nil,
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
|
||||
"Current working set of the node in bytes",
|
||||
nil,
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
|
||||
"Cumulative cpu time consumed by the container in core-seconds",
|
||||
[]string{"container", "pod", "namespace"},
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
|
||||
"Current working set of the container in bytes",
|
||||
[]string{"container", "pod", "namespace"},
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
)
|
||||
|
||||
// getNodeCPUMetrics returns CPU utilization of a node.
|
||||
func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
}
|
||||
|
||||
// getNodeMemoryMetrics returns memory utilization of a node.
|
||||
func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
}
|
||||
|
||||
// getContainerCPUMetrics returns CPU utilization of a container.
|
||||
func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
}
|
||||
|
||||
// getContainerMemoryMetrics returns memory utilization of a container.
|
||||
func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
}
|
||||
|
||||
// Config is the v1alpha1 resource metrics definition
|
||||
func Config() ResourceMetricsConfig {
|
||||
return ResourceMetricsConfig{
|
||||
NodeMetrics: []NodeResourceMetric{
|
||||
{
|
||||
Desc: nodeCPUUsageDesc,
|
||||
ValueFn: getNodeCPUMetrics,
|
||||
},
|
||||
{
|
||||
Desc: nodeMemoryUsageDesc,
|
||||
ValueFn: getNodeMemoryMetrics,
|
||||
},
|
||||
},
|
||||
ContainerMetrics: []ContainerResourceMetric{
|
||||
{
|
||||
Desc: containerCPUUsageDesc,
|
||||
ValueFn: getContainerCPUMetrics,
|
||||
},
|
||||
{
|
||||
Desc: containerMemoryUsageDesc,
|
||||
ValueFn: getContainerMemoryMetrics,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
type mockSummaryProvider struct {
|
||||
mock.Mock
|
||||
}
|
||||
@ -54,60 +140,32 @@ func (m *mockSummaryProvider) GetCPUAndMemoryStats() (*statsapi.Summary, error)
|
||||
return args.Get(0).(*statsapi.Summary), args.Error(1)
|
||||
}
|
||||
|
||||
type collectResult struct {
|
||||
desc *prometheus.Desc
|
||||
metric *dto.Metric
|
||||
}
|
||||
|
||||
func TestCollectResourceMetrics(t *testing.T) {
|
||||
testTime := metav1.Now()
|
||||
for _, tc := range []struct {
|
||||
description string
|
||||
config ResourceMetricsConfig
|
||||
summary *statsapi.Summary
|
||||
summaryErr error
|
||||
expectedMetrics []collectResult
|
||||
testTime := metav1.NewTime(time.Unix(2, 0)) // a static timestamp: 2000
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config ResourceMetricsConfig
|
||||
summary *statsapi.Summary
|
||||
summaryErr error
|
||||
expectedMetricsNames []string
|
||||
expectedMetrics string
|
||||
}{
|
||||
{
|
||||
description: "error getting summary",
|
||||
config: ResourceMetricsConfig{},
|
||||
summary: nil,
|
||||
summaryErr: fmt.Errorf("failed to get summary"),
|
||||
expectedMetrics: []collectResult{
|
||||
{
|
||||
desc: prometheus.NewDesc(errorName, errorHelp, []string{}, nil),
|
||||
metric: &dto.Metric{Gauge: &dto.Gauge{Value: &hasError}},
|
||||
},
|
||||
},
|
||||
name: "error getting summary",
|
||||
config: Config(),
|
||||
summary: nil,
|
||||
summaryErr: fmt.Errorf("failed to get summary"),
|
||||
expectedMetricsNames: []string{"scrape_error"},
|
||||
expectedMetrics: `
|
||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||
# TYPE scrape_error gauge
|
||||
scrape_error 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "arbitrary node metrics",
|
||||
config: ResourceMetricsConfig{
|
||||
NodeMetrics: []NodeResourceMetric{
|
||||
{
|
||||
Name: "node_foo",
|
||||
Description: "a metric from nodestats",
|
||||
ValueFn: func(s statsapi.NodeStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "node_bar",
|
||||
Description: "another metric from nodestats",
|
||||
ValueFn: func(s statsapi.NodeStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
name: "arbitrary node metrics",
|
||||
config: Config(),
|
||||
summary: &statsapi.Summary{
|
||||
Node: statsapi.NodeStats{
|
||||
CPU: &statsapi.CPUStats{
|
||||
@ -121,49 +179,26 @@ func TestCollectResourceMetrics(t *testing.T) {
|
||||
},
|
||||
},
|
||||
summaryErr: nil,
|
||||
expectedMetrics: []collectResult{
|
||||
{
|
||||
desc: prometheus.NewDesc("node_foo", "a metric from nodestats", []string{}, nil),
|
||||
metric: &dto.Metric{Gauge: &dto.Gauge{Value: float64Ptr(10)}},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc("node_bar", "another metric from nodestats", []string{}, nil),
|
||||
metric: &dto.Metric{Gauge: &dto.Gauge{Value: float64Ptr(1000)}},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc(errorName, errorHelp, []string{}, nil),
|
||||
metric: &dto.Metric{Gauge: &dto.Gauge{Value: &noError}},
|
||||
},
|
||||
expectedMetricsNames: []string{
|
||||
"node_cpu_usage_seconds_total",
|
||||
"node_memory_working_set_bytes",
|
||||
"scrape_error",
|
||||
},
|
||||
expectedMetrics: `
|
||||
# HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
|
||||
# TYPE node_cpu_usage_seconds_total gauge
|
||||
node_cpu_usage_seconds_total 10 2000
|
||||
# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
|
||||
# TYPE node_memory_working_set_bytes gauge
|
||||
node_memory_working_set_bytes 1000 2000
|
||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||
# TYPE scrape_error gauge
|
||||
scrape_error 0
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "arbitrary container metrics for different container, pods and namespaces",
|
||||
config: ResourceMetricsConfig{
|
||||
ContainerMetrics: []ContainerResourceMetric{
|
||||
{
|
||||
Name: "container_foo",
|
||||
Description: "a metric from container stats",
|
||||
ValueFn: func(s statsapi.ContainerStats) (*float64, time.Time) {
|
||||
if s.CPU == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
|
||||
return &v, s.CPU.Time.Time
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "container_bar",
|
||||
Description: "another metric from container stats",
|
||||
ValueFn: func(s statsapi.ContainerStats) (*float64, time.Time) {
|
||||
if s.Memory == nil {
|
||||
return nil, time.Time{}
|
||||
}
|
||||
v := float64(*s.Memory.WorkingSetBytes)
|
||||
return &v, s.Memory.Time.Time
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
name: "arbitrary container metrics for different container, pods and namespaces",
|
||||
config: Config(),
|
||||
summary: &statsapi.Summary{
|
||||
Pods: []statsapi.PodStats{
|
||||
{
|
||||
@ -218,131 +253,43 @@ func TestCollectResourceMetrics(t *testing.T) {
|
||||
},
|
||||
},
|
||||
summaryErr: nil,
|
||||
expectedMetrics: []collectResult{
|
||||
{
|
||||
desc: prometheus.NewDesc("container_foo", "a metric from container stats", []string{"container", "pod", "namespace"}, nil),
|
||||
metric: &dto.Metric{
|
||||
Gauge: &dto.Gauge{Value: float64Ptr(10)},
|
||||
Label: []*dto.LabelPair{
|
||||
{Name: stringPtr("container"), Value: stringPtr("container_a")},
|
||||
{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
|
||||
{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc("container_bar", "another metric from container stats", []string{"container", "pod", "namespace"}, nil),
|
||||
metric: &dto.Metric{
|
||||
Gauge: &dto.Gauge{Value: float64Ptr(1000)},
|
||||
Label: []*dto.LabelPair{
|
||||
{Name: stringPtr("container"), Value: stringPtr("container_a")},
|
||||
{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
|
||||
{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc("container_foo", "a metric from container stats", []string{"container", "pod", "namespace"}, nil),
|
||||
metric: &dto.Metric{
|
||||
Gauge: &dto.Gauge{Value: float64Ptr(10)},
|
||||
Label: []*dto.LabelPair{
|
||||
{Name: stringPtr("container"), Value: stringPtr("container_b")},
|
||||
{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
|
||||
{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc("container_bar", "another metric from container stats", []string{"container", "pod", "namespace"}, nil),
|
||||
metric: &dto.Metric{
|
||||
Gauge: &dto.Gauge{Value: float64Ptr(1000)},
|
||||
Label: []*dto.LabelPair{
|
||||
{Name: stringPtr("container"), Value: stringPtr("container_b")},
|
||||
{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
|
||||
{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc("container_foo", "a metric from container stats", []string{"container", "pod", "namespace"}, nil),
|
||||
metric: &dto.Metric{
|
||||
Gauge: &dto.Gauge{Value: float64Ptr(10)},
|
||||
Label: []*dto.LabelPair{
|
||||
{Name: stringPtr("container"), Value: stringPtr("container_a")},
|
||||
{Name: stringPtr("namespace"), Value: stringPtr("namespace_b")},
|
||||
{Name: stringPtr("pod"), Value: stringPtr("pod_b")},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc("container_bar", "another metric from container stats", []string{"container", "pod", "namespace"}, nil),
|
||||
metric: &dto.Metric{
|
||||
Gauge: &dto.Gauge{Value: float64Ptr(1000)},
|
||||
Label: []*dto.LabelPair{
|
||||
{Name: stringPtr("container"), Value: stringPtr("container_a")},
|
||||
{Name: stringPtr("namespace"), Value: stringPtr("namespace_b")},
|
||||
{Name: stringPtr("pod"), Value: stringPtr("pod_b")},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: prometheus.NewDesc(errorName, errorHelp, []string{}, nil),
|
||||
metric: &dto.Metric{Gauge: &dto.Gauge{Value: &noError}},
|
||||
},
|
||||
expectedMetricsNames: []string{
|
||||
"container_cpu_usage_seconds_total",
|
||||
"container_memory_working_set_bytes",
|
||||
"scrape_error",
|
||||
},
|
||||
expectedMetrics: `
|
||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||
# TYPE scrape_error gauge
|
||||
scrape_error 0
|
||||
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
|
||||
# TYPE container_cpu_usage_seconds_total gauge
|
||||
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 2000
|
||||
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 2000
|
||||
container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 2000
|
||||
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
|
||||
# TYPE container_memory_working_set_bytes gauge
|
||||
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 2000
|
||||
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 2000
|
||||
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000
|
||||
`,
|
||||
},
|
||||
} {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
tc := test
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
provider := &mockSummaryProvider{}
|
||||
provider.On("GetCPUAndMemoryStats").Return(tc.summary, tc.summaryErr)
|
||||
collector := NewPrometheusResourceMetricCollector(provider, tc.config)
|
||||
metrics := collectMetrics(t, collector, len(tc.expectedMetrics))
|
||||
for i := range metrics {
|
||||
assertEqual(t, metrics[i], tc.expectedMetrics[i])
|
||||
|
||||
if err := testutil.CustomCollectAndCompare(collector, strings.NewReader(tc.expectedMetrics), tc.expectedMetricsNames...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// collectMetrics is a wrapper around a prometheus.Collector which returns the metrics added to the metric channel as a slice.metric
|
||||
// It will block indefinitely if the collector does not collect exactly numMetrics.
|
||||
func collectMetrics(t *testing.T, collector prometheus.Collector, numMetrics int) (results []collectResult) {
|
||||
metricsCh := make(chan prometheus.Metric)
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
collector.Collect(metricsCh)
|
||||
done <- struct{}{}
|
||||
}()
|
||||
for i := 0; i < numMetrics; i++ {
|
||||
metric := <-metricsCh
|
||||
metricProto := &dto.Metric{}
|
||||
assert.NoError(t, metric.Write(metricProto))
|
||||
results = append(results, collectResult{desc: metric.Desc(), metric: metricProto})
|
||||
}
|
||||
<-done
|
||||
return
|
||||
}
|
||||
|
||||
// assertEqual asserts for semanitic equality for fields we care about
|
||||
func assertEqual(t *testing.T, expected, actual collectResult) {
|
||||
assert.Equal(t, expected.desc.String(), actual.desc.String())
|
||||
assert.Equal(t, *expected.metric.Gauge.Value, *actual.metric.Gauge.Value, "for desc: %v", expected.desc.String())
|
||||
assert.Equal(t, len(expected.metric.Label), len(actual.metric.Label))
|
||||
if len(expected.metric.Label) == len(actual.metric.Label) {
|
||||
for i := range expected.metric.Label {
|
||||
assert.Equal(t, *expected.metric.Label[i], *actual.metric.Label[i], "for desc: %v", expected.desc.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stringPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
|
||||
func uint64Ptr(u uint64) *uint64 {
|
||||
return &u
|
||||
}
|
||||
|
||||
func float64Ptr(f float64) *float64 {
|
||||
return &f
|
||||
}
|
||||
|
@ -17,6 +17,8 @@ limitations under the License.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
@ -44,3 +46,15 @@ func NewLazyConstMetric(desc *Desc, valueType ValueType, value float64, labelVal
|
||||
}
|
||||
return prometheus.MustNewConstMetric(desc.toPrometheusDesc(), valueType.toPromValueType(), value, labelValues...)
|
||||
}
|
||||
|
||||
// NewLazyMetricWithTimestamp is a helper of NewMetricWithTimestamp.
|
||||
//
|
||||
// Warning: the Metric 'm' must be the one created by NewLazyConstMetric(),
|
||||
// otherwise, no stability guarantees would be offered.
|
||||
func NewLazyMetricWithTimestamp(t time.Time, m Metric) Metric {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return prometheus.NewMetricWithTimestamp(t, m)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user