mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 18:00:08 +00:00
Graduate kubelet resource metrics to GA
This commit is contained in:
parent
93bf570d46
commit
4712025ea8
@ -31,14 +31,14 @@ var (
|
|||||||
"Cumulative cpu time consumed by the node in core-seconds",
|
"Cumulative cpu time consumed by the node in core-seconds",
|
||||||
nil,
|
nil,
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
|
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
|
||||||
"Current working set of the node in bytes",
|
"Current working set of the node in bytes",
|
||||||
nil,
|
nil,
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
nodeSwapUsageDesc = metrics.NewDesc("node_swap_usage_bytes",
|
nodeSwapUsageDesc = metrics.NewDesc("node_swap_usage_bytes",
|
||||||
@ -52,14 +52,14 @@ var (
|
|||||||
"Cumulative cpu time consumed by the container in core-seconds",
|
"Cumulative cpu time consumed by the container in core-seconds",
|
||||||
[]string{"container", "pod", "namespace"},
|
[]string{"container", "pod", "namespace"},
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
|
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
|
||||||
"Current working set of the container in bytes",
|
"Current working set of the container in bytes",
|
||||||
[]string{"container", "pod", "namespace"},
|
[]string{"container", "pod", "namespace"},
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
containerSwapUsageDesc = metrics.NewDesc("container_swap_usage_bytes",
|
containerSwapUsageDesc = metrics.NewDesc("container_swap_usage_bytes",
|
||||||
@ -73,14 +73,14 @@ var (
|
|||||||
"Cumulative cpu time consumed by the pod in core-seconds",
|
"Cumulative cpu time consumed by the pod in core-seconds",
|
||||||
[]string{"pod", "namespace"},
|
[]string{"pod", "namespace"},
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes",
|
podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes",
|
||||||
"Current working set of the pod in bytes",
|
"Current working set of the pod in bytes",
|
||||||
[]string{"pod", "namespace"},
|
[]string{"pod", "namespace"},
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
podSwapUsageDesc = metrics.NewDesc("pod_swap_usage_bytes",
|
podSwapUsageDesc = metrics.NewDesc("pod_swap_usage_bytes",
|
||||||
@ -95,13 +95,20 @@ var (
|
|||||||
nil,
|
nil,
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.ALPHA,
|
||||||
|
"1.29.0")
|
||||||
|
|
||||||
|
resourceScrapeErrorResultDesc = metrics.NewDesc("resource_scrape_error",
|
||||||
|
"1 if there was an error while getting container metrics, 0 otherwise",
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
|
|
||||||
containerStartTimeDesc = metrics.NewDesc("container_start_time_seconds",
|
containerStartTimeDesc = metrics.NewDesc("container_start_time_seconds",
|
||||||
"Start time of the container since unix epoch in seconds",
|
"Start time of the container since unix epoch in seconds",
|
||||||
[]string{"container", "pod", "namespace"},
|
[]string{"container", "pod", "namespace"},
|
||||||
nil,
|
nil,
|
||||||
metrics.ALPHA,
|
metrics.STABLE,
|
||||||
"")
|
"")
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -134,6 +141,7 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des
|
|||||||
ch <- podMemoryUsageDesc
|
ch <- podMemoryUsageDesc
|
||||||
ch <- podSwapUsageDesc
|
ch <- podSwapUsageDesc
|
||||||
ch <- resourceScrapeResultDesc
|
ch <- resourceScrapeResultDesc
|
||||||
|
ch <- resourceScrapeErrorResultDesc
|
||||||
}
|
}
|
||||||
|
|
||||||
// CollectWithStability implements metrics.StableCollector
|
// CollectWithStability implements metrics.StableCollector
|
||||||
@ -145,6 +153,7 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
|
|||||||
var errorCount float64
|
var errorCount float64
|
||||||
defer func() {
|
defer func() {
|
||||||
ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount)
|
ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount)
|
||||||
|
ch <- metrics.NewLazyConstMetric(resourceScrapeErrorResultDesc, metrics.GaugeValue, errorCount)
|
||||||
}()
|
}()
|
||||||
statsSummary, err := rc.provider.GetCPUAndMemoryStats(ctx)
|
statsSummary, err := rc.provider.GetCPUAndMemoryStats(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -36,6 +36,7 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
testTime := metav1.NewTime(staticTimestamp)
|
testTime := metav1.NewTime(staticTimestamp)
|
||||||
interestedMetrics := []string{
|
interestedMetrics := []string{
|
||||||
"scrape_error",
|
"scrape_error",
|
||||||
|
"resource_scrape_error",
|
||||||
"node_cpu_usage_seconds_total",
|
"node_cpu_usage_seconds_total",
|
||||||
"node_memory_working_set_bytes",
|
"node_memory_working_set_bytes",
|
||||||
"node_swap_usage_bytes",
|
"node_swap_usage_bytes",
|
||||||
@ -64,6 +65,9 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 1
|
scrape_error 1
|
||||||
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 1
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -86,10 +90,10 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
},
|
},
|
||||||
summaryErr: nil,
|
summaryErr: nil,
|
||||||
expectedMetrics: `
|
expectedMetrics: `
|
||||||
# HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
|
# HELP node_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the node in core-seconds
|
||||||
# TYPE node_cpu_usage_seconds_total counter
|
# TYPE node_cpu_usage_seconds_total counter
|
||||||
node_cpu_usage_seconds_total 10 1624396278302
|
node_cpu_usage_seconds_total 10 1624396278302
|
||||||
# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
|
# HELP node_memory_working_set_bytes [STABLE] Current working set of the node in bytes
|
||||||
# TYPE node_memory_working_set_bytes gauge
|
# TYPE node_memory_working_set_bytes gauge
|
||||||
node_memory_working_set_bytes 1000 1624396278302
|
node_memory_working_set_bytes 1000 1624396278302
|
||||||
# HELP node_swap_usage_bytes [ALPHA] Current swap usage of the node in bytes. Reported only on non-windows systems
|
# HELP node_swap_usage_bytes [ALPHA] Current swap usage of the node in bytes. Reported only on non-windows systems
|
||||||
@ -98,6 +102,9 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -119,6 +126,9 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -188,17 +198,20 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
|
# HELP container_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the container in core-seconds
|
||||||
# TYPE container_cpu_usage_seconds_total counter
|
# TYPE container_cpu_usage_seconds_total counter
|
||||||
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
||||||
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
|
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
|
||||||
container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
||||||
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
|
# HELP container_memory_working_set_bytes [STABLE] Current working set of the container in bytes
|
||||||
# TYPE container_memory_working_set_bytes gauge
|
# TYPE container_memory_working_set_bytes gauge
|
||||||
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
||||||
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
|
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
|
||||||
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
||||||
# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
|
# HELP container_start_time_seconds [STABLE] Start time of the container since unix epoch in seconds
|
||||||
# TYPE container_start_time_seconds gauge
|
# TYPE container_start_time_seconds gauge
|
||||||
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
|
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
|
||||||
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
|
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
|
||||||
@ -239,10 +252,13 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
|
# HELP container_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the container in core-seconds
|
||||||
# TYPE container_cpu_usage_seconds_total counter
|
# TYPE container_cpu_usage_seconds_total counter
|
||||||
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
||||||
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
|
# HELP container_memory_working_set_bytes [STABLE] Current working set of the container in bytes
|
||||||
# TYPE container_memory_working_set_bytes gauge
|
# TYPE container_memory_working_set_bytes gauge
|
||||||
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
||||||
`,
|
`,
|
||||||
@ -295,19 +311,22 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
},
|
},
|
||||||
summaryErr: nil,
|
summaryErr: nil,
|
||||||
expectedMetrics: `
|
expectedMetrics: `
|
||||||
# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
|
# HELP container_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the container in core-seconds
|
||||||
# TYPE container_cpu_usage_seconds_total counter
|
# TYPE container_cpu_usage_seconds_total counter
|
||||||
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
|
container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
|
||||||
# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
|
# HELP container_memory_working_set_bytes [STABLE] Current working set of the container in bytes
|
||||||
# TYPE container_memory_working_set_bytes gauge
|
# TYPE container_memory_working_set_bytes gauge
|
||||||
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
|
container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
|
||||||
# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
|
# HELP container_start_time_seconds [STABLE] Start time of the container since unix epoch in seconds
|
||||||
# TYPE container_start_time_seconds gauge
|
# TYPE container_start_time_seconds gauge
|
||||||
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
|
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
|
||||||
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
|
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
|
||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -339,10 +358,13 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
# HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
|
# HELP pod_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the pod in core-seconds
|
||||||
# TYPE pod_cpu_usage_seconds_total counter
|
# TYPE pod_cpu_usage_seconds_total counter
|
||||||
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 1624396278302
|
||||||
# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
|
# HELP pod_memory_working_set_bytes [STABLE] Current working set of the pod in bytes
|
||||||
# TYPE pod_memory_working_set_bytes gauge
|
# TYPE pod_memory_working_set_bytes gauge
|
||||||
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
|
||||||
# HELP pod_swap_usage_bytes [ALPHA] Current amount of the pod swap usage in bytes. Reported only on non-windows systems
|
# HELP pod_swap_usage_bytes [ALPHA] Current amount of the pod swap usage in bytes. Reported only on non-windows systems
|
||||||
@ -375,6 +397,9 @@ func TestCollectResourceMetrics(t *testing.T) {
|
|||||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
# TYPE scrape_error gauge
|
# TYPE scrape_error gauge
|
||||||
scrape_error 0
|
scrape_error 0
|
||||||
|
# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
# TYPE resource_scrape_error gauge
|
||||||
|
resource_scrape_error 0
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -181,21 +181,34 @@ func (g *Grabber) GrabFromKubelet(ctx context.Context, nodeName string) (Kubelet
|
|||||||
return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
|
return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
|
||||||
}
|
}
|
||||||
kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
|
kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
|
||||||
return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort))
|
return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort), "metrics")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string, kubeletPort int) (KubeletMetrics, error) {
|
// GrabresourceMetricsFromKubelet returns resource metrics from kubelet
|
||||||
|
func (g *Grabber) GrabResourceMetricsFromKubelet(ctx context.Context, nodeName string) (KubeletMetrics, error) {
|
||||||
|
nodes, err := g.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{FieldSelector: fields.Set{"metadata.name": nodeName}.AsSelector().String()})
|
||||||
|
if err != nil {
|
||||||
|
return KubeletMetrics{}, err
|
||||||
|
}
|
||||||
|
if len(nodes.Items) != 1 {
|
||||||
|
return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
|
||||||
|
}
|
||||||
|
kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
|
||||||
|
return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort), "metrics/resource")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string, kubeletPort int, pathSuffix string) (KubeletMetrics, error) {
|
||||||
if kubeletPort <= 0 || kubeletPort > 65535 {
|
if kubeletPort <= 0 || kubeletPort > 65535 {
|
||||||
return KubeletMetrics{}, fmt.Errorf("Invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort)
|
return KubeletMetrics{}, fmt.Errorf("Invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort)
|
||||||
}
|
}
|
||||||
output, err := g.getMetricsFromNode(ctx, nodeName, int(kubeletPort))
|
output, err := g.getMetricsFromNode(ctx, nodeName, int(kubeletPort), pathSuffix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return KubeletMetrics{}, err
|
return KubeletMetrics{}, err
|
||||||
}
|
}
|
||||||
return parseKubeletMetrics(output)
|
return parseKubeletMetrics(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubeletPort int) (string, error) {
|
func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubeletPort int, pathSuffix string) (string, error) {
|
||||||
// There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock.
|
// There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock.
|
||||||
finished := make(chan struct{}, 1)
|
finished := make(chan struct{}, 1)
|
||||||
var err error
|
var err error
|
||||||
@ -205,7 +218,7 @@ func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubel
|
|||||||
Resource("nodes").
|
Resource("nodes").
|
||||||
SubResource("proxy").
|
SubResource("proxy").
|
||||||
Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)).
|
Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)).
|
||||||
Suffix("metrics").
|
Suffix(pathSuffix).
|
||||||
Do(ctx).Raw()
|
Do(ctx).Raw()
|
||||||
finished <- struct{}{}
|
finished <- struct{}{}
|
||||||
}()
|
}()
|
||||||
@ -432,7 +445,7 @@ func (g *Grabber) Grab(ctx context.Context) (Collection, error) {
|
|||||||
} else {
|
} else {
|
||||||
for _, node := range nodes.Items {
|
for _, node := range nodes.Items {
|
||||||
kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port
|
kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port
|
||||||
metrics, err := g.grabFromKubeletInternal(ctx, node.Name, int(kubeletPort))
|
metrics, err := g.grabFromKubeletInternal(ctx, node.Name, int(kubeletPort), "metrics")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
|
70
test/e2e/instrumentation/metrics.go
Normal file
70
test/e2e/instrumentation/metrics.go
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2023 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package instrumentation
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/onsi/gomega"
|
||||||
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||||
|
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
||||||
|
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||||
|
"k8s.io/kubernetes/test/e2e/instrumentation/common"
|
||||||
|
admissionapi "k8s.io/pod-security-admission/api"
|
||||||
|
|
||||||
|
"github.com/onsi/ginkgo/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = common.SIGDescribe("Metrics", func() {
|
||||||
|
f := framework.NewDefaultFramework("metrics")
|
||||||
|
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
|
||||||
|
var c, ec clientset.Interface
|
||||||
|
var grabber *e2emetrics.Grabber
|
||||||
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
|
var err error
|
||||||
|
c = f.ClientSet
|
||||||
|
ec = f.KubemarkExternalClusterClientSet
|
||||||
|
gomega.Eventually(ctx, func() error {
|
||||||
|
grabber, err = e2emetrics.NewMetricsGrabber(ctx, c, ec, f.ClientConfig(), true, true, true, true, true, true)
|
||||||
|
if err != nil {
|
||||||
|
framework.ExpectNoError(err, "failed to create metrics grabber")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, 5*time.Minute, 10*time.Second).Should(gomega.BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
/*
|
||||||
|
Release: v1.29
|
||||||
|
Testname: Kubelet resource metrics
|
||||||
|
Description: Should attempt to grab all resource metrics from kubelet metrics/resource endpoint.
|
||||||
|
*/
|
||||||
|
ginkgo.It("should grab all metrics from kubelet /metrics/resource endpoint", func(ctx context.Context) {
|
||||||
|
ginkgo.By("Connecting to kubelet's /metrics/resource endpoint")
|
||||||
|
node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet)
|
||||||
|
if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) {
|
||||||
|
e2eskipper.Skipf("%v", err)
|
||||||
|
}
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
response, err := grabber.GrabResourceMetricsFromKubelet(ctx, node.Name)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Expect(response).NotTo(gomega.BeEmpty())
|
||||||
|
})
|
||||||
|
})
|
@ -74,6 +74,56 @@
|
|||||||
stabilityLevel: STABLE
|
stabilityLevel: STABLE
|
||||||
labels:
|
labels:
|
||||||
- zone
|
- zone
|
||||||
|
- name: container_cpu_usage_seconds_total
|
||||||
|
help: Cumulative cpu time consumed by the container in core-seconds
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- container
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: container_memory_working_set_bytes
|
||||||
|
help: Current working set of the container in bytes
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- container
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: container_start_time_seconds
|
||||||
|
help: Start time of the container since unix epoch in seconds
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- container
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: node_cpu_usage_seconds_total
|
||||||
|
help: Cumulative cpu time consumed by the node in core-seconds
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
- name: node_memory_working_set_bytes
|
||||||
|
help: Current working set of the node in bytes
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
- name: pod_cpu_usage_seconds_total
|
||||||
|
help: Cumulative cpu time consumed by the pod in core-seconds
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: pod_memory_working_set_bytes
|
||||||
|
help: Current working set of the pod in bytes
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: resource_scrape_error
|
||||||
|
help: 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
- name: pod_scheduling_sli_duration_seconds
|
- name: pod_scheduling_sli_duration_seconds
|
||||||
subsystem: scheduler
|
subsystem: scheduler
|
||||||
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
|
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
|
||||||
|
Loading…
Reference in New Issue
Block a user