mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 18:24:07 +00:00
Merge pull request #93066 from AnishShah/network-plugin-metrics
kubelet: add operations count and error count metrics to network plugin manager
This commit is contained in:
commit
0969e666dc
@ -1,4 +1,4 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
@ -46,3 +46,14 @@ filegroup(
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = ["plugins_test.go"],
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/kubelet/dockershim/network/metrics:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
|
||||
],
|
||||
)
|
||||
|
@ -28,9 +28,11 @@ import (
|
||||
|
||||
const (
|
||||
// NetworkPluginOperationsKey is the key for operation count metrics.
|
||||
NetworkPluginOperationsKey = "network_plugin_operations"
|
||||
NetworkPluginOperationsKey = "network_plugin_operations_total"
|
||||
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
|
||||
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
|
||||
// NetworkPluginOperationsErrorsKey is the key for the operations error metrics.
|
||||
NetworkPluginOperationsErrorsKey = "network_plugin_operations_errors_total"
|
||||
|
||||
// Keep the "kubelet" subsystem for backward compatibility.
|
||||
kubeletSubsystem = "kubelet"
|
||||
@ -49,6 +51,28 @@ var (
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
|
||||
// NetworkPluginOperations collects operation counts by operation type.
|
||||
NetworkPluginOperations = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: kubeletSubsystem,
|
||||
Name: NetworkPluginOperationsKey,
|
||||
Help: "Cumulative number of network plugin operations by operation type.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
|
||||
// NetworkPluginOperationsErrors collects operation errors by operation type.
|
||||
NetworkPluginOperationsErrors = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: kubeletSubsystem,
|
||||
Name: NetworkPluginOperationsErrorsKey,
|
||||
Help: "Cumulative number of network plugin operation errors by operation type.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
@ -57,6 +81,8 @@ var registerMetrics sync.Once
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(NetworkPluginOperationsLatency)
|
||||
legacyregistry.MustRegister(NetworkPluginOperations)
|
||||
legacyregistry.MustRegister(NetworkPluginOperationsErrors)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -382,17 +382,25 @@ func (pm *PluginManager) podUnlock(fullPodName string) {
|
||||
|
||||
// recordOperation records operation and duration
|
||||
func recordOperation(operation string, start time.Time) {
|
||||
metrics.NetworkPluginOperations.WithLabelValues(operation).Inc()
|
||||
metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start))
|
||||
}
|
||||
|
||||
// recordError records errors for metric.
|
||||
func recordError(operation string) {
|
||||
metrics.NetworkPluginOperationsErrors.WithLabelValues(operation).Inc()
|
||||
}
|
||||
|
||||
func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) {
|
||||
defer recordOperation("get_pod_network_status", time.Now())
|
||||
const operation = "get_pod_network_status"
|
||||
defer recordOperation(operation, time.Now())
|
||||
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
|
||||
pm.podLock(fullPodName).Lock()
|
||||
defer pm.podUnlock(fullPodName)
|
||||
|
||||
netStatus, err := pm.plugin.GetPodNetworkStatus(podNamespace, podName, id)
|
||||
if err != nil {
|
||||
recordError(operation)
|
||||
return nil, fmt.Errorf("networkPlugin %s failed on the status hook for pod %q: %v", pm.plugin.Name(), fullPodName, err)
|
||||
}
|
||||
|
||||
@ -400,13 +408,15 @@ func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id ku
|
||||
}
|
||||
|
||||
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations, options map[string]string) error {
|
||||
defer recordOperation("set_up_pod", time.Now())
|
||||
const operation = "set_up_pod"
|
||||
defer recordOperation(operation, time.Now())
|
||||
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
|
||||
pm.podLock(fullPodName).Lock()
|
||||
defer pm.podUnlock(fullPodName)
|
||||
|
||||
klog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
|
||||
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations, options); err != nil {
|
||||
recordError(operation)
|
||||
return fmt.Errorf("networkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
|
||||
}
|
||||
|
||||
@ -414,13 +424,15 @@ func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer
|
||||
}
|
||||
|
||||
func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error {
|
||||
defer recordOperation("tear_down_pod", time.Now())
|
||||
const operation = "tear_down_pod"
|
||||
defer recordOperation(operation, time.Now())
|
||||
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
|
||||
pm.podLock(fullPodName).Lock()
|
||||
defer pm.podUnlock(fullPodName)
|
||||
|
||||
klog.V(3).Infof("Calling network plugin %s to tear down pod %q", pm.plugin.Name(), fullPodName)
|
||||
if err := pm.plugin.TearDownPod(podNamespace, podName, id); err != nil {
|
||||
recordError(operation)
|
||||
return fmt.Errorf("networkPlugin %s failed to teardown pod %q network: %v", pm.plugin.Name(), fullPodName, err)
|
||||
}
|
||||
|
||||
|
67
pkg/kubelet/dockershim/network/plugins_test.go
Normal file
67
pkg/kubelet/dockershim/network/plugins_test.go
Normal file
@ -0,0 +1,67 @@
|
||||
// +build !dockerless
|
||||
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package network
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/network/metrics"
|
||||
)
|
||||
|
||||
func TestNetworkPluginManagerMetrics(t *testing.T) {
|
||||
metrics.Register()
|
||||
|
||||
operation := "test_operation"
|
||||
recordOperation(operation, time.Now())
|
||||
recordError(operation)
|
||||
|
||||
cases := []struct {
|
||||
metricName string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
metricName: "kubelet_network_plugin_operations_total",
|
||||
want: `
|
||||
# HELP kubelet_network_plugin_operations_total [ALPHA] Cumulative number of network plugin operations by operation type.
|
||||
# TYPE kubelet_network_plugin_operations_total counter
|
||||
kubelet_network_plugin_operations_total{operation_type="test_operation"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
metricName: "kubelet_network_plugin_operations_errors_total",
|
||||
want: `
|
||||
# HELP kubelet_network_plugin_operations_errors_total [ALPHA] Cumulative number of network plugin operation errors by operation type.
|
||||
# TYPE kubelet_network_plugin_operations_errors_total counter
|
||||
kubelet_network_plugin_operations_errors_total{operation_type="test_operation"} 1
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.metricName, func(t *testing.T) {
|
||||
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.want), tc.metricName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user