diff --git a/pkg/kubelet/dockershim/network/BUILD b/pkg/kubelet/dockershim/network/BUILD index fa8fddd2b8b..96dc3441384 100644 --- a/pkg/kubelet/dockershim/network/BUILD +++ b/pkg/kubelet/dockershim/network/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "go_default_library", @@ -46,3 +46,14 @@ filegroup( tags = ["automanaged"], visibility = ["//visibility:public"], ) + +go_test( + name = "go_default_test", + srcs = ["plugins_test.go"], + embed = [":go_default_library"], + deps = [ + "//pkg/kubelet/dockershim/network/metrics:go_default_library", + "//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library", + "//staging/src/k8s.io/component-base/metrics/testutil:go_default_library", + ], +) diff --git a/pkg/kubelet/dockershim/network/metrics/metrics.go b/pkg/kubelet/dockershim/network/metrics/metrics.go index eaa05f0649d..db086e4b955 100644 --- a/pkg/kubelet/dockershim/network/metrics/metrics.go +++ b/pkg/kubelet/dockershim/network/metrics/metrics.go @@ -28,9 +28,11 @@ import ( const ( // NetworkPluginOperationsKey is the key for operation count metrics. - NetworkPluginOperationsKey = "network_plugin_operations" + NetworkPluginOperationsKey = "network_plugin_operations_total" // NetworkPluginOperationsLatencyKey is the key for the operation latency metrics. NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds" + // NetworkPluginOperationsErrorsKey is the key for the operations error metrics. + NetworkPluginOperationsErrorsKey = "network_plugin_operations_errors_total" // Keep the "kubelet" subsystem for backward compatibility. kubeletSubsystem = "kubelet" @@ -49,6 +51,28 @@ var ( }, []string{"operation_type"}, ) + + // NetworkPluginOperations collects operation counts by operation type. + NetworkPluginOperations = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: NetworkPluginOperationsKey, + Help: "Cumulative number of network plugin operations by operation type.", + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) + + // NetworkPluginOperationsErrors collects operation errors by operation type. + NetworkPluginOperationsErrors = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: NetworkPluginOperationsErrorsKey, + Help: "Cumulative number of network plugin operation errors by operation type.", + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) ) var registerMetrics sync.Once @@ -57,6 +81,8 @@ var registerMetrics sync.Once func Register() { registerMetrics.Do(func() { legacyregistry.MustRegister(NetworkPluginOperationsLatency) + legacyregistry.MustRegister(NetworkPluginOperations) + legacyregistry.MustRegister(NetworkPluginOperationsErrors) }) } diff --git a/pkg/kubelet/dockershim/network/plugins.go b/pkg/kubelet/dockershim/network/plugins.go index 29b8a7e2c46..85b5146019e 100644 --- a/pkg/kubelet/dockershim/network/plugins.go +++ b/pkg/kubelet/dockershim/network/plugins.go @@ -382,17 +382,25 @@ func (pm *PluginManager) podUnlock(fullPodName string) { // recordOperation records operation and duration func recordOperation(operation string, start time.Time) { + metrics.NetworkPluginOperations.WithLabelValues(operation).Inc() metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start)) } +// recordError records errors for metric. +func recordError(operation string) { + metrics.NetworkPluginOperationsErrors.WithLabelValues(operation).Inc() +} + func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) { - defer recordOperation("get_pod_network_status", time.Now()) + const operation = "get_pod_network_status" + defer recordOperation(operation, time.Now()) fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace) pm.podLock(fullPodName).Lock() defer pm.podUnlock(fullPodName) netStatus, err := pm.plugin.GetPodNetworkStatus(podNamespace, podName, id) if err != nil { + recordError(operation) return nil, fmt.Errorf("networkPlugin %s failed on the status hook for pod %q: %v", pm.plugin.Name(), fullPodName, err) } @@ -400,13 +408,15 @@ func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id ku } func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations, options map[string]string) error { - defer recordOperation("set_up_pod", time.Now()) + const operation = "set_up_pod" + defer recordOperation(operation, time.Now()) fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace) pm.podLock(fullPodName).Lock() defer pm.podUnlock(fullPodName) klog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName) if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations, options); err != nil { + recordError(operation) return fmt.Errorf("networkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err) } @@ -414,13 +424,15 @@ func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer } func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error { - defer recordOperation("tear_down_pod", time.Now()) + const operation = "tear_down_pod" + defer recordOperation(operation, time.Now()) fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace) pm.podLock(fullPodName).Lock() defer pm.podUnlock(fullPodName) klog.V(3).Infof("Calling network plugin %s to tear down pod %q", pm.plugin.Name(), fullPodName) if err := pm.plugin.TearDownPod(podNamespace, podName, id); err != nil { + recordError(operation) return fmt.Errorf("networkPlugin %s failed to teardown pod %q network: %v", pm.plugin.Name(), fullPodName, err) } diff --git a/pkg/kubelet/dockershim/network/plugins_test.go b/pkg/kubelet/dockershim/network/plugins_test.go new file mode 100644 index 00000000000..3c01f787d6f --- /dev/null +++ b/pkg/kubelet/dockershim/network/plugins_test.go @@ -0,0 +1,67 @@ +// +build !dockerless + +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package network + +import ( + "strings" + "testing" + "time" + + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" + "k8s.io/kubernetes/pkg/kubelet/dockershim/network/metrics" +) + +func TestNetworkPluginManagerMetrics(t *testing.T) { + metrics.Register() + + operation := "test_operation" + recordOperation(operation, time.Now()) + recordError(operation) + + cases := []struct { + metricName string + want string + }{ + { + metricName: "kubelet_network_plugin_operations_total", + want: ` +# HELP kubelet_network_plugin_operations_total [ALPHA] Cumulative number of network plugin operations by operation type. +# TYPE kubelet_network_plugin_operations_total counter +kubelet_network_plugin_operations_total{operation_type="test_operation"} 1 +`, + }, + { + metricName: "kubelet_network_plugin_operations_errors_total", + want: ` +# HELP kubelet_network_plugin_operations_errors_total [ALPHA] Cumulative number of network plugin operation errors by operation type. +# TYPE kubelet_network_plugin_operations_errors_total counter +kubelet_network_plugin_operations_errors_total{operation_type="test_operation"} 1 +`, + }, + } + + for _, tc := range cases { + t.Run(tc.metricName, func(t *testing.T) { + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.want), tc.metricName); err != nil { + t.Fatal(err) + } + }) + } +}