kubelet: add operations count and error count metrics to network plugin manager

This commit is contained in:
Anish Shah 2020-07-13 17:23:57 -07:00
parent 6079cebfae
commit 0ffe89ed0b
2 changed files with 42 additions and 4 deletions

View File

@ -28,9 +28,11 @@ import (
const (
// NetworkPluginOperationsKey is the key for operation count metrics.
NetworkPluginOperationsKey = "network_plugin_operations"
NetworkPluginOperationsKey = "network_plugin_operations_total"
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
// NetworkPluginOperationsErrorsKey is the key for the operations error metrics.
NetworkPluginOperationsErrorsKey = "network_plugin_operations_errors_total"
// Keep the "kubelet" subsystem for backward compatibility.
kubeletSubsystem = "kubelet"
@ -49,6 +51,28 @@ var (
},
[]string{"operation_type"},
)
// NetworkPluginOperations collects operation counts by operation type.
NetworkPluginOperations = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: NetworkPluginOperationsKey,
Help: "Cumulative number of network plugin operations by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// NetworkPluginOperationsErrors collects operation errors by operation type.
NetworkPluginOperationsErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: NetworkPluginOperationsErrorsKey,
Help: "Cumulative number of network plugin operation errors by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
)
var registerMetrics sync.Once
@ -57,6 +81,8 @@ var registerMetrics sync.Once
func Register() {
registerMetrics.Do(func() {
legacyregistry.MustRegister(NetworkPluginOperationsLatency)
legacyregistry.MustRegister(NetworkPluginOperations)
legacyregistry.MustRegister(NetworkPluginOperationsErrors)
})
}

View File

@ -382,17 +382,25 @@ func (pm *PluginManager) podUnlock(fullPodName string) {
// recordOperation records operation and duration
func recordOperation(operation string, start time.Time) {
metrics.NetworkPluginOperations.WithLabelValues(operation).Inc()
metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start))
}
// recordError records errors for metric.
func recordError(operation string) {
metrics.NetworkPluginOperationsErrors.WithLabelValues(operation).Inc()
}
func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) {
defer recordOperation("get_pod_network_status", time.Now())
const operation = "get_pod_network_status"
defer recordOperation(operation, time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)
netStatus, err := pm.plugin.GetPodNetworkStatus(podNamespace, podName, id)
if err != nil {
recordError(operation)
return nil, fmt.Errorf("networkPlugin %s failed on the status hook for pod %q: %v", pm.plugin.Name(), fullPodName, err)
}
@ -400,13 +408,15 @@ func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id ku
}
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations, options map[string]string) error {
defer recordOperation("set_up_pod", time.Now())
const operation = "set_up_pod"
defer recordOperation(operation, time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)
klog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations, options); err != nil {
recordError(operation)
return fmt.Errorf("networkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
}
@ -414,13 +424,15 @@ func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer
}
func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error {
defer recordOperation("tear_down_pod", time.Now())
const operation = "tear_down_pod"
defer recordOperation(operation, time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)
klog.V(3).Infof("Calling network plugin %s to tear down pod %q", pm.plugin.Name(), fullPodName)
if err := pm.plugin.TearDownPod(podNamespace, podName, id); err != nil {
recordError(operation)
return fmt.Errorf("networkPlugin %s failed to teardown pod %q network: %v", pm.plugin.Name(), fullPodName, err)
}