diff --git a/pkg/kubelet/dockershim/libdocker/instrumented_client.go b/pkg/kubelet/dockershim/libdocker/instrumented_client.go index 78a0d696481..29c0f9e6b86 100644 --- a/pkg/kubelet/dockershim/libdocker/instrumented_client.go +++ b/pkg/kubelet/dockershim/libdocker/instrumented_client.go @@ -42,7 +42,9 @@ func NewInstrumentedInterface(dockerClient Interface) Interface { // recordOperation records the duration of the operation. func recordOperation(operation string, start time.Time) { metrics.DockerOperations.WithLabelValues(operation).Inc() - metrics.DockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) + metrics.DeprecatedDockerOperations.WithLabelValues(operation).Inc() + metrics.DockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start)) + metrics.DeprecatedDockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) } // recordError records error for metric if an error occurred. @@ -50,9 +52,11 @@ func recordError(operation string, err error) { if err != nil { if _, ok := err.(operationTimeout); ok { metrics.DockerOperationsTimeout.WithLabelValues(operation).Inc() + metrics.DeprecatedDockerOperationsTimeout.WithLabelValues(operation).Inc() } // Docker operation timeout error is also a docker error, so we don't add else here. metrics.DockerOperationsErrors.WithLabelValues(operation).Inc() + metrics.DeprecatedDockerOperationsErrors.WithLabelValues(operation).Inc() } } diff --git a/pkg/kubelet/dockershim/metrics/metrics.go b/pkg/kubelet/dockershim/metrics/metrics.go index 907647970d5..d36eeba473d 100644 --- a/pkg/kubelet/dockershim/metrics/metrics.go +++ b/pkg/kubelet/dockershim/metrics/metrics.go @@ -25,13 +25,22 @@ import ( const ( // DockerOperationsKey is the key for docker operation metrics. - DockerOperationsKey = "docker_operations" + DockerOperationsKey = "docker_operations_total" // DockerOperationsLatencyKey is the key for the operation latency metrics. - DockerOperationsLatencyKey = "docker_operations_latency_microseconds" + DockerOperationsLatencyKey = "docker_operations_latency_seconds" // DockerOperationsErrorsKey is the key for the operation error metrics. - DockerOperationsErrorsKey = "docker_operations_errors" + DockerOperationsErrorsKey = "docker_operations_errors_total" // DockerOperationsTimeoutKey is the key for the operation timeout metrics. - DockerOperationsTimeoutKey = "docker_operations_timeout" + DockerOperationsTimeoutKey = "docker_operations_timeout_total" + + // DeprecatedDockerOperationsKey is the deprecated key for docker operation metrics. + DeprecatedDockerOperationsKey = "docker_operations" + // DeprecatedDockerOperationsLatencyKey is the deprecated key for the operation latency metrics. + DeprecatedDockerOperationsLatencyKey = "docker_operations_latency_microseconds" + // DeprecatedDockerOperationsErrorsKey is the deprecated key for the operation error metrics. + DeprecatedDockerOperationsErrorsKey = "docker_operations_errors" + // DeprecatedDockerOperationsTimeoutKey is the deprecated key for the operation timeout metrics. + DeprecatedDockerOperationsTimeoutKey = "docker_operations_timeout" // Keep the "kubelet" subsystem for backward compatibility. kubeletSubsystem = "kubelet" @@ -40,11 +49,12 @@ const ( var ( // DockerOperationsLatency collects operation latency numbers by operation // type. - DockerOperationsLatency = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ + DockerOperationsLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ Subsystem: kubeletSubsystem, Name: DockerOperationsLatencyKey, - Help: "Latency in microseconds of Docker operations. Broken down by operation type.", + Help: "Latency in seconds of Docker operations. Broken down by operation type.", + Buckets: prometheus.DefBuckets, }, []string{"operation_type"}, ) @@ -76,6 +86,45 @@ var ( }, []string{"operation_type"}, ) + + // DeprecatedDockerOperationsLatency collects operation latency numbers by operation + // type. + DeprecatedDockerOperationsLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsLatencyKey, + Help: "(Deprecated) Latency in microseconds of Docker operations. Broken down by operation type.", + }, + []string{"operation_type"}, + ) + // DeprecatedDockerOperations collects operation counts by operation type. + DeprecatedDockerOperations = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsKey, + Help: "(Deprecated) Cumulative number of Docker operations by operation type.", + }, + []string{"operation_type"}, + ) + // DeprecatedDockerOperationsErrors collects operation errors by operation + // type. + DeprecatedDockerOperationsErrors = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsErrorsKey, + Help: "(Deprecated) Cumulative number of Docker operation errors by operation type.", + }, + []string{"operation_type"}, + ) + // DeprecatedDockerOperationsTimeout collects operation timeouts by operation type. + DeprecatedDockerOperationsTimeout = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsTimeoutKey, + Help: "(Deprecated) Cumulative number of Docker operation timeout by operation type.", + }, + []string{"operation_type"}, + ) ) var registerMetrics sync.Once @@ -87,6 +136,10 @@ func Register() { prometheus.MustRegister(DockerOperations) prometheus.MustRegister(DockerOperationsErrors) prometheus.MustRegister(DockerOperationsTimeout) + prometheus.MustRegister(DeprecatedDockerOperationsLatency) + prometheus.MustRegister(DeprecatedDockerOperations) + prometheus.MustRegister(DeprecatedDockerOperationsErrors) + prometheus.MustRegister(DeprecatedDockerOperationsTimeout) }) } @@ -94,3 +147,8 @@ func Register() { func SinceInMicroseconds(start time.Time) float64 { return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } + +// SinceInSeconds gets the time since the specified start in seconds. +func SinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} diff --git a/pkg/kubelet/dockershim/network/metrics/metrics.go b/pkg/kubelet/dockershim/network/metrics/metrics.go index 9e4ff185517..b7cc13c88e4 100644 --- a/pkg/kubelet/dockershim/network/metrics/metrics.go +++ b/pkg/kubelet/dockershim/network/metrics/metrics.go @@ -27,7 +27,9 @@ const ( // NetworkPluginOperationsKey is the key for operation count metrics. NetworkPluginOperationsKey = "network_plugin_operations" // NetworkPluginOperationsLatencyKey is the key for the operation latency metrics. - NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds" + NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_seconds" + // DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics. + DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds" // Keep the "kubelet" subsystem for backward compatibility. kubeletSubsystem = "kubelet" @@ -36,11 +38,23 @@ const ( var ( // NetworkPluginOperationsLatency collects operation latency numbers by operation // type. - NetworkPluginOperationsLatency = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ + NetworkPluginOperationsLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ Subsystem: kubeletSubsystem, Name: NetworkPluginOperationsLatencyKey, - Help: "Latency in microseconds of network plugin operations. Broken down by operation type.", + Help: "Latency in seconds of network plugin operations. Broken down by operation type.", + Buckets: prometheus.DefBuckets, + }, + []string{"operation_type"}, + ) + + // DeprecatedNetworkPluginOperationsLatency collects operation latency numbers by operation + // type. + DeprecatedNetworkPluginOperationsLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedNetworkPluginOperationsLatencyKey, + Help: "(Deprecated) Latency in microseconds of network plugin operations. Broken down by operation type.", }, []string{"operation_type"}, ) @@ -52,6 +66,7 @@ var registerMetrics sync.Once func Register() { registerMetrics.Do(func() { prometheus.MustRegister(NetworkPluginOperationsLatency) + prometheus.MustRegister(DeprecatedNetworkPluginOperationsLatency) }) } @@ -59,3 +74,8 @@ func Register() { func SinceInMicroseconds(start time.Time) float64 { return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } + +// SinceInSeconds gets the time since the specified start in seconds. +func SinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} diff --git a/pkg/kubelet/dockershim/network/plugins.go b/pkg/kubelet/dockershim/network/plugins.go index c67c1a355b6..bc1173b3ff2 100644 --- a/pkg/kubelet/dockershim/network/plugins.go +++ b/pkg/kubelet/dockershim/network/plugins.go @@ -351,7 +351,8 @@ func (pm *PluginManager) podUnlock(fullPodName string) { // recordOperation records operation and duration func recordOperation(operation string, start time.Time) { - metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) + metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start)) + metrics.DeprecatedNetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) } func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) { diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index 83adb80b4c6..74c0e559821 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -171,7 +171,7 @@ var InterestingControllerManagerMetrics = []string{ var InterestingKubeletMetrics = []string{ "kubelet_container_manager_latency_microseconds", "kubelet_docker_errors", - "kubelet_docker_operations_latency_microseconds", + "kubelet_docker_operations_latency_seconds", "kubelet_generate_pod_status_latency_microseconds", "kubelet_pod_start_latency_microseconds", "kubelet_pod_worker_latency_microseconds",