From 94d1050303c56e0252dfe57c26324796d5c74095 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 26 Dec 2018 15:58:04 +0800 Subject: [PATCH 1/3] Change docker metrics to conform guideline --- .../libdocker/instrumented_client.go | 6 +- pkg/kubelet/dockershim/metrics/metrics.go | 67 +++++++++++++++++-- .../dockershim/network/metrics/metrics.go | 21 +++++- pkg/kubelet/dockershim/network/plugins.go | 3 +- test/e2e/framework/metrics_util.go | 2 +- 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/pkg/kubelet/dockershim/libdocker/instrumented_client.go b/pkg/kubelet/dockershim/libdocker/instrumented_client.go index 78a0d696481..29c0f9e6b86 100644 --- a/pkg/kubelet/dockershim/libdocker/instrumented_client.go +++ b/pkg/kubelet/dockershim/libdocker/instrumented_client.go @@ -42,7 +42,9 @@ func NewInstrumentedInterface(dockerClient Interface) Interface { // recordOperation records the duration of the operation. func recordOperation(operation string, start time.Time) { metrics.DockerOperations.WithLabelValues(operation).Inc() - metrics.DockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) + metrics.DeprecatedDockerOperations.WithLabelValues(operation).Inc() + metrics.DockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start)) + metrics.DeprecatedDockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) } // recordError records error for metric if an error occurred. @@ -50,9 +52,11 @@ func recordError(operation string, err error) { if err != nil { if _, ok := err.(operationTimeout); ok { metrics.DockerOperationsTimeout.WithLabelValues(operation).Inc() + metrics.DeprecatedDockerOperationsTimeout.WithLabelValues(operation).Inc() } // Docker operation timeout error is also a docker error, so we don't add else here. metrics.DockerOperationsErrors.WithLabelValues(operation).Inc() + metrics.DeprecatedDockerOperationsErrors.WithLabelValues(operation).Inc() } } diff --git a/pkg/kubelet/dockershim/metrics/metrics.go b/pkg/kubelet/dockershim/metrics/metrics.go index 907647970d5..51be147d043 100644 --- a/pkg/kubelet/dockershim/metrics/metrics.go +++ b/pkg/kubelet/dockershim/metrics/metrics.go @@ -25,13 +25,22 @@ import ( const ( // DockerOperationsKey is the key for docker operation metrics. - DockerOperationsKey = "docker_operations" + DockerOperationsKey = "docker_operations_total" // DockerOperationsLatencyKey is the key for the operation latency metrics. - DockerOperationsLatencyKey = "docker_operations_latency_microseconds" + DockerOperationsLatencyKey = "docker_operations_latency_seconds" // DockerOperationsErrorsKey is the key for the operation error metrics. - DockerOperationsErrorsKey = "docker_operations_errors" + DockerOperationsErrorsKey = "docker_operations_errors_total" // DockerOperationsTimeoutKey is the key for the operation timeout metrics. - DockerOperationsTimeoutKey = "docker_operations_timeout" + DockerOperationsTimeoutKey = "docker_operations_timeout_total" + + // DeprecatedDockerOperationsKey is the deprecated key for docker operation metrics. + DeprecatedDockerOperationsKey = "docker_operations" + // DeprecatedDockerOperationsLatencyKey is the deprecated key for the operation latency metrics. + DeprecatedDockerOperationsLatencyKey = "docker_operations_latency_microseconds" + // DeprecatedDockerOperationsErrorsKey is the deprecated key for the operation error metrics. + DeprecatedDockerOperationsErrorsKey = "docker_operations_errors" + // DeprecatedDockerOperationsTimeoutKey is the deprecated key for the operation timeout metrics. + DeprecatedDockerOperationsTimeoutKey = "docker_operations_timeout" // Keep the "kubelet" subsystem for backward compatibility. kubeletSubsystem = "kubelet" @@ -44,7 +53,7 @@ var ( prometheus.SummaryOpts{ Subsystem: kubeletSubsystem, Name: DockerOperationsLatencyKey, - Help: "Latency in microseconds of Docker operations. Broken down by operation type.", + Help: "Latency in seconds of Docker operations. Broken down by operation type.", }, []string{"operation_type"}, ) @@ -76,6 +85,45 @@ var ( }, []string{"operation_type"}, ) + + // DeprecatedDockerOperationsLatency collects operation latency numbers by operation + // type. + DeprecatedDockerOperationsLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsLatencyKey, + Help: "Latency in microseconds of Docker operations. Broken down by operation type.", + }, + []string{"operation_type"}, + ) + // DeprecatedDockerOperations collects operation counts by operation type. + DeprecatedDockerOperations = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsKey, + Help: "Cumulative number of Docker operations by operation type.", + }, + []string{"operation_type"}, + ) + // DeprecatedDockerOperationsErrors collects operation errors by operation + // type. + DeprecatedDockerOperationsErrors = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsErrorsKey, + Help: "Cumulative number of Docker operation errors by operation type.", + }, + []string{"operation_type"}, + ) + // DeprecatedDockerOperationsTimeout collects operation timeouts by operation type. + DeprecatedDockerOperationsTimeout = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedDockerOperationsTimeoutKey, + Help: "Cumulative number of Docker operation timeout by operation type.", + }, + []string{"operation_type"}, + ) ) var registerMetrics sync.Once @@ -87,6 +135,10 @@ func Register() { prometheus.MustRegister(DockerOperations) prometheus.MustRegister(DockerOperationsErrors) prometheus.MustRegister(DockerOperationsTimeout) + prometheus.MustRegister(DeprecatedDockerOperationsLatency) + prometheus.MustRegister(DeprecatedDockerOperations) + prometheus.MustRegister(DeprecatedDockerOperationsErrors) + prometheus.MustRegister(DeprecatedDockerOperationsTimeout) }) } @@ -94,3 +146,8 @@ func Register() { func SinceInMicroseconds(start time.Time) float64 { return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } + +// SinceInSeconds gets the time since the specified start in seconds. +func SinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} diff --git a/pkg/kubelet/dockershim/network/metrics/metrics.go b/pkg/kubelet/dockershim/network/metrics/metrics.go index 9e4ff185517..d6082048145 100644 --- a/pkg/kubelet/dockershim/network/metrics/metrics.go +++ b/pkg/kubelet/dockershim/network/metrics/metrics.go @@ -27,7 +27,9 @@ const ( // NetworkPluginOperationsKey is the key for operation count metrics. NetworkPluginOperationsKey = "network_plugin_operations" // NetworkPluginOperationsLatencyKey is the key for the operation latency metrics. - NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds" + NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_seconds" + // DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics. + DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds" // Keep the "kubelet" subsystem for backward compatibility. kubeletSubsystem = "kubelet" @@ -40,6 +42,17 @@ var ( prometheus.SummaryOpts{ Subsystem: kubeletSubsystem, Name: NetworkPluginOperationsLatencyKey, + Help: "Latency in seconds of network plugin operations. Broken down by operation type.", + }, + []string{"operation_type"}, + ) + + // DeprecatedNetworkPluginOperationsLatency collects operation latency numbers by operation + // type. + DeprecatedNetworkPluginOperationsLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: kubeletSubsystem, + Name: DeprecatedNetworkPluginOperationsLatencyKey, Help: "Latency in microseconds of network plugin operations. Broken down by operation type.", }, []string{"operation_type"}, @@ -52,6 +65,7 @@ var registerMetrics sync.Once func Register() { registerMetrics.Do(func() { prometheus.MustRegister(NetworkPluginOperationsLatency) + prometheus.MustRegister(DeprecatedNetworkPluginOperationsLatency) }) } @@ -59,3 +73,8 @@ func Register() { func SinceInMicroseconds(start time.Time) float64 { return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } + +// SinceInSeconds gets the time since the specified start in seconds. +func SinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} diff --git a/pkg/kubelet/dockershim/network/plugins.go b/pkg/kubelet/dockershim/network/plugins.go index c67c1a355b6..bc1173b3ff2 100644 --- a/pkg/kubelet/dockershim/network/plugins.go +++ b/pkg/kubelet/dockershim/network/plugins.go @@ -351,7 +351,8 @@ func (pm *PluginManager) podUnlock(fullPodName string) { // recordOperation records operation and duration func recordOperation(operation string, start time.Time) { - metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) + metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start)) + metrics.DeprecatedNetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) } func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) { diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index 9f651df0d37..762fd34afc5 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -169,7 +169,7 @@ var InterestingControllerManagerMetrics = []string{ var InterestingKubeletMetrics = []string{ "kubelet_container_manager_latency_microseconds", "kubelet_docker_errors", - "kubelet_docker_operations_latency_microseconds", + "kubelet_docker_operations_latency_seconds", "kubelet_generate_pod_status_latency_microseconds", "kubelet_pod_start_latency_microseconds", "kubelet_pod_worker_latency_microseconds", From 65aec219c8f7c36de9e25857b8a64b374a6c4f44 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 26 Dec 2018 16:00:38 +0800 Subject: [PATCH 2/3] Move docker metrics to histogram metrics --- pkg/kubelet/dockershim/metrics/metrics.go | 5 +++-- pkg/kubelet/dockershim/network/metrics/metrics.go | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/dockershim/metrics/metrics.go b/pkg/kubelet/dockershim/metrics/metrics.go index 51be147d043..e513846bc81 100644 --- a/pkg/kubelet/dockershim/metrics/metrics.go +++ b/pkg/kubelet/dockershim/metrics/metrics.go @@ -49,11 +49,12 @@ const ( var ( // DockerOperationsLatency collects operation latency numbers by operation // type. - DockerOperationsLatency = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ + DockerOperationsLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ Subsystem: kubeletSubsystem, Name: DockerOperationsLatencyKey, Help: "Latency in seconds of Docker operations. Broken down by operation type.", + Buckets: prometheus.DefBuckets, }, []string{"operation_type"}, ) diff --git a/pkg/kubelet/dockershim/network/metrics/metrics.go b/pkg/kubelet/dockershim/network/metrics/metrics.go index d6082048145..8ece2169da1 100644 --- a/pkg/kubelet/dockershim/network/metrics/metrics.go +++ b/pkg/kubelet/dockershim/network/metrics/metrics.go @@ -38,11 +38,12 @@ const ( var ( // NetworkPluginOperationsLatency collects operation latency numbers by operation // type. - NetworkPluginOperationsLatency = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ + NetworkPluginOperationsLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ Subsystem: kubeletSubsystem, Name: NetworkPluginOperationsLatencyKey, Help: "Latency in seconds of network plugin operations. Broken down by operation type.", + Buckets: prometheus.DefBuckets, }, []string{"operation_type"}, ) From 7aef2efe5b3f5502c89aa7db6decdd53d0db1f25 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Tue, 8 Jan 2019 15:24:44 +0800 Subject: [PATCH 3/3] Mark deprecated in related dockershim metrics --- pkg/kubelet/dockershim/metrics/metrics.go | 8 ++++---- pkg/kubelet/dockershim/network/metrics/metrics.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/kubelet/dockershim/metrics/metrics.go b/pkg/kubelet/dockershim/metrics/metrics.go index e513846bc81..d36eeba473d 100644 --- a/pkg/kubelet/dockershim/metrics/metrics.go +++ b/pkg/kubelet/dockershim/metrics/metrics.go @@ -93,7 +93,7 @@ var ( prometheus.SummaryOpts{ Subsystem: kubeletSubsystem, Name: DeprecatedDockerOperationsLatencyKey, - Help: "Latency in microseconds of Docker operations. Broken down by operation type.", + Help: "(Deprecated) Latency in microseconds of Docker operations. Broken down by operation type.", }, []string{"operation_type"}, ) @@ -102,7 +102,7 @@ var ( prometheus.CounterOpts{ Subsystem: kubeletSubsystem, Name: DeprecatedDockerOperationsKey, - Help: "Cumulative number of Docker operations by operation type.", + Help: "(Deprecated) Cumulative number of Docker operations by operation type.", }, []string{"operation_type"}, ) @@ -112,7 +112,7 @@ var ( prometheus.CounterOpts{ Subsystem: kubeletSubsystem, Name: DeprecatedDockerOperationsErrorsKey, - Help: "Cumulative number of Docker operation errors by operation type.", + Help: "(Deprecated) Cumulative number of Docker operation errors by operation type.", }, []string{"operation_type"}, ) @@ -121,7 +121,7 @@ var ( prometheus.CounterOpts{ Subsystem: kubeletSubsystem, Name: DeprecatedDockerOperationsTimeoutKey, - Help: "Cumulative number of Docker operation timeout by operation type.", + Help: "(Deprecated) Cumulative number of Docker operation timeout by operation type.", }, []string{"operation_type"}, ) diff --git a/pkg/kubelet/dockershim/network/metrics/metrics.go b/pkg/kubelet/dockershim/network/metrics/metrics.go index 8ece2169da1..b7cc13c88e4 100644 --- a/pkg/kubelet/dockershim/network/metrics/metrics.go +++ b/pkg/kubelet/dockershim/network/metrics/metrics.go @@ -54,7 +54,7 @@ var ( prometheus.SummaryOpts{ Subsystem: kubeletSubsystem, Name: DeprecatedNetworkPluginOperationsLatencyKey, - Help: "Latency in microseconds of network plugin operations. Broken down by operation type.", + Help: "(Deprecated) Latency in microseconds of network plugin operations. Broken down by operation type.", }, []string{"operation_type"}, )