From 47938c373301a3a506d73f793ea3bc8256beb428 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 26 Dec 2018 17:51:16 +0800 Subject: [PATCH] Change storage metrics to conform guideline --- .../pkg/storage/etcd/metrics/metrics.go | 89 ++++++++++++++++--- .../apiserver/pkg/storage/value/metrics.go | 42 +++++++-- 2 files changed, 112 insertions(+), 19 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go index 96385f6e601..e1c37fb509e 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go @@ -25,37 +25,37 @@ import ( var ( cacheHitCounterOpts = prometheus.CounterOpts{ - Name: "etcd_helper_cache_hit_count", + Name: "etcd_helper_cache_hit_total", Help: "Counter of etcd helper cache hits.", } cacheHitCounter = prometheus.NewCounter(cacheHitCounterOpts) cacheMissCounterOpts = prometheus.CounterOpts{ - Name: "etcd_helper_cache_miss_count", + Name: "etcd_helper_cache_miss_total", Help: "Counter of etcd helper cache miss.", } cacheMissCounter = prometheus.NewCounter(cacheMissCounterOpts) cacheEntryCounterOpts = prometheus.CounterOpts{ - Name: "etcd_helper_cache_entry_count", + Name: "etcd_helper_cache_entry_total", Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " + "because two concurrent threads can miss the cache and generate the same entry twice.", } cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts) cacheGetLatency = prometheus.NewSummary( prometheus.SummaryOpts{ - Name: "etcd_request_cache_get_latencies_summary", - Help: "Latency in microseconds of getting an object from etcd cache", + Name: "etcd_request_cache_get_latency_seconds", + Help: "Latency in seconds of getting an object from etcd cache", }, ) cacheAddLatency = prometheus.NewSummary( prometheus.SummaryOpts{ - Name: "etcd_request_cache_add_latencies_summary", - Help: "Latency in microseconds of adding an object to etcd cache", + Name: "etcd_request_cache_add_latency_seconds", + Help: "Latency in seconds of adding an object to etcd cache", }, ) etcdRequestLatenciesSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Name: "etcd_request_latencies_summary", - Help: "Etcd request latency summary in microseconds for each operation and object type.", + Name: "etcd_request_latency_seconds", + Help: "Etcd request latency summary in seconds for each operation and object type.", }, []string{"operation", "type"}, ) @@ -66,6 +66,42 @@ var ( }, []string{"resource"}, ) + + deprecatedCacheHitCounterOpts = prometheus.CounterOpts{ + Name: "etcd_helper_cache_hit_count", + Help: "Counter of etcd helper cache hits.", + } + deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts) + deprecatedCacheMissCounterOpts = prometheus.CounterOpts{ + Name: "etcd_helper_cache_miss_count", + Help: "Counter of etcd helper cache miss.", + } + deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts) + deprecatedCacheEntryCounterOpts = prometheus.CounterOpts{ + Name: "etcd_helper_cache_entry_count", + Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " + + "because two concurrent threads can miss the cache and generate the same entry twice.", + } + deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts) + deprecatedCacheGetLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Name: "etcd_request_cache_get_latencies_summary", + Help: "Latency in microseconds of getting an object from etcd cache", + }, + ) + deprecatedCacheAddLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Name: "etcd_request_cache_add_latencies_summary", + Help: "Latency in microseconds of adding an object to etcd cache", + }, + ) + deprecatedEtcdRequestLatenciesSummary = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "etcd_request_latencies_summary", + Help: "Etcd request latency summary in microseconds for each operation and object type.", + }, + []string{"operation", "type"}, + ) ) var registerMetrics sync.Once @@ -81,6 +117,14 @@ func Register() { prometheus.MustRegister(cacheGetLatency) prometheus.MustRegister(etcdRequestLatenciesSummary) prometheus.MustRegister(objectCounts) + + // TODO(danielqsj): Remove the following metrics, they are deprecated + prometheus.MustRegister(deprecatedCacheHitCounter) + prometheus.MustRegister(deprecatedCacheMissCounter) + prometheus.MustRegister(deprecatedCacheEntryCounter) + prometheus.MustRegister(deprecatedCacheAddLatency) + prometheus.MustRegister(deprecatedCacheGetLatency) + prometheus.MustRegister(deprecatedEtcdRequestLatenciesSummary) }) } @@ -89,27 +133,33 @@ func UpdateObjectCount(resourcePrefix string, count int64) { } func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) { - etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(float64(time.Since(startTime) / time.Microsecond)) + etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime)) + deprecatedEtcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInMicroseconds(startTime)) } func ObserveGetCache(startTime time.Time) { - cacheGetLatency.Observe(float64(time.Since(startTime) / time.Microsecond)) + cacheGetLatency.Observe(sinceInSeconds(startTime)) + deprecatedCacheGetLatency.Observe(sinceInMicroseconds(startTime)) } func ObserveAddCache(startTime time.Time) { - cacheAddLatency.Observe(float64(time.Since(startTime) / time.Microsecond)) + cacheAddLatency.Observe(sinceInSeconds(startTime)) + deprecatedCacheAddLatency.Observe(sinceInMicroseconds(startTime)) } func ObserveCacheHit() { cacheHitCounter.Inc() + deprecatedCacheHitCounter.Inc() } func ObserveCacheMiss() { cacheMissCounter.Inc() + deprecatedCacheMissCounter.Inc() } func ObserveNewEntry() { cacheEntryCounter.Inc() + deprecatedCacheEntryCounter.Inc() } func Reset() { @@ -119,4 +169,19 @@ func Reset() { // TODO: Reset cacheAddLatency. // TODO: Reset cacheGetLatency. etcdRequestLatenciesSummary.Reset() + + deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts) + deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts) + deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts) + deprecatedEtcdRequestLatenciesSummary.Reset() +} + +// sinceInMicroseconds gets the time since the specified start in microseconds. +func sinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) +} + +// sinceInSeconds gets the time since the specified start in seconds. +func sinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go index 1fe31678214..f16cc7344d0 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go @@ -30,6 +30,18 @@ const ( var ( transformerLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "transformation_latencies_seconds", + Help: "Latencies in seconds of value transformation operations.", + // In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when + // external KMS is involved latencies may climb into milliseconds. + Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14), + }, + []string{"transformation_type"}, + ) + deprecatedTransformerLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, @@ -61,6 +73,15 @@ var ( ) dataKeyGenerationLatencies = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "data_key_generation_latencies_seconds", + Help: "Latencies in seconds of data encryption key(DEK) generation operations.", + Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14), + }, + ) + deprecatedDataKeyGenerationLatencies = prometheus.NewHistogram( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, @@ -84,9 +105,11 @@ var registerMetrics sync.Once func RegisterMetrics() { registerMetrics.Do(func() { prometheus.MustRegister(transformerLatencies) + prometheus.MustRegister(deprecatedTransformerLatencies) prometheus.MustRegister(transformerFailuresTotal) prometheus.MustRegister(envelopeTransformationCacheMissTotal) prometheus.MustRegister(dataKeyGenerationLatencies) + prometheus.MustRegister(deprecatedDataKeyGenerationLatencies) prometheus.MustRegister(dataKeyGenerationFailuresTotal) }) } @@ -98,8 +121,8 @@ func RecordTransformation(transformationType string, start time.Time, err error) return } - since := sinceInMicroseconds(start) - transformerLatencies.WithLabelValues(transformationType).Observe(float64(since)) + transformerLatencies.WithLabelValues(transformationType).Observe(sinceInSeconds(start)) + deprecatedTransformerLatencies.WithLabelValues(transformationType).Observe(sinceInMicroseconds(start)) } // RecordCacheMiss records a miss on Key Encryption Key(KEK) - call to KMS was required to decrypt KEK. @@ -114,11 +137,16 @@ func RecordDataKeyGeneration(start time.Time, err error) { return } - since := sinceInMicroseconds(start) - dataKeyGenerationLatencies.Observe(float64(since)) + dataKeyGenerationLatencies.Observe(sinceInSeconds(start)) + deprecatedDataKeyGenerationLatencies.Observe(sinceInMicroseconds(start)) } -func sinceInMicroseconds(start time.Time) int64 { - elapsedNanoseconds := time.Since(start).Nanoseconds() - return elapsedNanoseconds / int64(time.Microsecond) +// sinceInMicroseconds gets the time since the specified start in microseconds. +func sinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) +} + +// sinceInSeconds gets the time since the specified start in seconds. +func sinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() }