mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 21:17:23 +00:00
Merge pull request #72336 from danielqsj/apimetrics
Change apiserver metrics to conform metrics guidelines
This commit is contained in:
commit
24643fd116
@ -49,11 +49,18 @@ var (
|
|||||||
// the upstream library supports it.
|
// the upstream library supports it.
|
||||||
requestCounter = prometheus.NewCounterVec(
|
requestCounter = prometheus.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "apiserver_request_count",
|
Name: "apiserver_request_total",
|
||||||
Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
|
Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
|
||||||
},
|
},
|
||||||
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
|
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
|
||||||
)
|
)
|
||||||
|
deprecatedRequestCounter = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "apiserver_request_count",
|
||||||
|
Help: "(Deprecated) Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
|
||||||
|
},
|
||||||
|
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
|
||||||
|
)
|
||||||
longRunningRequestGauge = prometheus.NewGaugeVec(
|
longRunningRequestGauge = prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "apiserver_longrunning_gauge",
|
Name: "apiserver_longrunning_gauge",
|
||||||
@ -62,18 +69,27 @@ var (
|
|||||||
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
||||||
)
|
)
|
||||||
requestLatencies = prometheus.NewHistogramVec(
|
requestLatencies = prometheus.NewHistogramVec(
|
||||||
|
prometheus.HistogramOpts{
|
||||||
|
Name: "apiserver_request_latency_seconds",
|
||||||
|
Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.",
|
||||||
|
// Use buckets ranging from 125 ms to 8 seconds.
|
||||||
|
Buckets: prometheus.ExponentialBuckets(0.125, 2.0, 7),
|
||||||
|
},
|
||||||
|
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
||||||
|
)
|
||||||
|
deprecatedRequestLatencies = prometheus.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "apiserver_request_latencies",
|
Name: "apiserver_request_latencies",
|
||||||
Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
Help: "(Deprecated) Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
||||||
// Use buckets ranging from 125 ms to 8 seconds.
|
// Use buckets ranging from 125 ms to 8 seconds.
|
||||||
Buckets: prometheus.ExponentialBuckets(125000, 2.0, 7),
|
Buckets: prometheus.ExponentialBuckets(125000, 2.0, 7),
|
||||||
},
|
},
|
||||||
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
||||||
)
|
)
|
||||||
requestLatenciesSummary = prometheus.NewSummaryVec(
|
deprecatedRequestLatenciesSummary = prometheus.NewSummaryVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.SummaryOpts{
|
||||||
Name: "apiserver_request_latencies_summary",
|
Name: "apiserver_request_latencies_summary",
|
||||||
Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
Help: "(Deprecated) Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
||||||
// Make the sliding window of 5h.
|
// Make the sliding window of 5h.
|
||||||
// TODO: The value for this should be based on our SLI definition (medium term).
|
// TODO: The value for this should be based on our SLI definition (medium term).
|
||||||
MaxAge: 5 * time.Hour,
|
MaxAge: 5 * time.Hour,
|
||||||
@ -92,11 +108,18 @@ var (
|
|||||||
// DroppedRequests is a number of requests dropped with 'Try again later' response"
|
// DroppedRequests is a number of requests dropped with 'Try again later' response"
|
||||||
DroppedRequests = prometheus.NewCounterVec(
|
DroppedRequests = prometheus.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "apiserver_dropped_requests",
|
Name: "apiserver_dropped_requests_total",
|
||||||
Help: "Number of requests dropped with 'Try again later' response",
|
Help: "Number of requests dropped with 'Try again later' response",
|
||||||
},
|
},
|
||||||
[]string{"requestKind"},
|
[]string{"requestKind"},
|
||||||
)
|
)
|
||||||
|
DeprecatedDroppedRequests = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "apiserver_dropped_requests",
|
||||||
|
Help: "(Deprecated) Number of requests dropped with 'Try again later' response",
|
||||||
|
},
|
||||||
|
[]string{"requestKind"},
|
||||||
|
)
|
||||||
// RegisteredWatchers is a number of currently registered watchers splitted by resource.
|
// RegisteredWatchers is a number of currently registered watchers splitted by resource.
|
||||||
RegisteredWatchers = prometheus.NewGaugeVec(
|
RegisteredWatchers = prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
@ -118,11 +141,14 @@ var (
|
|||||||
|
|
||||||
metrics = []resettableCollector{
|
metrics = []resettableCollector{
|
||||||
requestCounter,
|
requestCounter,
|
||||||
|
deprecatedRequestCounter,
|
||||||
longRunningRequestGauge,
|
longRunningRequestGauge,
|
||||||
requestLatencies,
|
requestLatencies,
|
||||||
requestLatenciesSummary,
|
deprecatedRequestLatencies,
|
||||||
|
deprecatedRequestLatenciesSummary,
|
||||||
responseSizes,
|
responseSizes,
|
||||||
DroppedRequests,
|
DroppedRequests,
|
||||||
|
DeprecatedDroppedRequests,
|
||||||
RegisteredWatchers,
|
RegisteredWatchers,
|
||||||
currentInflightRequests,
|
currentInflightRequests,
|
||||||
}
|
}
|
||||||
@ -198,9 +224,12 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour
|
|||||||
reportedVerb := cleanVerb(verb, req)
|
reportedVerb := cleanVerb(verb, req)
|
||||||
client := cleanUserAgent(utilnet.GetHTTPClient(req))
|
client := cleanUserAgent(utilnet.GetHTTPClient(req))
|
||||||
elapsedMicroseconds := float64(elapsed / time.Microsecond)
|
elapsedMicroseconds := float64(elapsed / time.Microsecond)
|
||||||
|
elapsedSeconds := elapsed.Seconds()
|
||||||
requestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
|
requestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
|
||||||
requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
|
deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
|
||||||
requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
|
requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
|
||||||
|
deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
|
||||||
|
deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
|
||||||
// We are only interested in response sizes of read requests.
|
// We are only interested in response sizes of read requests.
|
||||||
if verb == "GET" || verb == "LIST" {
|
if verb == "GET" || verb == "LIST" {
|
||||||
responseSizes.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize))
|
responseSizes.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize))
|
||||||
|
@ -163,8 +163,10 @@ func WithMaxInFlightLimit(
|
|||||||
// We need to split this data between buckets used for throttling.
|
// We need to split this data between buckets used for throttling.
|
||||||
if isMutatingRequest {
|
if isMutatingRequest {
|
||||||
metrics.DroppedRequests.WithLabelValues(metrics.MutatingKind).Inc()
|
metrics.DroppedRequests.WithLabelValues(metrics.MutatingKind).Inc()
|
||||||
|
metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.MutatingKind).Inc()
|
||||||
} else {
|
} else {
|
||||||
metrics.DroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc()
|
metrics.DroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc()
|
||||||
|
metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc()
|
||||||
}
|
}
|
||||||
// at this point we're about to return a 429, BUT not all actors should be rate limited. A system:master is so powerful
|
// at this point we're about to return a 429, BUT not all actors should be rate limited. A system:master is so powerful
|
||||||
// that they should always get an answer. It's a super-admin or a loopback connection.
|
// that they should always get an answer. It's a super-admin or a loopback connection.
|
||||||
|
@ -25,37 +25,37 @@ import (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
cacheHitCounterOpts = prometheus.CounterOpts{
|
cacheHitCounterOpts = prometheus.CounterOpts{
|
||||||
Name: "etcd_helper_cache_hit_count",
|
Name: "etcd_helper_cache_hit_total",
|
||||||
Help: "Counter of etcd helper cache hits.",
|
Help: "Counter of etcd helper cache hits.",
|
||||||
}
|
}
|
||||||
cacheHitCounter = prometheus.NewCounter(cacheHitCounterOpts)
|
cacheHitCounter = prometheus.NewCounter(cacheHitCounterOpts)
|
||||||
cacheMissCounterOpts = prometheus.CounterOpts{
|
cacheMissCounterOpts = prometheus.CounterOpts{
|
||||||
Name: "etcd_helper_cache_miss_count",
|
Name: "etcd_helper_cache_miss_total",
|
||||||
Help: "Counter of etcd helper cache miss.",
|
Help: "Counter of etcd helper cache miss.",
|
||||||
}
|
}
|
||||||
cacheMissCounter = prometheus.NewCounter(cacheMissCounterOpts)
|
cacheMissCounter = prometheus.NewCounter(cacheMissCounterOpts)
|
||||||
cacheEntryCounterOpts = prometheus.CounterOpts{
|
cacheEntryCounterOpts = prometheus.CounterOpts{
|
||||||
Name: "etcd_helper_cache_entry_count",
|
Name: "etcd_helper_cache_entry_total",
|
||||||
Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " +
|
Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " +
|
||||||
"because two concurrent threads can miss the cache and generate the same entry twice.",
|
"because two concurrent threads can miss the cache and generate the same entry twice.",
|
||||||
}
|
}
|
||||||
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
|
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
|
||||||
cacheGetLatency = prometheus.NewSummary(
|
cacheGetLatency = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "etcd_request_cache_get_latencies_summary",
|
Name: "etcd_request_cache_get_latency_seconds",
|
||||||
Help: "Latency in microseconds of getting an object from etcd cache",
|
Help: "Latency in seconds of getting an object from etcd cache",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
cacheAddLatency = prometheus.NewSummary(
|
cacheAddLatency = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "etcd_request_cache_add_latencies_summary",
|
Name: "etcd_request_cache_add_latency_seconds",
|
||||||
Help: "Latency in microseconds of adding an object to etcd cache",
|
Help: "Latency in seconds of adding an object to etcd cache",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
etcdRequestLatenciesSummary = prometheus.NewSummaryVec(
|
etcdRequestLatency = prometheus.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "etcd_request_latencies_summary",
|
Name: "etcd_request_latency_seconds",
|
||||||
Help: "Etcd request latency summary in microseconds for each operation and object type.",
|
Help: "Etcd request latency in seconds for each operation and object type.",
|
||||||
},
|
},
|
||||||
[]string{"operation", "type"},
|
[]string{"operation", "type"},
|
||||||
)
|
)
|
||||||
@ -66,6 +66,42 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"resource"},
|
[]string{"resource"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
deprecatedCacheHitCounterOpts = prometheus.CounterOpts{
|
||||||
|
Name: "etcd_helper_cache_hit_count",
|
||||||
|
Help: "(Deprecated) Counter of etcd helper cache hits.",
|
||||||
|
}
|
||||||
|
deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts)
|
||||||
|
deprecatedCacheMissCounterOpts = prometheus.CounterOpts{
|
||||||
|
Name: "etcd_helper_cache_miss_count",
|
||||||
|
Help: "(Deprecated) Counter of etcd helper cache miss.",
|
||||||
|
}
|
||||||
|
deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts)
|
||||||
|
deprecatedCacheEntryCounterOpts = prometheus.CounterOpts{
|
||||||
|
Name: "etcd_helper_cache_entry_count",
|
||||||
|
Help: "(Deprecated) Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " +
|
||||||
|
"because two concurrent threads can miss the cache and generate the same entry twice.",
|
||||||
|
}
|
||||||
|
deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts)
|
||||||
|
deprecatedCacheGetLatency = prometheus.NewSummary(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Name: "etcd_request_cache_get_latencies_summary",
|
||||||
|
Help: "(Deprecated) Latency in microseconds of getting an object from etcd cache",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
deprecatedCacheAddLatency = prometheus.NewSummary(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Name: "etcd_request_cache_add_latencies_summary",
|
||||||
|
Help: "(Deprecated) Latency in microseconds of adding an object to etcd cache",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
deprecatedEtcdRequestLatenciesSummary = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Name: "etcd_request_latencies_summary",
|
||||||
|
Help: "(Deprecated) Etcd request latency summary in microseconds for each operation and object type.",
|
||||||
|
},
|
||||||
|
[]string{"operation", "type"},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
var registerMetrics sync.Once
|
var registerMetrics sync.Once
|
||||||
@ -79,8 +115,16 @@ func Register() {
|
|||||||
prometheus.MustRegister(cacheEntryCounter)
|
prometheus.MustRegister(cacheEntryCounter)
|
||||||
prometheus.MustRegister(cacheAddLatency)
|
prometheus.MustRegister(cacheAddLatency)
|
||||||
prometheus.MustRegister(cacheGetLatency)
|
prometheus.MustRegister(cacheGetLatency)
|
||||||
prometheus.MustRegister(etcdRequestLatenciesSummary)
|
prometheus.MustRegister(etcdRequestLatency)
|
||||||
prometheus.MustRegister(objectCounts)
|
prometheus.MustRegister(objectCounts)
|
||||||
|
|
||||||
|
// TODO(danielqsj): Remove the following metrics, they are deprecated
|
||||||
|
prometheus.MustRegister(deprecatedCacheHitCounter)
|
||||||
|
prometheus.MustRegister(deprecatedCacheMissCounter)
|
||||||
|
prometheus.MustRegister(deprecatedCacheEntryCounter)
|
||||||
|
prometheus.MustRegister(deprecatedCacheAddLatency)
|
||||||
|
prometheus.MustRegister(deprecatedCacheGetLatency)
|
||||||
|
prometheus.MustRegister(deprecatedEtcdRequestLatenciesSummary)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,27 +133,33 @@ func UpdateObjectCount(resourcePrefix string, count int64) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) {
|
func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) {
|
||||||
etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(float64(time.Since(startTime) / time.Microsecond))
|
etcdRequestLatency.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime))
|
||||||
|
deprecatedEtcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInMicroseconds(startTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
func ObserveGetCache(startTime time.Time) {
|
func ObserveGetCache(startTime time.Time) {
|
||||||
cacheGetLatency.Observe(float64(time.Since(startTime) / time.Microsecond))
|
cacheGetLatency.Observe(sinceInSeconds(startTime))
|
||||||
|
deprecatedCacheGetLatency.Observe(sinceInMicroseconds(startTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
func ObserveAddCache(startTime time.Time) {
|
func ObserveAddCache(startTime time.Time) {
|
||||||
cacheAddLatency.Observe(float64(time.Since(startTime) / time.Microsecond))
|
cacheAddLatency.Observe(sinceInSeconds(startTime))
|
||||||
|
deprecatedCacheAddLatency.Observe(sinceInMicroseconds(startTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
func ObserveCacheHit() {
|
func ObserveCacheHit() {
|
||||||
cacheHitCounter.Inc()
|
cacheHitCounter.Inc()
|
||||||
|
deprecatedCacheHitCounter.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
func ObserveCacheMiss() {
|
func ObserveCacheMiss() {
|
||||||
cacheMissCounter.Inc()
|
cacheMissCounter.Inc()
|
||||||
|
deprecatedCacheMissCounter.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
func ObserveNewEntry() {
|
func ObserveNewEntry() {
|
||||||
cacheEntryCounter.Inc()
|
cacheEntryCounter.Inc()
|
||||||
|
deprecatedCacheEntryCounter.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
func Reset() {
|
func Reset() {
|
||||||
@ -118,5 +168,20 @@ func Reset() {
|
|||||||
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
|
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
|
||||||
// TODO: Reset cacheAddLatency.
|
// TODO: Reset cacheAddLatency.
|
||||||
// TODO: Reset cacheGetLatency.
|
// TODO: Reset cacheGetLatency.
|
||||||
etcdRequestLatenciesSummary.Reset()
|
etcdRequestLatency.Reset()
|
||||||
|
|
||||||
|
deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts)
|
||||||
|
deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts)
|
||||||
|
deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts)
|
||||||
|
deprecatedEtcdRequestLatenciesSummary.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
// sinceInMicroseconds gets the time since the specified start in microseconds.
|
||||||
|
func sinceInMicroseconds(start time.Time) float64 {
|
||||||
|
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
// sinceInSeconds gets the time since the specified start in seconds.
|
||||||
|
func sinceInSeconds(start time.Time) float64 {
|
||||||
|
return time.Since(start).Seconds()
|
||||||
}
|
}
|
||||||
|
@ -30,11 +30,23 @@ const (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
transformerLatencies = prometheus.NewHistogramVec(
|
transformerLatencies = prometheus.NewHistogramVec(
|
||||||
|
prometheus.HistogramOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: subsystem,
|
||||||
|
Name: "transformation_latencies_seconds",
|
||||||
|
Help: "Latencies in seconds of value transformation operations.",
|
||||||
|
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
|
||||||
|
// external KMS is involved latencies may climb into milliseconds.
|
||||||
|
Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14),
|
||||||
|
},
|
||||||
|
[]string{"transformation_type"},
|
||||||
|
)
|
||||||
|
deprecatedTransformerLatencies = prometheus.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Subsystem: subsystem,
|
Subsystem: subsystem,
|
||||||
Name: "transformation_latencies_microseconds",
|
Name: "transformation_latencies_microseconds",
|
||||||
Help: "Latencies in microseconds of value transformation operations.",
|
Help: "(Deprecated) Latencies in microseconds of value transformation operations.",
|
||||||
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
|
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
|
||||||
// external KMS is involved latencies may climb into milliseconds.
|
// external KMS is involved latencies may climb into milliseconds.
|
||||||
Buckets: prometheus.ExponentialBuckets(5, 2, 14),
|
Buckets: prometheus.ExponentialBuckets(5, 2, 14),
|
||||||
@ -61,11 +73,20 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
dataKeyGenerationLatencies = prometheus.NewHistogram(
|
dataKeyGenerationLatencies = prometheus.NewHistogram(
|
||||||
|
prometheus.HistogramOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: subsystem,
|
||||||
|
Name: "data_key_generation_latencies_seconds",
|
||||||
|
Help: "Latencies in seconds of data encryption key(DEK) generation operations.",
|
||||||
|
Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
deprecatedDataKeyGenerationLatencies = prometheus.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Subsystem: subsystem,
|
Subsystem: subsystem,
|
||||||
Name: "data_key_generation_latencies_microseconds",
|
Name: "data_key_generation_latencies_microseconds",
|
||||||
Help: "Latencies in microseconds of data encryption key(DEK) generation operations.",
|
Help: "(Deprecated) Latencies in microseconds of data encryption key(DEK) generation operations.",
|
||||||
Buckets: prometheus.ExponentialBuckets(5, 2, 14),
|
Buckets: prometheus.ExponentialBuckets(5, 2, 14),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -84,9 +105,11 @@ var registerMetrics sync.Once
|
|||||||
func RegisterMetrics() {
|
func RegisterMetrics() {
|
||||||
registerMetrics.Do(func() {
|
registerMetrics.Do(func() {
|
||||||
prometheus.MustRegister(transformerLatencies)
|
prometheus.MustRegister(transformerLatencies)
|
||||||
|
prometheus.MustRegister(deprecatedTransformerLatencies)
|
||||||
prometheus.MustRegister(transformerFailuresTotal)
|
prometheus.MustRegister(transformerFailuresTotal)
|
||||||
prometheus.MustRegister(envelopeTransformationCacheMissTotal)
|
prometheus.MustRegister(envelopeTransformationCacheMissTotal)
|
||||||
prometheus.MustRegister(dataKeyGenerationLatencies)
|
prometheus.MustRegister(dataKeyGenerationLatencies)
|
||||||
|
prometheus.MustRegister(deprecatedDataKeyGenerationLatencies)
|
||||||
prometheus.MustRegister(dataKeyGenerationFailuresTotal)
|
prometheus.MustRegister(dataKeyGenerationFailuresTotal)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -98,8 +121,8 @@ func RecordTransformation(transformationType string, start time.Time, err error)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
since := sinceInMicroseconds(start)
|
transformerLatencies.WithLabelValues(transformationType).Observe(sinceInSeconds(start))
|
||||||
transformerLatencies.WithLabelValues(transformationType).Observe(float64(since))
|
deprecatedTransformerLatencies.WithLabelValues(transformationType).Observe(sinceInMicroseconds(start))
|
||||||
}
|
}
|
||||||
|
|
||||||
// RecordCacheMiss records a miss on Key Encryption Key(KEK) - call to KMS was required to decrypt KEK.
|
// RecordCacheMiss records a miss on Key Encryption Key(KEK) - call to KMS was required to decrypt KEK.
|
||||||
@ -114,11 +137,16 @@ func RecordDataKeyGeneration(start time.Time, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
since := sinceInMicroseconds(start)
|
dataKeyGenerationLatencies.Observe(sinceInSeconds(start))
|
||||||
dataKeyGenerationLatencies.Observe(float64(since))
|
deprecatedDataKeyGenerationLatencies.Observe(sinceInMicroseconds(start))
|
||||||
}
|
}
|
||||||
|
|
||||||
func sinceInMicroseconds(start time.Time) int64 {
|
// sinceInMicroseconds gets the time since the specified start in microseconds.
|
||||||
elapsedNanoseconds := time.Since(start).Nanoseconds()
|
func sinceInMicroseconds(start time.Time) float64 {
|
||||||
return elapsedNanoseconds / int64(time.Microsecond)
|
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
// sinceInSeconds gets the time since the specified start in seconds.
|
||||||
|
func sinceInSeconds(start time.Time) float64 {
|
||||||
|
return time.Since(start).Seconds()
|
||||||
}
|
}
|
||||||
|
@ -136,14 +136,14 @@ func (m *MetricsForE2E) SummaryKind() string {
|
|||||||
var SchedulingLatencyMetricName = model.LabelValue(schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName)
|
var SchedulingLatencyMetricName = model.LabelValue(schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName)
|
||||||
|
|
||||||
var InterestingApiServerMetrics = []string{
|
var InterestingApiServerMetrics = []string{
|
||||||
"apiserver_request_count",
|
"apiserver_request_total",
|
||||||
"apiserver_request_latencies_summary",
|
"apiserver_request_latency_seconds_summary",
|
||||||
"etcd_helper_cache_entry_count",
|
"etcd_helper_cache_entry_total",
|
||||||
"etcd_helper_cache_hit_count",
|
"etcd_helper_cache_hit_total",
|
||||||
"etcd_helper_cache_miss_count",
|
"etcd_helper_cache_miss_total",
|
||||||
"etcd_request_cache_add_latencies_summary",
|
"etcd_request_cache_add_latency_seconds",
|
||||||
"etcd_request_cache_get_latencies_summary",
|
"etcd_request_cache_get_latency_seconds",
|
||||||
"etcd_request_latencies_summary",
|
"etcd_request_latency_seconds",
|
||||||
}
|
}
|
||||||
|
|
||||||
var InterestingControllerManagerMetrics = []string{
|
var InterestingControllerManagerMetrics = []string{
|
||||||
@ -475,10 +475,10 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
|
|||||||
|
|
||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
// Example line:
|
// Example line:
|
||||||
// apiserver_request_latencies_summary{resource="namespaces",verb="LIST",quantile="0.99"} 908
|
// apiserver_request_latency_seconds_summary{resource="namespaces",verb="LIST",quantile="0.99"} 0.000908
|
||||||
// apiserver_request_count{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
|
// apiserver_request_total{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
|
||||||
if sample.Metric[model.MetricNameLabel] != "apiserver_request_latencies_summary" &&
|
if sample.Metric[model.MetricNameLabel] != "apiserver_request_latency_seconds_summary" &&
|
||||||
sample.Metric[model.MetricNameLabel] != "apiserver_request_count" {
|
sample.Metric[model.MetricNameLabel] != "apiserver_request_total" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -491,14 +491,14 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch sample.Metric[model.MetricNameLabel] {
|
switch sample.Metric[model.MetricNameLabel] {
|
||||||
case "apiserver_request_latencies_summary":
|
case "apiserver_request_latency_seconds_summary":
|
||||||
latency := sample.Value
|
latency := sample.Value
|
||||||
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
|
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond)
|
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Second)
|
||||||
case "apiserver_request_count":
|
case "apiserver_request_total":
|
||||||
count := sample.Value
|
count := sample.Value
|
||||||
a.addMetricRequestCount(resource, subresource, verb, scope, int(count))
|
a.addMetricRequestCount(resource, subresource, verb, scope, int(count))
|
||||||
|
|
||||||
|
@ -120,7 +120,7 @@ func TestApiserverMetrics(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
checkForExpectedMetrics(t, metrics, []string{
|
checkForExpectedMetrics(t, metrics, []string{
|
||||||
"apiserver_request_count",
|
"apiserver_request_total",
|
||||||
"apiserver_request_latencies",
|
"apiserver_request_latency_seconds",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user