From bd0f7f8ee8f7f1c7809e17fa60804bb37f65c495 Mon Sep 17 00:00:00 2001 From: Rita Zhang Date: Sun, 29 Jan 2023 22:40:18 -0800 Subject: [PATCH] kmsv2: add metrics Signed-off-by: Rita Zhang --- .../server/options/encryptionconfig/config.go | 4 +- .../value/encrypt/envelope/kmsv2/envelope.go | 33 +++- .../encrypt/envelope/kmsv2/envelope_test.go | 85 +++++++++- .../value/encrypt/envelope/metrics/metrics.go | 154 ++++++++++++++++- .../encrypt/envelope/metrics/metrics_test.go | 158 ++++++++++++++++++ .../apiserver/pkg/storage/value/metrics.go | 6 +- .../pkg/storage/value/metrics_test.go | 108 ++++++++++++ .../pkg/storage/value/transformer.go | 8 +- .../documentation/documentation-list.yaml | 2 + .../documentation/documentation.md | 2 +- 10 files changed, 529 insertions(+), 31 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go b/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go index e1ff774ea0f..20bb1550535 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go @@ -46,6 +46,7 @@ import ( aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes" "k8s.io/apiserver/pkg/storage/value/encrypt/envelope" envelopekmsv2 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2" + "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics" "k8s.io/apiserver/pkg/storage/value/encrypt/identity" "k8s.io/apiserver/pkg/storage/value/encrypt/secretbox" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -279,6 +280,7 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error { // we coast on the last valid key ID that we have observed if err := envelopekmsv2.ValidateKeyID(p.KeyID); err == nil { h.keyID.Store(&p.KeyID) + metrics.RecordKeyIDFromStatus(h.name, p.KeyID) } if err := isKMSv2ProviderHealthy(h.name, p); err != nil { @@ -598,7 +600,7 @@ func kmsPrefixTransformer(ctx context.Context, config *apiserverconfig.KMSConfig // using AES-GCM by default for encrypting data with KMSv2 transformer := value.PrefixTransformer{ - Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, probe.getCurrentKeyID, aestransformer.NewGCMTransformer), + Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, kmsName, probe.getCurrentKeyID, probe.check, aestransformer.NewGCMTransformer), Prefix: []byte(kmsTransformerPrefixV2 + kmsName + ":"), } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go index 4ee71852701..b0af7ebb1cf 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go @@ -38,6 +38,11 @@ import ( "k8s.io/utils/clock" ) +func init() { + value.RegisterMetrics() + metrics.RegisterMetrics() +} + const ( // KMSAPIVersion is the version of the KMS API. KMSAPIVersion = "v2alpha1" @@ -52,11 +57,13 @@ const ( ) type KeyIDGetterFunc func(context.Context) (keyID string, err error) +type ProbeHealthzCheckFunc func(context.Context) (err error) type envelopeTransformer struct { - envelopeService kmsservice.Service - - keyIDGetter KeyIDGetterFunc + envelopeService kmsservice.Service + providerName string + keyIDGetter KeyIDGetterFunc + probeHealthzCheck ProbeHealthzCheckFunc // baseTransformerFunc creates a new transformer for encrypting the data with the DEK. baseTransformerFunc func(cipher.Block) value.Transformer @@ -67,14 +74,16 @@ type envelopeTransformer struct { // NewEnvelopeTransformer returns a transformer which implements a KEK-DEK based envelope encryption scheme. // It uses envelopeService to encrypt and decrypt DEKs. Respective DEKs (in encrypted form) are prepended to // the data items they encrypt. -func NewEnvelopeTransformer(envelopeService kmsservice.Service, keyIDGetter KeyIDGetterFunc, baseTransformerFunc func(cipher.Block) value.Transformer) value.Transformer { - return newEnvelopeTransformerWithClock(envelopeService, keyIDGetter, baseTransformerFunc, cacheTTL, clock.RealClock{}) +func NewEnvelopeTransformer(envelopeService kmsservice.Service, providerName string, keyIDGetter KeyIDGetterFunc, probeHealthzCheck ProbeHealthzCheckFunc, baseTransformerFunc func(cipher.Block) value.Transformer) value.Transformer { + return newEnvelopeTransformerWithClock(envelopeService, providerName, keyIDGetter, probeHealthzCheck, baseTransformerFunc, cacheTTL, clock.RealClock{}) } -func newEnvelopeTransformerWithClock(envelopeService kmsservice.Service, keyIDGetter KeyIDGetterFunc, baseTransformerFunc func(cipher.Block) value.Transformer, cacheTTL time.Duration, clock clock.Clock) value.Transformer { +func newEnvelopeTransformerWithClock(envelopeService kmsservice.Service, providerName string, keyIDGetter KeyIDGetterFunc, probeHealthzCheck ProbeHealthzCheckFunc, baseTransformerFunc func(cipher.Block) value.Transformer, cacheTTL time.Duration, clock clock.Clock) value.Transformer { return &envelopeTransformer{ envelopeService: envelopeService, + providerName: providerName, keyIDGetter: keyIDGetter, + probeHealthzCheck: probeHealthzCheck, cache: newSimpleCache(clock, cacheTTL), baseTransformerFunc: baseTransformerFunc, } @@ -111,6 +120,8 @@ func (t *envelopeTransformer) TransformFromStorage(ctx context.Context, data []b return nil, false, err } } + // It's possible to record empty keyID + metrics.RecordKeyID(metrics.FromStorageLabel, t.providerName, encryptedObject.KeyID) out, stale, err := transformer.TransformFromStorage(ctx, encryptedObject.EncryptedData, dataCtx) if err != nil { @@ -125,6 +136,7 @@ func (t *envelopeTransformer) TransformFromStorage(ctx context.Context, data []b if err != nil { return nil, false, err } + return out, encryptedObject.KeyID != keyID, nil } @@ -154,6 +166,8 @@ func (t *envelopeTransformer) TransformToStorage(ctx context.Context, data []byt return nil, err } + metrics.RecordKeyID(metrics.ToStorageLabel, t.providerName, resp.KeyID) + encObject := &kmstypes.EncryptedObject{ KeyID: resp.KeyID, EncryptedDEK: resp.Ciphertext, @@ -164,7 +178,12 @@ func (t *envelopeTransformer) TransformToStorage(ctx context.Context, data []byt // Check keyID freshness and write to log if key IDs are different statusKeyID, err := t.keyIDGetter(ctx) if err == nil && encObject.KeyID != statusKeyID { - klog.V(2).InfoS("observed different key IDs when encrypting content using kms v2 envelope service", "uid", uid, "objectKeyID", encObject.KeyID, "statusKeyID", statusKeyID) + klog.V(2).InfoS("observed different key IDs when encrypting content using kms v2 envelope service", "uid", uid, "objectKeyID", encObject.KeyID, "statusKeyID", statusKeyID, "providerName", t.providerName) + + // trigger health probe check immediately to ensure keyID freshness + if err := t.probeHealthzCheck(ctx); err != nil { + klog.V(2).ErrorS(err, "kms plugin failed health check probe", "name", t.providerName) + } } // Serialize the EncryptedObject to a byte array. diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go index d84b0bc8963..0670a6afdae 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go @@ -31,6 +31,9 @@ import ( "k8s.io/apiserver/pkg/storage/value" aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes" kmstypes "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2alpha1" + "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" kmsservice "k8s.io/kms/service" testingclock "k8s.io/utils/clock/testing" ) @@ -38,6 +41,7 @@ import ( const ( testText = "abcdefghijklmnopqrstuvwxyz" testContextText = "0123456789" + testKeyHash = "sha256:6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" testKeyVersion = "1" testCacheTTL = 10 * time.Second ) @@ -132,13 +136,16 @@ func TestEnvelopeCaching(t *testing.T) { t.Run(tt.desc, func(t *testing.T) { envelopeService := newTestEnvelopeService() fakeClock := testingclock.NewFakeClock(time.Now()) - envelopeTransformer := newEnvelopeTransformerWithClock(envelopeService, + envelopeTransformer := newEnvelopeTransformerWithClock(envelopeService, testProviderName, func(ctx context.Context) (string, error) { return "", nil }, + func(ctx context.Context) error { + return nil + }, aestransformer.NewGCMTransformer, tt.cacheTTL, fakeClock) - ctx := context.Background() + ctx := testContext(t) dataCtx := value.DefaultContext([]byte(testContextText)) originalText := []byte(testText) @@ -211,13 +218,16 @@ func TestEnvelopeTransformerKeyIDGetter(t *testing.T) { t.Run(tt.desc, func(t *testing.T) { t.Parallel() envelopeService := newTestEnvelopeService() - envelopeTransformer := NewEnvelopeTransformer(envelopeService, + envelopeTransformer := NewEnvelopeTransformer(envelopeService, testProviderName, func(ctx context.Context) (string, error) { return tt.testKeyID, tt.testErr }, + func(ctx context.Context) error { + return nil + }, aestransformer.NewGCMTransformer) - ctx := context.Background() + ctx := testContext(t) dataCtx := value.DefaultContext([]byte(testContextText)) originalText := []byte(testText) @@ -279,12 +289,15 @@ func TestTransformToStorageError(t *testing.T) { t.Parallel() envelopeService := newTestEnvelopeService() envelopeService.SetAnnotations(tt.annotations) - envelopeTransformer := NewEnvelopeTransformer(envelopeService, + envelopeTransformer := NewEnvelopeTransformer(envelopeService, testProviderName, func(ctx context.Context) (string, error) { return "", nil }, + func(ctx context.Context) error { + return nil + }, aestransformer.NewGCMTransformer) - ctx := context.Background() + ctx := testContext(t) dataCtx := value.DefaultContext([]byte(testContextText)) _, err := envelopeTransformer.TransformToStorage(ctx, []byte(testText), dataCtx) @@ -556,3 +569,63 @@ func TestValidateEncryptedDEK(t *testing.T) { }) } } + +func TestEnvelopeMetrics(t *testing.T) { + envelopeService := newTestEnvelopeService() + envelopeTransformer := NewEnvelopeTransformer(envelopeService, testProviderName, + func(ctx context.Context) (string, error) { + return testKeyVersion, nil + }, + func(ctx context.Context) error { + return fmt.Errorf("health check probe called when encryption keyID is different") + }, + aestransformer.NewGCMTransformer) + + dataCtx := value.DefaultContext([]byte(testContextText)) + + kmsv2Transformer := value.PrefixTransformer{Prefix: []byte("k8s:enc:kms:v2:"), Transformer: envelopeTransformer} + + testCases := []struct { + desc string + keyVersionFromEncrypt string + prefix value.Transformer + metrics []string + want string + }{ + { + desc: "keyIDHash total", + keyVersionFromEncrypt: testKeyVersion, + prefix: value.NewPrefixTransformers(nil, kmsv2Transformer), + metrics: []string{ + "apiserver_envelope_encryption_key_id_hash_total", + }, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_key_id_hash_total [ALPHA] Number of times a keyID is used split by transformation type and provider. + # TYPE apiserver_envelope_encryption_key_id_hash_total counter + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + `, testKeyHash, testProviderName, metrics.FromStorageLabel, testKeyHash, testProviderName, metrics.ToStorageLabel), + }, + } + + metrics.DekCacheInterArrivals.Reset() + metrics.KeyIDHashTotal.Reset() + + for _, tt := range testCases { + t.Run(tt.desc, func(t *testing.T) { + defer metrics.DekCacheInterArrivals.Reset() + defer metrics.KeyIDHashTotal.Reset() + ctx := testContext(t) + envelopeService.keyVersion = tt.keyVersionFromEncrypt + transformedData, err := tt.prefix.TransformToStorage(ctx, []byte(testText), dataCtx) + if err != nil { + t.Fatal(err) + } + tt.prefix.TransformFromStorage(ctx, transformedData, dataCtx) + + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil { + t.Fatal(err) + } + }) + } +} diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go index 70f65d50116..27825388aeb 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go @@ -17,14 +17,20 @@ limitations under the License. package metrics import ( + "crypto/sha256" "errors" + "fmt" + "hash" "sync" "time" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/klog/v2" + "k8s.io/utils/lru" ) const ( @@ -34,6 +40,12 @@ const ( ToStorageLabel = "to_storage" ) +type metricLabels struct { + transformationType string + providerName string + keyIDHash string +} + /* * By default, all the following metrics are defined as falling under * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes) @@ -43,11 +55,16 @@ const ( * the metric stability policy. */ var ( - lockLastFromStorage sync.Mutex - lockLastToStorage sync.Mutex + lockLastFromStorage sync.Mutex + lockLastToStorage sync.Mutex + lockRecordKeyID sync.Mutex + lockRecordKeyIDStatus sync.Mutex - lastFromStorage time.Time - lastToStorage time.Time + lastFromStorage time.Time + lastToStorage time.Time + keyIDHashTotalMetricLabels *lru.Cache + keyIDHashStatusLastTimestampSecondsMetricLabels *lru.Cache + cacheSize int = 10 dekCacheFillPercent = metrics.NewGauge( &metrics.GaugeOpts{ @@ -58,8 +75,8 @@ var ( StabilityLevel: metrics.ALPHA, }, ) - - dekCacheInterArrivals = metrics.NewHistogramVec( + // These metrics are made public to be used by unit tests. + DekCacheInterArrivals = metrics.NewHistogramVec( &metrics.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, @@ -84,18 +101,114 @@ var ( }, []string{"provider_name", "method_name", "grpc_status_code"}, ) + + // keyIDHashTotal is the number of times a keyID is used + // e.g. apiserver_envelope_encryption_key_id_hash_total counter + // apiserver_envelope_encryption_key_id_hash_total{key_id_hash="sha256", + // provider_name="providerName",transformation_type="from_storage"} 1 + KeyIDHashTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "key_id_hash_total", + Help: "Number of times a keyID is used split by transformation type and provider.", + StabilityLevel: metrics.ALPHA, + }, + []string{"transformation_type", "provider_name", "key_id_hash"}, + ) + + // keyIDHashLastTimestampSeconds is the last time in seconds when a keyID was used + // e.g. apiserver_envelope_encryption_key_id_hash_last_timestamp_seconds{key_id_hash="sha256", provider_name="providerName",transformation_type="from_storage"} 1.674865558833728e+09 + KeyIDHashLastTimestampSeconds = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "key_id_hash_last_timestamp_seconds", + Help: "The last time in seconds when a keyID was used.", + StabilityLevel: metrics.ALPHA, + }, + []string{"transformation_type", "provider_name", "key_id_hash"}, + ) + + // keyIDHashStatusLastTimestampSeconds is the last time in seconds when a keyID was returned by the Status RPC call. + // e.g. apiserver_envelope_encryption_key_id_hash_status_last_timestamp_seconds{key_id_hash="sha256", provider_name="providerName"} 1.674865558833728e+09 + KeyIDHashStatusLastTimestampSeconds = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "key_id_hash_status_last_timestamp_seconds", + Help: "The last time in seconds when a keyID was returned by the Status RPC call.", + StabilityLevel: metrics.ALPHA, + }, + []string{"provider_name", "key_id_hash"}, + ) ) var registerMetricsFunc sync.Once +var hashPool *sync.Pool +func registerLRUMetrics() { + if keyIDHashTotalMetricLabels != nil { + keyIDHashTotalMetricLabels.Clear() + } + if keyIDHashStatusLastTimestampSecondsMetricLabels != nil { + keyIDHashStatusLastTimestampSecondsMetricLabels.Clear() + } + + keyIDHashTotalMetricLabels = lru.NewWithEvictionFunc(cacheSize, func(key lru.Key, _ interface{}) { + item := key.(metricLabels) + if deleted := KeyIDHashTotal.DeleteLabelValues(item.transformationType, item.providerName, item.keyIDHash); deleted { + klog.InfoS("Deleted keyIDHashTotalMetricLabels", "transformationType", item.transformationType, + "providerName", item.providerName, "keyIDHash", item.keyIDHash) + } + if deleted := KeyIDHashLastTimestampSeconds.DeleteLabelValues(item.transformationType, item.providerName, item.keyIDHash); deleted { + klog.InfoS("Deleted keyIDHashLastTimestampSecondsMetricLabels", "transformationType", item.transformationType, + "providerName", item.providerName, "keyIDHash", item.keyIDHash) + } + }) + keyIDHashStatusLastTimestampSecondsMetricLabels = lru.NewWithEvictionFunc(cacheSize, func(key lru.Key, _ interface{}) { + item := key.(metricLabels) + if deleted := KeyIDHashStatusLastTimestampSeconds.DeleteLabelValues(item.providerName, item.keyIDHash); deleted { + klog.InfoS("Deleted keyIDHashStatusLastTimestampSecondsMetricLabels", "providerName", item.providerName, "keyIDHash", item.keyIDHash) + } + }) +} func RegisterMetrics() { registerMetricsFunc.Do(func() { + registerLRUMetrics() + hashPool = &sync.Pool{ + New: func() interface{} { + return sha256.New() + }, + } legacyregistry.MustRegister(dekCacheFillPercent) - legacyregistry.MustRegister(dekCacheInterArrivals) + legacyregistry.MustRegister(DekCacheInterArrivals) + legacyregistry.MustRegister(KeyIDHashTotal) + legacyregistry.MustRegister(KeyIDHashLastTimestampSeconds) + legacyregistry.MustRegister(KeyIDHashStatusLastTimestampSeconds) legacyregistry.MustRegister(KMSOperationsLatencyMetric) }) } +// RecordKeyID records total count and last time in seconds when a KeyID was used for TransformFromStorage and TransformToStorage operations +func RecordKeyID(transformationType, providerName, keyID string) { + lockRecordKeyID.Lock() + defer lockRecordKeyID.Unlock() + + keyIDHash := addLabelToCache(keyIDHashTotalMetricLabels, transformationType, providerName, keyID) + KeyIDHashTotal.WithLabelValues(transformationType, providerName, keyIDHash).Inc() + KeyIDHashLastTimestampSeconds.WithLabelValues(transformationType, providerName, keyIDHash).SetToCurrentTime() +} + +// RecordKeyIDFromStatus records last time in seconds when a KeyID was returned by the Status RPC call. +func RecordKeyIDFromStatus(providerName, keyID string) { + lockRecordKeyIDStatus.Lock() + defer lockRecordKeyIDStatus.Unlock() + + keyIDHash := addLabelToCache(keyIDHashStatusLastTimestampSecondsMetricLabels, "", providerName, keyID) + KeyIDHashStatusLastTimestampSeconds.WithLabelValues(providerName, keyIDHash).SetToCurrentTime() +} + func RecordArrival(transformationType string, start time.Time) { switch transformationType { case FromStorageLabel: @@ -105,7 +218,7 @@ func RecordArrival(transformationType string, start time.Time) { if lastFromStorage.IsZero() { lastFromStorage = start } - dekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastFromStorage).Seconds()) + DekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastFromStorage).Seconds()) lastFromStorage = start case ToStorageLabel: lockLastToStorage.Lock() @@ -114,7 +227,7 @@ func RecordArrival(transformationType string, start time.Time) { if lastToStorage.IsZero() { lastToStorage = start } - dekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastToStorage).Seconds()) + DekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastToStorage).Seconds()) lastToStorage = start } } @@ -147,3 +260,26 @@ func getErrorCode(err error) string { // method was called, otherwise we would get gRPC error. return "unknown-non-grpc" } + +func getHash(data string) string { + h := hashPool.Get().(hash.Hash) + h.Reset() + h.Write([]byte(data)) + result := fmt.Sprintf("sha256:%x", h.Sum(nil)) + hashPool.Put(h) + return result +} + +func addLabelToCache(c *lru.Cache, transformationType, providerName, keyID string) string { + keyIDHash := "" + // only get hash if the keyID is not empty + if len(keyID) > 0 { + keyIDHash = getHash(keyID) + } + c.Add(metricLabels{ + transformationType: transformationType, + providerName: providerName, + keyIDHash: keyIDHash, + }, nil) // value is irrelevant, this is a set and not a map + return keyIDHash +} diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go index 8eeb0dae539..146c102ea98 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go @@ -19,15 +19,25 @@ package metrics import ( "fmt" "strings" + "sync" "testing" "time" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + + "k8s.io/apimachinery/pkg/util/rand" "k8s.io/component-base/metrics/legacyregistry" "k8s.io/component-base/metrics/testutil" ) +const ( + testKeyHash1 = "sha256:6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" + testKeyHash2 = "sha256:d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" + testKeyHash3 = "sha256:4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce" + testProviderNameForMetric = "providerName" +) + func TestRecordKMSOperationLatency(t *testing.T) { testCases := []struct { name string @@ -174,3 +184,151 @@ func TestRecordKMSOperationLatency(t *testing.T) { }) } } + +func TestEnvelopeMetrics_Serial(t *testing.T) { + testCases := []struct { + desc string + keyID string + metrics []string + providerName string + transformationType string + want string + }{ + { + desc: "keyIDHash total", + keyID: "1", + metrics: []string{ + "apiserver_envelope_encryption_key_id_hash_total", + }, + providerName: testProviderNameForMetric, + transformationType: FromStorageLabel, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_key_id_hash_total [ALPHA] Number of times a keyID is used split by transformation type and provider. + # TYPE apiserver_envelope_encryption_key_id_hash_total counter + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + `, testKeyHash1, testProviderNameForMetric, FromStorageLabel), + }, + { + desc: "keyIDHash total more labels", + keyID: "2", + metrics: []string{ + "apiserver_envelope_encryption_key_id_hash_total", + }, + providerName: testProviderNameForMetric, + transformationType: FromStorageLabel, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_key_id_hash_total [ALPHA] Number of times a keyID is used split by transformation type and provider. + # TYPE apiserver_envelope_encryption_key_id_hash_total counter + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + `, testKeyHash1, testProviderNameForMetric, FromStorageLabel, testKeyHash2, testProviderNameForMetric, FromStorageLabel), + }, + { + desc: "keyIDHash total same labels", + keyID: "2", + metrics: []string{ + "apiserver_envelope_encryption_key_id_hash_total", + }, + providerName: testProviderNameForMetric, + transformationType: FromStorageLabel, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_key_id_hash_total [ALPHA] Number of times a keyID is used split by transformation type and provider. + # TYPE apiserver_envelope_encryption_key_id_hash_total counter + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 2 + `, testKeyHash1, testProviderNameForMetric, FromStorageLabel, testKeyHash2, testProviderNameForMetric, FromStorageLabel), + }, + { + desc: "keyIDHash total exceeds limit, remove first label, and empty keyID", + keyID: "", + metrics: []string{ + "apiserver_envelope_encryption_key_id_hash_total", + }, + providerName: testProviderNameForMetric, + transformationType: FromStorageLabel, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_key_id_hash_total [ALPHA] Number of times a keyID is used split by transformation type and provider. + # TYPE apiserver_envelope_encryption_key_id_hash_total counter + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 2 + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + `, testKeyHash2, testProviderNameForMetric, FromStorageLabel, "", testProviderNameForMetric, FromStorageLabel), + }, + { + desc: "keyIDHash total exceeds limit 2, remove first label", + keyID: "1", + metrics: []string{ + "apiserver_envelope_encryption_key_id_hash_total", + }, + providerName: testProviderNameForMetric, + transformationType: FromStorageLabel, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_key_id_hash_total [ALPHA] Number of times a keyID is used split by transformation type and provider. + # TYPE apiserver_envelope_encryption_key_id_hash_total counter + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 + `, "", testProviderNameForMetric, FromStorageLabel, testKeyHash1, testProviderNameForMetric, FromStorageLabel), + }, + } + + KeyIDHashTotal.Reset() + cacheSize = 2 + RegisterMetrics() + registerLRUMetrics() + + for _, tt := range testCases { + t.Run(tt.desc, func(t *testing.T) { + RecordKeyID(tt.transformationType, tt.providerName, tt.keyID) + // We are not resetting the metric here as each test is not independent in order to validate the behavior + // when the metric labels exceed the limit to ensure the labels are not unbounded. + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil { + t.Fatal(err) + } + }) + } +} + +func TestEnvelopeMetricsLRUKey(t *testing.T) { + RegisterMetrics() + + cacheSize = 3 + registerLRUMetrics() + KeyIDHashTotal.Reset() + defer KeyIDHashTotal.Reset() + + var wg sync.WaitGroup + for i := 1; i < 100; i++ { + wg.Add(1) + go func() { + defer wg.Done() + keyID := rand.String(32) + key := metricLabels{ + transformationType: rand.String(32), + providerName: rand.String(32), + keyIDHash: getHash(keyID), + } + RecordKeyID(key.transformationType, key.providerName, keyID) + }() + } + wg.Wait() + + validMetrics := 0 + metricFamilies, err := legacyregistry.DefaultGatherer.Gather() + if err != nil { + t.Fatal(err) + } + for _, family := range metricFamilies { + if family.GetName() != "apiserver_envelope_encryption_key_id_hash_total" { + continue + } + for _, metric := range family.GetMetric() { + if metric.Counter.GetValue() != 1 { + t.Errorf("invalid metric seen: %s", metric.String()) + } else { + validMetrics++ + } + } + } + if validMetrics != cacheSize { + t.Fatalf("expected total valid metrics to be the same as cacheSize %d, got %d", cacheSize, validMetrics) + } +} diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go index 63ab526af19..995aa7f3433 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go @@ -51,7 +51,7 @@ var ( Buckets: metrics.ExponentialBuckets(5e-6, 2, 25), StabilityLevel: metrics.ALPHA, }, - []string{"transformation_type"}, + []string{"transformation_type", "transformer_prefix"}, ) transformerOperationsTotal = metrics.NewCounterVec( @@ -111,12 +111,12 @@ func RegisterMetrics() { // RecordTransformation records latencies and count of TransformFromStorage and TransformToStorage operations. // Note that transformation_failures_total metric is deprecated, use transformation_operations_total instead. -func RecordTransformation(transformationType, transformerPrefix string, start time.Time, err error) { +func RecordTransformation(transformationType, transformerPrefix string, elapsed time.Duration, err error) { transformerOperationsTotal.WithLabelValues(transformationType, transformerPrefix, status.Code(err).String()).Inc() switch { case err == nil: - transformerLatencies.WithLabelValues(transformationType).Observe(sinceInSeconds(start)) + transformerLatencies.WithLabelValues(transformationType, transformerPrefix).Observe(elapsed.Seconds()) } } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics_test.go b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics_test.go index 2a84e558ea7..acd21c7216a 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics_test.go @@ -21,6 +21,7 @@ import ( "errors" "strings" "testing" + "time" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -112,3 +113,110 @@ func TestTotals(t *testing.T) { }) } } + +func TestLatency(t *testing.T) { + testCases := []struct { + desc string + prefix string + transformationType string + elapsed time.Duration + metrics []string + want string + }{ + { + desc: "transformation latency", + prefix: "k8s:enc:kms:v1:", + transformationType: "from_storage", + elapsed: time.Duration(10) * time.Second, + metrics: []string{ + "apiserver_storage_transformation_duration_seconds", + }, + want: ` + # HELP apiserver_storage_transformation_duration_seconds [ALPHA] Latencies in seconds of value transformation operations. + # TYPE apiserver_storage_transformation_duration_seconds histogram + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="5e-06"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="1e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="2e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="4e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="8e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.00016"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.00032"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.00064"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.00128"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.00256"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.00512"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.01024"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.02048"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.04096"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.08192"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.16384"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.32768"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="0.65536"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="1.31072"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="2.62144"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="5.24288"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="10.48576"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="20.97152"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="41.94304"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="83.88608"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:",le="+Inf"} 1 + apiserver_storage_transformation_duration_seconds_sum{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:"} 10 + apiserver_storage_transformation_duration_seconds_count{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v1:"} 1 + `, + }, + { + desc: "transformation latency 2", + prefix: "k8s:enc:kms:v2:", + transformationType: "from_storage", + elapsed: time.Duration(5) * time.Second, + metrics: []string{ + "apiserver_storage_transformation_duration_seconds", + }, + want: ` + # HELP apiserver_storage_transformation_duration_seconds [ALPHA] Latencies in seconds of value transformation operations. + # TYPE apiserver_storage_transformation_duration_seconds histogram + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="5e-06"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="1e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="2e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="4e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="8e-05"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.00016"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.00032"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.00064"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.00128"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.00256"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.00512"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.01024"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.02048"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.04096"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.08192"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.16384"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.32768"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="0.65536"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="1.31072"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="2.62144"} 0 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="5.24288"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="10.48576"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="20.97152"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="41.94304"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="83.88608"} 1 + apiserver_storage_transformation_duration_seconds_bucket{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:",le="+Inf"} 1 + apiserver_storage_transformation_duration_seconds_sum{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:"} 5 + apiserver_storage_transformation_duration_seconds_count{transformation_type="from_storage",transformer_prefix="k8s:enc:kms:v2:"} 1 + `, + }, + } + + RegisterMetrics() + transformerLatencies.Reset() + + for _, tt := range testCases { + t.Run(tt.desc, func(t *testing.T) { + RecordTransformation(tt.transformationType, tt.prefix, tt.elapsed, nil) + defer transformerLatencies.Reset() + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil { + t.Fatal(err) + } + }) + } +} diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/transformer.go b/staging/src/k8s.io/apiserver/pkg/storage/value/transformer.go index 8e5bf94c916..a6a4aa184d6 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/transformer.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/transformer.go @@ -100,9 +100,9 @@ func (t *prefixTransformers) TransformFromStorage(ctx context.Context, data []by continue } if len(transformer.Prefix) == 0 { - RecordTransformation("from_storage", "identity", start, err) + RecordTransformation("from_storage", "identity", time.Since(start), err) } else { - RecordTransformation("from_storage", string(transformer.Prefix), start, err) + RecordTransformation("from_storage", string(transformer.Prefix), time.Since(start), err) } // It is valid to have overlapping prefixes when the same encryption provider @@ -146,7 +146,7 @@ func (t *prefixTransformers) TransformFromStorage(ctx context.Context, data []by if err := errors.Reduce(errors.NewAggregate(errs)); err != nil { return nil, false, err } - RecordTransformation("from_storage", "unknown", start, t.err) + RecordTransformation("from_storage", "unknown", time.Since(start), t.err) return nil, false, t.err } @@ -155,7 +155,7 @@ func (t *prefixTransformers) TransformToStorage(ctx context.Context, data []byte start := time.Now() transformer := t.transformers[0] result, err := transformer.Transformer.TransformToStorage(ctx, data, dataCtx) - RecordTransformation("to_storage", string(transformer.Prefix), start, err) + RecordTransformation("to_storage", string(transformer.Prefix), time.Since(start), err) if err != nil { return nil, err } diff --git a/test/instrumentation/documentation/documentation-list.yaml b/test/instrumentation/documentation/documentation-list.yaml index cb603fd57f7..ebd11e45a2a 100644 --- a/test/instrumentation/documentation/documentation-list.yaml +++ b/test/instrumentation/documentation/documentation-list.yaml @@ -3251,7 +3251,9 @@ type: Histogram stabilityLevel: ALPHA labels: + - status - transformation_type + - transformer_prefix buckets: - 5e-06 - 1e-05 diff --git a/test/instrumentation/documentation/documentation.md b/test/instrumentation/documentation/documentation.md index f762a46a4b6..0a7f030e4a7 100644 --- a/test/instrumentation/documentation/documentation.md +++ b/test/instrumentation/documentation/documentation.md @@ -822,7 +822,7 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu ALPHA Histogram Latencies in seconds of value transformation operations. -
transformation_type
+<
status
transformation_type
transformer_prefix
apiserver_storage_transformation_operations_total