From 5292542b72df0665fb73bb0060fe73553dbf9b05 Mon Sep 17 00:00:00 2001 From: Rita Zhang Date: Thu, 16 Feb 2023 11:56:31 -0800 Subject: [PATCH] kmsv2: add metrics for invalid_key_id_from_status_total Signed-off-by: Rita Zhang --- .../server/options/encryptionconfig/config.go | 6 +- .../options/encryptionconfig/config_test.go | 103 +++++++++++++++++- .../value/encrypt/envelope/kmsv2/envelope.go | 21 ++-- .../encrypt/envelope/kmsv2/envelope_test.go | 60 +++++++--- .../value/encrypt/envelope/metrics/metrics.go | 16 +++ .../encrypt/envelope/metrics/metrics_test.go | 67 +++++++++++- 6 files changed, 242 insertions(+), 31 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go b/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go index 20bb1550535..7b15de0d01e 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config.go @@ -278,9 +278,11 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error { return fmt.Errorf("failed to perform status section of the healthz check for KMS Provider %s, error: %w", h.name, err) } // we coast on the last valid key ID that we have observed - if err := envelopekmsv2.ValidateKeyID(p.KeyID); err == nil { + if errCode, err := envelopekmsv2.ValidateKeyID(p.KeyID); err == nil { h.keyID.Store(&p.KeyID) metrics.RecordKeyIDFromStatus(h.name, p.KeyID) + } else { + metrics.RecordInvalidKeyIDFromStatus(h.name, string(errCode)) } if err := isKMSv2ProviderHealthy(h.name, p); err != nil { @@ -312,7 +314,7 @@ func isKMSv2ProviderHealthy(name string, response *kmsservice.StatusResponse) er if response.Version != envelopekmsv2.KMSAPIVersion { errs = append(errs, fmt.Errorf("expected KMSv2 API version %s, got %s", envelopekmsv2.KMSAPIVersion, response.Version)) } - if err := envelopekmsv2.ValidateKeyID(response.KeyID); err != nil { + if _, err := envelopekmsv2.ValidateKeyID(response.KeyID); err != nil { errs = append(errs, fmt.Errorf("expected KMSv2 KeyID to be set, got %s", response.KeyID)) } diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config_test.go b/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config_test.go index 44221973ffa..8eb8d74c4f8 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/encryptionconfig/config_test.go @@ -21,6 +21,7 @@ import ( "context" "encoding/base64" "errors" + "strings" "sync" "testing" "time" @@ -33,8 +34,12 @@ import ( "k8s.io/apiserver/pkg/features" "k8s.io/apiserver/pkg/storage/value" "k8s.io/apiserver/pkg/storage/value/encrypt/envelope" + envelopekmsv2 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2" + "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" kmsservice "k8s.io/kms/service" ) @@ -43,6 +48,10 @@ const ( sampleContextText = "0123456789" ) +var ( + sampleInvalidKeyID = string(make([]byte, envelopekmsv2.KeyIDMaxSize+1)) +) + // testEnvelopeService is a mock envelope service which can be used to simulate remote Envelope services // for testing of the envelope transformer with other transformers. type testEnvelopeService struct { @@ -66,7 +75,8 @@ func (t *testEnvelopeService) Encrypt(data []byte) ([]byte, error) { // testKMSv2EnvelopeService is a mock kmsv2 envelope service which can be used to simulate remote Envelope v2 services // for testing of the envelope transformer with other transformers. type testKMSv2EnvelopeService struct { - err error + err error + keyID string } func (t *testKMSv2EnvelopeService) Decrypt(ctx context.Context, uid string, req *kmsservice.DecryptRequest) ([]byte, error) { @@ -82,7 +92,7 @@ func (t *testKMSv2EnvelopeService) Encrypt(ctx context.Context, uid string, data } return &kmsservice.EncryptResponse{ Ciphertext: []byte(base64.StdEncoding.EncodeToString(data)), - KeyID: "1", + KeyID: t.keyID, }, nil } @@ -90,7 +100,7 @@ func (t *testKMSv2EnvelopeService) Status(ctx context.Context) (*kmsservice.Stat if t.err != nil { return nil, t.err } - return &kmsservice.StatusResponse{Healthz: "ok", KeyID: "1", Version: "v2alpha1"}, nil + return &kmsservice.StatusResponse{Healthz: "ok", KeyID: t.keyID, Version: "v2alpha1"}, nil } // The factory method to create mock envelope service. @@ -105,12 +115,17 @@ func newMockErrorEnvelopeService(endpoint string, timeout time.Duration) (envelo // The factory method to create mock envelope kmsv2 service. func newMockEnvelopeKMSv2Service(ctx context.Context, endpoint, providerName string, timeout time.Duration) (kmsservice.Service, error) { - return &testKMSv2EnvelopeService{nil}, nil + return &testKMSv2EnvelopeService{nil, "1"}, nil } // The factory method to create mock envelope kmsv2 service which always returns error. func newMockErrorEnvelopeKMSv2Service(endpoint string, timeout time.Duration) (kmsservice.Service, error) { - return &testKMSv2EnvelopeService{errors.New("test")}, nil + return &testKMSv2EnvelopeService{errors.New("test"), "1"}, nil +} + +// The factory method to create mock envelope kmsv2 service that always returns invalid keyID. +func newMockInvalidKeyIDEnvelopeKMSv2Service(ctx context.Context, endpoint string, timeout time.Duration, keyID string) (kmsservice.Service, error) { + return &testKMSv2EnvelopeService{nil, keyID}, nil } func TestLegacyConfig(t *testing.T) { @@ -721,6 +736,84 @@ func TestKMSv2PluginHealthzTTL(t *testing.T) { } } +func TestKMSv2InvalidKeyID(t *testing.T) { + ctx := testContext(t) + invalidKeyIDService, _ := newMockInvalidKeyIDEnvelopeKMSv2Service(ctx, "unix:///tmp/testprovider.sock", 3*time.Second, "") + invalidLongKeyIDService, _ := newMockInvalidKeyIDEnvelopeKMSv2Service(ctx, "unix:///tmp/testprovider.sock", 3*time.Second, sampleInvalidKeyID) + service, _ := newMockInvalidKeyIDEnvelopeKMSv2Service(ctx, "unix:///tmp/testprovider.sock", 3*time.Second, "1") + + testCases := []struct { + desc string + probe *kmsv2PluginProbe + metrics []string + want string + }{ + { + desc: "kmsv2 provider returns an invalid empty keyID", + probe: &kmsv2PluginProbe{ + name: "test", + ttl: kmsPluginHealthzNegativeTTL, + service: invalidKeyIDService, + l: &sync.Mutex{}, + lastResponse: &kmsPluginHealthzResponse{}, + }, + metrics: []string{ + "apiserver_envelope_encryption_invalid_key_id_from_status_total", + }, + want: ` + # HELP apiserver_envelope_encryption_invalid_key_id_from_status_total [ALPHA] Number of times an invalid keyID is returned by the Status RPC call split by error. + # TYPE apiserver_envelope_encryption_invalid_key_id_from_status_total counter + apiserver_envelope_encryption_invalid_key_id_from_status_total{error="empty",provider_name="test"} 1 + `, + }, + { + desc: "kmsv2 provider returns a valid keyID", + probe: &kmsv2PluginProbe{ + name: "test", + ttl: kmsPluginHealthzNegativeTTL, + service: service, + l: &sync.Mutex{}, + lastResponse: &kmsPluginHealthzResponse{}, + }, + metrics: []string{ + "apiserver_envelope_encryption_invalid_key_id_from_status_total", + }, + want: ``, + }, + { + desc: "kmsv2 provider returns an invalid long keyID", + probe: &kmsv2PluginProbe{ + name: "test", + ttl: kmsPluginHealthzNegativeTTL, + service: invalidLongKeyIDService, + l: &sync.Mutex{}, + lastResponse: &kmsPluginHealthzResponse{}, + }, + metrics: []string{ + "apiserver_envelope_encryption_invalid_key_id_from_status_total", + }, + want: ` + # HELP apiserver_envelope_encryption_invalid_key_id_from_status_total [ALPHA] Number of times an invalid keyID is returned by the Status RPC call split by error. + # TYPE apiserver_envelope_encryption_invalid_key_id_from_status_total counter + apiserver_envelope_encryption_invalid_key_id_from_status_total{error="too_long",provider_name="test"} 1 + `, + }, + } + + metrics.InvalidKeyIDFromStatusTotal.Reset() + metrics.RegisterMetrics() + + for _, tt := range testCases { + t.Run(tt.desc, func(t *testing.T) { + defer metrics.InvalidKeyIDFromStatusTotal.Reset() + _ = tt.probe.check(ctx) + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil { + t.Fatal(err) + } + }) + } +} + func TestCBCKeyRotationWithOverlappingProviders(t *testing.T) { testCBCKeyRotationWithProviders( t, diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go index b0af7ebb1cf..3c9f3068882 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go @@ -48,16 +48,21 @@ const ( KMSAPIVersion = "v2alpha1" // annotationsMaxSize is the maximum size of the annotations. annotationsMaxSize = 32 * 1024 // 32 kB - // keyIDMaxSize is the maximum size of the keyID. - keyIDMaxSize = 1 * 1024 // 1 kB + // KeyIDMaxSize is the maximum size of the keyID. + KeyIDMaxSize = 1 * 1024 // 1 kB // encryptedDEKMaxSize is the maximum size of the encrypted DEK. encryptedDEKMaxSize = 1 * 1024 // 1 kB // cacheTTL is the default time-to-live for the cache entry. cacheTTL = 1 * time.Hour + // error code + errKeyIDOKCode ErrCodeKeyID = "ok" + errKeyIDEmptyCode ErrCodeKeyID = "empty" + errKeyIDTooLongCode ErrCodeKeyID = "too_long" ) type KeyIDGetterFunc func(context.Context) (keyID string, err error) type ProbeHealthzCheckFunc func(context.Context) (err error) +type ErrCodeKeyID string type envelopeTransformer struct { envelopeService kmsservice.Service @@ -247,7 +252,7 @@ func validateEncryptedObject(o *kmstypes.EncryptedObject) error { if err := validateEncryptedDEK(o.EncryptedDEK); err != nil { return fmt.Errorf("failed to validate encrypted DEK: %w", err) } - if err := ValidateKeyID(o.KeyID); err != nil { + if _, err := ValidateKeyID(o.KeyID); err != nil { return fmt.Errorf("failed to validate key id: %w", err) } if err := validateAnnotations(o.Annotations); err != nil { @@ -290,12 +295,12 @@ func validateAnnotations(annotations map[string][]byte) error { // ValidateKeyID tests the following: // 1. The keyID is not empty. // 2. The size of keyID is less than 1 kB. -func ValidateKeyID(keyID string) error { +func ValidateKeyID(keyID string) (ErrCodeKeyID, error) { if len(keyID) == 0 { - return fmt.Errorf("keyID is empty") + return errKeyIDEmptyCode, fmt.Errorf("keyID is empty") } - if len(keyID) > keyIDMaxSize { - return fmt.Errorf("keyID is %d bytes, which exceeds the max size of %d", len(keyID), keyIDMaxSize) + if len(keyID) > KeyIDMaxSize { + return errKeyIDTooLongCode, fmt.Errorf("keyID is %d bytes, which exceeds the max size of %d", len(keyID), KeyIDMaxSize) } - return nil + return errKeyIDOKCode, nil } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go index 0670a6afdae..2c9b64e618c 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope_test.go @@ -46,6 +46,10 @@ const ( testCacheTTL = 10 * time.Second ) +var ( + errCode = "empty" +) + // testEnvelopeService is a mock Envelope service which can be used to simulate remote Envelope services // for testing of Envelope based encryption providers. type testEnvelopeService struct { @@ -478,24 +482,28 @@ func TestValidateAnnotations(t *testing.T) { func TestValidateKeyID(t *testing.T) { t.Parallel() testCases := []struct { - name string - keyID string - expectedError string + name string + keyID string + expectedError string + expectedErrorCode string }{ { - name: "valid key ID", - keyID: "1234", - expectedError: "", + name: "valid key ID", + keyID: "1234", + expectedError: "", + expectedErrorCode: "ok", }, { - name: "empty key ID", - keyID: "", - expectedError: "keyID is empty", + name: "empty key ID", + keyID: "", + expectedError: "keyID is empty", + expectedErrorCode: "empty", }, { - name: "keyID size is greater than 1 kB", - keyID: strings.Repeat("a", 1024+1), - expectedError: "which exceeds the max size of", + name: "keyID size is greater than 1 kB", + keyID: strings.Repeat("a", 1024+1), + expectedError: "which exceeds the max size of", + expectedErrorCode: "too_long", }, } @@ -503,7 +511,7 @@ func TestValidateKeyID(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - err := ValidateKeyID(tt.keyID) + errCode, err := ValidateKeyID(tt.keyID) if tt.expectedError != "" { if err == nil { t.Fatalf("expected error %q, got nil", tt.expectedError) @@ -516,6 +524,9 @@ func TestValidateKeyID(t *testing.T) { t.Fatalf("expected no error, got %q", err) } } + if tt.expectedErrorCode != string(errCode) { + t.Fatalf("expected %s errCode, got %s", tt.expectedErrorCode, string(errCode)) + } }) } } @@ -576,8 +587,10 @@ func TestEnvelopeMetrics(t *testing.T) { func(ctx context.Context) (string, error) { return testKeyVersion, nil }, + // health probe check to ensure keyID freshness func(ctx context.Context) error { - return fmt.Errorf("health check probe called when encryption keyID is different") + metrics.RecordInvalidKeyIDFromStatus(testProviderName, errCode) + return nil }, aestransformer.NewGCMTransformer) @@ -606,15 +619,34 @@ func TestEnvelopeMetrics(t *testing.T) { apiserver_envelope_encryption_key_id_hash_total{key_id_hash="%s",provider_name="%s",transformation_type="%s"} 1 `, testKeyHash, testProviderName, metrics.FromStorageLabel, testKeyHash, testProviderName, metrics.ToStorageLabel), }, + { + // keyVersionFromEncrypt is returned from kms v2 envelope service + // when it is different from the key ID returned from last status call + // it will trigger health probe check immediately to ensure keyID freshness + // during probe check above, it will call RecordInvalidKeyIDFromStatus + desc: "invalid KeyID From Status Total", + keyVersionFromEncrypt: "2", + prefix: value.NewPrefixTransformers(nil, kmsv2Transformer), + metrics: []string{ + "apiserver_envelope_encryption_invalid_key_id_from_status_total", + }, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_invalid_key_id_from_status_total [ALPHA] Number of times an invalid keyID is returned by the Status RPC call split by error. + # TYPE apiserver_envelope_encryption_invalid_key_id_from_status_total counter + apiserver_envelope_encryption_invalid_key_id_from_status_total{error="%s",provider_name="%s"} 1 + `, errCode, testProviderName), + }, } metrics.DekCacheInterArrivals.Reset() metrics.KeyIDHashTotal.Reset() + metrics.InvalidKeyIDFromStatusTotal.Reset() for _, tt := range testCases { t.Run(tt.desc, func(t *testing.T) { defer metrics.DekCacheInterArrivals.Reset() defer metrics.KeyIDHashTotal.Reset() + defer metrics.InvalidKeyIDFromStatusTotal.Reset() ctx := testContext(t) envelopeService.keyVersion = tt.keyVersionFromEncrypt transformedData, err := tt.prefix.TransformToStorage(ctx, []byte(testText), dataCtx) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go index 27825388aeb..d25a5ee4b5f 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go @@ -142,6 +142,17 @@ var ( }, []string{"provider_name", "key_id_hash"}, ) + + InvalidKeyIDFromStatusTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "invalid_key_id_from_status_total", + Help: "Number of times an invalid keyID is returned by the Status RPC call split by error.", + StabilityLevel: metrics.ALPHA, + }, + []string{"provider_name", "error"}, + ) ) var registerMetricsFunc sync.Once @@ -186,6 +197,7 @@ func RegisterMetrics() { legacyregistry.MustRegister(KeyIDHashTotal) legacyregistry.MustRegister(KeyIDHashLastTimestampSeconds) legacyregistry.MustRegister(KeyIDHashStatusLastTimestampSeconds) + legacyregistry.MustRegister(InvalidKeyIDFromStatusTotal) legacyregistry.MustRegister(KMSOperationsLatencyMetric) }) } @@ -209,6 +221,10 @@ func RecordKeyIDFromStatus(providerName, keyID string) { KeyIDHashStatusLastTimestampSeconds.WithLabelValues(providerName, keyIDHash).SetToCurrentTime() } +func RecordInvalidKeyIDFromStatus(providerName, errCode string) { + InvalidKeyIDFromStatusTotal.WithLabelValues(providerName, errCode).Inc() +} + func RecordArrival(transformationType string, start time.Time) { switch transformationType { case FromStorageLabel: diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go index 146c102ea98..2bd9d65788b 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics_test.go @@ -38,6 +38,10 @@ const ( testProviderNameForMetric = "providerName" ) +var ( + errCode = "empty" +) + func TestRecordKMSOperationLatency(t *testing.T) { testCases := []struct { name string @@ -185,7 +189,7 @@ func TestRecordKMSOperationLatency(t *testing.T) { } } -func TestEnvelopeMetrics_Serial(t *testing.T) { +func TestRecordKeyID_Serial(t *testing.T) { testCases := []struct { desc string keyID string @@ -287,7 +291,7 @@ func TestEnvelopeMetrics_Serial(t *testing.T) { } } -func TestEnvelopeMetricsLRUKey(t *testing.T) { +func TestRecordKeyIDLRUKey(t *testing.T) { RegisterMetrics() cacheSize = 3 @@ -332,3 +336,62 @@ func TestEnvelopeMetricsLRUKey(t *testing.T) { t.Fatalf("expected total valid metrics to be the same as cacheSize %d, got %d", cacheSize, validMetrics) } } + +func TestRecordInvalidKeyIDFromStatus(t *testing.T) { + testCases := []struct { + desc string + count int + metrics []string + providerName string + want string + }{ + { + desc: "invalid KeyID From Status Total 3", + count: 3, + metrics: []string{ + "apiserver_envelope_encryption_invalid_key_id_from_status_total", + }, + providerName: testProviderNameForMetric, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_invalid_key_id_from_status_total [ALPHA] Number of times an invalid keyID is returned by the Status RPC call split by error. + # TYPE apiserver_envelope_encryption_invalid_key_id_from_status_total counter + apiserver_envelope_encryption_invalid_key_id_from_status_total{error="%s",provider_name="%s"} %d + `, errCode, testProviderNameForMetric, 3), + }, + { + desc: "invalid KeyID From Status Total 10", + count: 10, + metrics: []string{ + "apiserver_envelope_encryption_invalid_key_id_from_status_total", + }, + providerName: testProviderNameForMetric, + want: fmt.Sprintf(` + # HELP apiserver_envelope_encryption_invalid_key_id_from_status_total [ALPHA] Number of times an invalid keyID is returned by the Status RPC call split by error. + # TYPE apiserver_envelope_encryption_invalid_key_id_from_status_total counter + apiserver_envelope_encryption_invalid_key_id_from_status_total{error="%s",provider_name="%s"} %d + `, errCode, testProviderNameForMetric, 10), + }, + } + + InvalidKeyIDFromStatusTotal.Reset() + RegisterMetrics() + + for _, tt := range testCases { + t.Run(tt.desc, func(t *testing.T) { + defer InvalidKeyIDFromStatusTotal.Reset() + var wg sync.WaitGroup + for i := 0; i < tt.count; i++ { + wg.Add(1) + go func() { + defer wg.Done() + RecordInvalidKeyIDFromStatus(tt.providerName, errCode) + }() + } + wg.Wait() + + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil { + t.Fatal(err) + } + }) + } +}