From 79b344d85e3e2f8f3192a3dcabb384cfe87136a6 Mon Sep 17 00:00:00 2001 From: Jordan Liggitt Date: Sat, 2 Mar 2024 01:44:28 -0500 Subject: [PATCH] Add authorization webhook duration/count/failopen metrics --- pkg/kubeapiserver/authorizer/reload.go | 4 +- .../authorizerfactory/metrics.go | 2 + .../pkg/authorizer/webhook/metrics/metrics.go | 117 ++++++++++++++++ .../webhook/metrics/metrics_test.go | 86 ++++++++++++ .../pkg/authorizer/webhook/metrics_test.go | 79 ++++++++++- .../plugin/pkg/authorizer/webhook/webhook.go | 23 ++++ .../pkg/authorizer/webhook/webhook_v1_test.go | 8 ++ test/integration/auth/authz_config_test.go | 130 +++++++++++++++--- 8 files changed, 426 insertions(+), 23 deletions(-) create mode 100644 staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics_test.go diff --git a/pkg/kubeapiserver/authorizer/reload.go b/pkg/kubeapiserver/authorizer/reload.go index d9b75e29450..b7cb17a94a3 100644 --- a/pkg/kubeapiserver/authorizer/reload.go +++ b/pkg/kubeapiserver/authorizer/reload.go @@ -145,7 +145,7 @@ func (r *reloadableAuthorizerResolver) newForConfig(authzConfig *authzconfig.Aut decisionOnError, configuredAuthorizer.Webhook.MatchConditions, configuredAuthorizer.Name, - kubeapiserverWebhookMetrics{MatcherMetrics: cel.NewMatcherMetrics()}, + kubeapiserverWebhookMetrics{WebhookMetrics: webhookmetrics.NewWebhookMetrics(), MatcherMetrics: cel.NewMatcherMetrics()}, ) if err != nil { return nil, nil, err @@ -169,6 +169,8 @@ func (r *reloadableAuthorizerResolver) newForConfig(authzConfig *authzconfig.Aut type kubeapiserverWebhookMetrics struct { // kube-apiserver doesn't report request metrics webhookmetrics.NoopRequestMetrics + // kube-apiserver does report webhook metrics + webhookmetrics.WebhookMetrics // kube-apiserver does report matchCondition metrics cel.MatcherMetrics } diff --git a/staging/src/k8s.io/apiserver/pkg/authorization/authorizerfactory/metrics.go b/staging/src/k8s.io/apiserver/pkg/authorization/authorizerfactory/metrics.go index df30479b722..3f72a25b78b 100644 --- a/staging/src/k8s.io/apiserver/pkg/authorization/authorizerfactory/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/authorization/authorizerfactory/metrics.go @@ -60,6 +60,8 @@ var ( var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{}) type delegatingAuthorizerMetrics struct { + // no-op for webhook metrics for now, delegating authorization reports original total/latency metrics + webhookmetrics.NoopWebhookMetrics // no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions celmetrics.NoopMatcherMetrics } diff --git a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics.go b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics.go index 312f6ed8946..23f82cc6579 100644 --- a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics.go @@ -18,20 +18,26 @@ package metrics import ( "context" + "sync" "k8s.io/apiserver/pkg/authorization/cel" + compbasemetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" ) // AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer type AuthorizerMetrics interface { // Request total and latency metrics RequestMetrics + // Webhook count, latency, and fail open metrics + WebhookMetrics // match condition metrics cel.MatcherMetrics } type NoopAuthorizerMetrics struct { NoopRequestMetrics + NoopWebhookMetrics cel.NoopMatcherMetrics } @@ -47,3 +53,114 @@ type NoopRequestMetrics struct{} func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {} func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {} + +type WebhookMetrics interface { + // RecordWebhookEvaluation increments with each round-trip of a webhook authorizer. + // result is one of: + // - canceled: the call invoking the webhook request was canceled + // - timeout: the webhook request timed out + // - error: the webhook response completed and was invalid + // - success: the webhook response completed and was well-formed + RecordWebhookEvaluation(ctx context.Context, name, result string) + // RecordWebhookDuration records latency for each round-trip of a webhook authorizer. + // result is one of: + // - canceled: the call invoking the webhook request was canceled + // - timeout: the webhook request timed out + // - error: the webhook response completed and was invalid + // - success: the webhook response completed and was well-formed + RecordWebhookDuration(ctx context.Context, name, result string, duration float64) + // RecordWebhookFailOpen increments when a webhook timeout or error results in a fail open + // of a request which has not been canceled. + // result is one of: + // - timeout: the webhook request timed out + // - error: the webhook response completed and was invalid + RecordWebhookFailOpen(ctx context.Context, name, result string) +} + +type NoopWebhookMetrics struct{} + +func (NoopWebhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {} +func (NoopWebhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) { +} +func (NoopWebhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {} + +var registerWebhookMetrics sync.Once + +// RegisterMetrics registers authorizer metrics. +func RegisterWebhookMetrics() { + registerWebhookMetrics.Do(func() { + legacyregistry.MustRegister(webhookEvaluations) + legacyregistry.MustRegister(webhookDuration) + legacyregistry.MustRegister(webhookFailOpen) + }) +} + +func ResetMetricsForTest() { + webhookEvaluations.Reset() + webhookDuration.Reset() + webhookFailOpen.Reset() +} + +const ( + namespace = "apiserver" + subsystem = "authorization" +) + +var ( + webhookEvaluations = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "webhook_evaluations_total", + Help: "Round-trips to authorization webhooks.", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{"name", "result"}, + ) + + webhookDuration = compbasemetrics.NewHistogramVec( + &compbasemetrics.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "webhook_duration_seconds", + Help: "Request latency in seconds.", + Buckets: compbasemetrics.DefBuckets, + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{"name", "result"}, + ) + + webhookFailOpen = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "webhook_evaluations_fail_open_total", + Help: "NoOpinion results due to webhook timeout or error.", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{"name", "result"}, + ) +) + +type webhookMetrics struct{} + +func NewWebhookMetrics() WebhookMetrics { + RegisterWebhookMetrics() + return webhookMetrics{} +} + +func ResetWebhookMetricsForTest() { + webhookEvaluations.Reset() + webhookDuration.Reset() + webhookFailOpen.Reset() +} + +func (webhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) { + webhookEvaluations.WithContext(ctx).WithLabelValues(name, result).Inc() +} +func (webhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) { + webhookDuration.WithContext(ctx).WithLabelValues(name, result).Observe(duration) +} +func (webhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) { + webhookFailOpen.WithContext(ctx).WithLabelValues(name, result).Inc() +} diff --git a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics_test.go b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics_test.go new file mode 100644 index 00000000000..1af0435aa0c --- /dev/null +++ b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics/metrics_test.go @@ -0,0 +1,86 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "strings" + "testing" + + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" +) + +func TestRecordWebhookMetrics(t *testing.T) { + testCases := []struct { + desc string + metrics []string + name string + result string + duration float64 + want string + }{ + { + desc: "evaluation failure total", + metrics: []string{ + "apiserver_authorization_webhook_duration_seconds", + "apiserver_authorization_webhook_evaluations_total", + "apiserver_authorization_webhook_evaluations_fail_open_total", + }, + name: "wh1.example.com", + result: "timeout", + duration: 1.5, + want: ` + # HELP apiserver_authorization_webhook_duration_seconds [ALPHA] Request latency in seconds. + # TYPE apiserver_authorization_webhook_duration_seconds histogram + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.005"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.01"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.025"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.05"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.1"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.25"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.5"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="1"} 0 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="2.5"} 1 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="5"} 1 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="10"} 1 + apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="+Inf"} 1 + apiserver_authorization_webhook_duration_seconds_sum{name="wh1.example.com",result="timeout"} 1.5 + apiserver_authorization_webhook_duration_seconds_count{name="wh1.example.com",result="timeout"} 1 + # HELP apiserver_authorization_webhook_evaluations_fail_open_total [ALPHA] NoOpinion results due to webhook timeout or error. + # TYPE apiserver_authorization_webhook_evaluations_fail_open_total counter + apiserver_authorization_webhook_evaluations_fail_open_total{name="wh1.example.com",result="timeout"} 1 + # HELP apiserver_authorization_webhook_evaluations_total [ALPHA] Round-trips to authorization webhooks. + # TYPE apiserver_authorization_webhook_evaluations_total counter + apiserver_authorization_webhook_evaluations_total{name="wh1.example.com",result="timeout"} 1 + `, + }, + } + + for _, tt := range testCases { + t.Run(tt.desc, func(t *testing.T) { + ResetWebhookMetricsForTest() + m := NewWebhookMetrics() + m.RecordWebhookDuration(context.Background(), tt.name, tt.result, tt.duration) + m.RecordWebhookEvaluation(context.Background(), tt.name, tt.result) + m.RecordWebhookFailOpen(context.Background(), tt.name, tt.result) + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil { + t.Fatal(err) + } + }) + } +} diff --git a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics_test.go b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics_test.go index d8385bb0a40..422358f1483 100644 --- a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics_test.go +++ b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics_test.go @@ -18,8 +18,10 @@ package webhook import ( "context" + "net/http" "testing" + authorizationv1 "k8s.io/api/authorization/v1" "k8s.io/apiserver/pkg/apis/apiserver" "k8s.io/apiserver/pkg/authentication/user" "k8s.io/apiserver/pkg/authorization/authorizer" @@ -29,11 +31,15 @@ import ( func TestAuthorizerMetrics(t *testing.T) { scenarios := []struct { name string + canceledRequest bool clientCert, clientKey, clientCA []byte serverCert, serverKey, serverCA []byte authzFakeServiceStatusCode int authFakeServiceDeny bool expectedRegisteredStatusCode string + expectEvalutionResult string + expectDurationResult string + expectFailOpenResult string wantErr bool }{ { @@ -41,6 +47,31 @@ func TestAuthorizerMetrics(t *testing.T) { clientCert: clientCert, clientKey: clientKey, clientCA: caCert, serverCert: serverCert, serverKey: serverKey, serverCA: caCert, expectedRegisteredStatusCode: "200", + expectEvalutionResult: "success", + expectDurationResult: "success", + expectFailOpenResult: "", + }, + + { + name: "timed out request", + clientCert: clientCert, clientKey: clientKey, clientCA: caCert, + serverCert: serverCert, serverKey: serverKey, serverCA: caCert, + authzFakeServiceStatusCode: http.StatusGatewayTimeout, + expectedRegisteredStatusCode: "504", + expectEvalutionResult: "timeout", + expectDurationResult: "timeout", + expectFailOpenResult: "timeout", + }, + + { + name: "canceled request", + clientCert: clientCert, clientKey: clientKey, clientCA: caCert, + serverCert: serverCert, serverKey: serverKey, serverCA: caCert, + canceledRequest: true, + expectedRegisteredStatusCode: "", + expectEvalutionResult: "canceled", + expectDurationResult: "canceled", + expectFailOpenResult: "", }, { @@ -49,6 +80,9 @@ func TestAuthorizerMetrics(t *testing.T) { serverCert: serverCert, serverKey: serverKey, serverCA: caCert, authzFakeServiceStatusCode: 500, expectedRegisteredStatusCode: "500", + expectEvalutionResult: "error", + expectDurationResult: "error", + expectFailOpenResult: "error", }, { @@ -56,17 +90,28 @@ func TestAuthorizerMetrics(t *testing.T) { clientCert: clientCert, clientKey: clientKey, clientCA: caCert, serverCert: serverCert, serverKey: serverKey, serverCA: badCACert, expectedRegisteredStatusCode: "", + expectEvalutionResult: "error", + expectDurationResult: "error", + expectFailOpenResult: "error", wantErr: true, }, } for _, scenario := range scenarios { t.Run(scenario.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + service := new(mockV1Service) service.statusCode = scenario.authzFakeServiceStatusCode if service.statusCode == 0 { service.statusCode = 200 } + service.reviewHook = func(*authorizationv1.SubjectAccessReview) { + if scenario.canceledRequest { + cancel() + } + } service.allow = !scenario.authFakeServiceDeny server, err := NewV1TestServer(service, scenario.serverCert, scenario.serverKey, scenario.serverCA) @@ -84,7 +129,7 @@ func TestAuthorizerMetrics(t *testing.T) { } attr := authorizer.AttributesRecord{User: &user.DefaultInfo{}} - _, _, err = wh.Authorize(context.Background(), attr) + _, _, err = wh.Authorize(ctx, attr) if scenario.wantErr { if err == nil { t.Errorf("expected error making authorization request: %v", err) @@ -98,6 +143,16 @@ func TestAuthorizerMetrics(t *testing.T) { if fakeAuthzMetrics.latencyCode != scenario.expectedRegisteredStatusCode { t.Errorf("incorrect status code recorded for RecordRequestLatency method, expected = %v, got %v", scenario.expectedRegisteredStatusCode, fakeAuthzMetrics.latencyCode) } + + if fakeAuthzMetrics.evaluationsResult != scenario.expectEvalutionResult { + t.Errorf("expected evaluationsResult %q, got %q", scenario.expectEvalutionResult, fakeAuthzMetrics.evaluationsResult) + } + if fakeAuthzMetrics.durationResult != scenario.expectDurationResult { + t.Errorf("expected durationResult %q, got %q", scenario.expectDurationResult, fakeAuthzMetrics.durationResult) + } + if fakeAuthzMetrics.failOpenResult != scenario.expectFailOpenResult { + t.Errorf("expected failOpenResult %q, got %q", scenario.expectFailOpenResult, fakeAuthzMetrics.failOpenResult) + } }) } } @@ -108,6 +163,15 @@ type fakeAuthorizerMetrics struct { latency float64 latencyCode string + evaluations int + evaluationsResult string + + duration float64 + durationResult string + + failOpen int + failOpenResult string + cel.NoopMatcherMetrics } @@ -119,3 +183,16 @@ func (f *fakeAuthorizerMetrics) RecordRequestLatency(_ context.Context, code str f.latency = latency f.latencyCode = code } + +func (f *fakeAuthorizerMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) { + f.evaluations += 1 + f.evaluationsResult = result +} +func (f *fakeAuthorizerMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) { + f.duration = duration + f.durationResult = result +} +func (f *fakeAuthorizerMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) { + f.failOpen += 1 + f.failOpenResult = result +} diff --git a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook.go b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook.go index 65c320f6d6c..d97b121453a 100644 --- a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook.go +++ b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook.go @@ -20,12 +20,15 @@ package webhook import ( "context" "encoding/json" + "errors" "fmt" + "net/http" "strconv" "time" authorizationv1 "k8s.io/api/authorization/v1" authorizationv1beta1 "k8s.io/api/authorization/v1beta1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -233,6 +236,7 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri r.Status = entry.(authorizationv1.SubjectAccessReviewStatus) } else { var result *authorizationv1.SubjectAccessReview + var metricsResult string // WithExponentialBackoff will return SAR create error (sarErr) if any. if err := webhook.WithExponentialBackoff(ctx, w.retryBackoff, func() error { var sarErr error @@ -242,6 +246,19 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri result, statusCode, sarErr = w.subjectAccessReview.Create(ctx, r, metav1.CreateOptions{}) latency := time.Since(start) + switch { + case sarErr == nil: + metricsResult = "success" + case ctx.Err() != nil: + metricsResult = "canceled" + case errors.Is(sarErr, context.DeadlineExceeded) || apierrors.IsTimeout(sarErr) || statusCode == http.StatusGatewayTimeout: + metricsResult = "timeout" + default: + metricsResult = "error" + } + w.metrics.RecordWebhookEvaluation(ctx, w.name, metricsResult) + w.metrics.RecordWebhookDuration(ctx, w.name, metricsResult, latency.Seconds()) + if statusCode != 0 { w.metrics.RecordRequestTotal(ctx, strconv.Itoa(statusCode)) w.metrics.RecordRequestLatency(ctx, strconv.Itoa(statusCode), latency.Seconds()) @@ -256,6 +273,12 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri return sarErr }, webhook.DefaultShouldRetry); err != nil { klog.Errorf("Failed to make webhook authorizer request: %v", err) + + // we're returning NoOpinion, and the parent context has not timed out or been canceled + if w.decisionOnError == authorizer.DecisionNoOpinion && ctx.Err() == nil { + w.metrics.RecordWebhookFailOpen(ctx, w.name, metricsResult) + } + return w.decisionOnError, "", err } diff --git a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook_v1_test.go b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook_v1_test.go index a130a2f154b..2f5125e37b8 100644 --- a/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook_v1_test.go +++ b/staging/src/k8s.io/apiserver/plugin/pkg/authorizer/webhook/webhook_v1_test.go @@ -315,11 +315,18 @@ type mockV1Service struct { allow bool statusCode int called int + + // reviewHook is called just before returning from the Review() method + reviewHook func(*authorizationv1.SubjectAccessReview) } func (m *mockV1Service) Review(r *authorizationv1.SubjectAccessReview) { m.called++ r.Status.Allowed = m.allow + + if m.reviewHook != nil { + m.reviewHook(r) + } } func (m *mockV1Service) Allow() { m.allow = true } func (m *mockV1Service) Deny() { m.allow = false } @@ -1414,5 +1421,6 @@ func celAuthorizerMetrics() metrics.AuthorizerMetrics { type celAuthorizerMetricsType struct { metrics.NoopRequestMetrics + metrics.NoopWebhookMetrics celmetrics.MatcherMetrics } diff --git a/test/integration/auth/authz_config_test.go b/test/integration/auth/authz_config_test.go index 2ea7fe5fb1d..213955fa5dc 100644 --- a/test/integration/auth/authz_config_test.go +++ b/test/integration/auth/authz_config_test.go @@ -25,6 +25,7 @@ import ( "net/http/httptest" "os" "path/filepath" + "reflect" "regexp" "strconv" "strings" @@ -41,6 +42,7 @@ import ( "k8s.io/apiserver/pkg/features" authzmetrics "k8s.io/apiserver/pkg/server/options/authorizationconfig/metrics" utilfeature "k8s.io/apiserver/pkg/util/feature" + webhookmetrics "k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" featuregatetesting "k8s.io/component-base/featuregate/testing" @@ -145,6 +147,7 @@ users: ` // returns malformed responses when called + errorName := "error.example.com" serverErrorCalled := atomic.Int32{} serverError := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { serverErrorCalled.Add(1) @@ -164,6 +167,7 @@ users: } // hangs for 2 seconds when called + timeoutName := "timeout.example.com" serverTimeoutCalled := atomic.Int32{} serverTimeout := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { serverTimeoutCalled.Add(1) @@ -204,6 +208,7 @@ users: } // returns a no opinion response when called + noOpinionName := "noopinion.example.com" serverNoOpinionCalled := atomic.Int32{} serverNoOpinion := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { serverNoOpinionCalled.Add(1) @@ -224,6 +229,26 @@ users: t.Fatal(err) } + // returns malformed responses when called, which is then configured to fail open + failOpenName := "failopen.example.com" + serverFailOpenCalled := atomic.Int32{} + serverFailOpen := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + serverFailOpenCalled.Add(1) + sar := &authorizationv1.SubjectAccessReview{} + if err := json.NewDecoder(req.Body).Decode(sar); err != nil { + t.Error(err) + } + t.Log("serverFailOpen", sar) + if _, err := w.Write([]byte(`malformed response`)); err != nil { + t.Error(err) + } + })) + defer serverFailOpen.Close() + serverFailOpenKubeconfigName := filepath.Join(dir, "failOpen.yaml") + if err := os.WriteFile(serverFailOpenKubeconfigName, []byte(fmt.Sprintf(kubeconfigTemplate, serverFailOpen.URL)), os.FileMode(0644)); err != nil { + t.Fatal(err) + } + // returns an allow response when called allowName := "allow.example.com" serverAllowCalled := atomic.Int32{} @@ -273,14 +298,16 @@ users: serverTimeoutCalled.Store(0) serverDenyCalled.Store(0) serverNoOpinionCalled.Store(0) + serverFailOpenCalled.Store(0) serverAllowCalled.Store(0) serverAllowReloadedCalled.Store(0) authorizationmetrics.ResetMetricsForTest() celmetrics.ResetMetricsForTest() + webhookmetrics.ResetMetricsForTest() } var adminClient *clientset.Clientset type counts struct { - errorCount, timeoutCount, denyCount, noOpinionCount, allowCount, allowReloadedCount, webhookExclusionCount, evalErrorsCount int32 + errorCount, timeoutCount, denyCount, noOpinionCount, failOpenCount, allowCount, allowReloadedCount, webhookExclusionCount, evalErrorsCount int32 } assertCounts := func(c counts) { t.Helper() @@ -288,30 +315,40 @@ users: if err != nil { t.Fatalf("error getting metrics: %v", err) } - if e, a := c.errorCount, serverErrorCalled.Load(); e != a { - t.Fatalf("expected fail webhook calls: %d, got %d", e, a) - } - if e, a := c.timeoutCount, serverTimeoutCalled.Load(); e != a { - t.Fatalf("expected timeout webhook calls: %d, got %d", e, a) - } - if e, a := c.denyCount, serverDenyCalled.Load(); e != a { - t.Fatalf("expected deny webhook calls: %d, got %d", e, a) + + assertCount := func(name string, expected int32, serverCalls *atomic.Int32) { + t.Helper() + if actual := serverCalls.Load(); expected != actual { + t.Fatalf("expected %q webhook calls: %d, got %d", name, expected, actual) + } + if actual := int32(metrics.whTotal[name]); expected != actual { + t.Fatalf("expected %q webhook metric call count: %d, got %d (%#v)", name, expected, actual, metrics.whTotal) + } + if actual := int32(metrics.whDurationCount[name]); expected != actual { + t.Fatalf("expected %q webhook metric duration count: %d, got %d (%#v)", name, expected, actual, metrics.whDurationCount) + } } + + assertCount(errorName, c.errorCount, &serverErrorCalled) + assertCount(timeoutName, c.timeoutCount, &serverTimeoutCalled) + assertCount(denyName, c.denyCount, &serverDenyCalled) if e, a := c.denyCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: denyName}]["denied"]; e != int32(a) { t.Fatalf("expected deny webhook denied metrics calls: %d, got %d", e, a) } - if e, a := c.noOpinionCount, serverNoOpinionCalled.Load(); e != a { - t.Fatalf("expected noOpinion webhook calls: %d, got %d", e, a) + assertCount(noOpinionName, c.noOpinionCount, &serverNoOpinionCalled) + assertCount(failOpenName, c.failOpenCount, &serverFailOpenCalled) + expectedFailOpenCounts := map[string]int{} + if c.failOpenCount > 0 { + expectedFailOpenCounts[failOpenName] = int(c.failOpenCount) } - if e, a := c.allowCount, serverAllowCalled.Load(); e != a { - t.Fatalf("expected allow webhook calls: %d, got %d", e, a) + if !reflect.DeepEqual(expectedFailOpenCounts, metrics.whFailOpenTotal) { + t.Fatalf("expected fail open %#v, got %#v", expectedFailOpenCounts, metrics.whFailOpenTotal) } + assertCount(allowName, c.allowCount, &serverAllowCalled) if e, a := c.allowCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: allowName}]["allowed"]; e != int32(a) { t.Fatalf("expected allow webhook allowed metrics calls: %d, got %d", e, a) } - if e, a := c.allowReloadedCount, serverAllowReloadedCalled.Load(); e != a { - t.Fatalf("expected allowReloaded webhook calls: %d, got %d", e, a) - } + assertCount(allowReloadedName, c.allowReloadedCount, &serverAllowReloadedCalled) if e, a := c.allowReloadedCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: allowReloadedName}]["allowed"]; e != int32(a) { t.Fatalf("expected allowReloaded webhook allowed metrics calls: %d, got %d", e, a) } @@ -330,7 +367,7 @@ apiVersion: apiserver.config.k8s.io/v1alpha1 kind: AuthorizationConfiguration authorizers: - type: Webhook - name: error.example.com + name: `+errorName+` webhook: timeout: 5s failurePolicy: Deny @@ -347,7 +384,7 @@ authorizers: - expression: 'request.resourceAttributes.name == "error"' - type: Webhook - name: timeout.example.com + name: `+timeoutName+` webhook: timeout: 1s failurePolicy: Deny @@ -381,7 +418,7 @@ authorizers: - expression: 'request.resourceAttributes.namespace == "fail"' - type: Webhook - name: noopinion.example.com + name: `+noOpinionName+` webhook: timeout: 5s failurePolicy: Deny @@ -392,6 +429,19 @@ authorizers: type: KubeConfigFile kubeConfigFile: `+serverNoOpinionKubeconfigName+` +- type: Webhook + name: `+failOpenName+` + webhook: + timeout: 5s + failurePolicy: NoOpinion + subjectAccessReviewVersion: v1 + matchConditionSubjectAccessReviewVersion: v1 + authorizedTTL: 1ms + unauthorizedTTL: 1ms + connectionInfo: + type: KubeConfigFile + kubeConfigFile: `+serverFailOpenKubeconfigName+` + - type: Webhook name: `+allowName+` webhook: @@ -498,7 +548,7 @@ authorizers: t.Fatal("expected allowed, got denied") } else { t.Log(result.Status.Reason) - assertCounts(counts{noOpinionCount: 1, allowCount: 1, webhookExclusionCount: 3}) + assertCounts(counts{noOpinionCount: 1, failOpenCount: 1, allowCount: 1, webhookExclusionCount: 3}) } // the timeout webhook results in match condition eval errors when evaluating a non-resource request @@ -581,7 +631,7 @@ authorizers: t.Fatal("expected allowed, got denied") } else { t.Log(result.Status.Reason) - assertCounts(counts{noOpinionCount: 1, allowCount: 1, webhookExclusionCount: 3}) + assertCounts(counts{noOpinionCount: 1, failOpenCount: 1, allowCount: 1, webhookExclusionCount: 3}) } // write good config with different webhook @@ -718,6 +768,10 @@ type metrics struct { decisions map[authorizerKey]map[string]int exclusions int evalErrors int + + whTotal map[string]int + whFailOpenTotal map[string]int + whDurationCount map[string]int } type authorizerKey struct { authorizerType string @@ -727,6 +781,9 @@ type authorizerKey struct { var decisionMetric = regexp.MustCompile(`apiserver_authorization_decisions_total\{decision="(.*?)",name="(.*?)",type="(.*?)"\} (\d+)`) var webhookExclusionMetric = regexp.MustCompile(`apiserver_authorization_match_condition_exclusions_total\{name="(.*?)",type="(.*?)"\} (\d+)`) var webhookMatchConditionEvalErrorMetric = regexp.MustCompile(`apiserver_authorization_match_condition_evaluation_errors_total\{name="(.*?)",type="(.*?)"\} (\d+)`) +var whTotalMetric = regexp.MustCompile(`apiserver_authorization_webhook_evaluations_total{name="(.*?)",result="(.*?)"} (\d+)`) +var webhookDurationMetric = regexp.MustCompile(`apiserver_authorization_webhook_duration_seconds_count{name="(.*?)",result="(.*?)"} (\d+)`) +var webhookFailOpenMetric = regexp.MustCompile(`apiserver_authorization_webhook_evaluations_fail_open_total{name="(.*?)",result="(.*?)"} (\d+)`) func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) { data, err := client.RESTClient().Get().AbsPath("/metrics").DoRaw(context.TODO()) @@ -744,6 +801,10 @@ func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) { } var m metrics + + m.whTotal = map[string]int{} + m.whFailOpenTotal = map[string]int{} + m.whDurationCount = map[string]int{} m.exclusions = 0 for _, line := range strings.Split(string(data), "\n") { if matches := decisionMetric.FindStringSubmatch(line); matches != nil { @@ -780,6 +841,33 @@ func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) { t.Log(count) m.evalErrors += count } + if matches := whTotalMetric.FindStringSubmatch(line); matches != nil { + t.Log(matches) + count, err := strconv.Atoi(matches[3]) + if err != nil { + return nil, err + } + t.Log(count) + m.whTotal[matches[1]] += count + } + if matches := webhookDurationMetric.FindStringSubmatch(line); matches != nil { + t.Log(matches) + count, err := strconv.Atoi(matches[3]) + if err != nil { + return nil, err + } + t.Log(count) + m.whDurationCount[matches[1]] += count + } + if matches := webhookFailOpenMetric.FindStringSubmatch(line); matches != nil { + t.Log(matches) + count, err := strconv.Atoi(matches[3]) + if err != nil { + return nil, err + } + t.Log(count) + m.whFailOpenTotal[matches[1]] += count + } if strings.HasPrefix(line, "apiserver_authorization_config_controller_automatic_reload_last_timestamp_seconds") { t.Log(line) values := strings.Split(line, " ")