Add authorization webhook duration/count/failopen metrics

This commit is contained in:
Jordan Liggitt 2024-03-02 01:44:28 -05:00
parent 95875b7723
commit 79b344d85e
No known key found for this signature in database
8 changed files with 426 additions and 23 deletions

View File

@ -145,7 +145,7 @@ func (r *reloadableAuthorizerResolver) newForConfig(authzConfig *authzconfig.Aut
decisionOnError, decisionOnError,
configuredAuthorizer.Webhook.MatchConditions, configuredAuthorizer.Webhook.MatchConditions,
configuredAuthorizer.Name, configuredAuthorizer.Name,
kubeapiserverWebhookMetrics{MatcherMetrics: cel.NewMatcherMetrics()}, kubeapiserverWebhookMetrics{WebhookMetrics: webhookmetrics.NewWebhookMetrics(), MatcherMetrics: cel.NewMatcherMetrics()},
) )
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -169,6 +169,8 @@ func (r *reloadableAuthorizerResolver) newForConfig(authzConfig *authzconfig.Aut
type kubeapiserverWebhookMetrics struct { type kubeapiserverWebhookMetrics struct {
// kube-apiserver doesn't report request metrics // kube-apiserver doesn't report request metrics
webhookmetrics.NoopRequestMetrics webhookmetrics.NoopRequestMetrics
// kube-apiserver does report webhook metrics
webhookmetrics.WebhookMetrics
// kube-apiserver does report matchCondition metrics // kube-apiserver does report matchCondition metrics
cel.MatcherMetrics cel.MatcherMetrics
} }

View File

@ -60,6 +60,8 @@ var (
var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{}) var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{})
type delegatingAuthorizerMetrics struct { type delegatingAuthorizerMetrics struct {
// no-op for webhook metrics for now, delegating authorization reports original total/latency metrics
webhookmetrics.NoopWebhookMetrics
// no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions // no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions
celmetrics.NoopMatcherMetrics celmetrics.NoopMatcherMetrics
} }

View File

@ -18,20 +18,26 @@ package metrics
import ( import (
"context" "context"
"sync"
"k8s.io/apiserver/pkg/authorization/cel" "k8s.io/apiserver/pkg/authorization/cel"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
) )
// AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer // AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer
type AuthorizerMetrics interface { type AuthorizerMetrics interface {
// Request total and latency metrics // Request total and latency metrics
RequestMetrics RequestMetrics
// Webhook count, latency, and fail open metrics
WebhookMetrics
// match condition metrics // match condition metrics
cel.MatcherMetrics cel.MatcherMetrics
} }
type NoopAuthorizerMetrics struct { type NoopAuthorizerMetrics struct {
NoopRequestMetrics NoopRequestMetrics
NoopWebhookMetrics
cel.NoopMatcherMetrics cel.NoopMatcherMetrics
} }
@ -47,3 +53,114 @@ type NoopRequestMetrics struct{}
func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {} func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {}
func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {} func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {}
type WebhookMetrics interface {
// RecordWebhookEvaluation increments with each round-trip of a webhook authorizer.
// result is one of:
// - canceled: the call invoking the webhook request was canceled
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
// - success: the webhook response completed and was well-formed
RecordWebhookEvaluation(ctx context.Context, name, result string)
// RecordWebhookDuration records latency for each round-trip of a webhook authorizer.
// result is one of:
// - canceled: the call invoking the webhook request was canceled
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
// - success: the webhook response completed and was well-formed
RecordWebhookDuration(ctx context.Context, name, result string, duration float64)
// RecordWebhookFailOpen increments when a webhook timeout or error results in a fail open
// of a request which has not been canceled.
// result is one of:
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
RecordWebhookFailOpen(ctx context.Context, name, result string)
}
type NoopWebhookMetrics struct{}
func (NoopWebhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {}
func (NoopWebhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
}
func (NoopWebhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {}
var registerWebhookMetrics sync.Once
// RegisterMetrics registers authorizer metrics.
func RegisterWebhookMetrics() {
registerWebhookMetrics.Do(func() {
legacyregistry.MustRegister(webhookEvaluations)
legacyregistry.MustRegister(webhookDuration)
legacyregistry.MustRegister(webhookFailOpen)
})
}
func ResetMetricsForTest() {
webhookEvaluations.Reset()
webhookDuration.Reset()
webhookFailOpen.Reset()
}
const (
namespace = "apiserver"
subsystem = "authorization"
)
var (
webhookEvaluations = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_evaluations_total",
Help: "Round-trips to authorization webhooks.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
webhookDuration = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_duration_seconds",
Help: "Request latency in seconds.",
Buckets: compbasemetrics.DefBuckets,
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
webhookFailOpen = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_evaluations_fail_open_total",
Help: "NoOpinion results due to webhook timeout or error.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
)
type webhookMetrics struct{}
func NewWebhookMetrics() WebhookMetrics {
RegisterWebhookMetrics()
return webhookMetrics{}
}
func ResetWebhookMetricsForTest() {
webhookEvaluations.Reset()
webhookDuration.Reset()
webhookFailOpen.Reset()
}
func (webhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
webhookEvaluations.WithContext(ctx).WithLabelValues(name, result).Inc()
}
func (webhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
webhookDuration.WithContext(ctx).WithLabelValues(name, result).Observe(duration)
}
func (webhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
webhookFailOpen.WithContext(ctx).WithLabelValues(name, result).Inc()
}

View File

@ -0,0 +1,86 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"context"
"strings"
"testing"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"
)
func TestRecordWebhookMetrics(t *testing.T) {
testCases := []struct {
desc string
metrics []string
name string
result string
duration float64
want string
}{
{
desc: "evaluation failure total",
metrics: []string{
"apiserver_authorization_webhook_duration_seconds",
"apiserver_authorization_webhook_evaluations_total",
"apiserver_authorization_webhook_evaluations_fail_open_total",
},
name: "wh1.example.com",
result: "timeout",
duration: 1.5,
want: `
# HELP apiserver_authorization_webhook_duration_seconds [ALPHA] Request latency in seconds.
# TYPE apiserver_authorization_webhook_duration_seconds histogram
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.005"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.01"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.025"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.05"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.1"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.25"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.5"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="1"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="2.5"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="5"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="10"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="+Inf"} 1
apiserver_authorization_webhook_duration_seconds_sum{name="wh1.example.com",result="timeout"} 1.5
apiserver_authorization_webhook_duration_seconds_count{name="wh1.example.com",result="timeout"} 1
# HELP apiserver_authorization_webhook_evaluations_fail_open_total [ALPHA] NoOpinion results due to webhook timeout or error.
# TYPE apiserver_authorization_webhook_evaluations_fail_open_total counter
apiserver_authorization_webhook_evaluations_fail_open_total{name="wh1.example.com",result="timeout"} 1
# HELP apiserver_authorization_webhook_evaluations_total [ALPHA] Round-trips to authorization webhooks.
# TYPE apiserver_authorization_webhook_evaluations_total counter
apiserver_authorization_webhook_evaluations_total{name="wh1.example.com",result="timeout"} 1
`,
},
}
for _, tt := range testCases {
t.Run(tt.desc, func(t *testing.T) {
ResetWebhookMetricsForTest()
m := NewWebhookMetrics()
m.RecordWebhookDuration(context.Background(), tt.name, tt.result, tt.duration)
m.RecordWebhookEvaluation(context.Background(), tt.name, tt.result)
m.RecordWebhookFailOpen(context.Background(), tt.name, tt.result)
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil {
t.Fatal(err)
}
})
}
}

View File

@ -18,8 +18,10 @@ package webhook
import ( import (
"context" "context"
"net/http"
"testing" "testing"
authorizationv1 "k8s.io/api/authorization/v1"
"k8s.io/apiserver/pkg/apis/apiserver" "k8s.io/apiserver/pkg/apis/apiserver"
"k8s.io/apiserver/pkg/authentication/user" "k8s.io/apiserver/pkg/authentication/user"
"k8s.io/apiserver/pkg/authorization/authorizer" "k8s.io/apiserver/pkg/authorization/authorizer"
@ -29,11 +31,15 @@ import (
func TestAuthorizerMetrics(t *testing.T) { func TestAuthorizerMetrics(t *testing.T) {
scenarios := []struct { scenarios := []struct {
name string name string
canceledRequest bool
clientCert, clientKey, clientCA []byte clientCert, clientKey, clientCA []byte
serverCert, serverKey, serverCA []byte serverCert, serverKey, serverCA []byte
authzFakeServiceStatusCode int authzFakeServiceStatusCode int
authFakeServiceDeny bool authFakeServiceDeny bool
expectedRegisteredStatusCode string expectedRegisteredStatusCode string
expectEvalutionResult string
expectDurationResult string
expectFailOpenResult string
wantErr bool wantErr bool
}{ }{
{ {
@ -41,6 +47,31 @@ func TestAuthorizerMetrics(t *testing.T) {
clientCert: clientCert, clientKey: clientKey, clientCA: caCert, clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: caCert, serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
expectedRegisteredStatusCode: "200", expectedRegisteredStatusCode: "200",
expectEvalutionResult: "success",
expectDurationResult: "success",
expectFailOpenResult: "",
},
{
name: "timed out request",
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
authzFakeServiceStatusCode: http.StatusGatewayTimeout,
expectedRegisteredStatusCode: "504",
expectEvalutionResult: "timeout",
expectDurationResult: "timeout",
expectFailOpenResult: "timeout",
},
{
name: "canceled request",
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
canceledRequest: true,
expectedRegisteredStatusCode: "<error>",
expectEvalutionResult: "canceled",
expectDurationResult: "canceled",
expectFailOpenResult: "",
}, },
{ {
@ -49,6 +80,9 @@ func TestAuthorizerMetrics(t *testing.T) {
serverCert: serverCert, serverKey: serverKey, serverCA: caCert, serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
authzFakeServiceStatusCode: 500, authzFakeServiceStatusCode: 500,
expectedRegisteredStatusCode: "500", expectedRegisteredStatusCode: "500",
expectEvalutionResult: "error",
expectDurationResult: "error",
expectFailOpenResult: "error",
}, },
{ {
@ -56,17 +90,28 @@ func TestAuthorizerMetrics(t *testing.T) {
clientCert: clientCert, clientKey: clientKey, clientCA: caCert, clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: badCACert, serverCert: serverCert, serverKey: serverKey, serverCA: badCACert,
expectedRegisteredStatusCode: "<error>", expectedRegisteredStatusCode: "<error>",
expectEvalutionResult: "error",
expectDurationResult: "error",
expectFailOpenResult: "error",
wantErr: true, wantErr: true,
}, },
} }
for _, scenario := range scenarios { for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) { t.Run(scenario.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
service := new(mockV1Service) service := new(mockV1Service)
service.statusCode = scenario.authzFakeServiceStatusCode service.statusCode = scenario.authzFakeServiceStatusCode
if service.statusCode == 0 { if service.statusCode == 0 {
service.statusCode = 200 service.statusCode = 200
} }
service.reviewHook = func(*authorizationv1.SubjectAccessReview) {
if scenario.canceledRequest {
cancel()
}
}
service.allow = !scenario.authFakeServiceDeny service.allow = !scenario.authFakeServiceDeny
server, err := NewV1TestServer(service, scenario.serverCert, scenario.serverKey, scenario.serverCA) server, err := NewV1TestServer(service, scenario.serverCert, scenario.serverKey, scenario.serverCA)
@ -84,7 +129,7 @@ func TestAuthorizerMetrics(t *testing.T) {
} }
attr := authorizer.AttributesRecord{User: &user.DefaultInfo{}} attr := authorizer.AttributesRecord{User: &user.DefaultInfo{}}
_, _, err = wh.Authorize(context.Background(), attr) _, _, err = wh.Authorize(ctx, attr)
if scenario.wantErr { if scenario.wantErr {
if err == nil { if err == nil {
t.Errorf("expected error making authorization request: %v", err) t.Errorf("expected error making authorization request: %v", err)
@ -98,6 +143,16 @@ func TestAuthorizerMetrics(t *testing.T) {
if fakeAuthzMetrics.latencyCode != scenario.expectedRegisteredStatusCode { if fakeAuthzMetrics.latencyCode != scenario.expectedRegisteredStatusCode {
t.Errorf("incorrect status code recorded for RecordRequestLatency method, expected = %v, got %v", scenario.expectedRegisteredStatusCode, fakeAuthzMetrics.latencyCode) t.Errorf("incorrect status code recorded for RecordRequestLatency method, expected = %v, got %v", scenario.expectedRegisteredStatusCode, fakeAuthzMetrics.latencyCode)
} }
if fakeAuthzMetrics.evaluationsResult != scenario.expectEvalutionResult {
t.Errorf("expected evaluationsResult %q, got %q", scenario.expectEvalutionResult, fakeAuthzMetrics.evaluationsResult)
}
if fakeAuthzMetrics.durationResult != scenario.expectDurationResult {
t.Errorf("expected durationResult %q, got %q", scenario.expectDurationResult, fakeAuthzMetrics.durationResult)
}
if fakeAuthzMetrics.failOpenResult != scenario.expectFailOpenResult {
t.Errorf("expected failOpenResult %q, got %q", scenario.expectFailOpenResult, fakeAuthzMetrics.failOpenResult)
}
}) })
} }
} }
@ -108,6 +163,15 @@ type fakeAuthorizerMetrics struct {
latency float64 latency float64
latencyCode string latencyCode string
evaluations int
evaluationsResult string
duration float64
durationResult string
failOpen int
failOpenResult string
cel.NoopMatcherMetrics cel.NoopMatcherMetrics
} }
@ -119,3 +183,16 @@ func (f *fakeAuthorizerMetrics) RecordRequestLatency(_ context.Context, code str
f.latency = latency f.latency = latency
f.latencyCode = code f.latencyCode = code
} }
func (f *fakeAuthorizerMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
f.evaluations += 1
f.evaluationsResult = result
}
func (f *fakeAuthorizerMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
f.duration = duration
f.durationResult = result
}
func (f *fakeAuthorizerMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
f.failOpen += 1
f.failOpenResult = result
}

View File

@ -20,12 +20,15 @@ package webhook
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"net/http"
"strconv" "strconv"
"time" "time"
authorizationv1 "k8s.io/api/authorization/v1" authorizationv1 "k8s.io/api/authorization/v1"
authorizationv1beta1 "k8s.io/api/authorization/v1beta1" authorizationv1beta1 "k8s.io/api/authorization/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/schema"
@ -233,6 +236,7 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
r.Status = entry.(authorizationv1.SubjectAccessReviewStatus) r.Status = entry.(authorizationv1.SubjectAccessReviewStatus)
} else { } else {
var result *authorizationv1.SubjectAccessReview var result *authorizationv1.SubjectAccessReview
var metricsResult string
// WithExponentialBackoff will return SAR create error (sarErr) if any. // WithExponentialBackoff will return SAR create error (sarErr) if any.
if err := webhook.WithExponentialBackoff(ctx, w.retryBackoff, func() error { if err := webhook.WithExponentialBackoff(ctx, w.retryBackoff, func() error {
var sarErr error var sarErr error
@ -242,6 +246,19 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
result, statusCode, sarErr = w.subjectAccessReview.Create(ctx, r, metav1.CreateOptions{}) result, statusCode, sarErr = w.subjectAccessReview.Create(ctx, r, metav1.CreateOptions{})
latency := time.Since(start) latency := time.Since(start)
switch {
case sarErr == nil:
metricsResult = "success"
case ctx.Err() != nil:
metricsResult = "canceled"
case errors.Is(sarErr, context.DeadlineExceeded) || apierrors.IsTimeout(sarErr) || statusCode == http.StatusGatewayTimeout:
metricsResult = "timeout"
default:
metricsResult = "error"
}
w.metrics.RecordWebhookEvaluation(ctx, w.name, metricsResult)
w.metrics.RecordWebhookDuration(ctx, w.name, metricsResult, latency.Seconds())
if statusCode != 0 { if statusCode != 0 {
w.metrics.RecordRequestTotal(ctx, strconv.Itoa(statusCode)) w.metrics.RecordRequestTotal(ctx, strconv.Itoa(statusCode))
w.metrics.RecordRequestLatency(ctx, strconv.Itoa(statusCode), latency.Seconds()) w.metrics.RecordRequestLatency(ctx, strconv.Itoa(statusCode), latency.Seconds())
@ -256,6 +273,12 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
return sarErr return sarErr
}, webhook.DefaultShouldRetry); err != nil { }, webhook.DefaultShouldRetry); err != nil {
klog.Errorf("Failed to make webhook authorizer request: %v", err) klog.Errorf("Failed to make webhook authorizer request: %v", err)
// we're returning NoOpinion, and the parent context has not timed out or been canceled
if w.decisionOnError == authorizer.DecisionNoOpinion && ctx.Err() == nil {
w.metrics.RecordWebhookFailOpen(ctx, w.name, metricsResult)
}
return w.decisionOnError, "", err return w.decisionOnError, "", err
} }

View File

@ -315,11 +315,18 @@ type mockV1Service struct {
allow bool allow bool
statusCode int statusCode int
called int called int
// reviewHook is called just before returning from the Review() method
reviewHook func(*authorizationv1.SubjectAccessReview)
} }
func (m *mockV1Service) Review(r *authorizationv1.SubjectAccessReview) { func (m *mockV1Service) Review(r *authorizationv1.SubjectAccessReview) {
m.called++ m.called++
r.Status.Allowed = m.allow r.Status.Allowed = m.allow
if m.reviewHook != nil {
m.reviewHook(r)
}
} }
func (m *mockV1Service) Allow() { m.allow = true } func (m *mockV1Service) Allow() { m.allow = true }
func (m *mockV1Service) Deny() { m.allow = false } func (m *mockV1Service) Deny() { m.allow = false }
@ -1414,5 +1421,6 @@ func celAuthorizerMetrics() metrics.AuthorizerMetrics {
type celAuthorizerMetricsType struct { type celAuthorizerMetricsType struct {
metrics.NoopRequestMetrics metrics.NoopRequestMetrics
metrics.NoopWebhookMetrics
celmetrics.MatcherMetrics celmetrics.MatcherMetrics
} }

View File

@ -25,6 +25,7 @@ import (
"net/http/httptest" "net/http/httptest"
"os" "os"
"path/filepath" "path/filepath"
"reflect"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
@ -41,6 +42,7 @@ import (
"k8s.io/apiserver/pkg/features" "k8s.io/apiserver/pkg/features"
authzmetrics "k8s.io/apiserver/pkg/server/options/authorizationconfig/metrics" authzmetrics "k8s.io/apiserver/pkg/server/options/authorizationconfig/metrics"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
webhookmetrics "k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest" "k8s.io/client-go/rest"
featuregatetesting "k8s.io/component-base/featuregate/testing" featuregatetesting "k8s.io/component-base/featuregate/testing"
@ -145,6 +147,7 @@ users:
` `
// returns malformed responses when called // returns malformed responses when called
errorName := "error.example.com"
serverErrorCalled := atomic.Int32{} serverErrorCalled := atomic.Int32{}
serverError := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { serverError := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
serverErrorCalled.Add(1) serverErrorCalled.Add(1)
@ -164,6 +167,7 @@ users:
} }
// hangs for 2 seconds when called // hangs for 2 seconds when called
timeoutName := "timeout.example.com"
serverTimeoutCalled := atomic.Int32{} serverTimeoutCalled := atomic.Int32{}
serverTimeout := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { serverTimeout := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
serverTimeoutCalled.Add(1) serverTimeoutCalled.Add(1)
@ -204,6 +208,7 @@ users:
} }
// returns a no opinion response when called // returns a no opinion response when called
noOpinionName := "noopinion.example.com"
serverNoOpinionCalled := atomic.Int32{} serverNoOpinionCalled := atomic.Int32{}
serverNoOpinion := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { serverNoOpinion := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
serverNoOpinionCalled.Add(1) serverNoOpinionCalled.Add(1)
@ -224,6 +229,26 @@ users:
t.Fatal(err) t.Fatal(err)
} }
// returns malformed responses when called, which is then configured to fail open
failOpenName := "failopen.example.com"
serverFailOpenCalled := atomic.Int32{}
serverFailOpen := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
serverFailOpenCalled.Add(1)
sar := &authorizationv1.SubjectAccessReview{}
if err := json.NewDecoder(req.Body).Decode(sar); err != nil {
t.Error(err)
}
t.Log("serverFailOpen", sar)
if _, err := w.Write([]byte(`malformed response`)); err != nil {
t.Error(err)
}
}))
defer serverFailOpen.Close()
serverFailOpenKubeconfigName := filepath.Join(dir, "failOpen.yaml")
if err := os.WriteFile(serverFailOpenKubeconfigName, []byte(fmt.Sprintf(kubeconfigTemplate, serverFailOpen.URL)), os.FileMode(0644)); err != nil {
t.Fatal(err)
}
// returns an allow response when called // returns an allow response when called
allowName := "allow.example.com" allowName := "allow.example.com"
serverAllowCalled := atomic.Int32{} serverAllowCalled := atomic.Int32{}
@ -273,14 +298,16 @@ users:
serverTimeoutCalled.Store(0) serverTimeoutCalled.Store(0)
serverDenyCalled.Store(0) serverDenyCalled.Store(0)
serverNoOpinionCalled.Store(0) serverNoOpinionCalled.Store(0)
serverFailOpenCalled.Store(0)
serverAllowCalled.Store(0) serverAllowCalled.Store(0)
serverAllowReloadedCalled.Store(0) serverAllowReloadedCalled.Store(0)
authorizationmetrics.ResetMetricsForTest() authorizationmetrics.ResetMetricsForTest()
celmetrics.ResetMetricsForTest() celmetrics.ResetMetricsForTest()
webhookmetrics.ResetMetricsForTest()
} }
var adminClient *clientset.Clientset var adminClient *clientset.Clientset
type counts struct { type counts struct {
errorCount, timeoutCount, denyCount, noOpinionCount, allowCount, allowReloadedCount, webhookExclusionCount, evalErrorsCount int32 errorCount, timeoutCount, denyCount, noOpinionCount, failOpenCount, allowCount, allowReloadedCount, webhookExclusionCount, evalErrorsCount int32
} }
assertCounts := func(c counts) { assertCounts := func(c counts) {
t.Helper() t.Helper()
@ -288,30 +315,40 @@ users:
if err != nil { if err != nil {
t.Fatalf("error getting metrics: %v", err) t.Fatalf("error getting metrics: %v", err)
} }
if e, a := c.errorCount, serverErrorCalled.Load(); e != a {
t.Fatalf("expected fail webhook calls: %d, got %d", e, a) assertCount := func(name string, expected int32, serverCalls *atomic.Int32) {
} t.Helper()
if e, a := c.timeoutCount, serverTimeoutCalled.Load(); e != a { if actual := serverCalls.Load(); expected != actual {
t.Fatalf("expected timeout webhook calls: %d, got %d", e, a) t.Fatalf("expected %q webhook calls: %d, got %d", name, expected, actual)
} }
if e, a := c.denyCount, serverDenyCalled.Load(); e != a { if actual := int32(metrics.whTotal[name]); expected != actual {
t.Fatalf("expected deny webhook calls: %d, got %d", e, a) t.Fatalf("expected %q webhook metric call count: %d, got %d (%#v)", name, expected, actual, metrics.whTotal)
}
if actual := int32(metrics.whDurationCount[name]); expected != actual {
t.Fatalf("expected %q webhook metric duration count: %d, got %d (%#v)", name, expected, actual, metrics.whDurationCount)
}
} }
assertCount(errorName, c.errorCount, &serverErrorCalled)
assertCount(timeoutName, c.timeoutCount, &serverTimeoutCalled)
assertCount(denyName, c.denyCount, &serverDenyCalled)
if e, a := c.denyCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: denyName}]["denied"]; e != int32(a) { if e, a := c.denyCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: denyName}]["denied"]; e != int32(a) {
t.Fatalf("expected deny webhook denied metrics calls: %d, got %d", e, a) t.Fatalf("expected deny webhook denied metrics calls: %d, got %d", e, a)
} }
if e, a := c.noOpinionCount, serverNoOpinionCalled.Load(); e != a { assertCount(noOpinionName, c.noOpinionCount, &serverNoOpinionCalled)
t.Fatalf("expected noOpinion webhook calls: %d, got %d", e, a) assertCount(failOpenName, c.failOpenCount, &serverFailOpenCalled)
expectedFailOpenCounts := map[string]int{}
if c.failOpenCount > 0 {
expectedFailOpenCounts[failOpenName] = int(c.failOpenCount)
} }
if e, a := c.allowCount, serverAllowCalled.Load(); e != a { if !reflect.DeepEqual(expectedFailOpenCounts, metrics.whFailOpenTotal) {
t.Fatalf("expected allow webhook calls: %d, got %d", e, a) t.Fatalf("expected fail open %#v, got %#v", expectedFailOpenCounts, metrics.whFailOpenTotal)
} }
assertCount(allowName, c.allowCount, &serverAllowCalled)
if e, a := c.allowCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: allowName}]["allowed"]; e != int32(a) { if e, a := c.allowCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: allowName}]["allowed"]; e != int32(a) {
t.Fatalf("expected allow webhook allowed metrics calls: %d, got %d", e, a) t.Fatalf("expected allow webhook allowed metrics calls: %d, got %d", e, a)
} }
if e, a := c.allowReloadedCount, serverAllowReloadedCalled.Load(); e != a { assertCount(allowReloadedName, c.allowReloadedCount, &serverAllowReloadedCalled)
t.Fatalf("expected allowReloaded webhook calls: %d, got %d", e, a)
}
if e, a := c.allowReloadedCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: allowReloadedName}]["allowed"]; e != int32(a) { if e, a := c.allowReloadedCount, metrics.decisions[authorizerKey{authorizerType: "Webhook", authorizerName: allowReloadedName}]["allowed"]; e != int32(a) {
t.Fatalf("expected allowReloaded webhook allowed metrics calls: %d, got %d", e, a) t.Fatalf("expected allowReloaded webhook allowed metrics calls: %d, got %d", e, a)
} }
@ -330,7 +367,7 @@ apiVersion: apiserver.config.k8s.io/v1alpha1
kind: AuthorizationConfiguration kind: AuthorizationConfiguration
authorizers: authorizers:
- type: Webhook - type: Webhook
name: error.example.com name: `+errorName+`
webhook: webhook:
timeout: 5s timeout: 5s
failurePolicy: Deny failurePolicy: Deny
@ -347,7 +384,7 @@ authorizers:
- expression: 'request.resourceAttributes.name == "error"' - expression: 'request.resourceAttributes.name == "error"'
- type: Webhook - type: Webhook
name: timeout.example.com name: `+timeoutName+`
webhook: webhook:
timeout: 1s timeout: 1s
failurePolicy: Deny failurePolicy: Deny
@ -381,7 +418,7 @@ authorizers:
- expression: 'request.resourceAttributes.namespace == "fail"' - expression: 'request.resourceAttributes.namespace == "fail"'
- type: Webhook - type: Webhook
name: noopinion.example.com name: `+noOpinionName+`
webhook: webhook:
timeout: 5s timeout: 5s
failurePolicy: Deny failurePolicy: Deny
@ -392,6 +429,19 @@ authorizers:
type: KubeConfigFile type: KubeConfigFile
kubeConfigFile: `+serverNoOpinionKubeconfigName+` kubeConfigFile: `+serverNoOpinionKubeconfigName+`
- type: Webhook
name: `+failOpenName+`
webhook:
timeout: 5s
failurePolicy: NoOpinion
subjectAccessReviewVersion: v1
matchConditionSubjectAccessReviewVersion: v1
authorizedTTL: 1ms
unauthorizedTTL: 1ms
connectionInfo:
type: KubeConfigFile
kubeConfigFile: `+serverFailOpenKubeconfigName+`
- type: Webhook - type: Webhook
name: `+allowName+` name: `+allowName+`
webhook: webhook:
@ -498,7 +548,7 @@ authorizers:
t.Fatal("expected allowed, got denied") t.Fatal("expected allowed, got denied")
} else { } else {
t.Log(result.Status.Reason) t.Log(result.Status.Reason)
assertCounts(counts{noOpinionCount: 1, allowCount: 1, webhookExclusionCount: 3}) assertCounts(counts{noOpinionCount: 1, failOpenCount: 1, allowCount: 1, webhookExclusionCount: 3})
} }
// the timeout webhook results in match condition eval errors when evaluating a non-resource request // the timeout webhook results in match condition eval errors when evaluating a non-resource request
@ -581,7 +631,7 @@ authorizers:
t.Fatal("expected allowed, got denied") t.Fatal("expected allowed, got denied")
} else { } else {
t.Log(result.Status.Reason) t.Log(result.Status.Reason)
assertCounts(counts{noOpinionCount: 1, allowCount: 1, webhookExclusionCount: 3}) assertCounts(counts{noOpinionCount: 1, failOpenCount: 1, allowCount: 1, webhookExclusionCount: 3})
} }
// write good config with different webhook // write good config with different webhook
@ -718,6 +768,10 @@ type metrics struct {
decisions map[authorizerKey]map[string]int decisions map[authorizerKey]map[string]int
exclusions int exclusions int
evalErrors int evalErrors int
whTotal map[string]int
whFailOpenTotal map[string]int
whDurationCount map[string]int
} }
type authorizerKey struct { type authorizerKey struct {
authorizerType string authorizerType string
@ -727,6 +781,9 @@ type authorizerKey struct {
var decisionMetric = regexp.MustCompile(`apiserver_authorization_decisions_total\{decision="(.*?)",name="(.*?)",type="(.*?)"\} (\d+)`) var decisionMetric = regexp.MustCompile(`apiserver_authorization_decisions_total\{decision="(.*?)",name="(.*?)",type="(.*?)"\} (\d+)`)
var webhookExclusionMetric = regexp.MustCompile(`apiserver_authorization_match_condition_exclusions_total\{name="(.*?)",type="(.*?)"\} (\d+)`) var webhookExclusionMetric = regexp.MustCompile(`apiserver_authorization_match_condition_exclusions_total\{name="(.*?)",type="(.*?)"\} (\d+)`)
var webhookMatchConditionEvalErrorMetric = regexp.MustCompile(`apiserver_authorization_match_condition_evaluation_errors_total\{name="(.*?)",type="(.*?)"\} (\d+)`) var webhookMatchConditionEvalErrorMetric = regexp.MustCompile(`apiserver_authorization_match_condition_evaluation_errors_total\{name="(.*?)",type="(.*?)"\} (\d+)`)
var whTotalMetric = regexp.MustCompile(`apiserver_authorization_webhook_evaluations_total{name="(.*?)",result="(.*?)"} (\d+)`)
var webhookDurationMetric = regexp.MustCompile(`apiserver_authorization_webhook_duration_seconds_count{name="(.*?)",result="(.*?)"} (\d+)`)
var webhookFailOpenMetric = regexp.MustCompile(`apiserver_authorization_webhook_evaluations_fail_open_total{name="(.*?)",result="(.*?)"} (\d+)`)
func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) { func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) {
data, err := client.RESTClient().Get().AbsPath("/metrics").DoRaw(context.TODO()) data, err := client.RESTClient().Get().AbsPath("/metrics").DoRaw(context.TODO())
@ -744,6 +801,10 @@ func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) {
} }
var m metrics var m metrics
m.whTotal = map[string]int{}
m.whFailOpenTotal = map[string]int{}
m.whDurationCount = map[string]int{}
m.exclusions = 0 m.exclusions = 0
for _, line := range strings.Split(string(data), "\n") { for _, line := range strings.Split(string(data), "\n") {
if matches := decisionMetric.FindStringSubmatch(line); matches != nil { if matches := decisionMetric.FindStringSubmatch(line); matches != nil {
@ -780,6 +841,33 @@ func getMetrics(t *testing.T, client *clientset.Clientset) (*metrics, error) {
t.Log(count) t.Log(count)
m.evalErrors += count m.evalErrors += count
} }
if matches := whTotalMetric.FindStringSubmatch(line); matches != nil {
t.Log(matches)
count, err := strconv.Atoi(matches[3])
if err != nil {
return nil, err
}
t.Log(count)
m.whTotal[matches[1]] += count
}
if matches := webhookDurationMetric.FindStringSubmatch(line); matches != nil {
t.Log(matches)
count, err := strconv.Atoi(matches[3])
if err != nil {
return nil, err
}
t.Log(count)
m.whDurationCount[matches[1]] += count
}
if matches := webhookFailOpenMetric.FindStringSubmatch(line); matches != nil {
t.Log(matches)
count, err := strconv.Atoi(matches[3])
if err != nil {
return nil, err
}
t.Log(count)
m.whFailOpenTotal[matches[1]] += count
}
if strings.HasPrefix(line, "apiserver_authorization_config_controller_automatic_reload_last_timestamp_seconds") { if strings.HasPrefix(line, "apiserver_authorization_config_controller_automatic_reload_last_timestamp_seconds") {
t.Log(line) t.Log(line)
values := strings.Split(line, " ") values := strings.Split(line, " ")