From 8ed1628a6e75f4029853502dbac44fdb0edac5fc Mon Sep 17 00:00:00 2001 From: Ryan Moriarty Date: Tue, 22 Jun 2021 22:32:47 +0000 Subject: [PATCH] Add a new webhook metric tracking request totals. Also add a 1.0s bucket boundary to the webhook latency metric. --- .../pkg/admission/metrics/metrics.go | 25 +++++++++-- .../pkg/admission/metrics/metrics_test.go | 43 +++++++++++++++++-- .../plugin/webhook/mutating/dispatcher.go | 26 +++++++---- .../plugin/webhook/validating/dispatcher.go | 26 +++++++---- .../apiserver/pkg/util/webhook/error.go | 1 + 5 files changed, 96 insertions(+), 25 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics.go index 82752fe08cc..4ec5c9b357c 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics.go @@ -46,7 +46,7 @@ const ( var ( // Use buckets ranging from 5 ms to 2.5 seconds (admission webhooks timeout at 30 seconds by default). - latencyBuckets = []float64{0.005, 0.025, 0.1, 0.5, 2.5} + latencyBuckets = []float64{0.005, 0.025, 0.1, 0.5, 1.0, 2.5} latencySummaryMaxAge = 5 * time.Hour // Metrics provides access to all admission metrics. @@ -119,6 +119,7 @@ type AdmissionMetrics struct { controller *metricSet webhook *metricSet webhookRejection *metrics.CounterVec + webhookRequest *metrics.CounterVec } // newAdmissionMetrics create a new AdmissionMetrics, configured with default metric names. @@ -149,11 +150,22 @@ func newAdmissionMetrics() *AdmissionMetrics { }, []string{"name", "type", "operation", "error_type", "rejection_code"}) + webhookRequest := metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "webhook_request_total", + Help: "Admission webhook request total, identified by name and broken out for each admission type (validating or mutating) and operation. Additional labels specify whether the request was rejected or not and an HTTP status code. Codes greater than 600 are truncated to 600, to keep the metrics cardinality bounded.", + StabilityLevel: metrics.ALPHA, + }, + []string{"name", "type", "operation", "code", "rejected"}) + step.mustRegister() controller.mustRegister() webhook.mustRegister() legacyregistry.MustRegister(webhookRejection) - return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection} + legacyregistry.MustRegister(webhookRequest) + return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection, webhookRequest: webhookRequest} } func (m *AdmissionMetrics) reset() { @@ -173,8 +185,13 @@ func (m *AdmissionMetrics) ObserveAdmissionController(ctx context.Context, elaps } // ObserveWebhook records admission related metrics for a admission webhook. -func (m *AdmissionMetrics) ObserveWebhook(ctx context.Context, elapsed time.Duration, rejected bool, attr admission.Attributes, stepType string, extraLabels ...string) { - m.webhook.observe(ctx, elapsed, append(extraLabels, stepType, string(attr.GetOperation()), strconv.FormatBool(rejected))...) +func (m *AdmissionMetrics) ObserveWebhook(ctx context.Context, name string, elapsed time.Duration, rejected bool, attr admission.Attributes, stepType string, code int) { + // We truncate codes greater than 600 to keep the cardinality bounded. + if code > 600 { + code = 600 + } + m.webhookRequest.WithContext(ctx).WithLabelValues(name, stepType, string(attr.GetOperation()), strconv.Itoa(code), strconv.FormatBool(rejected)).Inc() + m.webhook.observe(ctx, elapsed, name, stepType, string(attr.GetOperation()), strconv.FormatBool(rejected)) } // ObserveWebhookRejection records admission related metrics for an admission webhook rejection. diff --git a/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics_test.go b/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics_test.go index b5a07a37906..6eb5a77430d 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics_test.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics_test.go @@ -83,21 +83,48 @@ func TestObserveAdmissionController(t *testing.T) { func TestObserveWebhook(t *testing.T) { defer Metrics.reset() defer legacyregistry.Reset() - Metrics.ObserveWebhook(context.TODO(), 2*time.Second, false, attr, stepAdmit, "x") - wantLabels := map[string]string{ + Metrics.ObserveWebhook(context.TODO(), "x", 2*time.Second, false, attr, stepAdmit, 200) + Metrics.ObserveWebhook(context.TODO(), "x", 2*time.Second, true, attr, stepValidate, 654) + wantLabelsCounterAdmit := map[string]string{ + "name": "x", + "operation": string(admission.Create), + "type": "admit", + "code": "200", + "rejected": "false", + } + wantLabelsCounterValidate := map[string]string{ + "name": "x", + "operation": string(admission.Create), + "type": "validate", + "code": "600", + "rejected": "true", + } + wantLabelsHistogramAdmit := map[string]string{ "name": "x", "operation": string(admission.Create), "type": "admit", "rejected": "false", } - expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabels, 1) + wantLabelsHistogramValidate := map[string]string{ + "name": "x", + "operation": string(admission.Create), + "type": "validate", + "rejected": "true", + } + + expectCounterValue(t, "apiserver_admission_webhook_request_total", wantLabelsCounterAdmit, 1) + expectCounterValue(t, "apiserver_admission_webhook_request_total", wantLabelsCounterValidate, 1) + + expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabelsHistogramAdmit, 1) + expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabelsHistogramValidate, 1) } func TestObserveWebhookRejection(t *testing.T) { Metrics.reset() Metrics.ObserveWebhookRejection(context.TODO(), "x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 500) Metrics.ObserveWebhookRejection(context.TODO(), "x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 654) - Metrics.ObserveWebhookRejection(context.TODO(), "x", stepValidate, string(admission.Update), WebhookRejectionCallingWebhookError, 0) + Metrics.ObserveWebhookRejection(context.TODO(), "x", stepValidate, string(admission.Update), WebhookRejectionCallingWebhookError, 501) + Metrics.ObserveWebhookRejection(context.TODO(), "x", stepValidate, string(admission.Update), WebhookRejectionAPIServerInternalError, 0) wantLabels := map[string]string{ "name": "x", "operation": string(admission.Create), @@ -117,11 +144,19 @@ func TestObserveWebhookRejection(t *testing.T) { "operation": string(admission.Update), "type": "validate", "error_type": "calling_webhook_error", + "rejection_code": "501", + } + wantLabelsAPIServerInternalError := map[string]string{ + "name": "x", + "operation": string(admission.Update), + "type": "validate", + "error_type": "apiserver_internal_error", "rejection_code": "0", } expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels, 1) expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels600, 1) expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabelsCallingWebhookError, 1) + expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabelsAPIServerInternalError, 1) } func TestWithMetrics(t *testing.T) { diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/mutating/dispatcher.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/mutating/dispatcher.go index 3b35bb31ece..a419328f436 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/mutating/dispatcher.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/mutating/dispatcher.go @@ -148,17 +148,21 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib case *webhookutil.ErrCallingWebhook: if !ignoreClientCallFailures { rejected = true - admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0) + admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, int(err.Status.ErrStatus.Code)) } + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "admit", int(err.Status.ErrStatus.Code)) case *webhookutil.ErrWebhookRejection: rejected = true admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code)) + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "admit", int(err.Status.ErrStatus.Code)) default: rejected = true admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0) + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "admit", 0) } + } else { + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "admit", 200) } - admissionmetrics.Metrics.ObserveWebhook(ctx, time.Since(t), rejected, versionedAttr.Attributes, "admit", hook.Name) if changed { // Patch had changed the object. Prepare to reinvoke all previous webhooks that are eligible for re-invocation. webhookReinvokeCtx.RequireReinvokingPreviouslyInvokedPlugins() @@ -213,7 +217,7 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *admiss defer func() { annotator.addMutationAnnotation(changed) }() if attr.Attributes.IsDryRun() { if h.SideEffects == nil { - return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("Webhook SideEffects is nil")} + return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("Webhook SideEffects is nil"), Status: apierrors.NewBadRequest("Webhook SideEffects is nil")} } if !(*h.SideEffects == admissionregistrationv1.SideEffectClassNone || *h.SideEffects == admissionregistrationv1.SideEffectClassNoneOnDryRun) { return false, webhookerrors.NewDryRunUnsupportedErr(h.Name) @@ -222,12 +226,12 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *admiss uid, request, response, err := webhookrequest.CreateAdmissionObjects(attr, invocation) if err != nil { - return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not create admission objects: %w", err)} + return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not create admission objects: %w", err), Status: apierrors.NewBadRequest("error creating admission objects")} } // Make the webhook request client, err := invocation.Webhook.GetRESTClient(a.cm) if err != nil { - return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not get REST client: %w", err)} + return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not get REST client: %w", err), Status: apierrors.NewBadRequest("error getting REST client")} } trace := utiltrace.New("Call mutating webhook", utiltrace.Field{"configuration", configurationName}, @@ -261,13 +265,19 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *admiss } if err := r.Do(ctx).Into(response); err != nil { - return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("failed to call webhook: %w", err)} + var status *apierrors.StatusError + if se, ok := err.(*apierrors.StatusError); ok { + status = se + } else { + status = apierrors.NewBadRequest("error calling webhook") + } + return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("failed to call webhook: %w", err), Status: status} } trace.Step("Request completed") result, err := webhookrequest.VerifyAdmissionResponse(uid, true, response) if err != nil { - return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("received invalid webhook response: %w", err)} + return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("received invalid webhook response: %w", err), Status: apierrors.NewServiceUnavailable("error validating webhook response")} } for k, v := range result.AuditAnnotations { @@ -315,7 +325,7 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *admiss return false, apierrors.NewInternalError(err) } default: - return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("unsupported patch type %q", result.PatchType)} + return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("unsupported patch type %q", result.PatchType), Status: webhookerrors.ToStatusErr(h.Name, result.Result)} } var newVersionedObject runtime.Object diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/validating/dispatcher.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/validating/dispatcher.go index 250bb02a783..1a1c521a6ec 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/validating/dispatcher.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/validating/dispatcher.go @@ -119,18 +119,20 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr case *webhookutil.ErrCallingWebhook: if !ignoreClientCallFailures { rejected = true - admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0) + admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, int(err.Status.ErrStatus.Code)) } + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "validating", int(err.Status.ErrStatus.Code)) case *webhookutil.ErrWebhookRejection: rejected = true admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code)) + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "validating", int(err.Status.ErrStatus.Code)) default: rejected = true admissionmetrics.Metrics.ObserveWebhookRejection(ctx, hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0) + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "validating", 0) } - } - admissionmetrics.Metrics.ObserveWebhook(ctx, time.Since(t), rejected, versionedAttr.Attributes, "validating", hook.Name) - if err == nil { + } else { + admissionmetrics.Metrics.ObserveWebhook(ctx, hook.Name, time.Since(t), rejected, versionedAttr.Attributes, "validating", 200) return } @@ -181,7 +183,7 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr func (d *validatingDispatcher) callHook(ctx context.Context, h *v1.ValidatingWebhook, invocation *generic.WebhookInvocation, attr *generic.VersionedAttributes) error { if attr.Attributes.IsDryRun() { if h.SideEffects == nil { - return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("Webhook SideEffects is nil")} + return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("Webhook SideEffects is nil"), Status: apierrors.NewBadRequest("Webhook SideEffects is nil")} } if !(*h.SideEffects == v1.SideEffectClassNone || *h.SideEffects == v1.SideEffectClassNoneOnDryRun) { return webhookerrors.NewDryRunUnsupportedErr(h.Name) @@ -190,12 +192,12 @@ func (d *validatingDispatcher) callHook(ctx context.Context, h *v1.ValidatingWeb uid, request, response, err := webhookrequest.CreateAdmissionObjects(attr, invocation) if err != nil { - return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: err} + return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not create admission objects: %w", err), Status: apierrors.NewBadRequest("error creating admission objects")} } // Make the webhook request client, err := invocation.Webhook.GetRESTClient(d.cm) if err != nil { - return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: err} + return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not get REST client: %w", err), Status: apierrors.NewBadRequest("error getting REST client")} } trace := utiltrace.New("Call validating webhook", utiltrace.Field{"configuration", invocation.Webhook.GetConfigurationName()}, @@ -229,13 +231,19 @@ func (d *validatingDispatcher) callHook(ctx context.Context, h *v1.ValidatingWeb } if err := r.Do(ctx).Into(response); err != nil { - return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: err} + var status *apierrors.StatusError + if se, ok := err.(*apierrors.StatusError); ok { + status = se + } else { + status = apierrors.NewBadRequest("error calling webhook") + } + return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("failed to call webhook: %w", err), Status: status} } trace.Step("Request completed") result, err := webhookrequest.VerifyAdmissionResponse(uid, false, response) if err != nil { - return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: err} + return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("received invalid webhook response: %w", err), Status: apierrors.NewServiceUnavailable("error validating webhook response")} } for k, v := range result.AuditAnnotations { diff --git a/staging/src/k8s.io/apiserver/pkg/util/webhook/error.go b/staging/src/k8s.io/apiserver/pkg/util/webhook/error.go index 9c6f780e2f8..3c5d39ae3c6 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/webhook/error.go +++ b/staging/src/k8s.io/apiserver/pkg/util/webhook/error.go @@ -28,6 +28,7 @@ import ( type ErrCallingWebhook struct { WebhookName string Reason error + Status *apierrors.StatusError } func (e *ErrCallingWebhook) Error() string {