mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #81399 from roycaihw/webhook-rejection-metrics
Fix the rejected label semantics in webhook metrics, add a counter metrics for webhook rejection with details
This commit is contained in:
commit
34605737b9
@ -27,9 +27,21 @@ import (
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
// WebhookRejectionErrorType defines different error types that happen in a webhook rejection.
|
||||
type WebhookRejectionErrorType string
|
||||
|
||||
const (
|
||||
namespace = "apiserver"
|
||||
subsystem = "admission"
|
||||
|
||||
// WebhookRejectionCallingWebhookError identifies a calling webhook error which causes
|
||||
// a webhook admission to reject a request
|
||||
WebhookRejectionCallingWebhookError WebhookRejectionErrorType = "calling_webhook_error"
|
||||
// WebhookRejectionAPIServerInternalError identifies an apiserver internal error which
|
||||
// causes a webhook admission to reject a request
|
||||
WebhookRejectionAPIServerInternalError WebhookRejectionErrorType = "apiserver_internal_error"
|
||||
// WebhookRejectionNoError identifies a webhook properly rejected a request
|
||||
WebhookRejectionNoError WebhookRejectionErrorType = "no_error"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -103,9 +115,10 @@ func (p pluginHandlerWithMetrics) Validate(ctx context.Context, a admission.Attr
|
||||
|
||||
// AdmissionMetrics instruments admission with prometheus metrics.
|
||||
type AdmissionMetrics struct {
|
||||
step *metricSet
|
||||
controller *metricSet
|
||||
webhook *metricSet
|
||||
step *metricSet
|
||||
controller *metricSet
|
||||
webhook *metricSet
|
||||
webhookRejection *metrics.CounterVec
|
||||
}
|
||||
|
||||
// newAdmissionMetrics create a new AdmissionMetrics, configured with default metric names.
|
||||
@ -126,10 +139,21 @@ func newAdmissionMetrics() *AdmissionMetrics {
|
||||
[]string{"name", "type", "operation", "rejected"},
|
||||
"Admission webhook %s, identified by name and broken out for each operation and API resource and type (validate or admit).", false)
|
||||
|
||||
webhookRejection := metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "webhook_rejection_count",
|
||||
Help: "Admission webhook rejection count, identified by name and broken out for each admission type (validating or admit) and operation. Additional labels specify an error type (calling_webhook_error or apiserver_internal_error if an error occurred; no_error otherwise) and optionally a non-zero rejection code if the webhook rejects the request with an HTTP status code (honored by the apiserver when the code is greater or equal to 400). Codes greater than 600 are truncated to 600, to keep the metrics cardinality bounded.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"name", "type", "operation", "error_type", "rejection_code"})
|
||||
|
||||
step.mustRegister()
|
||||
controller.mustRegister()
|
||||
webhook.mustRegister()
|
||||
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook}
|
||||
legacyregistry.MustRegister(webhookRejection)
|
||||
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection}
|
||||
}
|
||||
|
||||
func (m *AdmissionMetrics) reset() {
|
||||
@ -153,6 +177,16 @@ func (m *AdmissionMetrics) ObserveWebhook(elapsed time.Duration, rejected bool,
|
||||
m.webhook.observe(elapsed, append(extraLabels, stepType, string(attr.GetOperation()), strconv.FormatBool(rejected))...)
|
||||
}
|
||||
|
||||
// ObserveWebhookRejection records admission related metrics for an admission webhook rejection.
|
||||
func (m *AdmissionMetrics) ObserveWebhookRejection(name, stepType, operation string, errorType WebhookRejectionErrorType, rejectionCode int) {
|
||||
// We truncate codes greater than 600 to keep the cardinality bounded.
|
||||
// This should be rarely done by a malfunctioning webhook server.
|
||||
if rejectionCode > 600 {
|
||||
rejectionCode = 600
|
||||
}
|
||||
m.webhookRejection.WithLabelValues(name, stepType, operation, string(errorType), strconv.Itoa(rejectionCode)).Inc()
|
||||
}
|
||||
|
||||
type metricSet struct {
|
||||
latencies *metrics.HistogramVec
|
||||
latenciesSummary *metrics.SummaryVec
|
||||
|
@ -81,6 +81,37 @@ func TestObserveWebhook(t *testing.T) {
|
||||
expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabels, 1)
|
||||
}
|
||||
|
||||
func TestObserveWebhookRejection(t *testing.T) {
|
||||
Metrics.reset()
|
||||
Metrics.ObserveWebhookRejection("x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 500)
|
||||
Metrics.ObserveWebhookRejection("x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 654)
|
||||
Metrics.ObserveWebhookRejection("x", stepValidate, string(admission.Update), WebhookRejectionCallingWebhookError, 0)
|
||||
wantLabels := map[string]string{
|
||||
"name": "x",
|
||||
"operation": string(admission.Create),
|
||||
"type": "admit",
|
||||
"error_type": "no_error",
|
||||
"rejection_code": "500",
|
||||
}
|
||||
wantLabels600 := map[string]string{
|
||||
"name": "x",
|
||||
"operation": string(admission.Create),
|
||||
"type": "admit",
|
||||
"error_type": "no_error",
|
||||
"rejection_code": "600",
|
||||
}
|
||||
wantLabelsCallingWebhookError := map[string]string{
|
||||
"name": "x",
|
||||
"operation": string(admission.Update),
|
||||
"type": "validate",
|
||||
"error_type": "calling_webhook_error",
|
||||
"rejection_code": "0",
|
||||
}
|
||||
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels, 1)
|
||||
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels600, 1)
|
||||
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabelsCallingWebhookError, 1)
|
||||
}
|
||||
|
||||
func TestWithMetrics(t *testing.T) {
|
||||
Metrics.reset()
|
||||
|
||||
|
@ -89,3 +89,35 @@ func expectHistogramCountTotal(t *testing.T, name string, labelFilter map[string
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// expectCounterValue ensures that the counts of metrics matching the labelFilter is as
|
||||
// expected.
|
||||
func expectCounterValue(t *testing.T, name string, labelFilter map[string]string, wantCount int) {
|
||||
metrics, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to gather metrics: %s", err)
|
||||
}
|
||||
|
||||
counterSum := 0
|
||||
for _, mf := range metrics {
|
||||
if mf.GetName() != name {
|
||||
continue // Ignore other metrics.
|
||||
}
|
||||
for _, metric := range mf.GetMetric() {
|
||||
if !labelsMatch(metric, labelFilter) {
|
||||
continue
|
||||
}
|
||||
counterSum += int(metric.GetCounter().GetValue())
|
||||
}
|
||||
}
|
||||
if wantCount != counterSum {
|
||||
t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter)
|
||||
for _, mf := range metrics {
|
||||
if mf.GetName() == name {
|
||||
for _, metric := range mf.GetMetric() {
|
||||
t.Logf("\tnear match: %s", metric.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -133,7 +133,24 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
|
||||
round = 1
|
||||
}
|
||||
changed, err := a.callAttrMutatingHook(ctx, hook, invocation, versionedAttr, o, round, i)
|
||||
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), err != nil, versionedAttr.Attributes, "admit", hook.Name)
|
||||
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
|
||||
rejected := false
|
||||
if err != nil {
|
||||
switch err := err.(type) {
|
||||
case *webhookutil.ErrCallingWebhook:
|
||||
if !ignoreClientCallFailures {
|
||||
rejected = true
|
||||
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0)
|
||||
}
|
||||
case *webhookutil.ErrWebhookRejection:
|
||||
rejected = true
|
||||
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code))
|
||||
default:
|
||||
rejected = true
|
||||
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0)
|
||||
}
|
||||
}
|
||||
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), rejected, versionedAttr.Attributes, "admit", hook.Name)
|
||||
if changed {
|
||||
// Patch had changed the object. Prepare to reinvoke all previous webhooks that are eligible for re-invocation.
|
||||
webhookReinvokeCtx.RequireReinvokingPreviouslyInvokedPlugins()
|
||||
@ -146,7 +163,6 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
|
||||
continue
|
||||
}
|
||||
|
||||
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
|
||||
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
|
||||
if ignoreClientCallFailures {
|
||||
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)
|
||||
@ -164,6 +180,9 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
|
||||
klog.Warningf("Failed calling webhook, failing closed %v: %v", hook.Name, err)
|
||||
return apierrors.NewInternalError(err)
|
||||
}
|
||||
if rejectionErr, ok := err.(*webhookutil.ErrWebhookRejection); ok {
|
||||
return rejectionErr.Status
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@ -249,7 +268,7 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *v1beta
|
||||
}
|
||||
|
||||
if !result.Allowed {
|
||||
return false, webhookerrors.ToStatusErr(h.Name, result.Result)
|
||||
return false, &webhookutil.ErrWebhookRejection{Status: webhookerrors.ToStatusErr(h.Name, result.Result)}
|
||||
}
|
||||
|
||||
if len(result.Patch) == 0 {
|
||||
|
@ -101,12 +101,28 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
|
||||
versionedAttr := versionedAttrs[invocation.Kind]
|
||||
t := time.Now()
|
||||
err := d.callHook(ctx, hook, invocation, versionedAttr)
|
||||
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), err != nil, versionedAttr.Attributes, "validating", hook.Name)
|
||||
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
|
||||
rejected := false
|
||||
if err != nil {
|
||||
switch err := err.(type) {
|
||||
case *webhookutil.ErrCallingWebhook:
|
||||
if !ignoreClientCallFailures {
|
||||
rejected = true
|
||||
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0)
|
||||
}
|
||||
case *webhookutil.ErrWebhookRejection:
|
||||
rejected = true
|
||||
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code))
|
||||
default:
|
||||
rejected = true
|
||||
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0)
|
||||
}
|
||||
}
|
||||
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), rejected, versionedAttr.Attributes, "validating", hook.Name)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
|
||||
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
|
||||
if ignoreClientCallFailures {
|
||||
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)
|
||||
@ -119,6 +135,9 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
|
||||
return
|
||||
}
|
||||
|
||||
if rejectionErr, ok := err.(*webhookutil.ErrWebhookRejection); ok {
|
||||
err = rejectionErr.Status
|
||||
}
|
||||
klog.Warningf("rejected by webhook %q: %#v", hook.Name, err)
|
||||
errCh <- err
|
||||
}(relevantHooks[i])
|
||||
@ -211,5 +230,5 @@ func (d *validatingDispatcher) callHook(ctx context.Context, h *v1beta1.Validati
|
||||
if result.Allowed {
|
||||
return nil
|
||||
}
|
||||
return webhookerrors.ToStatusErr(h.Name, result.Result)
|
||||
return &webhookutil.ErrWebhookRejection{Status: webhookerrors.ToStatusErr(h.Name, result.Result)}
|
||||
}
|
||||
|
@ -16,7 +16,11 @@ limitations under the License.
|
||||
|
||||
package webhook
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
)
|
||||
|
||||
// ErrCallingWebhook is returned for transport-layer errors calling webhooks. It
|
||||
// represents a failure to talk to the webhook, not the webhook rejecting a
|
||||
@ -32,3 +36,12 @@ func (e *ErrCallingWebhook) Error() string {
|
||||
}
|
||||
return fmt.Sprintf("failed calling webhook %q; no further details available", e.WebhookName)
|
||||
}
|
||||
|
||||
// ErrWebhookRejection represents a webhook properly rejecting a request.
|
||||
type ErrWebhookRejection struct {
|
||||
Status *apierrors.StatusError
|
||||
}
|
||||
|
||||
func (e *ErrWebhookRejection) Error() string {
|
||||
return e.Status.Error()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user