Merge pull request #81399 from roycaihw/webhook-rejection-metrics

Fix the rejected label semantics in webhook metrics, add a counter metrics for webhook rejection with details
This commit is contained in:
Kubernetes Prow Robot 2019-08-29 19:55:31 -07:00 committed by GitHub
commit 34605737b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 159 additions and 11 deletions

View File

@ -27,9 +27,21 @@ import (
"k8s.io/component-base/metrics/legacyregistry"
)
// WebhookRejectionErrorType defines different error types that happen in a webhook rejection.
type WebhookRejectionErrorType string
const (
namespace = "apiserver"
subsystem = "admission"
// WebhookRejectionCallingWebhookError identifies a calling webhook error which causes
// a webhook admission to reject a request
WebhookRejectionCallingWebhookError WebhookRejectionErrorType = "calling_webhook_error"
// WebhookRejectionAPIServerInternalError identifies an apiserver internal error which
// causes a webhook admission to reject a request
WebhookRejectionAPIServerInternalError WebhookRejectionErrorType = "apiserver_internal_error"
// WebhookRejectionNoError identifies a webhook properly rejected a request
WebhookRejectionNoError WebhookRejectionErrorType = "no_error"
)
var (
@ -103,9 +115,10 @@ func (p pluginHandlerWithMetrics) Validate(ctx context.Context, a admission.Attr
// AdmissionMetrics instruments admission with prometheus metrics.
type AdmissionMetrics struct {
step *metricSet
controller *metricSet
webhook *metricSet
step *metricSet
controller *metricSet
webhook *metricSet
webhookRejection *metrics.CounterVec
}
// newAdmissionMetrics create a new AdmissionMetrics, configured with default metric names.
@ -126,10 +139,21 @@ func newAdmissionMetrics() *AdmissionMetrics {
[]string{"name", "type", "operation", "rejected"},
"Admission webhook %s, identified by name and broken out for each operation and API resource and type (validate or admit).", false)
webhookRejection := metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_rejection_count",
Help: "Admission webhook rejection count, identified by name and broken out for each admission type (validating or admit) and operation. Additional labels specify an error type (calling_webhook_error or apiserver_internal_error if an error occurred; no_error otherwise) and optionally a non-zero rejection code if the webhook rejects the request with an HTTP status code (honored by the apiserver when the code is greater or equal to 400). Codes greater than 600 are truncated to 600, to keep the metrics cardinality bounded.",
StabilityLevel: metrics.ALPHA,
},
[]string{"name", "type", "operation", "error_type", "rejection_code"})
step.mustRegister()
controller.mustRegister()
webhook.mustRegister()
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook}
legacyregistry.MustRegister(webhookRejection)
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection}
}
func (m *AdmissionMetrics) reset() {
@ -153,6 +177,16 @@ func (m *AdmissionMetrics) ObserveWebhook(elapsed time.Duration, rejected bool,
m.webhook.observe(elapsed, append(extraLabels, stepType, string(attr.GetOperation()), strconv.FormatBool(rejected))...)
}
// ObserveWebhookRejection records admission related metrics for an admission webhook rejection.
func (m *AdmissionMetrics) ObserveWebhookRejection(name, stepType, operation string, errorType WebhookRejectionErrorType, rejectionCode int) {
// We truncate codes greater than 600 to keep the cardinality bounded.
// This should be rarely done by a malfunctioning webhook server.
if rejectionCode > 600 {
rejectionCode = 600
}
m.webhookRejection.WithLabelValues(name, stepType, operation, string(errorType), strconv.Itoa(rejectionCode)).Inc()
}
type metricSet struct {
latencies *metrics.HistogramVec
latenciesSummary *metrics.SummaryVec

View File

@ -81,6 +81,37 @@ func TestObserveWebhook(t *testing.T) {
expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabels, 1)
}
func TestObserveWebhookRejection(t *testing.T) {
Metrics.reset()
Metrics.ObserveWebhookRejection("x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 500)
Metrics.ObserveWebhookRejection("x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 654)
Metrics.ObserveWebhookRejection("x", stepValidate, string(admission.Update), WebhookRejectionCallingWebhookError, 0)
wantLabels := map[string]string{
"name": "x",
"operation": string(admission.Create),
"type": "admit",
"error_type": "no_error",
"rejection_code": "500",
}
wantLabels600 := map[string]string{
"name": "x",
"operation": string(admission.Create),
"type": "admit",
"error_type": "no_error",
"rejection_code": "600",
}
wantLabelsCallingWebhookError := map[string]string{
"name": "x",
"operation": string(admission.Update),
"type": "validate",
"error_type": "calling_webhook_error",
"rejection_code": "0",
}
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels, 1)
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels600, 1)
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabelsCallingWebhookError, 1)
}
func TestWithMetrics(t *testing.T) {
Metrics.reset()

View File

@ -89,3 +89,35 @@ func expectHistogramCountTotal(t *testing.T, name string, labelFilter map[string
}
}
}
// expectCounterValue ensures that the counts of metrics matching the labelFilter is as
// expected.
func expectCounterValue(t *testing.T, name string, labelFilter map[string]string, wantCount int) {
metrics, err := prometheus.DefaultGatherer.Gather()
if err != nil {
t.Fatalf("Failed to gather metrics: %s", err)
}
counterSum := 0
for _, mf := range metrics {
if mf.GetName() != name {
continue // Ignore other metrics.
}
for _, metric := range mf.GetMetric() {
if !labelsMatch(metric, labelFilter) {
continue
}
counterSum += int(metric.GetCounter().GetValue())
}
}
if wantCount != counterSum {
t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter)
for _, mf := range metrics {
if mf.GetName() == name {
for _, metric := range mf.GetMetric() {
t.Logf("\tnear match: %s", metric.String())
}
}
}
}
}

View File

@ -133,7 +133,24 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
round = 1
}
changed, err := a.callAttrMutatingHook(ctx, hook, invocation, versionedAttr, o, round, i)
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), err != nil, versionedAttr.Attributes, "admit", hook.Name)
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
rejected := false
if err != nil {
switch err := err.(type) {
case *webhookutil.ErrCallingWebhook:
if !ignoreClientCallFailures {
rejected = true
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0)
}
case *webhookutil.ErrWebhookRejection:
rejected = true
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code))
default:
rejected = true
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0)
}
}
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), rejected, versionedAttr.Attributes, "admit", hook.Name)
if changed {
// Patch had changed the object. Prepare to reinvoke all previous webhooks that are eligible for re-invocation.
webhookReinvokeCtx.RequireReinvokingPreviouslyInvokedPlugins()
@ -146,7 +163,6 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
continue
}
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
if ignoreClientCallFailures {
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)
@ -164,6 +180,9 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
klog.Warningf("Failed calling webhook, failing closed %v: %v", hook.Name, err)
return apierrors.NewInternalError(err)
}
if rejectionErr, ok := err.(*webhookutil.ErrWebhookRejection); ok {
return rejectionErr.Status
}
return err
}
@ -249,7 +268,7 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *v1beta
}
if !result.Allowed {
return false, webhookerrors.ToStatusErr(h.Name, result.Result)
return false, &webhookutil.ErrWebhookRejection{Status: webhookerrors.ToStatusErr(h.Name, result.Result)}
}
if len(result.Patch) == 0 {

View File

@ -101,12 +101,28 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
versionedAttr := versionedAttrs[invocation.Kind]
t := time.Now()
err := d.callHook(ctx, hook, invocation, versionedAttr)
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), err != nil, versionedAttr.Attributes, "validating", hook.Name)
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
rejected := false
if err != nil {
switch err := err.(type) {
case *webhookutil.ErrCallingWebhook:
if !ignoreClientCallFailures {
rejected = true
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0)
}
case *webhookutil.ErrWebhookRejection:
rejected = true
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code))
default:
rejected = true
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0)
}
}
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), rejected, versionedAttr.Attributes, "validating", hook.Name)
if err == nil {
return
}
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
if ignoreClientCallFailures {
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)
@ -119,6 +135,9 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
return
}
if rejectionErr, ok := err.(*webhookutil.ErrWebhookRejection); ok {
err = rejectionErr.Status
}
klog.Warningf("rejected by webhook %q: %#v", hook.Name, err)
errCh <- err
}(relevantHooks[i])
@ -211,5 +230,5 @@ func (d *validatingDispatcher) callHook(ctx context.Context, h *v1beta1.Validati
if result.Allowed {
return nil
}
return webhookerrors.ToStatusErr(h.Name, result.Result)
return &webhookutil.ErrWebhookRejection{Status: webhookerrors.ToStatusErr(h.Name, result.Result)}
}

View File

@ -16,7 +16,11 @@ limitations under the License.
package webhook
import "fmt"
import (
"fmt"
apierrors "k8s.io/apimachinery/pkg/api/errors"
)
// ErrCallingWebhook is returned for transport-layer errors calling webhooks. It
// represents a failure to talk to the webhook, not the webhook rejecting a
@ -32,3 +36,12 @@ func (e *ErrCallingWebhook) Error() string {
}
return fmt.Sprintf("failed calling webhook %q; no further details available", e.WebhookName)
}
// ErrWebhookRejection represents a webhook properly rejecting a request.
type ErrWebhookRejection struct {
Status *apierrors.StatusError
}
func (e *ErrWebhookRejection) Error() string {
return e.Status.Error()
}