From b7821078b36f1cb25d903774ddf37a97966c2eac Mon Sep 17 00:00:00 2001 From: Cici Huang Date: Tue, 16 Jul 2024 08:27:36 -0700 Subject: [PATCH] Fix the error type, Add into observation, Fix tests. --- .../plugin/policy/validating/dispatcher.go | 13 +++--- .../plugin/policy/validating/errors.go | 2 +- .../policy/validating/metrics/errors.go | 2 +- .../policy/validating/metrics/metrics.go | 30 +++++++------- .../policy/validating/metrics/metrics_test.go | 40 +++++++++---------- 5 files changed, 44 insertions(+), 43 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/dispatcher.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/dispatcher.go index 7b2349ddff6..f0601142530 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/dispatcher.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/dispatcher.go @@ -234,13 +234,13 @@ func (c *dispatcher) Dispatch(ctx context.Context, a admission.Attributes, o adm Binding: binding, PolicyDecision: decision, }) - celmetrics.Metrics.ObserveRejection(ctx, decision.Elapsed, definition.Name, binding.Name) + celmetrics.Metrics.ObserveRejection(ctx, decision.Elapsed, definition.Name, binding.Name, ErrorType(&decision)) case admissionregistrationv1.Audit: publishValidationFailureAnnotation(binding, i, decision, versionedAttr) - celmetrics.Metrics.ObserveAudit(ctx, decision.Elapsed, definition.Name, binding.Name) + celmetrics.Metrics.ObserveAudit(ctx, decision.Elapsed, definition.Name, binding.Name, ErrorType(&decision)) case admissionregistrationv1.Warn: warning.AddWarning(ctx, "", fmt.Sprintf("Validation failed for ValidatingAdmissionPolicy '%s' with binding '%s': %s", definition.Name, binding.Name, decision.Message)) - celmetrics.Metrics.ObserveWarn(ctx, decision.Elapsed, definition.Name, binding.Name) + celmetrics.Metrics.ObserveWarn(ctx, decision.Elapsed, definition.Name, binding.Name, ErrorType(&decision)) } } default: @@ -259,7 +259,7 @@ func (c *dispatcher) Dispatch(ctx context.Context, a admission.Attributes, o adm auditAnnotationCollector.add(auditAnnotation.Key, value) case AuditAnnotationActionError: // When failurePolicy=fail, audit annotation errors result in deny - deniedDecisions = append(deniedDecisions, policyDecisionWithMetadata{ + d := policyDecisionWithMetadata{ Definition: definition, Binding: binding, PolicyDecision: PolicyDecision{ @@ -268,8 +268,9 @@ func (c *dispatcher) Dispatch(ctx context.Context, a admission.Attributes, o adm Message: auditAnnotation.Error, Elapsed: auditAnnotation.Elapsed, }, - }) - celmetrics.Metrics.ObserveRejection(ctx, auditAnnotation.Elapsed, definition.Name, binding.Name) + } + deniedDecisions = append(deniedDecisions, d) + celmetrics.Metrics.ObserveRejection(ctx, auditAnnotation.Elapsed, definition.Name, binding.Name, ErrorType(&d.PolicyDecision)) case AuditAnnotationActionExclude: // skip it default: return fmt.Errorf("unsupported AuditAnnotation Action: %s", auditAnnotation.Action) diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/errors.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/errors.go index c8380baa9bf..0fcf40ff0d4 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/errors.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/errors.go @@ -34,5 +34,5 @@ func ErrorType(decision *PolicyDecision) celmetrics.ValidationErrorType { if strings.HasPrefix(decision.Message, "validation failed due to running out of cost budget") { return celmetrics.ValidatingOutOfBudget } - return celmetrics.ValidatingInternalError + return celmetrics.ValidatingInvalidError } diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/errors.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/errors.go index e6dac566929..4327252610a 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/errors.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/errors.go @@ -34,5 +34,5 @@ func ErrorType(err error) ValidationErrorType { if errors.Is(err, apiservercel.ErrOutOfBudget) { return ValidatingOutOfBudget } - return ValidatingInternalError + return ValidatingInvalidError } diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics.go index 3be9afa8e65..15e6fbe5421 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics.go @@ -35,9 +35,9 @@ type ValidationErrorType string const ( // ValidationCompileError indicates that the expression fails to compile. ValidationCompileError ValidationErrorType = "compile_error" - // ValidatingInternalError indicates that the expression fails due to internal + // ValidatingInvalidError indicates that the expression fails due to internal // errors that are out of the control of the user. - ValidatingInternalError ValidationErrorType = "internal_error" + ValidatingInvalidError ValidationErrorType = "invalid_error" // ValidatingOutOfBudget indicates that the expression fails due to running // out of cost budget, or the budget cannot be obtained. ValidatingOutOfBudget ValidationErrorType = "out_of_budget" @@ -65,7 +65,7 @@ func newValidationAdmissionMetrics() *ValidatingAdmissionPolicyMetrics { Help: "Validation admission policy check total, labeled by policy and further identified by binding and enforcement action taken.", StabilityLevel: metrics.ALPHA, }, - []string{"policy", "policy_binding", "enforcement_action"}, + []string{"policy", "policy_binding", "error_type", "enforcement_action"}, ) latency := metrics.NewHistogramVec(&metrics.HistogramOpts{ Namespace: metricsNamespace, @@ -83,7 +83,7 @@ func newValidationAdmissionMetrics() *ValidatingAdmissionPolicyMetrics { Buckets: []float64{0.0000005, 0.001, 0.01, 0.1, 1.0}, StabilityLevel: metrics.ALPHA, }, - []string{"policy", "policy_binding", "enforcement_action"}, + []string{"policy", "policy_binding", "error_type", "enforcement_action"}, ) legacyregistry.MustRegister(check) @@ -99,24 +99,24 @@ func (m *ValidatingAdmissionPolicyMetrics) Reset() { // ObserveAdmission observes a policy validation, with an optional error to indicate the error that may occur but ignored. func (m *ValidatingAdmissionPolicyMetrics) ObserveAdmission(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) { - m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, "allow").Inc() - m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, "allow").Observe(elapsed.Seconds()) + m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "allow").Inc() + m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "allow").Observe(elapsed.Seconds()) } // ObserveRejection observes a policy validation error that was at least one of the reasons for a deny. -func (m *ValidatingAdmissionPolicyMetrics) ObserveRejection(ctx context.Context, elapsed time.Duration, policy, binding string) { - m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, "deny").Inc() - m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, "deny").Observe(elapsed.Seconds()) +func (m *ValidatingAdmissionPolicyMetrics) ObserveRejection(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) { + m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "deny").Inc() + m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "deny").Observe(elapsed.Seconds()) } // ObserveAudit observes a policy validation audit annotation was published for a validation failure. -func (m *ValidatingAdmissionPolicyMetrics) ObserveAudit(ctx context.Context, elapsed time.Duration, policy, binding string) { - m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, "audit").Inc() - m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, "audit").Observe(elapsed.Seconds()) +func (m *ValidatingAdmissionPolicyMetrics) ObserveAudit(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) { + m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "audit").Inc() + m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "audit").Observe(elapsed.Seconds()) } // ObserveWarn observes a policy validation warning was published for a validation failure. -func (m *ValidatingAdmissionPolicyMetrics) ObserveWarn(ctx context.Context, elapsed time.Duration, policy, binding string) { - m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, "warn").Inc() - m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, "warn").Observe(elapsed.Seconds()) +func (m *ValidatingAdmissionPolicyMetrics) ObserveWarn(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) { + m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "warn").Inc() + m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "warn").Observe(elapsed.Seconds()) } diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics_test.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics_test.go index e575e0faad0..ad450001b90 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics_test.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/policy/validating/metrics/metrics_test.go @@ -45,20 +45,20 @@ func TestNoUtils(t *testing.T) { want: ` # HELP apiserver_validating_admission_policy_check_duration_seconds [ALPHA] Validation admission latency for individual validation expressions in seconds, labeled by policy and further including binding and enforcement action taken. # TYPE apiserver_validating_admission_policy_check_duration_seconds histogram - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com",le="0.0000005"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com",le="0.001"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com",le="0.01"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com",le="0.1"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com",le="1"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com",le="+Inf"} 1 - apiserver_validating_admission_policy_check_duration_seconds_sum{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com"} 10 - apiserver_validating_admission_policy_check_duration_seconds_count{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com"} 1 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.0000005"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.001"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.01"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.1"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="1"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="+Inf"} 1 + apiserver_validating_admission_policy_check_duration_seconds_sum{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com"} 10 + apiserver_validating_admission_policy_check_duration_seconds_count{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com"} 1 # HELP apiserver_validating_admission_policy_check_total [ALPHA] Validation admission policy check total, labeled by policy and further identified by binding and enforcement action taken. # TYPE apiserver_validating_admission_policy_check_total counter - apiserver_validating_admission_policy_check_total{enforcement_action="allow",policy="policy.example.com",policy_binding="binding.example.com"} 1 + apiserver_validating_admission_policy_check_total{enforcement_action="allow",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com"} 1 `, observer: func() { - Metrics.ObserveAdmission(context.TODO(), time.Duration(10)*time.Second, "policy.example.com", "binding.example.com") + Metrics.ObserveAdmission(context.TODO(), time.Duration(10)*time.Second, "policy.example.com", "binding.example.com", ValidatingInvalidError) }, }, { @@ -66,20 +66,20 @@ func TestNoUtils(t *testing.T) { want: ` # HELP apiserver_validating_admission_policy_check_duration_seconds [ALPHA] Validation admission latency for individual validation expressions in seconds, labeled by policy and further including binding and enforcement action taken. # TYPE apiserver_validating_admission_policy_check_duration_seconds histogram - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com",le="0.0000005"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com",le="0.001"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com",le="0.01"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com",le="0.1"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com",le="1"} 0 - apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com",le="+Inf"} 1 - apiserver_validating_admission_policy_check_duration_seconds_sum{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com"} 10 - apiserver_validating_admission_policy_check_duration_seconds_count{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com"} 1 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.0000005"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.001"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.01"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="0.1"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="1"} 0 + apiserver_validating_admission_policy_check_duration_seconds_bucket{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com",le="+Inf"} 1 + apiserver_validating_admission_policy_check_duration_seconds_sum{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com"} 10 + apiserver_validating_admission_policy_check_duration_seconds_count{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com"} 1 # HELP apiserver_validating_admission_policy_check_total [ALPHA] Validation admission policy check total, labeled by policy and further identified by binding and enforcement action taken. # TYPE apiserver_validating_admission_policy_check_total counter - apiserver_validating_admission_policy_check_total{enforcement_action="deny",policy="policy.example.com",policy_binding="binding.example.com"} 1 + apiserver_validating_admission_policy_check_total{enforcement_action="deny",error_type="invalid_error",policy="policy.example.com",policy_binding="binding.example.com"} 1 `, observer: func() { - Metrics.ObserveRejection(context.TODO(), time.Duration(10)*time.Second, "policy.example.com", "binding.example.com") + Metrics.ObserveRejection(context.TODO(), time.Duration(10)*time.Second, "policy.example.com", "binding.example.com", ValidatingInvalidError) }, }, }