From 21692e1683afc243cf5322ebb2e430797000ca69 Mon Sep 17 00:00:00 2001 From: Tim Allclair Date: Mon, 25 Oct 2021 23:10:12 -0700 Subject: [PATCH] [PodSecurity] Add error & exemption metrics --- .../admission/admission.go | 15 +++++ .../admission/admission_test.go | 5 +- .../pod-security-admission/metrics/metrics.go | 58 ++++++++++++++--- .../metrics/metrics_test.go | 62 +++++++++++++++++++ test/integration/auth/podsecurity_test.go | 2 +- 5 files changed, 131 insertions(+), 11 deletions(-) diff --git a/staging/src/k8s.io/pod-security-admission/admission/admission.go b/staging/src/k8s.io/pod-security-admission/admission/admission.go index 722aee1955d..1bb3a9b7af7 100644 --- a/staging/src/k8s.io/pod-security-admission/admission/admission.go +++ b/staging/src/k8s.io/pod-security-admission/admission/admission.go @@ -320,10 +320,12 @@ func (a *Admission) ValidatePod(ctx context.Context, attrs api.Attributes) *admi } // short-circuit on exempt namespaces and users if a.exemptNamespace(attrs.GetNamespace()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByNamespaceExemptionResponse() } if a.exemptUser(attrs.GetUserName()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByUserExemptionResponse() } @@ -331,32 +333,38 @@ func (a *Admission) ValidatePod(ctx context.Context, attrs api.Attributes) *admi namespace, err := a.NamespaceGetter.GetNamespace(ctx, attrs.GetNamespace()) if err != nil { klog.ErrorS(err, "failed to fetch pod namespace", "namespace", attrs.GetNamespace()) + a.Metrics.RecordError(true, attrs) return internalErrorResponse(fmt.Sprintf("failed to lookup namespace %s", attrs.GetNamespace())) } nsPolicy, nsPolicyErrs := a.PolicyToEvaluate(namespace.Labels) if len(nsPolicyErrs) == 0 && nsPolicy.Enforce.Level == api.LevelPrivileged && nsPolicy.Warn.Level == api.LevelPrivileged && nsPolicy.Audit.Level == api.LevelPrivileged { + a.Metrics.RecordEvaluation(metrics.DecisionAllow, nsPolicy.Enforce, metrics.ModeEnforce, attrs) return sharedAllowedResponse() } obj, err := attrs.GetObject() if err != nil { klog.ErrorS(err, "failed to decode object") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode object") } pod, ok := obj.(*corev1.Pod) if !ok { klog.InfoS("failed to assert pod type", "type", reflect.TypeOf(obj)) + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode pod") } if attrs.GetOperation() == admissionv1.Update { oldObj, err := attrs.GetOldObject() if err != nil { klog.ErrorS(err, "failed to decode old object") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode old object") } oldPod, ok := oldObj.(*corev1.Pod) if !ok { klog.InfoS("failed to assert old pod type", "type", reflect.TypeOf(oldObj)) + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode old pod") } if !isSignificantPodUpdate(pod, oldPod) { @@ -376,10 +384,12 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu } // short-circuit on exempt namespaces and users if a.exemptNamespace(attrs.GetNamespace()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByNamespaceExemptionResponse() } if a.exemptUser(attrs.GetUserName()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByUserExemptionResponse() } @@ -387,6 +397,7 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu namespace, err := a.NamespaceGetter.GetNamespace(ctx, attrs.GetNamespace()) if err != nil { klog.ErrorS(err, "failed to fetch pod namespace", "namespace", attrs.GetNamespace()) + a.Metrics.RecordError(true, attrs) return internalErrorResponse(fmt.Sprintf("failed to lookup namespace %s", attrs.GetNamespace())) } nsPolicy, nsPolicyErrs := a.PolicyToEvaluate(namespace.Labels) @@ -397,11 +408,13 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu obj, err := attrs.GetObject() if err != nil { klog.ErrorS(err, "failed to decode object") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode object") } podMetadata, podSpec, err := a.PodSpecExtractor.ExtractPodSpec(obj) if err != nil { klog.ErrorS(err, "failed to extract pod spec") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to extract pod template") } if podMetadata == nil && podSpec == nil { @@ -417,6 +430,7 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu func (a *Admission) EvaluatePod(ctx context.Context, nsPolicy api.Policy, nsPolicyErr error, podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec, attrs api.Attributes, enforce bool) *admissionv1.AdmissionResponse { // short-circuit on exempt runtimeclass if a.exemptRuntimeClass(podSpec.RuntimeClassName) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByRuntimeClassExemptionResponse() } @@ -424,6 +438,7 @@ func (a *Admission) EvaluatePod(ctx context.Context, nsPolicy api.Policy, nsPoli if nsPolicyErr != nil { klog.V(2).InfoS("failed to parse PodSecurity namespace labels", "err", nsPolicyErr) auditAnnotations["error"] = fmt.Sprintf("Failed to parse policy: %v", nsPolicyErr) + a.Metrics.RecordError(false, attrs) } if klog.V(5).Enabled() { diff --git a/staging/src/k8s.io/pod-security-admission/admission/admission_test.go b/staging/src/k8s.io/pod-security-admission/admission/admission_test.go index 68b7cf5a887..f0f4f027e25 100644 --- a/staging/src/k8s.io/pod-security-admission/admission/admission_test.go +++ b/staging/src/k8s.io/pod-security-admission/admission/admission_test.go @@ -731,7 +731,7 @@ func TestValidatePodController(t *testing.T) { assert.Equal(t, tc.expectWarnings, result.Warnings, "unexpected Warnings") expectedEvaluations := []EvaluationRecord{} - if len(tc.expectAuditAnnotations) > 0 { + if _, ok := tc.expectAuditAnnotations["audit-violations"]; ok { expectedEvaluations = append(expectedEvaluations, EvaluationRecord{testName, metrics.DecisionDeny, nsLevelVersion, metrics.ModeAudit}) } if len(tc.expectWarnings) > 0 { @@ -757,6 +757,9 @@ func (r *FakeRecorder) RecordEvaluation(decision metrics.Decision, policy api.Le r.evaluations = append(r.evaluations, EvaluationRecord{attrs.GetName(), decision, policy, evalMode}) } +func (r *FakeRecorder) RecordExemption(api.Attributes) {} +func (r *FakeRecorder) RecordError(bool, api.Attributes) {} + // ExpectEvaluation asserts that the evaluation was recorded, and clears the record. func (r *FakeRecorder) ExpectEvaluations(t *testing.T, expected []EvaluationRecord) { t.Helper() diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go index 7fc3fd49bce..f84388a736b 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go @@ -42,6 +42,8 @@ type Mode string type Recorder interface { RecordEvaluation(Decision, api.LevelVersion, Mode, api.Attributes) + RecordExemption(api.Attributes) + RecordError(fatal bool, attrs api.Attributes) } var defaultRecorder = NewPrometheusRecorder(api.GetAPIVersion()) @@ -59,6 +61,8 @@ type PrometheusRecorder struct { apiVersion api.Version evaluationsCounter *metrics.CounterVec + exemptionsCounter *metrics.CounterVec + errorsCounter *metrics.CounterVec registerOnce sync.Once } @@ -74,21 +78,43 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { }, []string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"}, ) + exemptionsCounter := metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_exemptions_total", + Help: "Number of exempt requests, not counting ignored or out of scope requests.", + StabilityLevel: metrics.ALPHA, + }, + []string{"request_operation", "resource", "subresource"}, + ) + errorsCounter := metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_errors_total", + Help: "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.", + StabilityLevel: metrics.ALPHA, + }, + []string{"fatal", "request_operation", "resource", "subresource"}, + ) return &PrometheusRecorder{ apiVersion: version, evaluationsCounter: evaluationsCounter, + exemptionsCounter: exemptionsCounter, + errorsCounter: errorsCounter, } } func (r *PrometheusRecorder) MustRegister(registerFunc func(...metrics.Registerable)) { r.registerOnce.Do(func() { registerFunc(r.evaluationsCounter) + registerFunc(r.exemptionsCounter) + registerFunc(r.errorsCounter) }) } func (r *PrometheusRecorder) Reset() { r.evaluationsCounter.Reset() + r.exemptionsCounter.Reset() + r.errorsCounter.Reset() } func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) { @@ -98,18 +124,32 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve subresource := attrs.GetSubresource() var version string - if policy.Version.Latest() { - version = "latest" + if policy.Version.Latest() { + version = "latest" + } else { + if !r.apiVersion.Older(policy.Version) { + version = policy.Version.String() } else { - if !r.apiVersion.Older(policy.Version) { - version = policy.Version.String() - } else { - version = "future" - } + version = "future" } + } - r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), - version, string(evalMode), operation, resource, subresource).Inc() + r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), + version, string(evalMode), operation, resource, subresource).Inc() +} + +func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) { + operation := operationLabel(attrs.GetOperation()) + resource := resourceLabel(attrs.GetResource()) + subresource := attrs.GetSubresource() + r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc() +} + +func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) { + operation := operationLabel(attrs.GetOperation()) + resource := resourceLabel(attrs.GetResource()) + subresource := attrs.GetSubresource() + r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc() } func resourceLabel(resource schema.GroupVersionResource) string { diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go index 0a50195e058..72081a89434 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go @@ -17,6 +17,7 @@ limitations under the License. package metrics import ( + "strconv" "strings" "testing" @@ -95,6 +96,65 @@ func TestRecordEvaluation(t *testing.T) { } } +func TestRecordExemption(t *testing.T) { + recorder := NewPrometheusRecorder(testVersion) + registry := testutil.NewFakeKubeRegistry("1.23.0") + recorder.MustRegister(registry.MustRegister) + + for _, op := range operations { + for resource, expectedResource := range resourceExpectations { + recorder.RecordExemption(&api.AttributesRecord{ + Resource: resource, + Operation: op, + }) + expectedLabels := map[string]string{ + "request_operation": strings.ToLower(string(op)), + "resource": expectedResource, + "subresource": "", + } + val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels)) + require.NoError(t, err, expectedLabels) + + if !assert.EqualValues(t, 1, val, expectedLabels) { + findMetric(t, registry, "pod_security_exemptions_total") + } + + recorder.Reset() + } + } +} + +func TestRecordError(t *testing.T) { + recorder := NewPrometheusRecorder(testVersion) + registry := testutil.NewFakeKubeRegistry("1.23.0") + recorder.MustRegister(registry.MustRegister) + + for _, fatal := range []bool{true, false} { + for _, op := range operations { + for resource, expectedResource := range resourceExpectations { + recorder.RecordError(fatal, &api.AttributesRecord{ + Resource: resource, + Operation: op, + }) + expectedLabels := map[string]string{ + "fatal": strconv.FormatBool(fatal), + "request_operation": strings.ToLower(string(op)), + "resource": expectedResource, + "subresource": "", + } + val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels)) + require.NoError(t, err, expectedLabels) + + if !assert.EqualValues(t, 1, val, expectedLabels) { + findMetric(t, registry, "pod_security_errors_total") + } + + recorder.Reset() + } + } + } +} + func levelVersion(level api.Level, version string) api.LevelVersion { lv := api.LevelVersion{Level: level} var err error @@ -115,6 +175,8 @@ func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) { t.Logf("Found metric: %s", metric.String()) } } + return } } + t.Errorf("Expected metric %s not found", metricName) } diff --git a/test/integration/auth/podsecurity_test.go b/test/integration/auth/podsecurity_test.go index 78ed4b9f3b7..4acdcde7d3e 100644 --- a/test/integration/auth/podsecurity_test.go +++ b/test/integration/auth/podsecurity_test.go @@ -340,6 +340,6 @@ func validateMetrics(t *testing.T, rawMetrics []byte) { if err := testutil.ValidateMetrics(metrics, "pod_security_evaluations_total", "decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"); err != nil { - t.Fatalf("Metric validation failed: %v", err) + t.Errorf("Metric validation failed: %v", err) } }