From e46928c0b1eb9a90b01753a274c8aeaa5a075009 Mon Sep 17 00:00:00 2001 From: Tim Allclair Date: Mon, 25 Oct 2021 21:43:15 -0700 Subject: [PATCH 1/4] [PodSecurity] Fix up metrics & add tests Update pod security metrics to match the spec in the KEP. --- .../security/podsecurity/admission.go | 5 +- .../admission/admission_test.go | 38 +++++-- .../{admission => api}/attributes.go | 36 +++++- .../pod-security-admission/api/interfaces.go | 50 --------- .../cmd/webhook/server/server.go | 11 +- .../pod-security-admission/metrics/metrics.go | 100 +++++++++++++---- .../metrics/metrics_test.go | 103 ++++++++++++++++++ test/integration/auth/podsecurity_test.go | 58 ++++++++++ 8 files changed, 313 insertions(+), 88 deletions(-) rename staging/src/k8s.io/pod-security-admission/{admission => api}/attributes.go (64%) delete mode 100644 staging/src/k8s.io/pod-security-admission/api/interfaces.go diff --git a/plugin/pkg/admission/security/podsecurity/admission.go b/plugin/pkg/admission/security/podsecurity/admission.go index f701517e8b5..3407c42038e 100644 --- a/plugin/pkg/admission/security/podsecurity/admission.go +++ b/plugin/pkg/admission/security/podsecurity/admission.go @@ -51,7 +51,7 @@ import ( podsecurityadmission "k8s.io/pod-security-admission/admission" podsecurityconfigloader "k8s.io/pod-security-admission/admission/api/load" podsecurityadmissionapi "k8s.io/pod-security-admission/api" - podsecuritymetrics "k8s.io/pod-security-admission/metrics" + "k8s.io/pod-security-admission/metrics" "k8s.io/pod-security-admission/policy" ) @@ -94,13 +94,14 @@ func newPlugin(reader io.Reader) (*Plugin, error) { if err != nil { return nil, fmt.Errorf("could not create PodSecurityRegistry: %w", err) } + metrics.LegacyMustRegister() return &Plugin{ Handler: admission.NewHandler(admission.Create, admission.Update), delegate: &podsecurityadmission.Admission{ Configuration: config, Evaluator: evaluator, - Metrics: podsecuritymetrics.NewPrometheusRecorder(podsecurityadmissionapi.GetAPIVersion()), + Metrics: metrics.DefaultRecorder(), PodSpecExtractor: podsecurityadmission.DefaultPodSpecExtractor{}, }, }, nil diff --git a/staging/src/k8s.io/pod-security-admission/admission/admission_test.go b/staging/src/k8s.io/pod-security-admission/admission/admission_test.go index e3f652fcb63..68b7cf5a887 100644 --- a/staging/src/k8s.io/pod-security-admission/admission/admission_test.go +++ b/staging/src/k8s.io/pod-security-admission/admission/admission_test.go @@ -457,7 +457,7 @@ func TestValidateNamespace(t *testing.T) { } } - attrs := &AttributesRecord{ + attrs := &api.AttributesRecord{ Object: newObject, OldObject: oldObject, Name: newObject.Name, @@ -508,7 +508,7 @@ func TestValidateNamespace(t *testing.T) { RuntimeClasses: tc.exemptRuntimeClasses, }, }, - Metrics: NewMockRecorder(), + Metrics: &FakeRecorder{}, defaultPolicy: defaultPolicy, namespacePodCheckTimeout: time.Second, @@ -582,6 +582,7 @@ func TestValidatePodController(t *testing.T) { api.WarnLevelLabel: string(api.LevelBaseline), api.AuditLevelLabel: string(api.LevelBaseline), } + nsLevelVersion := api.LevelVersion{api.LevelBaseline, api.LatestVersion()} testCases := []struct { desc string @@ -671,7 +672,7 @@ func TestValidatePodController(t *testing.T) { operation = admissionv1.Update } - attrs := &AttributesRecord{ + attrs := &api.AttributesRecord{ testName, testNamespace, tc.gvk, @@ -700,6 +701,7 @@ func TestValidatePodController(t *testing.T) { Labels: nsLabels}}, } PodSpecExtractor := &DefaultPodSpecExtractor{} + recorder := &FakeRecorder{} a := &Admission{ PodLister: podLister, Evaluator: evaluator, @@ -711,7 +713,7 @@ func TestValidatePodController(t *testing.T) { Usernames: tc.exemptUsers, }, }, - Metrics: NewMockRecorder(), + Metrics: recorder, defaultPolicy: defaultPolicy, NamespaceGetter: nsGetter, } @@ -727,16 +729,36 @@ func TestValidatePodController(t *testing.T) { assert.Empty(t, resultError) assert.Equal(t, tc.expectAuditAnnotations, result.AuditAnnotations, "unexpected AuditAnnotations") assert.Equal(t, tc.expectWarnings, result.Warnings, "unexpected Warnings") + + expectedEvaluations := []EvaluationRecord{} + if len(tc.expectAuditAnnotations) > 0 { + expectedEvaluations = append(expectedEvaluations, EvaluationRecord{testName, metrics.DecisionDeny, nsLevelVersion, metrics.ModeAudit}) + } + if len(tc.expectWarnings) > 0 { + expectedEvaluations = append(expectedEvaluations, EvaluationRecord{testName, metrics.DecisionDeny, nsLevelVersion, metrics.ModeWarn}) + } + recorder.ExpectEvaluations(t, expectedEvaluations) }) } } -type MockRecorder struct { +type FakeRecorder struct { + evaluations []EvaluationRecord } -func NewMockRecorder() *MockRecorder { - return &MockRecorder{} +type EvaluationRecord struct { + ObjectName string + Decision metrics.Decision + Policy api.LevelVersion + Mode metrics.Mode } -func (r MockRecorder) RecordEvaluation(decision metrics.Decision, policy api.LevelVersion, evalMode metrics.Mode, attrs api.Attributes) { +func (r *FakeRecorder) RecordEvaluation(decision metrics.Decision, policy api.LevelVersion, evalMode metrics.Mode, attrs api.Attributes) { + r.evaluations = append(r.evaluations, EvaluationRecord{attrs.GetName(), decision, policy, evalMode}) +} + +// ExpectEvaluation asserts that the evaluation was recorded, and clears the record. +func (r *FakeRecorder) ExpectEvaluations(t *testing.T, expected []EvaluationRecord) { + t.Helper() + assert.ElementsMatch(t, expected, r.evaluations) } diff --git a/staging/src/k8s.io/pod-security-admission/admission/attributes.go b/staging/src/k8s.io/pod-security-admission/api/attributes.go similarity index 64% rename from staging/src/k8s.io/pod-security-admission/admission/attributes.go rename to staging/src/k8s.io/pod-security-admission/api/attributes.go index f55c5a92505..ce6c16fbb61 100644 --- a/staging/src/k8s.io/pod-security-admission/admission/attributes.go +++ b/staging/src/k8s.io/pod-security-admission/api/attributes.go @@ -14,15 +14,41 @@ See the License for the specific language governing permissions and limitations under the License. */ -package admission +package api import ( admissionv1 "k8s.io/api/admission/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/pod-security-admission/api" ) +// Attributes exposes the admission request parameters consumed by the PodSecurity admission controller. +type Attributes interface { + // GetName is the name of the object associated with the request. + GetName() string + // GetNamespace is the namespace associated with the request (if any) + GetNamespace() string + // GetResource is the name of the resource being requested. This is not the kind. For example: pods + GetResource() schema.GroupVersionResource + // GetKind is the name of the kind being requested. For example: Pod + GetKind() schema.GroupVersionKind + // GetSubresource is the name of the subresource being requested. This is a different resource, scoped to the parent resource, but it may have a different kind. + // For instance, /pods has the resource "pods" and the kind "Pod", while /pods/foo/status has the resource "pods", the sub resource "status", and the kind "Pod" + // (because status operates on pods). The binding resource for a pod though may be /pods/foo/binding, which has resource "pods", subresource "binding", and kind "Binding". + GetSubresource() string + // GetOperation is the operation being performed + GetOperation() admissionv1.Operation + + // GetObject returns the typed Object from incoming request. + // For objects in the core API group, the result must use the v1 API. + GetObject() (runtime.Object, error) + // GetOldObject returns the typed existing object. Only populated for UPDATE requests. + // For objects in the core API group, the result must use the v1 API. + GetOldObject() (runtime.Object, error) + // GetUserName is the requesting user's authenticated name. + GetUserName() string +} + // AttributesRecord is a simple struct implementing the Attributes interface. type AttributesRecord struct { Name string @@ -64,8 +90,10 @@ func (a *AttributesRecord) GetOldObject() (runtime.Object, error) { return a.OldObject, nil } +var _ Attributes = &AttributesRecord{} + // RequestAttributes adapts an admission.Request to the Attributes interface. -func RequestAttributes(request *admissionv1.AdmissionRequest, decoder runtime.Decoder) api.Attributes { +func RequestAttributes(request *admissionv1.AdmissionRequest, decoder runtime.Decoder) Attributes { return &attributes{ r: request, decoder: decoder, @@ -114,3 +142,5 @@ func (a *attributes) decode(in runtime.RawExtension) (runtime.Object, error) { out, _, err := a.decoder.Decode(in.Raw, &gvk, nil) return out, err } + +var _ Attributes = &attributes{} diff --git a/staging/src/k8s.io/pod-security-admission/api/interfaces.go b/staging/src/k8s.io/pod-security-admission/api/interfaces.go deleted file mode 100644 index 30fd002eaec..00000000000 --- a/staging/src/k8s.io/pod-security-admission/api/interfaces.go +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package api - -import ( - admissionv1 "k8s.io/api/admission/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" -) - -// Attributes exposes the admission request parameters consumed by the PodSecurity admission controller. -type Attributes interface { - // GetName is the name of the object associated with the request. - GetName() string - // GetNamespace is the namespace associated with the request (if any) - GetNamespace() string - // GetResource is the name of the resource being requested. This is not the kind. For example: pods - GetResource() schema.GroupVersionResource - // GetKind is the name of the kind being requested. For example: Pod - GetKind() schema.GroupVersionKind - // GetSubresource is the name of the subresource being requested. This is a different resource, scoped to the parent resource, but it may have a different kind. - // For instance, /pods has the resource "pods" and the kind "Pod", while /pods/foo/status has the resource "pods", the sub resource "status", and the kind "Pod" - // (because status operates on pods). The binding resource for a pod though may be /pods/foo/binding, which has resource "pods", subresource "binding", and kind "Binding". - GetSubresource() string - // GetOperation is the operation being performed - GetOperation() admissionv1.Operation - - // GetObject returns the typed Object from incoming request. - // For objects in the core API group, the result must use the v1 API. - GetObject() (runtime.Object, error) - // GetOldObject returns the typed existing object. Only populated for UPDATE requests. - // For objects in the core API group, the result must use the v1 API. - GetOldObject() (runtime.Object, error) - // GetUserName is the requesting user's authenticated name. - GetUserName() string -} diff --git a/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go b/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go index f6b72beaa4d..21b50b0db7a 100644 --- a/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go +++ b/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go @@ -40,6 +40,8 @@ import ( clientset "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + compbasemetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" "k8s.io/component-base/version/verflag" "k8s.io/klog/v2" "k8s.io/pod-security-admission/admission" @@ -117,6 +119,11 @@ func (s *Server) Start(ctx context.Context) error { // debugging or proxy purposes. The API server will not connect to an http webhook. mux.HandleFunc("/", s.HandleValidate) + // Serve the global metrics registry. + metrics.LegacyMustRegister() + mux.Handle("/metrics", + compbasemetrics.HandlerFor(legacyregistry.DefaultGatherer, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError})) + if s.insecureServing != nil { if err := s.insecureServing.Serve(mux, 0, ctx.Done()); err != nil { return fmt.Errorf("failed to start insecure server: %w", err) @@ -206,7 +213,7 @@ func (s *Server) HandleValidate(w http.ResponseWriter, r *http.Request) { } klog.V(1).InfoS("received request", "UID", review.Request.UID, "kind", review.Request.Kind, "resource", review.Request.Resource) - attributes := admission.RequestAttributes(review.Request, codecs.UniversalDeserializer()) + attributes := api.RequestAttributes(review.Request, codecs.UniversalDeserializer()) response := s.delegate.Validate(ctx, attributes) response.UID = review.Request.UID // Response UID must match request UID review.Response = response @@ -276,7 +283,7 @@ func Setup(c *Config) (*Server, error) { s.delegate = &admission.Admission{ Configuration: c.PodSecurityConfig, Evaluator: evaluator, - Metrics: metrics.NewPrometheusRecorder(api.GetAPIVersion()), + Metrics: metrics.DefaultRecorder(), PodSpecExtractor: admission.DefaultPodSpecExtractor{}, PodLister: admission.PodListerFromClient(client), NamespaceGetter: admission.NamespaceGetterFromListerAndClient(namespaceLister, client), diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go index 60adc51aaeb..7fc3fd49bce 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go @@ -17,7 +17,15 @@ limitations under the License. package metrics import ( + "strconv" + "strings" + "sync" + + admissionv1 "k8s.io/api/admission/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" "k8s.io/pod-security-admission/api" ) @@ -29,45 +37,67 @@ const ( DecisionDeny = "deny" // Policy evaluated, request denied ) -var ( - SecurityEvaluation = metrics.NewCounterVec( - &metrics.CounterOpts{ - Name: "pod_security_evaluations_total", - Help: "Counter of pod security evaluations.", - StabilityLevel: metrics.ALPHA, - }, - []string{"decision", "policy_level", "policy_version", "mode", "operation", "resource", "subresource"}, - ) - - Registry = metrics.NewKubeRegistry() -) - type Decision string type Mode string type Recorder interface { - RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) + RecordEvaluation(Decision, api.LevelVersion, Mode, api.Attributes) +} + +var defaultRecorder = NewPrometheusRecorder(api.GetAPIVersion()) + +func DefaultRecorder() Recorder { + return defaultRecorder +} + +// MustRegister registers the global DefaultMetrics against the legacy registry. +func LegacyMustRegister() { + defaultRecorder.MustRegister(legacyregistry.MustRegister) } type PrometheusRecorder struct { apiVersion api.Version + + evaluationsCounter *metrics.CounterVec + + registerOnce sync.Once } -func init() { - Registry.MustRegister(SecurityEvaluation) -} +var _ Recorder = &PrometheusRecorder{} func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { - return &PrometheusRecorder{apiVersion: version} + evaluationsCounter := metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_evaluations_total", + Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.", + StabilityLevel: metrics.ALPHA, + }, + []string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"}, + ) + + return &PrometheusRecorder{ + apiVersion: version, + evaluationsCounter: evaluationsCounter, + } } -func (r PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) { +func (r *PrometheusRecorder) MustRegister(registerFunc func(...metrics.Registerable)) { + r.registerOnce.Do(func() { + registerFunc(r.evaluationsCounter) + }) +} + +func (r *PrometheusRecorder) Reset() { + r.evaluationsCounter.Reset() +} + +func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) { dec := string(decision) - operation := string(attrs.GetOperation()) - resource := attrs.GetResource().String() + operation := operationLabel(attrs.GetOperation()) + resource := resourceLabel(attrs.GetResource()) subresource := attrs.GetSubresource() + var version string - if policy.Valid() { if policy.Version.Latest() { version = "latest" } else { @@ -77,7 +107,31 @@ func (r PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Level version = "future" } } - SecurityEvaluation.WithLabelValues(dec, string(policy.Level), + + r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), version, string(evalMode), operation, resource, subresource).Inc() +} + +func resourceLabel(resource schema.GroupVersionResource) string { + switch resource.GroupResource() { + case corev1.Resource("pods"): + return "pod" + case corev1.Resource("namespace"): + return "namespace" + default: + // Assume any other resource is a valid input to pod-security, and therefore a controller. + return "controller" + } +} + +func operationLabel(op admissionv1.Operation) string { + switch op { + case admissionv1.Create: + return "create" + case admissionv1.Update: + return "update" + default: + // This is a slower operation, but never used in the default implementation. + return strings.ToLower(string(op)) } } diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go index 17911b6bc6c..0a50195e058 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go @@ -15,3 +15,106 @@ limitations under the License. */ package metrics + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + admissionv1 "k8s.io/api/admission/v1" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/testutil" + "k8s.io/pod-security-admission/api" +) + +var ( + decisions = []Decision{DecisionAllow, DecisionDeny} + modes = []Mode{ModeEnforce, ModeAudit, ModeWarn} + operations = []admissionv1.Operation{admissionv1.Create, admissionv1.Update} + levels = []api.Level{api.LevelPrivileged, api.LevelBaseline, api.LevelRestricted} + + // Map of resource types to test to expected label value. + resourceExpectations = map[schema.GroupVersionResource]string{ + corev1.SchemeGroupVersion.WithResource("pods"): "pod", + appsv1.SchemeGroupVersion.WithResource("deployments"): "controller", + batchv1.SchemeGroupVersion.WithResource("cronjobs"): "controller", + } + + // Map of versions to expected label value (compared against testVersion). + versionExpectations = map[string]string{ + "latest": "latest", + "v1.22": "v1.22", + "v1.23": "v1.23", + "v1.24": "future", + } + testVersion = api.MajorMinorVersion(1, 23) +) + +func TestRecordEvaluation(t *testing.T) { + recorder := NewPrometheusRecorder(testVersion) + registry := testutil.NewFakeKubeRegistry("1.23.0") + recorder.MustRegister(registry.MustRegister) + + for _, decision := range decisions { + for _, mode := range modes { + for _, op := range operations { + for _, level := range levels { + for version, expectedVersion := range versionExpectations { + for resource, expectedResource := range resourceExpectations { + recorder.RecordEvaluation(decision, levelVersion(level, version), mode, &api.AttributesRecord{ + Resource: resource, + Operation: op, + }) + expectedLabels := map[string]string{ + "decision": string(decision), + "policy_level": string(level), + "policy_version": expectedVersion, + "mode": string(mode), + "request_operation": strings.ToLower(string(op)), + "resource": expectedResource, + "subresource": "", + } + val, err := testutil.GetCounterMetricValue(recorder.evaluationsCounter.With(expectedLabels)) + require.NoError(t, err, expectedLabels) + + if !assert.EqualValues(t, 1, val, expectedLabels) { + findMetric(t, registry, "pod_security_evaluations_total") + } + + recorder.Reset() + } + } + } + } + } + } +} + +func levelVersion(level api.Level, version string) api.LevelVersion { + lv := api.LevelVersion{Level: level} + var err error + if lv.Version, err = api.ParseVersion(version); err != nil { + panic(err) + } + return lv +} + +// findMetric dumps non-zero metric samples for the metric with the given name, to help with debugging. +func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) { + t.Helper() + m, _ := gatherer.Gather() + for _, mFamily := range m { + if mFamily.GetName() == metricName { + for _, metric := range mFamily.GetMetric() { + if metric.GetCounter().GetValue() > 0 { + t.Logf("Found metric: %s", metric.String()) + } + } + } + } +} diff --git a/test/integration/auth/podsecurity_test.go b/test/integration/auth/podsecurity_test.go index a01bd53261e..78ed4b9f3b7 100644 --- a/test/integration/auth/podsecurity_test.go +++ b/test/integration/auth/podsecurity_test.go @@ -18,7 +18,9 @@ package auth import ( "context" + "crypto/tls" "fmt" + "io/ioutil" "net" "net/http" "net/url" @@ -37,6 +39,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/component-base/featuregate" featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/component-base/metrics/testutil" kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing" "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/features" @@ -67,6 +70,8 @@ func TestPodSecurity(t *testing.T) { ExemptRuntimeClasses: []string{}, } podsecuritytest.Run(t, opts) + + ValidatePluginMetrics(t, opts.ClientConfig) } // TestPodSecurityGAOnly ensures policies pass with only GA features enabled @@ -88,6 +93,8 @@ func TestPodSecurityGAOnly(t *testing.T) { Features: utilfeature.DefaultFeatureGate, } podsecuritytest.Run(t, opts) + + ValidatePluginMetrics(t, opts.ClientConfig) } func TestPodSecurityWebhook(t *testing.T) { @@ -125,6 +132,8 @@ func TestPodSecurityWebhook(t *testing.T) { ExemptRuntimeClasses: []string{}, } podsecuritytest.Run(t, opts) + + ValidateWebhookMetrics(t, webhookAddr) } func startPodSecurityServer(t *testing.T) *kubeapiservertesting.TestServer { @@ -285,3 +294,52 @@ func installWebhook(t *testing.T, clientConfig *rest.Config, addr string) error return nil } + +func ValidatePluginMetrics(t *testing.T, clientConfig *rest.Config) { + client, err := kubernetes.NewForConfig(clientConfig) + if err != nil { + t.Fatalf("Error creating client: %v", err) + } + ctx := context.Background() + data, err := client.CoreV1().RESTClient().Get().AbsPath("metrics").DoRaw(ctx) + if err != nil { + t.Fatalf("Failed to read metrics: %v", err) + } + validateMetrics(t, data) +} + +func ValidateWebhookMetrics(t *testing.T, webhookAddr string) { + endpoint := &url.URL{ + Scheme: "https", + Host: webhookAddr, + Path: "/metrics", + } + client := &http.Client{Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }} + resp, err := client.Get(endpoint.String()) + if err != nil { + t.Fatalf("Failed to fetch metrics from %s: %v", endpoint.String(), err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("Non-200 response trying to scrape metrics from %s: %v", endpoint.String(), resp) + } + data, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("Unable to read metrics response: %v", err) + } + validateMetrics(t, data) +} + +func validateMetrics(t *testing.T, rawMetrics []byte) { + metrics := testutil.NewMetrics() + if err := testutil.ParseMetrics(string(rawMetrics), &metrics); err != nil { + t.Fatalf("Failed to parse metrics: %v", err) + } + + if err := testutil.ValidateMetrics(metrics, "pod_security_evaluations_total", + "decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"); err != nil { + t.Fatalf("Metric validation failed: %v", err) + } +} From 21692e1683afc243cf5322ebb2e430797000ca69 Mon Sep 17 00:00:00 2001 From: Tim Allclair Date: Mon, 25 Oct 2021 23:10:12 -0700 Subject: [PATCH 2/4] [PodSecurity] Add error & exemption metrics --- .../admission/admission.go | 15 +++++ .../admission/admission_test.go | 5 +- .../pod-security-admission/metrics/metrics.go | 58 ++++++++++++++--- .../metrics/metrics_test.go | 62 +++++++++++++++++++ test/integration/auth/podsecurity_test.go | 2 +- 5 files changed, 131 insertions(+), 11 deletions(-) diff --git a/staging/src/k8s.io/pod-security-admission/admission/admission.go b/staging/src/k8s.io/pod-security-admission/admission/admission.go index 722aee1955d..1bb3a9b7af7 100644 --- a/staging/src/k8s.io/pod-security-admission/admission/admission.go +++ b/staging/src/k8s.io/pod-security-admission/admission/admission.go @@ -320,10 +320,12 @@ func (a *Admission) ValidatePod(ctx context.Context, attrs api.Attributes) *admi } // short-circuit on exempt namespaces and users if a.exemptNamespace(attrs.GetNamespace()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByNamespaceExemptionResponse() } if a.exemptUser(attrs.GetUserName()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByUserExemptionResponse() } @@ -331,32 +333,38 @@ func (a *Admission) ValidatePod(ctx context.Context, attrs api.Attributes) *admi namespace, err := a.NamespaceGetter.GetNamespace(ctx, attrs.GetNamespace()) if err != nil { klog.ErrorS(err, "failed to fetch pod namespace", "namespace", attrs.GetNamespace()) + a.Metrics.RecordError(true, attrs) return internalErrorResponse(fmt.Sprintf("failed to lookup namespace %s", attrs.GetNamespace())) } nsPolicy, nsPolicyErrs := a.PolicyToEvaluate(namespace.Labels) if len(nsPolicyErrs) == 0 && nsPolicy.Enforce.Level == api.LevelPrivileged && nsPolicy.Warn.Level == api.LevelPrivileged && nsPolicy.Audit.Level == api.LevelPrivileged { + a.Metrics.RecordEvaluation(metrics.DecisionAllow, nsPolicy.Enforce, metrics.ModeEnforce, attrs) return sharedAllowedResponse() } obj, err := attrs.GetObject() if err != nil { klog.ErrorS(err, "failed to decode object") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode object") } pod, ok := obj.(*corev1.Pod) if !ok { klog.InfoS("failed to assert pod type", "type", reflect.TypeOf(obj)) + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode pod") } if attrs.GetOperation() == admissionv1.Update { oldObj, err := attrs.GetOldObject() if err != nil { klog.ErrorS(err, "failed to decode old object") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode old object") } oldPod, ok := oldObj.(*corev1.Pod) if !ok { klog.InfoS("failed to assert old pod type", "type", reflect.TypeOf(oldObj)) + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode old pod") } if !isSignificantPodUpdate(pod, oldPod) { @@ -376,10 +384,12 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu } // short-circuit on exempt namespaces and users if a.exemptNamespace(attrs.GetNamespace()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByNamespaceExemptionResponse() } if a.exemptUser(attrs.GetUserName()) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByUserExemptionResponse() } @@ -387,6 +397,7 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu namespace, err := a.NamespaceGetter.GetNamespace(ctx, attrs.GetNamespace()) if err != nil { klog.ErrorS(err, "failed to fetch pod namespace", "namespace", attrs.GetNamespace()) + a.Metrics.RecordError(true, attrs) return internalErrorResponse(fmt.Sprintf("failed to lookup namespace %s", attrs.GetNamespace())) } nsPolicy, nsPolicyErrs := a.PolicyToEvaluate(namespace.Labels) @@ -397,11 +408,13 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu obj, err := attrs.GetObject() if err != nil { klog.ErrorS(err, "failed to decode object") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to decode object") } podMetadata, podSpec, err := a.PodSpecExtractor.ExtractPodSpec(obj) if err != nil { klog.ErrorS(err, "failed to extract pod spec") + a.Metrics.RecordError(true, attrs) return badRequestResponse("failed to extract pod template") } if podMetadata == nil && podSpec == nil { @@ -417,6 +430,7 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu func (a *Admission) EvaluatePod(ctx context.Context, nsPolicy api.Policy, nsPolicyErr error, podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec, attrs api.Attributes, enforce bool) *admissionv1.AdmissionResponse { // short-circuit on exempt runtimeclass if a.exemptRuntimeClass(podSpec.RuntimeClassName) { + a.Metrics.RecordExemption(attrs) return sharedAllowedByRuntimeClassExemptionResponse() } @@ -424,6 +438,7 @@ func (a *Admission) EvaluatePod(ctx context.Context, nsPolicy api.Policy, nsPoli if nsPolicyErr != nil { klog.V(2).InfoS("failed to parse PodSecurity namespace labels", "err", nsPolicyErr) auditAnnotations["error"] = fmt.Sprintf("Failed to parse policy: %v", nsPolicyErr) + a.Metrics.RecordError(false, attrs) } if klog.V(5).Enabled() { diff --git a/staging/src/k8s.io/pod-security-admission/admission/admission_test.go b/staging/src/k8s.io/pod-security-admission/admission/admission_test.go index 68b7cf5a887..f0f4f027e25 100644 --- a/staging/src/k8s.io/pod-security-admission/admission/admission_test.go +++ b/staging/src/k8s.io/pod-security-admission/admission/admission_test.go @@ -731,7 +731,7 @@ func TestValidatePodController(t *testing.T) { assert.Equal(t, tc.expectWarnings, result.Warnings, "unexpected Warnings") expectedEvaluations := []EvaluationRecord{} - if len(tc.expectAuditAnnotations) > 0 { + if _, ok := tc.expectAuditAnnotations["audit-violations"]; ok { expectedEvaluations = append(expectedEvaluations, EvaluationRecord{testName, metrics.DecisionDeny, nsLevelVersion, metrics.ModeAudit}) } if len(tc.expectWarnings) > 0 { @@ -757,6 +757,9 @@ func (r *FakeRecorder) RecordEvaluation(decision metrics.Decision, policy api.Le r.evaluations = append(r.evaluations, EvaluationRecord{attrs.GetName(), decision, policy, evalMode}) } +func (r *FakeRecorder) RecordExemption(api.Attributes) {} +func (r *FakeRecorder) RecordError(bool, api.Attributes) {} + // ExpectEvaluation asserts that the evaluation was recorded, and clears the record. func (r *FakeRecorder) ExpectEvaluations(t *testing.T, expected []EvaluationRecord) { t.Helper() diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go index 7fc3fd49bce..f84388a736b 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go @@ -42,6 +42,8 @@ type Mode string type Recorder interface { RecordEvaluation(Decision, api.LevelVersion, Mode, api.Attributes) + RecordExemption(api.Attributes) + RecordError(fatal bool, attrs api.Attributes) } var defaultRecorder = NewPrometheusRecorder(api.GetAPIVersion()) @@ -59,6 +61,8 @@ type PrometheusRecorder struct { apiVersion api.Version evaluationsCounter *metrics.CounterVec + exemptionsCounter *metrics.CounterVec + errorsCounter *metrics.CounterVec registerOnce sync.Once } @@ -74,21 +78,43 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { }, []string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"}, ) + exemptionsCounter := metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_exemptions_total", + Help: "Number of exempt requests, not counting ignored or out of scope requests.", + StabilityLevel: metrics.ALPHA, + }, + []string{"request_operation", "resource", "subresource"}, + ) + errorsCounter := metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_errors_total", + Help: "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.", + StabilityLevel: metrics.ALPHA, + }, + []string{"fatal", "request_operation", "resource", "subresource"}, + ) return &PrometheusRecorder{ apiVersion: version, evaluationsCounter: evaluationsCounter, + exemptionsCounter: exemptionsCounter, + errorsCounter: errorsCounter, } } func (r *PrometheusRecorder) MustRegister(registerFunc func(...metrics.Registerable)) { r.registerOnce.Do(func() { registerFunc(r.evaluationsCounter) + registerFunc(r.exemptionsCounter) + registerFunc(r.errorsCounter) }) } func (r *PrometheusRecorder) Reset() { r.evaluationsCounter.Reset() + r.exemptionsCounter.Reset() + r.errorsCounter.Reset() } func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) { @@ -98,18 +124,32 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve subresource := attrs.GetSubresource() var version string - if policy.Version.Latest() { - version = "latest" + if policy.Version.Latest() { + version = "latest" + } else { + if !r.apiVersion.Older(policy.Version) { + version = policy.Version.String() } else { - if !r.apiVersion.Older(policy.Version) { - version = policy.Version.String() - } else { - version = "future" - } + version = "future" } + } - r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), - version, string(evalMode), operation, resource, subresource).Inc() + r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), + version, string(evalMode), operation, resource, subresource).Inc() +} + +func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) { + operation := operationLabel(attrs.GetOperation()) + resource := resourceLabel(attrs.GetResource()) + subresource := attrs.GetSubresource() + r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc() +} + +func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) { + operation := operationLabel(attrs.GetOperation()) + resource := resourceLabel(attrs.GetResource()) + subresource := attrs.GetSubresource() + r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc() } func resourceLabel(resource schema.GroupVersionResource) string { diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go index 0a50195e058..72081a89434 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go @@ -17,6 +17,7 @@ limitations under the License. package metrics import ( + "strconv" "strings" "testing" @@ -95,6 +96,65 @@ func TestRecordEvaluation(t *testing.T) { } } +func TestRecordExemption(t *testing.T) { + recorder := NewPrometheusRecorder(testVersion) + registry := testutil.NewFakeKubeRegistry("1.23.0") + recorder.MustRegister(registry.MustRegister) + + for _, op := range operations { + for resource, expectedResource := range resourceExpectations { + recorder.RecordExemption(&api.AttributesRecord{ + Resource: resource, + Operation: op, + }) + expectedLabels := map[string]string{ + "request_operation": strings.ToLower(string(op)), + "resource": expectedResource, + "subresource": "", + } + val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels)) + require.NoError(t, err, expectedLabels) + + if !assert.EqualValues(t, 1, val, expectedLabels) { + findMetric(t, registry, "pod_security_exemptions_total") + } + + recorder.Reset() + } + } +} + +func TestRecordError(t *testing.T) { + recorder := NewPrometheusRecorder(testVersion) + registry := testutil.NewFakeKubeRegistry("1.23.0") + recorder.MustRegister(registry.MustRegister) + + for _, fatal := range []bool{true, false} { + for _, op := range operations { + for resource, expectedResource := range resourceExpectations { + recorder.RecordError(fatal, &api.AttributesRecord{ + Resource: resource, + Operation: op, + }) + expectedLabels := map[string]string{ + "fatal": strconv.FormatBool(fatal), + "request_operation": strings.ToLower(string(op)), + "resource": expectedResource, + "subresource": "", + } + val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels)) + require.NoError(t, err, expectedLabels) + + if !assert.EqualValues(t, 1, val, expectedLabels) { + findMetric(t, registry, "pod_security_errors_total") + } + + recorder.Reset() + } + } + } +} + func levelVersion(level api.Level, version string) api.LevelVersion { lv := api.LevelVersion{Level: level} var err error @@ -115,6 +175,8 @@ func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) { t.Logf("Found metric: %s", metric.String()) } } + return } } + t.Errorf("Expected metric %s not found", metricName) } diff --git a/test/integration/auth/podsecurity_test.go b/test/integration/auth/podsecurity_test.go index 78ed4b9f3b7..4acdcde7d3e 100644 --- a/test/integration/auth/podsecurity_test.go +++ b/test/integration/auth/podsecurity_test.go @@ -340,6 +340,6 @@ func validateMetrics(t *testing.T, rawMetrics []byte) { if err := testutil.ValidateMetrics(metrics, "pod_security_evaluations_total", "decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"); err != nil { - t.Fatalf("Metric validation failed: %v", err) + t.Errorf("Metric validation failed: %v", err) } } From 6c273020d3a5d1f74e6f948f4ed7938ce78bd34b Mon Sep 17 00:00:00 2001 From: Tim Allclair Date: Tue, 26 Oct 2021 18:49:13 -0700 Subject: [PATCH 3/4] [PodSecurity] Avoid the LegcayRegistry for metrics serving --- .../security/podsecurity/admission.go | 18 +++++++++++++-- .../cmd/webhook/server/server.go | 13 +++++++---- .../pod-security-admission/metrics/metrics.go | 23 +++---------------- test/integration/auth/podsecurity_test.go | 4 ++++ 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/plugin/pkg/admission/security/podsecurity/admission.go b/plugin/pkg/admission/security/podsecurity/admission.go index 3407c42038e..f4b4859e34f 100644 --- a/plugin/pkg/admission/security/podsecurity/admission.go +++ b/plugin/pkg/admission/security/podsecurity/admission.go @@ -43,6 +43,7 @@ import ( "k8s.io/client-go/kubernetes" corev1listers "k8s.io/client-go/listers/core/v1" "k8s.io/component-base/featuregate" + "k8s.io/component-base/metrics/legacyregistry" "k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/apis/apps" "k8s.io/kubernetes/pkg/apis/batch" @@ -83,6 +84,20 @@ var _ admission.ValidationInterface = &Plugin{} var _ genericadmissioninit.WantsExternalKubeInformerFactory = &Plugin{} var _ genericadmissioninit.WantsExternalKubeClientSet = &Plugin{} +var ( + defaultRecorder *metrics.PrometheusRecorder + defaultRecorderInit sync.Once +) + +func getDefaultRecorder() metrics.Recorder { + // initialize and register to legacy metrics once + defaultRecorderInit.Do(func() { + defaultRecorder = metrics.NewPrometheusRecorder(podsecurityadmissionapi.GetAPIVersion()) + defaultRecorder.MustRegister(legacyregistry.MustRegister) + }) + return defaultRecorder +} + // newPlugin creates a new admission plugin. func newPlugin(reader io.Reader) (*Plugin, error) { config, err := podsecurityconfigloader.LoadFromReader(reader) @@ -94,14 +109,13 @@ func newPlugin(reader io.Reader) (*Plugin, error) { if err != nil { return nil, fmt.Errorf("could not create PodSecurityRegistry: %w", err) } - metrics.LegacyMustRegister() return &Plugin{ Handler: admission.NewHandler(admission.Create, admission.Update), delegate: &podsecurityadmission.Admission{ Configuration: config, Evaluator: evaluator, - Metrics: metrics.DefaultRecorder(), + Metrics: getDefaultRecorder(), PodSpecExtractor: podsecurityadmission.DefaultPodSpecExtractor{}, }, }, nil diff --git a/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go b/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go index 21b50b0db7a..8ea6d5cf202 100644 --- a/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go +++ b/staging/src/k8s.io/pod-security-admission/cmd/webhook/server/server.go @@ -41,7 +41,6 @@ import ( restclient "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" compbasemetrics "k8s.io/component-base/metrics" - "k8s.io/component-base/metrics/legacyregistry" "k8s.io/component-base/version/verflag" "k8s.io/klog/v2" "k8s.io/pod-security-admission/admission" @@ -107,6 +106,8 @@ type Server struct { informerFactory kubeinformers.SharedInformerFactory delegate *admission.Admission + + metricsRegistry compbasemetrics.KubeRegistry } func (s *Server) Start(ctx context.Context) error { @@ -119,10 +120,9 @@ func (s *Server) Start(ctx context.Context) error { // debugging or proxy purposes. The API server will not connect to an http webhook. mux.HandleFunc("/", s.HandleValidate) - // Serve the global metrics registry. - metrics.LegacyMustRegister() + // Serve the metrics. mux.Handle("/metrics", - compbasemetrics.HandlerFor(legacyregistry.DefaultGatherer, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError})) + compbasemetrics.HandlerFor(s.metricsRegistry, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError})) if s.insecureServing != nil { if err := s.insecureServing.Serve(mux, 0, ctx.Done()); err != nil { @@ -279,11 +279,14 @@ func Setup(c *Config) (*Server, error) { if err != nil { return nil, fmt.Errorf("could not create PodSecurityRegistry: %w", err) } + metrics := metrics.NewPrometheusRecorder(api.GetAPIVersion()) + s.metricsRegistry = compbasemetrics.NewKubeRegistry() + metrics.MustRegister(s.metricsRegistry.MustRegister) s.delegate = &admission.Admission{ Configuration: c.PodSecurityConfig, Evaluator: evaluator, - Metrics: metrics.DefaultRecorder(), + Metrics: metrics, PodSpecExtractor: admission.DefaultPodSpecExtractor{}, PodLister: admission.PodListerFromClient(client), NamespaceGetter: admission.NamespaceGetterFromListerAndClient(namespaceLister, client), diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go index f84388a736b..d6ab481f098 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go @@ -19,13 +19,11 @@ package metrics import ( "strconv" "strings" - "sync" admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/component-base/metrics" - "k8s.io/component-base/metrics/legacyregistry" "k8s.io/pod-security-admission/api" ) @@ -46,25 +44,12 @@ type Recorder interface { RecordError(fatal bool, attrs api.Attributes) } -var defaultRecorder = NewPrometheusRecorder(api.GetAPIVersion()) - -func DefaultRecorder() Recorder { - return defaultRecorder -} - -// MustRegister registers the global DefaultMetrics against the legacy registry. -func LegacyMustRegister() { - defaultRecorder.MustRegister(legacyregistry.MustRegister) -} - type PrometheusRecorder struct { apiVersion api.Version evaluationsCounter *metrics.CounterVec exemptionsCounter *metrics.CounterVec errorsCounter *metrics.CounterVec - - registerOnce sync.Once } var _ Recorder = &PrometheusRecorder{} @@ -104,11 +89,9 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { } func (r *PrometheusRecorder) MustRegister(registerFunc func(...metrics.Registerable)) { - r.registerOnce.Do(func() { - registerFunc(r.evaluationsCounter) - registerFunc(r.exemptionsCounter) - registerFunc(r.errorsCounter) - }) + registerFunc(r.evaluationsCounter) + registerFunc(r.exemptionsCounter) + registerFunc(r.errorsCounter) } func (r *PrometheusRecorder) Reset() { diff --git a/test/integration/auth/podsecurity_test.go b/test/integration/auth/podsecurity_test.go index 4acdcde7d3e..ad7c8e215ca 100644 --- a/test/integration/auth/podsecurity_test.go +++ b/test/integration/auth/podsecurity_test.go @@ -342,4 +342,8 @@ func validateMetrics(t *testing.T, rawMetrics []byte) { "decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"); err != nil { t.Errorf("Metric validation failed: %v", err) } + if err := testutil.ValidateMetrics(metrics, "pod_security_exemptions_total", + "request_operation", "resource", "subresource"); err != nil { + t.Errorf("Metric validation failed: %v", err) + } } From afad3417596327fe3c8814808415c59b4d06414f Mon Sep 17 00:00:00 2001 From: Tim Allclair Date: Tue, 26 Oct 2021 18:19:10 -0700 Subject: [PATCH 4/4] Cache fast-path metrics & update unit tests --- .../pod-security-admission/api/helpers.go | 7 +- .../src/k8s.io/pod-security-admission/go.mod | 1 + .../pod-security-admission/metrics/metrics.go | 220 ++++++++++++++---- .../metrics/metrics_test.go | 126 +++++----- 4 files changed, 258 insertions(+), 96 deletions(-) diff --git a/staging/src/k8s.io/pod-security-admission/api/helpers.go b/staging/src/k8s.io/pod-security-admission/api/helpers.go index ee175993d9f..5e309fddc2c 100644 --- a/staging/src/k8s.io/pod-security-admission/api/helpers.go +++ b/staging/src/k8s.io/pod-security-admission/api/helpers.go @@ -21,6 +21,7 @@ import ( "regexp" "strconv" "strings" + "unicode" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/component-base/version" @@ -76,7 +77,11 @@ func GetAPIVersion() Version { if err != nil { return v } - minor, err := strconv.Atoi(apiVersion.Minor) + // split the "normal" + and - for semver stuff to get the leading minor number + minorString := strings.FieldsFunc(apiVersion.Minor, func(r rune) bool { + return !unicode.IsDigit(r) + })[0] + minor, err := strconv.Atoi(minorString) if err != nil { return v } diff --git a/staging/src/k8s.io/pod-security-admission/go.mod b/staging/src/k8s.io/pod-security-admission/go.mod index f8156b818c0..38c4358b67f 100644 --- a/staging/src/k8s.io/pod-security-admission/go.mod +++ b/staging/src/k8s.io/pod-security-admission/go.mod @@ -5,6 +5,7 @@ module k8s.io/pod-security-admission go 1.16 require ( + github.com/blang/semver v3.5.1+incompatible github.com/google/go-cmp v0.5.5 github.com/spf13/cobra v1.2.1 github.com/spf13/pflag v1.0.5 diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go index d6ab481f098..bb6d384b21d 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go @@ -19,7 +19,9 @@ package metrics import ( "strconv" "strings" + "sync" + "github.com/blang/semver" admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -47,34 +49,18 @@ type Recorder interface { type PrometheusRecorder struct { apiVersion api.Version - evaluationsCounter *metrics.CounterVec - exemptionsCounter *metrics.CounterVec + evaluationsCounter *evaluationsCounter + exemptionsCounter *exemptionsCounter errorsCounter *metrics.CounterVec } var _ Recorder = &PrometheusRecorder{} func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { - evaluationsCounter := metrics.NewCounterVec( - &metrics.CounterOpts{ - Name: "pod_security_evaluations_total", - Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.", - StabilityLevel: metrics.ALPHA, - }, - []string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"}, - ) - exemptionsCounter := metrics.NewCounterVec( - &metrics.CounterOpts{ - Name: "pod_security_exemptions_total", - Help: "Number of exempt requests, not counting ignored or out of scope requests.", - StabilityLevel: metrics.ALPHA, - }, - []string{"request_operation", "resource", "subresource"}, - ) errorsCounter := metrics.NewCounterVec( &metrics.CounterOpts{ Name: "pod_security_errors_total", - Help: "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.", + Help: "Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.", StabilityLevel: metrics.ALPHA, }, []string{"fatal", "request_operation", "resource", "subresource"}, @@ -82,8 +68,8 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { return &PrometheusRecorder{ apiVersion: version, - evaluationsCounter: evaluationsCounter, - exemptionsCounter: exemptionsCounter, + evaluationsCounter: newEvaluationsCounter(), + exemptionsCounter: newExemptionsCounter(), errorsCounter: errorsCounter, } } @@ -101,13 +87,8 @@ func (r *PrometheusRecorder) Reset() { } func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) { - dec := string(decision) - operation := operationLabel(attrs.GetOperation()) - resource := resourceLabel(attrs.GetResource()) - subresource := attrs.GetSubresource() - var version string - if policy.Version.Latest() { + if policy.Version.Latest() || policy.Level == api.LevelPrivileged { // Privileged is always effectively latest. version = "latest" } else { if !r.apiVersion.Older(policy.Version) { @@ -117,29 +98,44 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve } } - r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), - version, string(evalMode), operation, resource, subresource).Inc() + r.evaluationsCounter.CachedInc(evaluationsLabels{ + decision: string(decision), + level: string(policy.Level), + version: version, + mode: string(evalMode), + operation: operationLabel(attrs.GetOperation()), + resource: resourceLabel(attrs.GetResource()), + subresource: attrs.GetSubresource(), + }) } func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) { - operation := operationLabel(attrs.GetOperation()) - resource := resourceLabel(attrs.GetResource()) - subresource := attrs.GetSubresource() - r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc() + r.exemptionsCounter.CachedInc(exemptionsLabels{ + operation: operationLabel(attrs.GetOperation()), + resource: resourceLabel(attrs.GetResource()), + subresource: attrs.GetSubresource(), + }) } func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) { - operation := operationLabel(attrs.GetOperation()) - resource := resourceLabel(attrs.GetResource()) - subresource := attrs.GetSubresource() - r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc() + r.errorsCounter.WithLabelValues( + strconv.FormatBool(fatal), + operationLabel(attrs.GetOperation()), + resourceLabel(attrs.GetResource()), + attrs.GetSubresource(), + ).Inc() } +var ( + podResource = corev1.Resource("pods") + namespaceResource = corev1.Resource("namespaces") +) + func resourceLabel(resource schema.GroupVersionResource) string { switch resource.GroupResource() { - case corev1.Resource("pods"): + case podResource: return "pod" - case corev1.Resource("namespace"): + case namespaceResource: return "namespace" default: // Assume any other resource is a valid input to pod-security, and therefore a controller. @@ -158,3 +154,149 @@ func operationLabel(op admissionv1.Operation) string { return strings.ToLower(string(op)) } } + +type evaluationsLabels struct { + decision string + level string + version string + mode string + operation string + resource string + subresource string +} + +func (l *evaluationsLabels) labels() []string { + return []string{l.decision, l.level, l.version, l.mode, l.operation, l.resource, l.subresource} +} + +type exemptionsLabels struct { + operation string + resource string + subresource string +} + +func (l *exemptionsLabels) labels() []string { + return []string{l.operation, l.resource, l.subresource} +} + +type evaluationsCounter struct { + *metrics.CounterVec + + cache map[evaluationsLabels]metrics.CounterMetric + cacheLock sync.RWMutex +} + +func newEvaluationsCounter() *evaluationsCounter { + return &evaluationsCounter{ + CounterVec: metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_evaluations_total", + Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.", + StabilityLevel: metrics.ALPHA, + }, + []string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"}, + ), + cache: make(map[evaluationsLabels]metrics.CounterMetric), + } +} + +func (c *evaluationsCounter) CachedInc(l evaluationsLabels) { + c.cacheLock.RLock() + defer c.cacheLock.RUnlock() + + if cachedCounter, ok := c.cache[l]; ok { + cachedCounter.Inc() + } else { + c.CounterVec.WithLabelValues(l.labels()...).Inc() + } +} + +func (c *evaluationsCounter) Create(version *semver.Version) bool { + c.cacheLock.Lock() + defer c.cacheLock.Unlock() + if c.CounterVec.Create(version) { + c.populateCache() + return true + } else { + return false + } +} + +func (c *evaluationsCounter) Reset() { + c.cacheLock.Lock() + defer c.cacheLock.Unlock() + c.CounterVec.Reset() + c.populateCache() +} + +func (c *evaluationsCounter) populateCache() { + labelsToCache := []evaluationsLabels{ + {decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "create", resource: "pod", subresource: ""}, + {decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "update", resource: "pod", subresource: ""}, + } + for _, l := range labelsToCache { + c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...) + } +} + +type exemptionsCounter struct { + *metrics.CounterVec + + cache map[exemptionsLabels]metrics.CounterMetric + cacheLock sync.RWMutex +} + +func newExemptionsCounter() *exemptionsCounter { + return &exemptionsCounter{ + CounterVec: metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "pod_security_exemptions_total", + Help: "Number of exempt requests, not counting ignored or out of scope requests.", + StabilityLevel: metrics.ALPHA, + }, + []string{"request_operation", "resource", "subresource"}, + ), + cache: make(map[exemptionsLabels]metrics.CounterMetric), + } +} + +func (c *exemptionsCounter) CachedInc(l exemptionsLabels) { + c.cacheLock.RLock() + defer c.cacheLock.RUnlock() + + if cachedCounter, ok := c.cache[l]; ok { + cachedCounter.Inc() + } else { + c.CounterVec.WithLabelValues(l.labels()...).Inc() + } +} + +func (c *exemptionsCounter) Create(version *semver.Version) bool { + c.cacheLock.Lock() + defer c.cacheLock.Unlock() + if c.CounterVec.Create(version) { + c.populateCache() + return true + } else { + return false + } +} + +func (c *exemptionsCounter) Reset() { + c.cacheLock.Lock() + defer c.cacheLock.Unlock() + c.CounterVec.Reset() + c.populateCache() +} + +func (c *exemptionsCounter) populateCache() { + labelsToCache := []exemptionsLabels{ + {operation: "create", resource: "pod", subresource: ""}, + {operation: "update", resource: "pod", subresource: ""}, + {operation: "create", resource: "controller", subresource: ""}, + {operation: "update", resource: "controller", subresource: ""}, + } + for _, l := range labelsToCache { + c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...) + } +} diff --git a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go index 72081a89434..657e1aa6d56 100644 --- a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go +++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go @@ -17,20 +17,21 @@ limitations under the License. package metrics import ( - "strconv" + "bytes" + "fmt" + "sort" "strings" "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" admissionv1 "k8s.io/api/admission/v1" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/testutil" "k8s.io/pod-security-admission/api" + + "github.com/stretchr/testify/assert" ) var ( @@ -71,21 +72,18 @@ func TestRecordEvaluation(t *testing.T) { Resource: resource, Operation: op, }) - expectedLabels := map[string]string{ - "decision": string(decision), - "policy_level": string(level), - "policy_version": expectedVersion, - "mode": string(mode), - "request_operation": strings.ToLower(string(op)), - "resource": expectedResource, - "subresource": "", - } - val, err := testutil.GetCounterMetricValue(recorder.evaluationsCounter.With(expectedLabels)) - require.NoError(t, err, expectedLabels) - if !assert.EqualValues(t, 1, val, expectedLabels) { - findMetric(t, registry, "pod_security_evaluations_total") + if level == api.LevelPrivileged { + expectedVersion = "latest" } + expected := fmt.Sprintf(` + # HELP pod_security_evaluations_total [ALPHA] Number of policy evaluations that occurred, not counting ignored or exempt requests. + # TYPE pod_security_evaluations_total counter + pod_security_evaluations_total{decision="%s",mode="%s",policy_level="%s",policy_version="%s",request_operation="%s",resource="%s",subresource=""} 1 + `, decision, mode, level, expectedVersion, strings.ToLower(string(op)), expectedResource) + expected = expectCachedMetrics("pod_security_evaluations_total", expected) + + assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_evaluations_total")) recorder.Reset() } @@ -103,23 +101,24 @@ func TestRecordExemption(t *testing.T) { for _, op := range operations { for resource, expectedResource := range resourceExpectations { - recorder.RecordExemption(&api.AttributesRecord{ - Resource: resource, - Operation: op, - }) - expectedLabels := map[string]string{ - "request_operation": strings.ToLower(string(op)), - "resource": expectedResource, - "subresource": "", - } - val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels)) - require.NoError(t, err, expectedLabels) + for _, subresource := range []string{"", "ephemeralcontainers"} { + recorder.RecordExemption(&api.AttributesRecord{ + Resource: resource, + Operation: op, + Subresource: subresource, + }) - if !assert.EqualValues(t, 1, val, expectedLabels) { - findMetric(t, registry, "pod_security_exemptions_total") - } + expected := fmt.Sprintf(` + # HELP pod_security_exemptions_total [ALPHA] Number of exempt requests, not counting ignored or out of scope requests. + # TYPE pod_security_exemptions_total counter + pod_security_exemptions_total{request_operation="%s",resource="%s",subresource="%s"} 1 + `, strings.ToLower(string(op)), expectedResource, subresource) + expected = expectCachedMetrics("pod_security_exemptions_total", expected) - recorder.Reset() + assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_exemptions_total")) + + recorder.Reset() + } } } } @@ -136,18 +135,14 @@ func TestRecordError(t *testing.T) { Resource: resource, Operation: op, }) - expectedLabels := map[string]string{ - "fatal": strconv.FormatBool(fatal), - "request_operation": strings.ToLower(string(op)), - "resource": expectedResource, - "subresource": "", - } - val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels)) - require.NoError(t, err, expectedLabels) - if !assert.EqualValues(t, 1, val, expectedLabels) { - findMetric(t, registry, "pod_security_errors_total") - } + expected := bytes.NewBufferString(fmt.Sprintf(` + # HELP pod_security_errors_total [ALPHA] Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation. + # TYPE pod_security_errors_total counter + pod_security_errors_total{fatal="%t",request_operation="%s",resource="%s",subresource=""} 1 + `, fatal, strings.ToLower(string(op)), expectedResource)) + + assert.NoError(t, testutil.GatherAndCompare(registry, expected, "pod_security_errors_total")) recorder.Reset() } @@ -164,19 +159,38 @@ func levelVersion(level api.Level, version string) api.LevelVersion { return lv } -// findMetric dumps non-zero metric samples for the metric with the given name, to help with debugging. -func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) { - t.Helper() - m, _ := gatherer.Gather() - for _, mFamily := range m { - if mFamily.GetName() == metricName { - for _, metric := range mFamily.GetMetric() { - if metric.GetCounter().GetValue() > 0 { - t.Logf("Found metric: %s", metric.String()) - } - } - return +// The cached metrics should always be present (value 0 if not counted). +var expectedCachedMetrics = map[string][]string{ + "pod_security_evaluations_total": { + `pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="create",resource="pod",subresource=""}`, + `pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="update",resource="pod",subresource=""}`, + }, + "pod_security_exemptions_total": { + `pod_security_exemptions_total{request_operation="create",resource="controller",subresource=""}`, + `pod_security_exemptions_total{request_operation="create",resource="pod",subresource=""}`, + `pod_security_exemptions_total{request_operation="update",resource="controller",subresource=""}`, + `pod_security_exemptions_total{request_operation="update",resource="pod",subresource=""}`, + }, +} + +func expectCachedMetrics(metricName, expected string) string { + expectations := strings.Split(strings.TrimSpace(expected), "\n") + for i, expectation := range expectations { + expectations[i] = strings.TrimSpace(expectation) // Whitespace messes with sorting. + } + for _, cached := range expectedCachedMetrics[metricName] { + expectations = addZeroExpectation(expectations, cached) + } + sort.Strings(expectations[:len(expectations)-1]) + return "\n" + strings.Join(expectations, "\n") + "\n" +} + +// addZeroExpectation adds the mixin as an empty sample if not already present. +func addZeroExpectation(currentExpectations []string, mixin string) []string { + for _, current := range currentExpectations { + if strings.HasPrefix(current, mixin) { + return currentExpectations // Mixin value already present. } } - t.Errorf("Expected metric %s not found", metricName) + return append(currentExpectations, fmt.Sprintf("%s 0", mixin)) }