Cache fast-path metrics & update unit tests

2025-07-29 22:46:12 +00:00 · 2021-10-26 18:19:10 -07:00 · 2021-10-26 18:19:10 -07:00 · afad341759
commit afad341759
parent 6c273020d3
4 changed files with 258 additions and 96 deletions
--- a/staging/src/k8s.io/pod-security-admission/api/helpers.go
+++ b/staging/src/k8s.io/pod-security-admission/api/helpers.go
@ -21,6 +21,7 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
 	"unicode"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"k8s.io/component-base/version"
@ -76,7 +77,11 @@ func GetAPIVersion() Version {
 	if err != nil {
 		return v
 	}
-	minor, err := strconv.Atoi(apiVersion.Minor)
+	// split the "normal" + and - for semver stuff to get the leading minor number
 	minorString := strings.FieldsFunc(apiVersion.Minor, func(r rune) bool {
 		return !unicode.IsDigit(r)
 	})[0]
 	minor, err := strconv.Atoi(minorString)
 	if err != nil {
 		return v
 	}
--- a/staging/src/k8s.io/pod-security-admission/go.mod
+++ b/staging/src/k8s.io/pod-security-admission/go.mod
@ -5,6 +5,7 @@ module k8s.io/pod-security-admission
 go 1.16
 require (
 	github.com/blang/semver v3.5.1+incompatible
 	github.com/google/go-cmp v0.5.5
 	github.com/spf13/cobra v1.2.1
 	github.com/spf13/pflag v1.0.5
--- a/staging/src/k8s.io/pod-security-admission/metrics/metrics.go
+++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics.go
@ -19,7 +19,9 @@ package metrics
 import (
 	"strconv"
 	"strings"
 	"sync"
 	"github.com/blang/semver"
 	admissionv1 "k8s.io/api/admission/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
@ -47,34 +49,18 @@ type Recorder interface {
 type PrometheusRecorder struct {
 	apiVersion api.Version
-	evaluationsCounter *metrics.CounterVec
+	evaluationsCounter *evaluationsCounter
-	exemptionsCounter  *metrics.CounterVec
+	exemptionsCounter  *exemptionsCounter
 	errorsCounter      *metrics.CounterVec
 }
 var _ Recorder = &PrometheusRecorder{}
 func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
 	evaluationsCounter := metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Name:           "pod_security_evaluations_total",
 			Help:           "Number of policy evaluations that occurred, not counting ignored or exempt requests.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
 	)
 	exemptionsCounter := metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Name:           "pod_security_exemptions_total",
 			Help:           "Number of exempt requests, not counting ignored or out of scope requests.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"request_operation", "resource", "subresource"},
 	)
 	errorsCounter := metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Name:           "pod_security_errors_total",
-			Help:           "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.",
+			Help:           "Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"fatal", "request_operation", "resource", "subresource"},
@ -82,8 +68,8 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
 	return &PrometheusRecorder{
 		apiVersion:         version,
-		evaluationsCounter: evaluationsCounter,
+		evaluationsCounter: newEvaluationsCounter(),
-		exemptionsCounter:  exemptionsCounter,
+		exemptionsCounter:  newExemptionsCounter(),
 		errorsCounter:      errorsCounter,
 	}
 }
@ -101,13 +87,8 @@ func (r *PrometheusRecorder) Reset() {
 }
 func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) {
 	dec := string(decision)
 	operation := operationLabel(attrs.GetOperation())
 	resource := resourceLabel(attrs.GetResource())
 	subresource := attrs.GetSubresource()
 	var version string
-	if policy.Version.Latest() {
+	if policy.Version.Latest() || policy.Level == api.LevelPrivileged { // Privileged is always effectively latest.
 		version = "latest"
 	} else {
 		if !r.apiVersion.Older(policy.Version) {
@ -117,29 +98,44 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve
 		}
 	}
-	r.evaluationsCounter.WithLabelValues(dec, string(policy.Level),
+	r.evaluationsCounter.CachedInc(evaluationsLabels{
-		version, string(evalMode), operation, resource, subresource).Inc()
+		decision:    string(decision),
 		level:       string(policy.Level),
 		version:     version,
 		mode:        string(evalMode),
 		operation:   operationLabel(attrs.GetOperation()),
 		resource:    resourceLabel(attrs.GetResource()),
 		subresource: attrs.GetSubresource(),
 	})
 }
 func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) {
-	operation := operationLabel(attrs.GetOperation())
+	r.exemptionsCounter.CachedInc(exemptionsLabels{
-	resource := resourceLabel(attrs.GetResource())
+		operation:   operationLabel(attrs.GetOperation()),
-	subresource := attrs.GetSubresource()
+		resource:    resourceLabel(attrs.GetResource()),
-	r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc()
+		subresource: attrs.GetSubresource(),
 	})
 }
 func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) {
-	operation := operationLabel(attrs.GetOperation())
+	r.errorsCounter.WithLabelValues(
-	resource := resourceLabel(attrs.GetResource())
+		strconv.FormatBool(fatal),
-	subresource := attrs.GetSubresource()
+		operationLabel(attrs.GetOperation()),
-	r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc()
+		resourceLabel(attrs.GetResource()),
 		attrs.GetSubresource(),
 	).Inc()
 }
 var (
 	podResource       = corev1.Resource("pods")
 	namespaceResource = corev1.Resource("namespaces")
 )
 func resourceLabel(resource schema.GroupVersionResource) string {
 	switch resource.GroupResource() {
-	case corev1.Resource("pods"):
+	case podResource:
 		return "pod"
-	case corev1.Resource("namespace"):
+	case namespaceResource:
 		return "namespace"
 	default:
 		// Assume any other resource is a valid input to pod-security, and therefore a controller.
@ -158,3 +154,149 @@ func operationLabel(op admissionv1.Operation) string {
 		return strings.ToLower(string(op))
 	}
 }
 type evaluationsLabels struct {
 	decision    string
 	level       string
 	version     string
 	mode        string
 	operation   string
 	resource    string
 	subresource string
 }
 func (l *evaluationsLabels) labels() []string {
 	return []string{l.decision, l.level, l.version, l.mode, l.operation, l.resource, l.subresource}
 }
 type exemptionsLabels struct {
 	operation   string
 	resource    string
 	subresource string
 }
 func (l *exemptionsLabels) labels() []string {
 	return []string{l.operation, l.resource, l.subresource}
 }
 type evaluationsCounter struct {
 	*metrics.CounterVec
 	cache     map[evaluationsLabels]metrics.CounterMetric
 	cacheLock sync.RWMutex
 }
 func newEvaluationsCounter() *evaluationsCounter {
 	return &evaluationsCounter{
 		CounterVec: metrics.NewCounterVec(
 			&metrics.CounterOpts{
 				Name:           "pod_security_evaluations_total",
 				Help:           "Number of policy evaluations that occurred, not counting ignored or exempt requests.",
 				StabilityLevel: metrics.ALPHA,
 			},
 			[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
 		),
 		cache: make(map[evaluationsLabels]metrics.CounterMetric),
 	}
 }
 func (c *evaluationsCounter) CachedInc(l evaluationsLabels) {
 	c.cacheLock.RLock()
 	defer c.cacheLock.RUnlock()
 	if cachedCounter, ok := c.cache[l]; ok {
 		cachedCounter.Inc()
 	} else {
 		c.CounterVec.WithLabelValues(l.labels()...).Inc()
 	}
 }
 func (c *evaluationsCounter) Create(version *semver.Version) bool {
 	c.cacheLock.Lock()
 	defer c.cacheLock.Unlock()
 	if c.CounterVec.Create(version) {
 		c.populateCache()
 		return true
 	} else {
 		return false
 	}
 }
 func (c *evaluationsCounter) Reset() {
 	c.cacheLock.Lock()
 	defer c.cacheLock.Unlock()
 	c.CounterVec.Reset()
 	c.populateCache()
 }
 func (c *evaluationsCounter) populateCache() {
 	labelsToCache := []evaluationsLabels{
 		{decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "create", resource: "pod", subresource: ""},
 		{decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "update", resource: "pod", subresource: ""},
 	}
 	for _, l := range labelsToCache {
 		c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...)
 	}
 }
 type exemptionsCounter struct {
 	*metrics.CounterVec
 	cache     map[exemptionsLabels]metrics.CounterMetric
 	cacheLock sync.RWMutex
 }
 func newExemptionsCounter() *exemptionsCounter {
 	return &exemptionsCounter{
 		CounterVec: metrics.NewCounterVec(
 			&metrics.CounterOpts{
 				Name:           "pod_security_exemptions_total",
 				Help:           "Number of exempt requests, not counting ignored or out of scope requests.",
 				StabilityLevel: metrics.ALPHA,
 			},
 			[]string{"request_operation", "resource", "subresource"},
 		),
 		cache: make(map[exemptionsLabels]metrics.CounterMetric),
 	}
 }
 func (c *exemptionsCounter) CachedInc(l exemptionsLabels) {
 	c.cacheLock.RLock()
 	defer c.cacheLock.RUnlock()
 	if cachedCounter, ok := c.cache[l]; ok {
 		cachedCounter.Inc()
 	} else {
 		c.CounterVec.WithLabelValues(l.labels()...).Inc()
 	}
 }
 func (c *exemptionsCounter) Create(version *semver.Version) bool {
 	c.cacheLock.Lock()
 	defer c.cacheLock.Unlock()
 	if c.CounterVec.Create(version) {
 		c.populateCache()
 		return true
 	} else {
 		return false
 	}
 }
 func (c *exemptionsCounter) Reset() {
 	c.cacheLock.Lock()
 	defer c.cacheLock.Unlock()
 	c.CounterVec.Reset()
 	c.populateCache()
 }
 func (c *exemptionsCounter) populateCache() {
 	labelsToCache := []exemptionsLabels{
 		{operation: "create", resource: "pod", subresource: ""},
 		{operation: "update", resource: "pod", subresource: ""},
 		{operation: "create", resource: "controller", subresource: ""},
 		{operation: "update", resource: "controller", subresource: ""},
 	}
 	for _, l := range labelsToCache {
 		c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...)
 	}
 }
--- a/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go
+++ b/staging/src/k8s.io/pod-security-admission/metrics/metrics_test.go
@ -17,20 +17,21 @@ limitations under the License.
 package metrics
 import (
-	"strconv"
+	"bytes"
 	"fmt"
 	"sort"
 	"strings"
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	admissionv1 "k8s.io/api/admission/v1"
 	appsv1 "k8s.io/api/apps/v1"
 	batchv1 "k8s.io/api/batch/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/component-base/metrics"
 	"k8s.io/component-base/metrics/testutil"
 	"k8s.io/pod-security-admission/api"
 	"github.com/stretchr/testify/assert"
 )
 var (
@ -71,21 +72,18 @@ func TestRecordEvaluation(t *testing.T) {
 								Resource:  resource,
 								Operation: op,
 							})
 							expectedLabels := map[string]string{
 								"decision":          string(decision),
 								"policy_level":      string(level),
 								"policy_version":    expectedVersion,
 								"mode":              string(mode),
 								"request_operation": strings.ToLower(string(op)),
 								"resource":          expectedResource,
 								"subresource":       "",
 							}
 							val, err := testutil.GetCounterMetricValue(recorder.evaluationsCounter.With(expectedLabels))
 							require.NoError(t, err, expectedLabels)
-							if !assert.EqualValues(t, 1, val, expectedLabels) {
+							if level == api.LevelPrivileged {
-								findMetric(t, registry, "pod_security_evaluations_total")
+								expectedVersion = "latest"
 							}
 							expected := fmt.Sprintf(`
 							# HELP pod_security_evaluations_total [ALPHA] Number of policy evaluations that occurred, not counting ignored or exempt requests.
        	            	# TYPE pod_security_evaluations_total counter
 							pod_security_evaluations_total{decision="%s",mode="%s",policy_level="%s",policy_version="%s",request_operation="%s",resource="%s",subresource=""} 1
 							`, decision, mode, level, expectedVersion, strings.ToLower(string(op)), expectedResource)
 							expected = expectCachedMetrics("pod_security_evaluations_total", expected)
 							assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_evaluations_total"))
 							recorder.Reset()
 						}
@ -103,23 +101,24 @@ func TestRecordExemption(t *testing.T) {
 	for _, op := range operations {
 		for resource, expectedResource := range resourceExpectations {
-			recorder.RecordExemption(&api.AttributesRecord{
+			for _, subresource := range []string{"", "ephemeralcontainers"} {
-				Resource:  resource,
+				recorder.RecordExemption(&api.AttributesRecord{
-				Operation: op,
+					Resource:    resource,
-			})
+					Operation:   op,
-			expectedLabels := map[string]string{
+					Subresource: subresource,
-				"request_operation": strings.ToLower(string(op)),
+				})
 				"resource":          expectedResource,
 				"subresource":       "",
 			}
 			val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels))
 			require.NoError(t, err, expectedLabels)
-			if !assert.EqualValues(t, 1, val, expectedLabels) {
+				expected := fmt.Sprintf(`
-				findMetric(t, registry, "pod_security_exemptions_total")
+				# HELP pod_security_exemptions_total [ALPHA] Number of exempt requests, not counting ignored or out of scope requests.
-			}
+				# TYPE pod_security_exemptions_total counter
 				pod_security_exemptions_total{request_operation="%s",resource="%s",subresource="%s"} 1
 				`, strings.ToLower(string(op)), expectedResource, subresource)
 				expected = expectCachedMetrics("pod_security_exemptions_total", expected)
-			recorder.Reset()
+				assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_exemptions_total"))
 				recorder.Reset()
 			}
 		}
 	}
 }
@ -136,18 +135,14 @@ func TestRecordError(t *testing.T) {
 					Resource:  resource,
 					Operation: op,
 				})
 				expectedLabels := map[string]string{
 					"fatal":             strconv.FormatBool(fatal),
 					"request_operation": strings.ToLower(string(op)),
 					"resource":          expectedResource,
 					"subresource":       "",
 				}
 				val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels))
 				require.NoError(t, err, expectedLabels)
-				if !assert.EqualValues(t, 1, val, expectedLabels) {
+				expected := bytes.NewBufferString(fmt.Sprintf(`
-					findMetric(t, registry, "pod_security_errors_total")
+				# HELP pod_security_errors_total [ALPHA] Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.
-				}
+				# TYPE pod_security_errors_total counter
 				pod_security_errors_total{fatal="%t",request_operation="%s",resource="%s",subresource=""} 1
 				`, fatal, strings.ToLower(string(op)), expectedResource))
 				assert.NoError(t, testutil.GatherAndCompare(registry, expected, "pod_security_errors_total"))
 				recorder.Reset()
 			}
@ -164,19 +159,38 @@ func levelVersion(level api.Level, version string) api.LevelVersion {
 	return lv
 }
-// findMetric dumps non-zero metric samples for the metric with the given name, to help with debugging.
+// The cached metrics should always be present (value 0 if not counted).
-func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) {
+var expectedCachedMetrics = map[string][]string{
-	t.Helper()
+	"pod_security_evaluations_total": {
-	m, _ := gatherer.Gather()
+		`pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="create",resource="pod",subresource=""}`,
-	for _, mFamily := range m {
+		`pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="update",resource="pod",subresource=""}`,
-		if mFamily.GetName() == metricName {
+	},
-			for _, metric := range mFamily.GetMetric() {
+	"pod_security_exemptions_total": {
-				if metric.GetCounter().GetValue() > 0 {
+		`pod_security_exemptions_total{request_operation="create",resource="controller",subresource=""}`,
-					t.Logf("Found metric: %s", metric.String())
+		`pod_security_exemptions_total{request_operation="create",resource="pod",subresource=""}`,
-				}
+		`pod_security_exemptions_total{request_operation="update",resource="controller",subresource=""}`,
-			}
+		`pod_security_exemptions_total{request_operation="update",resource="pod",subresource=""}`,
-			return
+	},
 }
 func expectCachedMetrics(metricName, expected string) string {
 	expectations := strings.Split(strings.TrimSpace(expected), "\n")
 	for i, expectation := range expectations {
 		expectations[i] = strings.TrimSpace(expectation) // Whitespace messes with sorting.
 	}
 	for _, cached := range expectedCachedMetrics[metricName] {
 		expectations = addZeroExpectation(expectations, cached)
 	}
 	sort.Strings(expectations[:len(expectations)-1])
 	return "\n" + strings.Join(expectations, "\n") + "\n"
 }
 // addZeroExpectation adds the mixin as an empty sample if not already present.
 func addZeroExpectation(currentExpectations []string, mixin string) []string {
 	for _, current := range currentExpectations {
 		if strings.HasPrefix(current, mixin) {
 			return currentExpectations // Mixin value already present.
 		}
 	}
-	t.Errorf("Expected metric %s not found", metricName)
+	return append(currentExpectations, fmt.Sprintf("%s 0", mixin))
 }