Cache fast-path metrics & update unit tests

This commit is contained in:
Tim Allclair 2021-10-26 18:19:10 -07:00
parent 6c273020d3
commit afad341759
4 changed files with 258 additions and 96 deletions

View File

@ -21,6 +21,7 @@ import (
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"unicode"
"k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/component-base/version" "k8s.io/component-base/version"
@ -76,7 +77,11 @@ func GetAPIVersion() Version {
if err != nil { if err != nil {
return v return v
} }
minor, err := strconv.Atoi(apiVersion.Minor) // split the "normal" + and - for semver stuff to get the leading minor number
minorString := strings.FieldsFunc(apiVersion.Minor, func(r rune) bool {
return !unicode.IsDigit(r)
})[0]
minor, err := strconv.Atoi(minorString)
if err != nil { if err != nil {
return v return v
} }

View File

@ -5,6 +5,7 @@ module k8s.io/pod-security-admission
go 1.16 go 1.16
require ( require (
github.com/blang/semver v3.5.1+incompatible
github.com/google/go-cmp v0.5.5 github.com/google/go-cmp v0.5.5
github.com/spf13/cobra v1.2.1 github.com/spf13/cobra v1.2.1
github.com/spf13/pflag v1.0.5 github.com/spf13/pflag v1.0.5

View File

@ -19,7 +19,9 @@ package metrics
import ( import (
"strconv" "strconv"
"strings" "strings"
"sync"
"github.com/blang/semver"
admissionv1 "k8s.io/api/admission/v1" admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/schema"
@ -47,34 +49,18 @@ type Recorder interface {
type PrometheusRecorder struct { type PrometheusRecorder struct {
apiVersion api.Version apiVersion api.Version
evaluationsCounter *metrics.CounterVec evaluationsCounter *evaluationsCounter
exemptionsCounter *metrics.CounterVec exemptionsCounter *exemptionsCounter
errorsCounter *metrics.CounterVec errorsCounter *metrics.CounterVec
} }
var _ Recorder = &PrometheusRecorder{} var _ Recorder = &PrometheusRecorder{}
func NewPrometheusRecorder(version api.Version) *PrometheusRecorder { func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
evaluationsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_evaluations_total",
Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
)
exemptionsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_exemptions_total",
Help: "Number of exempt requests, not counting ignored or out of scope requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"request_operation", "resource", "subresource"},
)
errorsCounter := metrics.NewCounterVec( errorsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{ &metrics.CounterOpts{
Name: "pod_security_errors_total", Name: "pod_security_errors_total",
Help: "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.", Help: "Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.",
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
[]string{"fatal", "request_operation", "resource", "subresource"}, []string{"fatal", "request_operation", "resource", "subresource"},
@ -82,8 +68,8 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
return &PrometheusRecorder{ return &PrometheusRecorder{
apiVersion: version, apiVersion: version,
evaluationsCounter: evaluationsCounter, evaluationsCounter: newEvaluationsCounter(),
exemptionsCounter: exemptionsCounter, exemptionsCounter: newExemptionsCounter(),
errorsCounter: errorsCounter, errorsCounter: errorsCounter,
} }
} }
@ -101,13 +87,8 @@ func (r *PrometheusRecorder) Reset() {
} }
func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) { func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) {
dec := string(decision)
operation := operationLabel(attrs.GetOperation())
resource := resourceLabel(attrs.GetResource())
subresource := attrs.GetSubresource()
var version string var version string
if policy.Version.Latest() { if policy.Version.Latest() || policy.Level == api.LevelPrivileged { // Privileged is always effectively latest.
version = "latest" version = "latest"
} else { } else {
if !r.apiVersion.Older(policy.Version) { if !r.apiVersion.Older(policy.Version) {
@ -117,29 +98,44 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve
} }
} }
r.evaluationsCounter.WithLabelValues(dec, string(policy.Level), r.evaluationsCounter.CachedInc(evaluationsLabels{
version, string(evalMode), operation, resource, subresource).Inc() decision: string(decision),
level: string(policy.Level),
version: version,
mode: string(evalMode),
operation: operationLabel(attrs.GetOperation()),
resource: resourceLabel(attrs.GetResource()),
subresource: attrs.GetSubresource(),
})
} }
func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) { func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) {
operation := operationLabel(attrs.GetOperation()) r.exemptionsCounter.CachedInc(exemptionsLabels{
resource := resourceLabel(attrs.GetResource()) operation: operationLabel(attrs.GetOperation()),
subresource := attrs.GetSubresource() resource: resourceLabel(attrs.GetResource()),
r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc() subresource: attrs.GetSubresource(),
})
} }
func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) { func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) {
operation := operationLabel(attrs.GetOperation()) r.errorsCounter.WithLabelValues(
resource := resourceLabel(attrs.GetResource()) strconv.FormatBool(fatal),
subresource := attrs.GetSubresource() operationLabel(attrs.GetOperation()),
r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc() resourceLabel(attrs.GetResource()),
attrs.GetSubresource(),
).Inc()
} }
var (
podResource = corev1.Resource("pods")
namespaceResource = corev1.Resource("namespaces")
)
func resourceLabel(resource schema.GroupVersionResource) string { func resourceLabel(resource schema.GroupVersionResource) string {
switch resource.GroupResource() { switch resource.GroupResource() {
case corev1.Resource("pods"): case podResource:
return "pod" return "pod"
case corev1.Resource("namespace"): case namespaceResource:
return "namespace" return "namespace"
default: default:
// Assume any other resource is a valid input to pod-security, and therefore a controller. // Assume any other resource is a valid input to pod-security, and therefore a controller.
@ -158,3 +154,149 @@ func operationLabel(op admissionv1.Operation) string {
return strings.ToLower(string(op)) return strings.ToLower(string(op))
} }
} }
type evaluationsLabels struct {
decision string
level string
version string
mode string
operation string
resource string
subresource string
}
func (l *evaluationsLabels) labels() []string {
return []string{l.decision, l.level, l.version, l.mode, l.operation, l.resource, l.subresource}
}
type exemptionsLabels struct {
operation string
resource string
subresource string
}
func (l *exemptionsLabels) labels() []string {
return []string{l.operation, l.resource, l.subresource}
}
type evaluationsCounter struct {
*metrics.CounterVec
cache map[evaluationsLabels]metrics.CounterMetric
cacheLock sync.RWMutex
}
func newEvaluationsCounter() *evaluationsCounter {
return &evaluationsCounter{
CounterVec: metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_evaluations_total",
Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
),
cache: make(map[evaluationsLabels]metrics.CounterMetric),
}
}
func (c *evaluationsCounter) CachedInc(l evaluationsLabels) {
c.cacheLock.RLock()
defer c.cacheLock.RUnlock()
if cachedCounter, ok := c.cache[l]; ok {
cachedCounter.Inc()
} else {
c.CounterVec.WithLabelValues(l.labels()...).Inc()
}
}
func (c *evaluationsCounter) Create(version *semver.Version) bool {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
if c.CounterVec.Create(version) {
c.populateCache()
return true
} else {
return false
}
}
func (c *evaluationsCounter) Reset() {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
c.CounterVec.Reset()
c.populateCache()
}
func (c *evaluationsCounter) populateCache() {
labelsToCache := []evaluationsLabels{
{decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "create", resource: "pod", subresource: ""},
{decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "update", resource: "pod", subresource: ""},
}
for _, l := range labelsToCache {
c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...)
}
}
type exemptionsCounter struct {
*metrics.CounterVec
cache map[exemptionsLabels]metrics.CounterMetric
cacheLock sync.RWMutex
}
func newExemptionsCounter() *exemptionsCounter {
return &exemptionsCounter{
CounterVec: metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_exemptions_total",
Help: "Number of exempt requests, not counting ignored or out of scope requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"request_operation", "resource", "subresource"},
),
cache: make(map[exemptionsLabels]metrics.CounterMetric),
}
}
func (c *exemptionsCounter) CachedInc(l exemptionsLabels) {
c.cacheLock.RLock()
defer c.cacheLock.RUnlock()
if cachedCounter, ok := c.cache[l]; ok {
cachedCounter.Inc()
} else {
c.CounterVec.WithLabelValues(l.labels()...).Inc()
}
}
func (c *exemptionsCounter) Create(version *semver.Version) bool {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
if c.CounterVec.Create(version) {
c.populateCache()
return true
} else {
return false
}
}
func (c *exemptionsCounter) Reset() {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
c.CounterVec.Reset()
c.populateCache()
}
func (c *exemptionsCounter) populateCache() {
labelsToCache := []exemptionsLabels{
{operation: "create", resource: "pod", subresource: ""},
{operation: "update", resource: "pod", subresource: ""},
{operation: "create", resource: "controller", subresource: ""},
{operation: "update", resource: "controller", subresource: ""},
}
for _, l := range labelsToCache {
c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...)
}
}

View File

@ -17,20 +17,21 @@ limitations under the License.
package metrics package metrics
import ( import (
"strconv" "bytes"
"fmt"
"sort"
"strings" "strings"
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
admissionv1 "k8s.io/api/admission/v1" admissionv1 "k8s.io/api/admission/v1"
appsv1 "k8s.io/api/apps/v1" appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1" batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/testutil" "k8s.io/component-base/metrics/testutil"
"k8s.io/pod-security-admission/api" "k8s.io/pod-security-admission/api"
"github.com/stretchr/testify/assert"
) )
var ( var (
@ -71,21 +72,18 @@ func TestRecordEvaluation(t *testing.T) {
Resource: resource, Resource: resource,
Operation: op, Operation: op,
}) })
expectedLabels := map[string]string{
"decision": string(decision),
"policy_level": string(level),
"policy_version": expectedVersion,
"mode": string(mode),
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.evaluationsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) { if level == api.LevelPrivileged {
findMetric(t, registry, "pod_security_evaluations_total") expectedVersion = "latest"
} }
expected := fmt.Sprintf(`
# HELP pod_security_evaluations_total [ALPHA] Number of policy evaluations that occurred, not counting ignored or exempt requests.
# TYPE pod_security_evaluations_total counter
pod_security_evaluations_total{decision="%s",mode="%s",policy_level="%s",policy_version="%s",request_operation="%s",resource="%s",subresource=""} 1
`, decision, mode, level, expectedVersion, strings.ToLower(string(op)), expectedResource)
expected = expectCachedMetrics("pod_security_evaluations_total", expected)
assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_evaluations_total"))
recorder.Reset() recorder.Reset()
} }
@ -103,23 +101,24 @@ func TestRecordExemption(t *testing.T) {
for _, op := range operations { for _, op := range operations {
for resource, expectedResource := range resourceExpectations { for resource, expectedResource := range resourceExpectations {
recorder.RecordExemption(&api.AttributesRecord{ for _, subresource := range []string{"", "ephemeralcontainers"} {
Resource: resource, recorder.RecordExemption(&api.AttributesRecord{
Operation: op, Resource: resource,
}) Operation: op,
expectedLabels := map[string]string{ Subresource: subresource,
"request_operation": strings.ToLower(string(op)), })
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) { expected := fmt.Sprintf(`
findMetric(t, registry, "pod_security_exemptions_total") # HELP pod_security_exemptions_total [ALPHA] Number of exempt requests, not counting ignored or out of scope requests.
} # TYPE pod_security_exemptions_total counter
pod_security_exemptions_total{request_operation="%s",resource="%s",subresource="%s"} 1
`, strings.ToLower(string(op)), expectedResource, subresource)
expected = expectCachedMetrics("pod_security_exemptions_total", expected)
recorder.Reset() assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_exemptions_total"))
recorder.Reset()
}
} }
} }
} }
@ -136,18 +135,14 @@ func TestRecordError(t *testing.T) {
Resource: resource, Resource: resource,
Operation: op, Operation: op,
}) })
expectedLabels := map[string]string{
"fatal": strconv.FormatBool(fatal),
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) { expected := bytes.NewBufferString(fmt.Sprintf(`
findMetric(t, registry, "pod_security_errors_total") # HELP pod_security_errors_total [ALPHA] Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.
} # TYPE pod_security_errors_total counter
pod_security_errors_total{fatal="%t",request_operation="%s",resource="%s",subresource=""} 1
`, fatal, strings.ToLower(string(op)), expectedResource))
assert.NoError(t, testutil.GatherAndCompare(registry, expected, "pod_security_errors_total"))
recorder.Reset() recorder.Reset()
} }
@ -164,19 +159,38 @@ func levelVersion(level api.Level, version string) api.LevelVersion {
return lv return lv
} }
// findMetric dumps non-zero metric samples for the metric with the given name, to help with debugging. // The cached metrics should always be present (value 0 if not counted).
func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) { var expectedCachedMetrics = map[string][]string{
t.Helper() "pod_security_evaluations_total": {
m, _ := gatherer.Gather() `pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="create",resource="pod",subresource=""}`,
for _, mFamily := range m { `pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="update",resource="pod",subresource=""}`,
if mFamily.GetName() == metricName { },
for _, metric := range mFamily.GetMetric() { "pod_security_exemptions_total": {
if metric.GetCounter().GetValue() > 0 { `pod_security_exemptions_total{request_operation="create",resource="controller",subresource=""}`,
t.Logf("Found metric: %s", metric.String()) `pod_security_exemptions_total{request_operation="create",resource="pod",subresource=""}`,
} `pod_security_exemptions_total{request_operation="update",resource="controller",subresource=""}`,
} `pod_security_exemptions_total{request_operation="update",resource="pod",subresource=""}`,
return },
}
func expectCachedMetrics(metricName, expected string) string {
expectations := strings.Split(strings.TrimSpace(expected), "\n")
for i, expectation := range expectations {
expectations[i] = strings.TrimSpace(expectation) // Whitespace messes with sorting.
}
for _, cached := range expectedCachedMetrics[metricName] {
expectations = addZeroExpectation(expectations, cached)
}
sort.Strings(expectations[:len(expectations)-1])
return "\n" + strings.Join(expectations, "\n") + "\n"
}
// addZeroExpectation adds the mixin as an empty sample if not already present.
func addZeroExpectation(currentExpectations []string, mixin string) []string {
for _, current := range currentExpectations {
if strings.HasPrefix(current, mixin) {
return currentExpectations // Mixin value already present.
} }
} }
t.Errorf("Expected metric %s not found", metricName) return append(currentExpectations, fmt.Sprintf("%s 0", mixin))
} }