Cache fast-path metrics & update unit tests

This commit is contained in:
Tim Allclair 2021-10-26 18:19:10 -07:00
parent 6c273020d3
commit afad341759
4 changed files with 258 additions and 96 deletions

View File

@ -21,6 +21,7 @@ import (
"regexp"
"strconv"
"strings"
"unicode"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/component-base/version"
@ -76,7 +77,11 @@ func GetAPIVersion() Version {
if err != nil {
return v
}
minor, err := strconv.Atoi(apiVersion.Minor)
// split the "normal" + and - for semver stuff to get the leading minor number
minorString := strings.FieldsFunc(apiVersion.Minor, func(r rune) bool {
return !unicode.IsDigit(r)
})[0]
minor, err := strconv.Atoi(minorString)
if err != nil {
return v
}

View File

@ -5,6 +5,7 @@ module k8s.io/pod-security-admission
go 1.16
require (
github.com/blang/semver v3.5.1+incompatible
github.com/google/go-cmp v0.5.5
github.com/spf13/cobra v1.2.1
github.com/spf13/pflag v1.0.5

View File

@ -19,7 +19,9 @@ package metrics
import (
"strconv"
"strings"
"sync"
"github.com/blang/semver"
admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
@ -47,34 +49,18 @@ type Recorder interface {
type PrometheusRecorder struct {
apiVersion api.Version
evaluationsCounter *metrics.CounterVec
exemptionsCounter *metrics.CounterVec
evaluationsCounter *evaluationsCounter
exemptionsCounter *exemptionsCounter
errorsCounter *metrics.CounterVec
}
var _ Recorder = &PrometheusRecorder{}
func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
evaluationsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_evaluations_total",
Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
)
exemptionsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_exemptions_total",
Help: "Number of exempt requests, not counting ignored or out of scope requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"request_operation", "resource", "subresource"},
)
errorsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_errors_total",
Help: "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.",
Help: "Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.",
StabilityLevel: metrics.ALPHA,
},
[]string{"fatal", "request_operation", "resource", "subresource"},
@ -82,8 +68,8 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
return &PrometheusRecorder{
apiVersion: version,
evaluationsCounter: evaluationsCounter,
exemptionsCounter: exemptionsCounter,
evaluationsCounter: newEvaluationsCounter(),
exemptionsCounter: newExemptionsCounter(),
errorsCounter: errorsCounter,
}
}
@ -101,13 +87,8 @@ func (r *PrometheusRecorder) Reset() {
}
func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) {
dec := string(decision)
operation := operationLabel(attrs.GetOperation())
resource := resourceLabel(attrs.GetResource())
subresource := attrs.GetSubresource()
var version string
if policy.Version.Latest() {
if policy.Version.Latest() || policy.Level == api.LevelPrivileged { // Privileged is always effectively latest.
version = "latest"
} else {
if !r.apiVersion.Older(policy.Version) {
@ -117,29 +98,44 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve
}
}
r.evaluationsCounter.WithLabelValues(dec, string(policy.Level),
version, string(evalMode), operation, resource, subresource).Inc()
r.evaluationsCounter.CachedInc(evaluationsLabels{
decision: string(decision),
level: string(policy.Level),
version: version,
mode: string(evalMode),
operation: operationLabel(attrs.GetOperation()),
resource: resourceLabel(attrs.GetResource()),
subresource: attrs.GetSubresource(),
})
}
func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) {
operation := operationLabel(attrs.GetOperation())
resource := resourceLabel(attrs.GetResource())
subresource := attrs.GetSubresource()
r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc()
r.exemptionsCounter.CachedInc(exemptionsLabels{
operation: operationLabel(attrs.GetOperation()),
resource: resourceLabel(attrs.GetResource()),
subresource: attrs.GetSubresource(),
})
}
func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) {
operation := operationLabel(attrs.GetOperation())
resource := resourceLabel(attrs.GetResource())
subresource := attrs.GetSubresource()
r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc()
r.errorsCounter.WithLabelValues(
strconv.FormatBool(fatal),
operationLabel(attrs.GetOperation()),
resourceLabel(attrs.GetResource()),
attrs.GetSubresource(),
).Inc()
}
var (
podResource = corev1.Resource("pods")
namespaceResource = corev1.Resource("namespaces")
)
func resourceLabel(resource schema.GroupVersionResource) string {
switch resource.GroupResource() {
case corev1.Resource("pods"):
case podResource:
return "pod"
case corev1.Resource("namespace"):
case namespaceResource:
return "namespace"
default:
// Assume any other resource is a valid input to pod-security, and therefore a controller.
@ -158,3 +154,149 @@ func operationLabel(op admissionv1.Operation) string {
return strings.ToLower(string(op))
}
}
type evaluationsLabels struct {
decision string
level string
version string
mode string
operation string
resource string
subresource string
}
func (l *evaluationsLabels) labels() []string {
return []string{l.decision, l.level, l.version, l.mode, l.operation, l.resource, l.subresource}
}
type exemptionsLabels struct {
operation string
resource string
subresource string
}
func (l *exemptionsLabels) labels() []string {
return []string{l.operation, l.resource, l.subresource}
}
type evaluationsCounter struct {
*metrics.CounterVec
cache map[evaluationsLabels]metrics.CounterMetric
cacheLock sync.RWMutex
}
func newEvaluationsCounter() *evaluationsCounter {
return &evaluationsCounter{
CounterVec: metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_evaluations_total",
Help: "Number of policy evaluations that occurred, not counting ignored or exempt requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
),
cache: make(map[evaluationsLabels]metrics.CounterMetric),
}
}
func (c *evaluationsCounter) CachedInc(l evaluationsLabels) {
c.cacheLock.RLock()
defer c.cacheLock.RUnlock()
if cachedCounter, ok := c.cache[l]; ok {
cachedCounter.Inc()
} else {
c.CounterVec.WithLabelValues(l.labels()...).Inc()
}
}
func (c *evaluationsCounter) Create(version *semver.Version) bool {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
if c.CounterVec.Create(version) {
c.populateCache()
return true
} else {
return false
}
}
func (c *evaluationsCounter) Reset() {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
c.CounterVec.Reset()
c.populateCache()
}
func (c *evaluationsCounter) populateCache() {
labelsToCache := []evaluationsLabels{
{decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "create", resource: "pod", subresource: ""},
{decision: "allow", level: "privileged", version: "latest", mode: "enforce", operation: "update", resource: "pod", subresource: ""},
}
for _, l := range labelsToCache {
c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...)
}
}
type exemptionsCounter struct {
*metrics.CounterVec
cache map[exemptionsLabels]metrics.CounterMetric
cacheLock sync.RWMutex
}
func newExemptionsCounter() *exemptionsCounter {
return &exemptionsCounter{
CounterVec: metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_exemptions_total",
Help: "Number of exempt requests, not counting ignored or out of scope requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"request_operation", "resource", "subresource"},
),
cache: make(map[exemptionsLabels]metrics.CounterMetric),
}
}
func (c *exemptionsCounter) CachedInc(l exemptionsLabels) {
c.cacheLock.RLock()
defer c.cacheLock.RUnlock()
if cachedCounter, ok := c.cache[l]; ok {
cachedCounter.Inc()
} else {
c.CounterVec.WithLabelValues(l.labels()...).Inc()
}
}
func (c *exemptionsCounter) Create(version *semver.Version) bool {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
if c.CounterVec.Create(version) {
c.populateCache()
return true
} else {
return false
}
}
func (c *exemptionsCounter) Reset() {
c.cacheLock.Lock()
defer c.cacheLock.Unlock()
c.CounterVec.Reset()
c.populateCache()
}
func (c *exemptionsCounter) populateCache() {
labelsToCache := []exemptionsLabels{
{operation: "create", resource: "pod", subresource: ""},
{operation: "update", resource: "pod", subresource: ""},
{operation: "create", resource: "controller", subresource: ""},
{operation: "update", resource: "controller", subresource: ""},
}
for _, l := range labelsToCache {
c.cache[l] = c.CounterVec.WithLabelValues(l.labels()...)
}
}

View File

@ -17,20 +17,21 @@ limitations under the License.
package metrics
import (
"strconv"
"bytes"
"fmt"
"sort"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
admissionv1 "k8s.io/api/admission/v1"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/testutil"
"k8s.io/pod-security-admission/api"
"github.com/stretchr/testify/assert"
)
var (
@ -71,21 +72,18 @@ func TestRecordEvaluation(t *testing.T) {
Resource: resource,
Operation: op,
})
expectedLabels := map[string]string{
"decision": string(decision),
"policy_level": string(level),
"policy_version": expectedVersion,
"mode": string(mode),
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.evaluationsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) {
findMetric(t, registry, "pod_security_evaluations_total")
if level == api.LevelPrivileged {
expectedVersion = "latest"
}
expected := fmt.Sprintf(`
# HELP pod_security_evaluations_total [ALPHA] Number of policy evaluations that occurred, not counting ignored or exempt requests.
# TYPE pod_security_evaluations_total counter
pod_security_evaluations_total{decision="%s",mode="%s",policy_level="%s",policy_version="%s",request_operation="%s",resource="%s",subresource=""} 1
`, decision, mode, level, expectedVersion, strings.ToLower(string(op)), expectedResource)
expected = expectCachedMetrics("pod_security_evaluations_total", expected)
assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_evaluations_total"))
recorder.Reset()
}
@ -103,23 +101,24 @@ func TestRecordExemption(t *testing.T) {
for _, op := range operations {
for resource, expectedResource := range resourceExpectations {
recorder.RecordExemption(&api.AttributesRecord{
Resource: resource,
Operation: op,
})
expectedLabels := map[string]string{
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
for _, subresource := range []string{"", "ephemeralcontainers"} {
recorder.RecordExemption(&api.AttributesRecord{
Resource: resource,
Operation: op,
Subresource: subresource,
})
if !assert.EqualValues(t, 1, val, expectedLabels) {
findMetric(t, registry, "pod_security_exemptions_total")
}
expected := fmt.Sprintf(`
# HELP pod_security_exemptions_total [ALPHA] Number of exempt requests, not counting ignored or out of scope requests.
# TYPE pod_security_exemptions_total counter
pod_security_exemptions_total{request_operation="%s",resource="%s",subresource="%s"} 1
`, strings.ToLower(string(op)), expectedResource, subresource)
expected = expectCachedMetrics("pod_security_exemptions_total", expected)
recorder.Reset()
assert.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(expected), "pod_security_exemptions_total"))
recorder.Reset()
}
}
}
}
@ -136,18 +135,14 @@ func TestRecordError(t *testing.T) {
Resource: resource,
Operation: op,
})
expectedLabels := map[string]string{
"fatal": strconv.FormatBool(fatal),
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) {
findMetric(t, registry, "pod_security_errors_total")
}
expected := bytes.NewBufferString(fmt.Sprintf(`
# HELP pod_security_errors_total [ALPHA] Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation.
# TYPE pod_security_errors_total counter
pod_security_errors_total{fatal="%t",request_operation="%s",resource="%s",subresource=""} 1
`, fatal, strings.ToLower(string(op)), expectedResource))
assert.NoError(t, testutil.GatherAndCompare(registry, expected, "pod_security_errors_total"))
recorder.Reset()
}
@ -164,19 +159,38 @@ func levelVersion(level api.Level, version string) api.LevelVersion {
return lv
}
// findMetric dumps non-zero metric samples for the metric with the given name, to help with debugging.
func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) {
t.Helper()
m, _ := gatherer.Gather()
for _, mFamily := range m {
if mFamily.GetName() == metricName {
for _, metric := range mFamily.GetMetric() {
if metric.GetCounter().GetValue() > 0 {
t.Logf("Found metric: %s", metric.String())
}
}
return
// The cached metrics should always be present (value 0 if not counted).
var expectedCachedMetrics = map[string][]string{
"pod_security_evaluations_total": {
`pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="create",resource="pod",subresource=""}`,
`pod_security_evaluations_total{decision="allow",mode="enforce",policy_level="privileged",policy_version="latest",request_operation="update",resource="pod",subresource=""}`,
},
"pod_security_exemptions_total": {
`pod_security_exemptions_total{request_operation="create",resource="controller",subresource=""}`,
`pod_security_exemptions_total{request_operation="create",resource="pod",subresource=""}`,
`pod_security_exemptions_total{request_operation="update",resource="controller",subresource=""}`,
`pod_security_exemptions_total{request_operation="update",resource="pod",subresource=""}`,
},
}
func expectCachedMetrics(metricName, expected string) string {
expectations := strings.Split(strings.TrimSpace(expected), "\n")
for i, expectation := range expectations {
expectations[i] = strings.TrimSpace(expectation) // Whitespace messes with sorting.
}
for _, cached := range expectedCachedMetrics[metricName] {
expectations = addZeroExpectation(expectations, cached)
}
sort.Strings(expectations[:len(expectations)-1])
return "\n" + strings.Join(expectations, "\n") + "\n"
}
// addZeroExpectation adds the mixin as an empty sample if not already present.
func addZeroExpectation(currentExpectations []string, mixin string) []string {
for _, current := range currentExpectations {
if strings.HasPrefix(current, mixin) {
return currentExpectations // Mixin value already present.
}
}
t.Errorf("Expected metric %s not found", metricName)
return append(currentExpectations, fmt.Sprintf("%s 0", mixin))
}