[PodSecurity] Add error & exemption metrics

This commit is contained in:
Tim Allclair 2021-10-25 23:10:12 -07:00
parent e46928c0b1
commit 21692e1683
5 changed files with 131 additions and 11 deletions

View File

@ -320,10 +320,12 @@ func (a *Admission) ValidatePod(ctx context.Context, attrs api.Attributes) *admi
}
// short-circuit on exempt namespaces and users
if a.exemptNamespace(attrs.GetNamespace()) {
a.Metrics.RecordExemption(attrs)
return sharedAllowedByNamespaceExemptionResponse()
}
if a.exemptUser(attrs.GetUserName()) {
a.Metrics.RecordExemption(attrs)
return sharedAllowedByUserExemptionResponse()
}
@ -331,32 +333,38 @@ func (a *Admission) ValidatePod(ctx context.Context, attrs api.Attributes) *admi
namespace, err := a.NamespaceGetter.GetNamespace(ctx, attrs.GetNamespace())
if err != nil {
klog.ErrorS(err, "failed to fetch pod namespace", "namespace", attrs.GetNamespace())
a.Metrics.RecordError(true, attrs)
return internalErrorResponse(fmt.Sprintf("failed to lookup namespace %s", attrs.GetNamespace()))
}
nsPolicy, nsPolicyErrs := a.PolicyToEvaluate(namespace.Labels)
if len(nsPolicyErrs) == 0 && nsPolicy.Enforce.Level == api.LevelPrivileged && nsPolicy.Warn.Level == api.LevelPrivileged && nsPolicy.Audit.Level == api.LevelPrivileged {
a.Metrics.RecordEvaluation(metrics.DecisionAllow, nsPolicy.Enforce, metrics.ModeEnforce, attrs)
return sharedAllowedResponse()
}
obj, err := attrs.GetObject()
if err != nil {
klog.ErrorS(err, "failed to decode object")
a.Metrics.RecordError(true, attrs)
return badRequestResponse("failed to decode object")
}
pod, ok := obj.(*corev1.Pod)
if !ok {
klog.InfoS("failed to assert pod type", "type", reflect.TypeOf(obj))
a.Metrics.RecordError(true, attrs)
return badRequestResponse("failed to decode pod")
}
if attrs.GetOperation() == admissionv1.Update {
oldObj, err := attrs.GetOldObject()
if err != nil {
klog.ErrorS(err, "failed to decode old object")
a.Metrics.RecordError(true, attrs)
return badRequestResponse("failed to decode old object")
}
oldPod, ok := oldObj.(*corev1.Pod)
if !ok {
klog.InfoS("failed to assert old pod type", "type", reflect.TypeOf(oldObj))
a.Metrics.RecordError(true, attrs)
return badRequestResponse("failed to decode old pod")
}
if !isSignificantPodUpdate(pod, oldPod) {
@ -376,10 +384,12 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu
}
// short-circuit on exempt namespaces and users
if a.exemptNamespace(attrs.GetNamespace()) {
a.Metrics.RecordExemption(attrs)
return sharedAllowedByNamespaceExemptionResponse()
}
if a.exemptUser(attrs.GetUserName()) {
a.Metrics.RecordExemption(attrs)
return sharedAllowedByUserExemptionResponse()
}
@ -387,6 +397,7 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu
namespace, err := a.NamespaceGetter.GetNamespace(ctx, attrs.GetNamespace())
if err != nil {
klog.ErrorS(err, "failed to fetch pod namespace", "namespace", attrs.GetNamespace())
a.Metrics.RecordError(true, attrs)
return internalErrorResponse(fmt.Sprintf("failed to lookup namespace %s", attrs.GetNamespace()))
}
nsPolicy, nsPolicyErrs := a.PolicyToEvaluate(namespace.Labels)
@ -397,11 +408,13 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu
obj, err := attrs.GetObject()
if err != nil {
klog.ErrorS(err, "failed to decode object")
a.Metrics.RecordError(true, attrs)
return badRequestResponse("failed to decode object")
}
podMetadata, podSpec, err := a.PodSpecExtractor.ExtractPodSpec(obj)
if err != nil {
klog.ErrorS(err, "failed to extract pod spec")
a.Metrics.RecordError(true, attrs)
return badRequestResponse("failed to extract pod template")
}
if podMetadata == nil && podSpec == nil {
@ -417,6 +430,7 @@ func (a *Admission) ValidatePodController(ctx context.Context, attrs api.Attribu
func (a *Admission) EvaluatePod(ctx context.Context, nsPolicy api.Policy, nsPolicyErr error, podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec, attrs api.Attributes, enforce bool) *admissionv1.AdmissionResponse {
// short-circuit on exempt runtimeclass
if a.exemptRuntimeClass(podSpec.RuntimeClassName) {
a.Metrics.RecordExemption(attrs)
return sharedAllowedByRuntimeClassExemptionResponse()
}
@ -424,6 +438,7 @@ func (a *Admission) EvaluatePod(ctx context.Context, nsPolicy api.Policy, nsPoli
if nsPolicyErr != nil {
klog.V(2).InfoS("failed to parse PodSecurity namespace labels", "err", nsPolicyErr)
auditAnnotations["error"] = fmt.Sprintf("Failed to parse policy: %v", nsPolicyErr)
a.Metrics.RecordError(false, attrs)
}
if klog.V(5).Enabled() {

View File

@ -731,7 +731,7 @@ func TestValidatePodController(t *testing.T) {
assert.Equal(t, tc.expectWarnings, result.Warnings, "unexpected Warnings")
expectedEvaluations := []EvaluationRecord{}
if len(tc.expectAuditAnnotations) > 0 {
if _, ok := tc.expectAuditAnnotations["audit-violations"]; ok {
expectedEvaluations = append(expectedEvaluations, EvaluationRecord{testName, metrics.DecisionDeny, nsLevelVersion, metrics.ModeAudit})
}
if len(tc.expectWarnings) > 0 {
@ -757,6 +757,9 @@ func (r *FakeRecorder) RecordEvaluation(decision metrics.Decision, policy api.Le
r.evaluations = append(r.evaluations, EvaluationRecord{attrs.GetName(), decision, policy, evalMode})
}
func (r *FakeRecorder) RecordExemption(api.Attributes) {}
func (r *FakeRecorder) RecordError(bool, api.Attributes) {}
// ExpectEvaluation asserts that the evaluation was recorded, and clears the record.
func (r *FakeRecorder) ExpectEvaluations(t *testing.T, expected []EvaluationRecord) {
t.Helper()

View File

@ -42,6 +42,8 @@ type Mode string
type Recorder interface {
RecordEvaluation(Decision, api.LevelVersion, Mode, api.Attributes)
RecordExemption(api.Attributes)
RecordError(fatal bool, attrs api.Attributes)
}
var defaultRecorder = NewPrometheusRecorder(api.GetAPIVersion())
@ -59,6 +61,8 @@ type PrometheusRecorder struct {
apiVersion api.Version
evaluationsCounter *metrics.CounterVec
exemptionsCounter *metrics.CounterVec
errorsCounter *metrics.CounterVec
registerOnce sync.Once
}
@ -74,21 +78,43 @@ func NewPrometheusRecorder(version api.Version) *PrometheusRecorder {
},
[]string{"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"},
)
exemptionsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_exemptions_total",
Help: "Number of exempt requests, not counting ignored or out of scope requests.",
StabilityLevel: metrics.ALPHA,
},
[]string{"request_operation", "resource", "subresource"},
)
errorsCounter := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "pod_security_errors_total",
Help: "Number of errors prevent normal evaluation. Non-fatal errors are evaluated against a default policy.",
StabilityLevel: metrics.ALPHA,
},
[]string{"fatal", "request_operation", "resource", "subresource"},
)
return &PrometheusRecorder{
apiVersion: version,
evaluationsCounter: evaluationsCounter,
exemptionsCounter: exemptionsCounter,
errorsCounter: errorsCounter,
}
}
func (r *PrometheusRecorder) MustRegister(registerFunc func(...metrics.Registerable)) {
r.registerOnce.Do(func() {
registerFunc(r.evaluationsCounter)
registerFunc(r.exemptionsCounter)
registerFunc(r.errorsCounter)
})
}
func (r *PrometheusRecorder) Reset() {
r.evaluationsCounter.Reset()
r.exemptionsCounter.Reset()
r.errorsCounter.Reset()
}
func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.LevelVersion, evalMode Mode, attrs api.Attributes) {
@ -98,18 +124,32 @@ func (r *PrometheusRecorder) RecordEvaluation(decision Decision, policy api.Leve
subresource := attrs.GetSubresource()
var version string
if policy.Version.Latest() {
version = "latest"
if policy.Version.Latest() {
version = "latest"
} else {
if !r.apiVersion.Older(policy.Version) {
version = policy.Version.String()
} else {
if !r.apiVersion.Older(policy.Version) {
version = policy.Version.String()
} else {
version = "future"
}
version = "future"
}
}
r.evaluationsCounter.WithLabelValues(dec, string(policy.Level),
version, string(evalMode), operation, resource, subresource).Inc()
r.evaluationsCounter.WithLabelValues(dec, string(policy.Level),
version, string(evalMode), operation, resource, subresource).Inc()
}
func (r *PrometheusRecorder) RecordExemption(attrs api.Attributes) {
operation := operationLabel(attrs.GetOperation())
resource := resourceLabel(attrs.GetResource())
subresource := attrs.GetSubresource()
r.exemptionsCounter.WithLabelValues(operation, resource, subresource).Inc()
}
func (r *PrometheusRecorder) RecordError(fatal bool, attrs api.Attributes) {
operation := operationLabel(attrs.GetOperation())
resource := resourceLabel(attrs.GetResource())
subresource := attrs.GetSubresource()
r.errorsCounter.WithLabelValues(strconv.FormatBool(fatal), operation, resource, subresource).Inc()
}
func resourceLabel(resource schema.GroupVersionResource) string {

View File

@ -17,6 +17,7 @@ limitations under the License.
package metrics
import (
"strconv"
"strings"
"testing"
@ -95,6 +96,65 @@ func TestRecordEvaluation(t *testing.T) {
}
}
func TestRecordExemption(t *testing.T) {
recorder := NewPrometheusRecorder(testVersion)
registry := testutil.NewFakeKubeRegistry("1.23.0")
recorder.MustRegister(registry.MustRegister)
for _, op := range operations {
for resource, expectedResource := range resourceExpectations {
recorder.RecordExemption(&api.AttributesRecord{
Resource: resource,
Operation: op,
})
expectedLabels := map[string]string{
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.exemptionsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) {
findMetric(t, registry, "pod_security_exemptions_total")
}
recorder.Reset()
}
}
}
func TestRecordError(t *testing.T) {
recorder := NewPrometheusRecorder(testVersion)
registry := testutil.NewFakeKubeRegistry("1.23.0")
recorder.MustRegister(registry.MustRegister)
for _, fatal := range []bool{true, false} {
for _, op := range operations {
for resource, expectedResource := range resourceExpectations {
recorder.RecordError(fatal, &api.AttributesRecord{
Resource: resource,
Operation: op,
})
expectedLabels := map[string]string{
"fatal": strconv.FormatBool(fatal),
"request_operation": strings.ToLower(string(op)),
"resource": expectedResource,
"subresource": "",
}
val, err := testutil.GetCounterMetricValue(recorder.errorsCounter.With(expectedLabels))
require.NoError(t, err, expectedLabels)
if !assert.EqualValues(t, 1, val, expectedLabels) {
findMetric(t, registry, "pod_security_errors_total")
}
recorder.Reset()
}
}
}
}
func levelVersion(level api.Level, version string) api.LevelVersion {
lv := api.LevelVersion{Level: level}
var err error
@ -115,6 +175,8 @@ func findMetric(t *testing.T, gatherer metrics.Gatherer, metricName string) {
t.Logf("Found metric: %s", metric.String())
}
}
return
}
}
t.Errorf("Expected metric %s not found", metricName)
}

View File

@ -340,6 +340,6 @@ func validateMetrics(t *testing.T, rawMetrics []byte) {
if err := testutil.ValidateMetrics(metrics, "pod_security_evaluations_total",
"decision", "policy_level", "policy_version", "mode", "request_operation", "resource", "subresource"); err != nil {
t.Fatalf("Metric validation failed: %v", err)
t.Errorf("Metric validation failed: %v", err)
}
}