From 771c9be291c1916c7c7f077d4ae70e2f20654f2b Mon Sep 17 00:00:00 2001
From: Jan Safranek <jsafrane@redhat.com>
Date: Thu, 9 Mar 2023 14:02:25 +0100
Subject: [PATCH 1/3] Add e2e test for SELinux metrics

 This is the commit message for patch #2 (refresh-temp):
---
 .../e2e/storage/csi_mock/csi_selinux_mount.go | 217 ++++++++++++++++++
 1 file changed, 217 insertions(+)

diff --git a/test/e2e/storage/csi_mock/csi_selinux_mount.go b/test/e2e/storage/csi_mock/csi_selinux_mount.go
index b6b74c78c69..5322d551816 100644
--- a/test/e2e/storage/csi_mock/csi_selinux_mount.go
+++ b/test/e2e/storage/csi_mock/csi_selinux_mount.go
@@ -18,16 +18,22 @@ package csi_mock
 
 import (
 	"context"
+	"fmt"
+	"sort"
 	"sync/atomic"
+	"time"
 
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/fields"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/kubernetes/pkg/kubelet/events"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2eevents "k8s.io/kubernetes/test/e2e/framework/events"
+	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
 	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
 	"k8s.io/kubernetes/test/e2e/storage/utils"
@@ -237,3 +243,214 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount", func() {
 		}
 	})
 })
+
+var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() {
+	f := framework.NewDefaultFramework("csi-mock-volumes-selinux-metrics")
+	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
+	m := newMockDriverSetup(f)
+
+	// [Serial]: the tests read global kube-controller-manager metrics, so no other test changes them in parallel.
+	ginkgo.Context("SELinuxMount metrics [LinuxOnly][Feature:SELinux][Feature:SELinuxMountReadWriteOncePod][Serial]", func() {
+
+		// All SELinux metrics. Unless explicitly mentioned in test.expectIncreases, these metrics must not grow during
+		// a test.
+		allMetrics := sets.NewString(
+			"volume_manager_selinux_container_errors_total",
+			"volume_manager_selinux_container_warnings_total",
+			"volume_manager_selinux_pod_context_mismatch_errors_total",
+			"volume_manager_selinux_pod_context_mismatch_warnings_total",
+			"volume_manager_selinux_volume_context_mismatch_errors_total",
+			"volume_manager_selinux_volume_context_mismatch_warnings_total",
+			"volume_manager_selinux_volumes_admitted_total",
+		)
+
+		// Make sure all options are set so system specific defaults are not used.
+		seLinuxOpts1 := v1.SELinuxOptions{
+			User:  "system_u",
+			Role:  "object_r",
+			Type:  "container_file_t",
+			Level: "s0:c0,c1",
+		}
+		seLinuxOpts2 := v1.SELinuxOptions{
+			User:  "system_u",
+			Role:  "object_r",
+			Type:  "container_file_t",
+			Level: "s0:c98,c99",
+		}
+
+		tests := []struct {
+			name                    string
+			csiDriverSELinuxEnabled bool
+			firstPodSELinuxOpts     *v1.SELinuxOptions
+			secondPodSELinuxOpts    *v1.SELinuxOptions
+			volumeMode              v1.PersistentVolumeAccessMode
+			waitForSecondPodStart   bool
+			secondPodFailureEvent   string
+			expectIncreases         sets.String
+		}{
+			{
+				name:                    "warning is not bumped on two Pods with the same context on RWO volume",
+				csiDriverSELinuxEnabled: true,
+				firstPodSELinuxOpts:     &seLinuxOpts1,
+				secondPodSELinuxOpts:    &seLinuxOpts1,
+				volumeMode:              v1.ReadWriteOnce,
+				waitForSecondPodStart:   true,
+				expectIncreases:         sets.NewString( /* no metric is increased, admitted_total was already increased when the first pod started */ ),
+			},
+			{
+				name:                    "warning is bumped on two Pods with a different context on RWO volume",
+				csiDriverSELinuxEnabled: true,
+				firstPodSELinuxOpts:     &seLinuxOpts1,
+				secondPodSELinuxOpts:    &seLinuxOpts2,
+				volumeMode:              v1.ReadWriteOnce,
+				waitForSecondPodStart:   true,
+				expectIncreases:         sets.NewString("volume_manager_selinux_volume_context_mismatch_warnings_total"),
+			},
+			{
+				name:                    "error is bumped on two Pods with a different context on RWOP volume",
+				csiDriverSELinuxEnabled: true,
+				firstPodSELinuxOpts:     &seLinuxOpts1,
+				secondPodSELinuxOpts:    &seLinuxOpts2,
+				secondPodFailureEvent:   "conflicting SELinux labels of volume",
+				volumeMode:              v1.ReadWriteOncePod,
+				waitForSecondPodStart:   false,
+				expectIncreases:         sets.NewString("volume_manager_selinux_volume_context_mismatch_errors_total"),
+			},
+		}
+		for _, t := range tests {
+			t := t
+			ginkgo.It(t.name, func(ctx context.Context) {
+				if framework.NodeOSDistroIs("windows") {
+					e2eskipper.Skipf("SELinuxMount is only applied on linux nodes -- skipping")
+				}
+				grabber, err := e2emetrics.NewMetricsGrabber(ctx, f.ClientSet, nil, f.ClientConfig(), true, false, false, false, false, false)
+				framework.ExpectNoError(err, "creating the metrics grabber")
+
+				var nodeStageMountOpts, nodePublishMountOpts []string
+				var unstageCalls, stageCalls, unpublishCalls, publishCalls atomic.Int32
+				m.init(ctx, testParameters{
+					disableAttach:      true,
+					registerDriver:     true,
+					enableSELinuxMount: &t.csiDriverSELinuxEnabled,
+					hooks:              createSELinuxMountPreHook(&nodeStageMountOpts, &nodePublishMountOpts, &stageCalls, &unstageCalls, &publishCalls, &unpublishCalls),
+				})
+				ginkgo.DeferCleanup(m.cleanup)
+
+				ginkgo.By("Starting the first pod")
+				accessModes := []v1.PersistentVolumeAccessMode{t.volumeMode}
+				_, claim, pod := m.createPodWithSELinux(ctx, accessModes, []string{}, t.firstPodSELinuxOpts)
+				err = e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod.Name, pod.Namespace)
+				framework.ExpectNoError(err, "starting the initial pod")
+
+				ginkgo.By("Grabbing initial metrics")
+				pod, err = m.cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
+				framework.ExpectNoError(err, "getting the initial pod")
+				metrics, err := grabMetrics(ctx, grabber, pod.Spec.NodeName, allMetrics)
+				framework.ExpectNoError(err, "collecting the initial metrics")
+				dumpMetrics(metrics)
+
+				// Act
+				ginkgo.By("Starting the second pod")
+				// Skip scheduler, it would block scheduling the second pod with ReadWriteOncePod PV.
+				nodeSelection := e2epod.NodeSelection{Name: pod.Spec.NodeName}
+				pod2, err := startPausePodWithSELinuxOptions(f.ClientSet, claim, nodeSelection, f.Namespace.Name, t.secondPodSELinuxOpts)
+				framework.ExpectNoError(err, "creating second pod with SELinux context %s", t.secondPodSELinuxOpts)
+				m.pods = append(m.pods, pod2)
+
+				if t.waitForSecondPodStart {
+					err := e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod2.Name, pod2.Namespace)
+					framework.ExpectNoError(err, "starting the second pod")
+				} else {
+					ginkgo.By("Waiting for the second pod to fail to start")
+					eventSelector := fields.Set{
+						"involvedObject.kind":      "Pod",
+						"involvedObject.name":      pod2.Name,
+						"involvedObject.namespace": pod2.Namespace,
+						"reason":                   events.FailedMountVolume,
+					}.AsSelector().String()
+					err = e2eevents.WaitTimeoutForEvent(ctx, m.cs, pod2.Namespace, eventSelector, t.secondPodFailureEvent, f.Timeouts.PodStart)
+					framework.ExpectNoError(err, "waiting for event %q in the second test pod", t.secondPodFailureEvent)
+				}
+
+				// Assert: count the metrics
+				ginkgo.By("Waiting for expected metric changes")
+				err = waitForMetricIncrease(ctx, grabber, pod.Spec.NodeName, allMetrics, t.expectIncreases, metrics, framework.PodStartShortTimeout)
+				framework.ExpectNoError(err, "waiting for metrics %s to increase", t.expectIncreases)
+			})
+		}
+	})
+})
+
+func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, metricNames sets.String) (map[string]float64, error) {
+	response, err := grabber.GrabFromKubelet(ctx, nodeName)
+	framework.ExpectNoError(err)
+
+	metrics := map[string]float64{}
+	for method, samples := range response {
+		if metricNames.Has(method) {
+			if len(samples) == 0 {
+				return nil, fmt.Errorf("metric %s has no samples", method)
+			}
+			lastSample := samples[len(samples)-1]
+			metrics[method] = float64(lastSample.Value)
+		}
+	}
+
+	// Ensure all metrics were provided
+	for name := range metricNames {
+		if _, found := metrics[name]; !found {
+			return nil, fmt.Errorf("metric %s not found", name)
+		}
+	}
+
+	return metrics, nil
+}
+
+func waitForMetricIncrease(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, allMetricNames, expectedIncreaseNames sets.String, initialValues map[string]float64, timeout time.Duration) error {
+	var noIncreaseMetrics sets.String
+	var metrics map[string]float64
+
+	err := wait.Poll(time.Second, timeout, func() (bool, error) {
+		var err error
+		metrics, err = grabMetrics(ctx, grabber, nodeName, allMetricNames)
+		if err != nil {
+			return false, err
+		}
+
+		noIncreaseMetrics = sets.NewString()
+		// Always evaluate all SELinux metrics to check that the other metrics are not unexpectedly increased.
+		for name := range allMetricNames {
+			if expectedIncreaseNames.Has(name) {
+				if metrics[name] <= initialValues[name] {
+					noIncreaseMetrics.Insert(name)
+				}
+			} else {
+				if initialValues[name] != metrics[name] {
+					return false, fmt.Errorf("metric %s unexpectedly increased to %v", name, metrics[name])
+				}
+			}
+		}
+		return noIncreaseMetrics.Len() == 0, nil
+	})
+
+	ginkgo.By("Dumping final metrics")
+	dumpMetrics(metrics)
+
+	if err == context.DeadlineExceeded {
+		return fmt.Errorf("timed out waiting for metrics %v", noIncreaseMetrics.List())
+	}
+	return err
+}
+
+func dumpMetrics(metrics map[string]float64) {
+	// Print the metrics sorted by metric name for better readability
+	keys := make([]string, 0, len(metrics))
+	for key := range metrics {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+
+	for _, key := range keys {
+		framework.Logf("Metric %s: %v", key, metrics[key])
+	}
+}

From 48ea6a3f3a74ea1fd9a356918f63a135cad87b60 Mon Sep 17 00:00:00 2001
From: Jan Safranek <jsafrane@redhat.com>
Date: Thu, 9 Mar 2023 14:15:58 +0100
Subject: [PATCH 2/3] Fix SELinux mismatch metrics

DesiredStateOfWorld must remember both
- the effective SELinux label to apply as a mount option (non-empty for
  RWOP volumes, empty otherwise)
- and the label that _would_ be used if the mount option would be used by
  all access modes.

Mismatch warning metrics must be generated from the second label.
---
 .../cache/desired_state_of_world.go           | 47 ++++++++++++-------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
index 0d8faf32351..dae89ab401f 100644
--- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
+++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
@@ -206,12 +206,23 @@ type volumeToMount struct {
 	// Usually this value reflects size recorded in pv.Spec.Capacity
 	persistentVolumeSize *resource.Quantity
 
-	// seLinuxFileLabel is desired SELinux label on files on the volume. If empty, then
+	// effectiveSELinuxMountFileLabel is the SELinux label that will be applied to the volume using mount options.
+	// If empty, then:
+	// - either the context+label is unknown (assigned randomly by the container runtime)
+	// - or the volume plugin responsible for this volume does not support mounting with -o context
+	// - or the volume is not ReadWriteOncePod
+	// - or the OS does not support SELinux
+	// In all cases, the SELinux context does not matter when mounting the volume.
+	effectiveSELinuxMountFileLabel string
+
+	// originalSELinuxLabel is the SELinux label that would be used if SELinux mount was supported for all access modes.
+	// For RWOP volumes it's the same as effectiveSELinuxMountFileLabel.
+	// It is used only to report potential SELinux mismatch metrics.
+	// If empty, then:
 	// - either the context+label is unknown (assigned randomly by the container runtime)
 	// - or the volume plugin responsible for this volume does not support mounting with -o context
 	// - or the OS does not support SELinux
-	// In all cases, the SELinux context does not matter when mounting the volume.
-	seLinuxFileLabel string
+	originalSELinuxLabel string
 }
 
 // The pod object represents a pod that references the underlying volume and
@@ -307,23 +318,25 @@ func (dsw *desiredStateOfWorld) AddPodToVolume(
 				}
 			}
 		}
+		effectiveSELinuxMountLabel := seLinuxFileLabel
 		if !util.VolumeSupportsSELinuxMount(volumeSpec) {
 			// Clear SELinux label for the volume with unsupported access modes.
 			klog.V(4).InfoS("volume does not support SELinux context mount, clearing the expected label", "volume", volumeSpec.Name())
-			seLinuxFileLabel = ""
+			effectiveSELinuxMountLabel = ""
 		}
 		if seLinuxFileLabel != "" {
 			seLinuxVolumesAdmitted.Add(1.0)
 		}
 		vmt := volumeToMount{
-			volumeName:              volumeName,
-			podsToMount:             make(map[types.UniquePodName]podToMount),
-			pluginIsAttachable:      attachable,
-			pluginIsDeviceMountable: deviceMountable,
-			volumeGidValue:          volumeGidValue,
-			reportedInUse:           false,
-			desiredSizeLimit:        sizeLimit,
-			seLinuxFileLabel:        seLinuxFileLabel,
+			volumeName:                     volumeName,
+			podsToMount:                    make(map[types.UniquePodName]podToMount),
+			pluginIsAttachable:             attachable,
+			pluginIsDeviceMountable:        deviceMountable,
+			volumeGidValue:                 volumeGidValue,
+			reportedInUse:                  false,
+			desiredSizeLimit:               sizeLimit,
+			effectiveSELinuxMountFileLabel: effectiveSELinuxMountLabel,
+			originalSELinuxLabel:           seLinuxFileLabel,
 		}
 		// record desired size of the volume
 		if volumeSpec.PersistentVolume != nil {
@@ -337,9 +350,9 @@ func (dsw *desiredStateOfWorld) AddPodToVolume(
 	} else {
 		// volume exists
 		if pluginSupportsSELinuxContextMount {
-			if seLinuxFileLabel != vol.seLinuxFileLabel {
+			if seLinuxFileLabel != vol.originalSELinuxLabel {
 				// TODO: update the error message after tests, e.g. add at least the conflicting pod names.
-				fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), vol.seLinuxFileLabel, seLinuxFileLabel)
+				fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), vol.originalSELinuxLabel, seLinuxFileLabel)
 				supported := util.VolumeSupportsSELinuxMount(volumeSpec)
 				if err := handleSELinuxMetricError(fullErr, supported, seLinuxVolumeContextMismatchWarnings, seLinuxVolumeContextMismatchErrors); err != nil {
 					return "", err
@@ -499,7 +512,7 @@ func (dsw *desiredStateOfWorld) VolumeExists(
 		// and mounted with new SELinux mount options for pod B.
 		// Without SELinux, kubelet can (and often does) reuse device mounted
 		// for A.
-		return vol.seLinuxFileLabel == seLinuxMountContext
+		return vol.effectiveSELinuxMountFileLabel == seLinuxMountContext
 	}
 	return true
 }
@@ -515,7 +528,7 @@ func (dsw *desiredStateOfWorld) PodExistsInVolume(
 	}
 
 	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
-		if volumeObj.seLinuxFileLabel != seLinuxMountOption {
+		if volumeObj.effectiveSELinuxMountFileLabel != seLinuxMountOption {
 			// The volume is in DSW, but with a different SELinux mount option.
 			// Report it as unused, so the volume is unmounted and mounted back
 			// with the right SELinux option.
@@ -573,7 +586,7 @@ func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount {
 					ReportedInUse:           volumeObj.reportedInUse,
 					MountRequestTime:        podObj.mountRequestTime,
 					DesiredSizeLimit:        volumeObj.desiredSizeLimit,
-					SELinuxLabel:            volumeObj.seLinuxFileLabel,
+					SELinuxLabel:            volumeObj.effectiveSELinuxMountFileLabel,
 				},
 			}
 			if volumeObj.persistentVolumeSize != nil {

From 05cd2ba8631d612a983e0a1642308aaa2c5ba64c Mon Sep 17 00:00:00 2001
From: Jan Safranek <jsafrane@redhat.com>
Date: Thu, 9 Mar 2023 15:45:21 +0100
Subject: [PATCH 3/3] Don't bump nr. of admitted volumes on retry

AddPodToVolume is called periodically, it does not make sense to bump
volume_manager_selinux_volumes_admitted_total on each call.
---
 pkg/kubelet/volumemanager/cache/desired_state_of_world.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
index dae89ab401f..fc77c675572 100644
--- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
+++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
@@ -357,10 +357,6 @@ func (dsw *desiredStateOfWorld) AddPodToVolume(
 				if err := handleSELinuxMetricError(fullErr, supported, seLinuxVolumeContextMismatchWarnings, seLinuxVolumeContextMismatchErrors); err != nil {
 					return "", err
 				}
-			} else {
-				if seLinuxFileLabel != "" {
-					seLinuxVolumesAdmitted.Add(1.0)
-				}
 			}
 		}
 	}