Merge pull request #116424 from jsafrane/add-selinux-metric-test

Add e2e tests for SELinux metrics
2025-07-26 21:17:23 +00:00 · 2023-03-10 12:41:06 -08:00 · 2023-03-10 12:41:06 -08:00 · 1f2d49972c
commit 1f2d49972c
parent 0010333bdd 05cd2ba863
2 changed files with 247 additions and 21 deletions
--- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
+++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go
@ -207,12 +207,23 @@ type volumeToMount struct {
 	// Usually this value reflects size recorded in pv.Spec.Capacity
 	persistentVolumeSize *resource.Quantity
-	// seLinuxFileLabel is desired SELinux label on files on the volume. If empty, then
+	// effectiveSELinuxMountFileLabel is the SELinux label that will be applied to the volume using mount options.
 	// If empty, then:
 	// - either the context+label is unknown (assigned randomly by the container runtime)
 	// - or the volume plugin responsible for this volume does not support mounting with -o context
 	// - or the volume is not ReadWriteOncePod
 	// - or the OS does not support SELinux
 	// In all cases, the SELinux context does not matter when mounting the volume.
 	effectiveSELinuxMountFileLabel string
 	// originalSELinuxLabel is the SELinux label that would be used if SELinux mount was supported for all access modes.
 	// For RWOP volumes it's the same as effectiveSELinuxMountFileLabel.
 	// It is used only to report potential SELinux mismatch metrics.
 	// If empty, then:
 	// - either the context+label is unknown (assigned randomly by the container runtime)
 	// - or the volume plugin responsible for this volume does not support mounting with -o context
 	// - or the OS does not support SELinux
-	// In all cases, the SELinux context does not matter when mounting the volume.
+	originalSELinuxLabel string
 	seLinuxFileLabel string
 }
 // The pod object represents a pod that references the underlying volume and
@ -308,10 +319,11 @@ func (dsw *desiredStateOfWorld) AddPodToVolume(
 				}
 			}
 		}
 		effectiveSELinuxMountLabel := seLinuxFileLabel
 		if !util.VolumeSupportsSELinuxMount(volumeSpec) {
 			// Clear SELinux label for the volume with unsupported access modes.
 			klog.V(4).InfoS("volume does not support SELinux context mount, clearing the expected label", "volume", volumeSpec.Name())
-			seLinuxFileLabel = ""
+			effectiveSELinuxMountLabel = ""
 		}
 		if seLinuxFileLabel != "" {
 			seLinuxVolumesAdmitted.Add(1.0)
@ -324,7 +336,8 @@ func (dsw *desiredStateOfWorld) AddPodToVolume(
 			volumeGidValue:                 volumeGidValue,
 			reportedInUse:                  false,
 			desiredSizeLimit:               sizeLimit,
-			seLinuxFileLabel:        seLinuxFileLabel,
+			effectiveSELinuxMountFileLabel: effectiveSELinuxMountLabel,
 			originalSELinuxLabel:           seLinuxFileLabel,
 		}
 		// record desired size of the volume
 		if volumeSpec.PersistentVolume != nil {
@ -338,17 +351,13 @@ func (dsw *desiredStateOfWorld) AddPodToVolume(
 	} else {
 		// volume exists
 		if pluginSupportsSELinuxContextMount {
-			if seLinuxFileLabel != vol.seLinuxFileLabel {
+			if seLinuxFileLabel != vol.originalSELinuxLabel {
 				// TODO: update the error message after tests, e.g. add at least the conflicting pod names.
-				fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), vol.seLinuxFileLabel, seLinuxFileLabel)
+				fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), vol.originalSELinuxLabel, seLinuxFileLabel)
 				supported := util.VolumeSupportsSELinuxMount(volumeSpec)
 				if err := handleSELinuxMetricError(fullErr, supported, seLinuxVolumeContextMismatchWarnings, seLinuxVolumeContextMismatchErrors); err != nil {
 					return "", err
 				}
 			} else {
 				if seLinuxFileLabel != "" {
 					seLinuxVolumesAdmitted.Add(1.0)
 				}
 			}
 		}
 	}
@ -500,7 +509,7 @@ func (dsw *desiredStateOfWorld) VolumeExists(
 		// and mounted with new SELinux mount options for pod B.
 		// Without SELinux, kubelet can (and often does) reuse device mounted
 		// for A.
-		return vol.seLinuxFileLabel == seLinuxMountContext
+		return vol.effectiveSELinuxMountFileLabel == seLinuxMountContext
 	}
 	return true
 }
@ -516,7 +525,7 @@ func (dsw *desiredStateOfWorld) PodExistsInVolume(
 	}
 	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
-		if volumeObj.seLinuxFileLabel != seLinuxMountOption {
+		if volumeObj.effectiveSELinuxMountFileLabel != seLinuxMountOption {
 			// The volume is in DSW, but with a different SELinux mount option.
 			// Report it as unused, so the volume is unmounted and mounted back
 			// with the right SELinux option.
@ -574,7 +583,7 @@ func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount {
 					ReportedInUse:           volumeObj.reportedInUse,
 					MountRequestTime:        podObj.mountRequestTime,
 					DesiredSizeLimit:        volumeObj.desiredSizeLimit,
-					SELinuxLabel:            volumeObj.seLinuxFileLabel,
+					SELinuxLabel:            volumeObj.effectiveSELinuxMountFileLabel,
 				},
 			}
 			if volumeObj.persistentVolumeSize != nil {
--- a/test/e2e/storage/csi_mock/csi_selinux_mount.go
+++ b/test/e2e/storage/csi_mock/csi_selinux_mount.go
@ -18,16 +18,22 @@ package csi_mock
 import (
 	"context"
 	"fmt"
 	"sort"
 	"sync/atomic"
 	"time"
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/fields"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/kubernetes/pkg/kubelet/events"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2eevents "k8s.io/kubernetes/test/e2e/framework/events"
 	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
 	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
 	"k8s.io/kubernetes/test/e2e/storage/utils"
@ -237,3 +243,214 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount", func() {
 		}
 	})
 })
 var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() {
 	f := framework.NewDefaultFramework("csi-mock-volumes-selinux-metrics")
 	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
 	m := newMockDriverSetup(f)
 	// [Serial]: the tests read global kube-controller-manager metrics, so no other test changes them in parallel.
 	ginkgo.Context("SELinuxMount metrics [LinuxOnly][Feature:SELinux][Feature:SELinuxMountReadWriteOncePod][Serial]", func() {
 		// All SELinux metrics. Unless explicitly mentioned in test.expectIncreases, these metrics must not grow during
 		// a test.
 		allMetrics := sets.NewString(
 			"volume_manager_selinux_container_errors_total",
 			"volume_manager_selinux_container_warnings_total",
 			"volume_manager_selinux_pod_context_mismatch_errors_total",
 			"volume_manager_selinux_pod_context_mismatch_warnings_total",
 			"volume_manager_selinux_volume_context_mismatch_errors_total",
 			"volume_manager_selinux_volume_context_mismatch_warnings_total",
 			"volume_manager_selinux_volumes_admitted_total",
 		)
 		// Make sure all options are set so system specific defaults are not used.
 		seLinuxOpts1 := v1.SELinuxOptions{
 			User:  "system_u",
 			Role:  "object_r",
 			Type:  "container_file_t",
 			Level: "s0:c0,c1",
 		}
 		seLinuxOpts2 := v1.SELinuxOptions{
 			User:  "system_u",
 			Role:  "object_r",
 			Type:  "container_file_t",
 			Level: "s0:c98,c99",
 		}
 		tests := []struct {
 			name                    string
 			csiDriverSELinuxEnabled bool
 			firstPodSELinuxOpts     *v1.SELinuxOptions
 			secondPodSELinuxOpts    *v1.SELinuxOptions
 			volumeMode              v1.PersistentVolumeAccessMode
 			waitForSecondPodStart   bool
 			secondPodFailureEvent   string
 			expectIncreases         sets.String
 		}{
 			{
 				name:                    "warning is not bumped on two Pods with the same context on RWO volume",
 				csiDriverSELinuxEnabled: true,
 				firstPodSELinuxOpts:     &seLinuxOpts1,
 				secondPodSELinuxOpts:    &seLinuxOpts1,
 				volumeMode:              v1.ReadWriteOnce,
 				waitForSecondPodStart:   true,
 				expectIncreases:         sets.NewString( /* no metric is increased, admitted_total was already increased when the first pod started */ ),
 			},
 			{
 				name:                    "warning is bumped on two Pods with a different context on RWO volume",
 				csiDriverSELinuxEnabled: true,
 				firstPodSELinuxOpts:     &seLinuxOpts1,
 				secondPodSELinuxOpts:    &seLinuxOpts2,
 				volumeMode:              v1.ReadWriteOnce,
 				waitForSecondPodStart:   true,
 				expectIncreases:         sets.NewString("volume_manager_selinux_volume_context_mismatch_warnings_total"),
 			},
 			{
 				name:                    "error is bumped on two Pods with a different context on RWOP volume",
 				csiDriverSELinuxEnabled: true,
 				firstPodSELinuxOpts:     &seLinuxOpts1,
 				secondPodSELinuxOpts:    &seLinuxOpts2,
 				secondPodFailureEvent:   "conflicting SELinux labels of volume",
 				volumeMode:              v1.ReadWriteOncePod,
 				waitForSecondPodStart:   false,
 				expectIncreases:         sets.NewString("volume_manager_selinux_volume_context_mismatch_errors_total"),
 			},
 		}
 		for _, t := range tests {
 			t := t
 			ginkgo.It(t.name, func(ctx context.Context) {
 				if framework.NodeOSDistroIs("windows") {
 					e2eskipper.Skipf("SELinuxMount is only applied on linux nodes -- skipping")
 				}
 				grabber, err := e2emetrics.NewMetricsGrabber(ctx, f.ClientSet, nil, f.ClientConfig(), true, false, false, false, false, false)
 				framework.ExpectNoError(err, "creating the metrics grabber")
 				var nodeStageMountOpts, nodePublishMountOpts []string
 				var unstageCalls, stageCalls, unpublishCalls, publishCalls atomic.Int32
 				m.init(ctx, testParameters{
 					disableAttach:      true,
 					registerDriver:     true,
 					enableSELinuxMount: &t.csiDriverSELinuxEnabled,
 					hooks:              createSELinuxMountPreHook(&nodeStageMountOpts, &nodePublishMountOpts, &stageCalls, &unstageCalls, &publishCalls, &unpublishCalls),
 				})
 				ginkgo.DeferCleanup(m.cleanup)
 				ginkgo.By("Starting the first pod")
 				accessModes := []v1.PersistentVolumeAccessMode{t.volumeMode}
 				_, claim, pod := m.createPodWithSELinux(ctx, accessModes, []string{}, t.firstPodSELinuxOpts)
 				err = e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod.Name, pod.Namespace)
 				framework.ExpectNoError(err, "starting the initial pod")
 				ginkgo.By("Grabbing initial metrics")
 				pod, err = m.cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
 				framework.ExpectNoError(err, "getting the initial pod")
 				metrics, err := grabMetrics(ctx, grabber, pod.Spec.NodeName, allMetrics)
 				framework.ExpectNoError(err, "collecting the initial metrics")
 				dumpMetrics(metrics)
 				// Act
 				ginkgo.By("Starting the second pod")
 				// Skip scheduler, it would block scheduling the second pod with ReadWriteOncePod PV.
 				nodeSelection := e2epod.NodeSelection{Name: pod.Spec.NodeName}
 				pod2, err := startPausePodWithSELinuxOptions(f.ClientSet, claim, nodeSelection, f.Namespace.Name, t.secondPodSELinuxOpts)
 				framework.ExpectNoError(err, "creating second pod with SELinux context %s", t.secondPodSELinuxOpts)
 				m.pods = append(m.pods, pod2)
 				if t.waitForSecondPodStart {
 					err := e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod2.Name, pod2.Namespace)
 					framework.ExpectNoError(err, "starting the second pod")
 				} else {
 					ginkgo.By("Waiting for the second pod to fail to start")
 					eventSelector := fields.Set{
 						"involvedObject.kind":      "Pod",
 						"involvedObject.name":      pod2.Name,
 						"involvedObject.namespace": pod2.Namespace,
 						"reason":                   events.FailedMountVolume,
 					}.AsSelector().String()
 					err = e2eevents.WaitTimeoutForEvent(ctx, m.cs, pod2.Namespace, eventSelector, t.secondPodFailureEvent, f.Timeouts.PodStart)
 					framework.ExpectNoError(err, "waiting for event %q in the second test pod", t.secondPodFailureEvent)
 				}
 				// Assert: count the metrics
 				ginkgo.By("Waiting for expected metric changes")
 				err = waitForMetricIncrease(ctx, grabber, pod.Spec.NodeName, allMetrics, t.expectIncreases, metrics, framework.PodStartShortTimeout)
 				framework.ExpectNoError(err, "waiting for metrics %s to increase", t.expectIncreases)
 			})
 		}
 	})
 })
 func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, metricNames sets.String) (map[string]float64, error) {
 	response, err := grabber.GrabFromKubelet(ctx, nodeName)
 	framework.ExpectNoError(err)
 	metrics := map[string]float64{}
 	for method, samples := range response {
 		if metricNames.Has(method) {
 			if len(samples) == 0 {
 				return nil, fmt.Errorf("metric %s has no samples", method)
 			}
 			lastSample := samples[len(samples)-1]
 			metrics[method] = float64(lastSample.Value)
 		}
 	}
 	// Ensure all metrics were provided
 	for name := range metricNames {
 		if _, found := metrics[name]; !found {
 			return nil, fmt.Errorf("metric %s not found", name)
 		}
 	}
 	return metrics, nil
 }
 func waitForMetricIncrease(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, allMetricNames, expectedIncreaseNames sets.String, initialValues map[string]float64, timeout time.Duration) error {
 	var noIncreaseMetrics sets.String
 	var metrics map[string]float64
 	err := wait.Poll(time.Second, timeout, func() (bool, error) {
 		var err error
 		metrics, err = grabMetrics(ctx, grabber, nodeName, allMetricNames)
 		if err != nil {
 			return false, err
 		}
 		noIncreaseMetrics = sets.NewString()
 		// Always evaluate all SELinux metrics to check that the other metrics are not unexpectedly increased.
 		for name := range allMetricNames {
 			if expectedIncreaseNames.Has(name) {
 				if metrics[name] <= initialValues[name] {
 					noIncreaseMetrics.Insert(name)
 				}
 			} else {
 				if initialValues[name] != metrics[name] {
 					return false, fmt.Errorf("metric %s unexpectedly increased to %v", name, metrics[name])
 				}
 			}
 		}
 		return noIncreaseMetrics.Len() == 0, nil
 	})
 	ginkgo.By("Dumping final metrics")
 	dumpMetrics(metrics)
 	if err == context.DeadlineExceeded {
 		return fmt.Errorf("timed out waiting for metrics %v", noIncreaseMetrics.List())
 	}
 	return err
 }
 func dumpMetrics(metrics map[string]float64) {
 	// Print the metrics sorted by metric name for better readability
 	keys := make([]string, 0, len(metrics))
 	for key := range metrics {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	for _, key := range keys {
 		framework.Logf("Metric %s: %v", key, metrics[key])
 	}
 }