From 771c9be291c1916c7c7f077d4ae70e2f20654f2b Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 9 Mar 2023 14:02:25 +0100 Subject: [PATCH 1/3] Add e2e test for SELinux metrics This is the commit message for patch #2 (refresh-temp): --- .../e2e/storage/csi_mock/csi_selinux_mount.go | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/test/e2e/storage/csi_mock/csi_selinux_mount.go b/test/e2e/storage/csi_mock/csi_selinux_mount.go index b6b74c78c69..5322d551816 100644 --- a/test/e2e/storage/csi_mock/csi_selinux_mount.go +++ b/test/e2e/storage/csi_mock/csi_selinux_mount.go @@ -18,16 +18,22 @@ package csi_mock import ( "context" + "fmt" + "sort" "sync/atomic" + "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/framework" e2eevents "k8s.io/kubernetes/test/e2e/framework/events" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/kubernetes/test/e2e/storage/utils" @@ -237,3 +243,214 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount", func() { } }) }) + +var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { + f := framework.NewDefaultFramework("csi-mock-volumes-selinux-metrics") + f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged + m := newMockDriverSetup(f) + + // [Serial]: the tests read global kube-controller-manager metrics, so no other test changes them in parallel. + ginkgo.Context("SELinuxMount metrics [LinuxOnly][Feature:SELinux][Feature:SELinuxMountReadWriteOncePod][Serial]", func() { + + // All SELinux metrics. Unless explicitly mentioned in test.expectIncreases, these metrics must not grow during + // a test. + allMetrics := sets.NewString( + "volume_manager_selinux_container_errors_total", + "volume_manager_selinux_container_warnings_total", + "volume_manager_selinux_pod_context_mismatch_errors_total", + "volume_manager_selinux_pod_context_mismatch_warnings_total", + "volume_manager_selinux_volume_context_mismatch_errors_total", + "volume_manager_selinux_volume_context_mismatch_warnings_total", + "volume_manager_selinux_volumes_admitted_total", + ) + + // Make sure all options are set so system specific defaults are not used. + seLinuxOpts1 := v1.SELinuxOptions{ + User: "system_u", + Role: "object_r", + Type: "container_file_t", + Level: "s0:c0,c1", + } + seLinuxOpts2 := v1.SELinuxOptions{ + User: "system_u", + Role: "object_r", + Type: "container_file_t", + Level: "s0:c98,c99", + } + + tests := []struct { + name string + csiDriverSELinuxEnabled bool + firstPodSELinuxOpts *v1.SELinuxOptions + secondPodSELinuxOpts *v1.SELinuxOptions + volumeMode v1.PersistentVolumeAccessMode + waitForSecondPodStart bool + secondPodFailureEvent string + expectIncreases sets.String + }{ + { + name: "warning is not bumped on two Pods with the same context on RWO volume", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts1, + volumeMode: v1.ReadWriteOnce, + waitForSecondPodStart: true, + expectIncreases: sets.NewString( /* no metric is increased, admitted_total was already increased when the first pod started */ ), + }, + { + name: "warning is bumped on two Pods with a different context on RWO volume", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts2, + volumeMode: v1.ReadWriteOnce, + waitForSecondPodStart: true, + expectIncreases: sets.NewString("volume_manager_selinux_volume_context_mismatch_warnings_total"), + }, + { + name: "error is bumped on two Pods with a different context on RWOP volume", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts2, + secondPodFailureEvent: "conflicting SELinux labels of volume", + volumeMode: v1.ReadWriteOncePod, + waitForSecondPodStart: false, + expectIncreases: sets.NewString("volume_manager_selinux_volume_context_mismatch_errors_total"), + }, + } + for _, t := range tests { + t := t + ginkgo.It(t.name, func(ctx context.Context) { + if framework.NodeOSDistroIs("windows") { + e2eskipper.Skipf("SELinuxMount is only applied on linux nodes -- skipping") + } + grabber, err := e2emetrics.NewMetricsGrabber(ctx, f.ClientSet, nil, f.ClientConfig(), true, false, false, false, false, false) + framework.ExpectNoError(err, "creating the metrics grabber") + + var nodeStageMountOpts, nodePublishMountOpts []string + var unstageCalls, stageCalls, unpublishCalls, publishCalls atomic.Int32 + m.init(ctx, testParameters{ + disableAttach: true, + registerDriver: true, + enableSELinuxMount: &t.csiDriverSELinuxEnabled, + hooks: createSELinuxMountPreHook(&nodeStageMountOpts, &nodePublishMountOpts, &stageCalls, &unstageCalls, &publishCalls, &unpublishCalls), + }) + ginkgo.DeferCleanup(m.cleanup) + + ginkgo.By("Starting the first pod") + accessModes := []v1.PersistentVolumeAccessMode{t.volumeMode} + _, claim, pod := m.createPodWithSELinux(ctx, accessModes, []string{}, t.firstPodSELinuxOpts) + err = e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod.Name, pod.Namespace) + framework.ExpectNoError(err, "starting the initial pod") + + ginkgo.By("Grabbing initial metrics") + pod, err = m.cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "getting the initial pod") + metrics, err := grabMetrics(ctx, grabber, pod.Spec.NodeName, allMetrics) + framework.ExpectNoError(err, "collecting the initial metrics") + dumpMetrics(metrics) + + // Act + ginkgo.By("Starting the second pod") + // Skip scheduler, it would block scheduling the second pod with ReadWriteOncePod PV. + nodeSelection := e2epod.NodeSelection{Name: pod.Spec.NodeName} + pod2, err := startPausePodWithSELinuxOptions(f.ClientSet, claim, nodeSelection, f.Namespace.Name, t.secondPodSELinuxOpts) + framework.ExpectNoError(err, "creating second pod with SELinux context %s", t.secondPodSELinuxOpts) + m.pods = append(m.pods, pod2) + + if t.waitForSecondPodStart { + err := e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod2.Name, pod2.Namespace) + framework.ExpectNoError(err, "starting the second pod") + } else { + ginkgo.By("Waiting for the second pod to fail to start") + eventSelector := fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": pod2.Name, + "involvedObject.namespace": pod2.Namespace, + "reason": events.FailedMountVolume, + }.AsSelector().String() + err = e2eevents.WaitTimeoutForEvent(ctx, m.cs, pod2.Namespace, eventSelector, t.secondPodFailureEvent, f.Timeouts.PodStart) + framework.ExpectNoError(err, "waiting for event %q in the second test pod", t.secondPodFailureEvent) + } + + // Assert: count the metrics + ginkgo.By("Waiting for expected metric changes") + err = waitForMetricIncrease(ctx, grabber, pod.Spec.NodeName, allMetrics, t.expectIncreases, metrics, framework.PodStartShortTimeout) + framework.ExpectNoError(err, "waiting for metrics %s to increase", t.expectIncreases) + }) + } + }) +}) + +func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, metricNames sets.String) (map[string]float64, error) { + response, err := grabber.GrabFromKubelet(ctx, nodeName) + framework.ExpectNoError(err) + + metrics := map[string]float64{} + for method, samples := range response { + if metricNames.Has(method) { + if len(samples) == 0 { + return nil, fmt.Errorf("metric %s has no samples", method) + } + lastSample := samples[len(samples)-1] + metrics[method] = float64(lastSample.Value) + } + } + + // Ensure all metrics were provided + for name := range metricNames { + if _, found := metrics[name]; !found { + return nil, fmt.Errorf("metric %s not found", name) + } + } + + return metrics, nil +} + +func waitForMetricIncrease(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, allMetricNames, expectedIncreaseNames sets.String, initialValues map[string]float64, timeout time.Duration) error { + var noIncreaseMetrics sets.String + var metrics map[string]float64 + + err := wait.Poll(time.Second, timeout, func() (bool, error) { + var err error + metrics, err = grabMetrics(ctx, grabber, nodeName, allMetricNames) + if err != nil { + return false, err + } + + noIncreaseMetrics = sets.NewString() + // Always evaluate all SELinux metrics to check that the other metrics are not unexpectedly increased. + for name := range allMetricNames { + if expectedIncreaseNames.Has(name) { + if metrics[name] <= initialValues[name] { + noIncreaseMetrics.Insert(name) + } + } else { + if initialValues[name] != metrics[name] { + return false, fmt.Errorf("metric %s unexpectedly increased to %v", name, metrics[name]) + } + } + } + return noIncreaseMetrics.Len() == 0, nil + }) + + ginkgo.By("Dumping final metrics") + dumpMetrics(metrics) + + if err == context.DeadlineExceeded { + return fmt.Errorf("timed out waiting for metrics %v", noIncreaseMetrics.List()) + } + return err +} + +func dumpMetrics(metrics map[string]float64) { + // Print the metrics sorted by metric name for better readability + keys := make([]string, 0, len(metrics)) + for key := range metrics { + keys = append(keys, key) + } + sort.Strings(keys) + + for _, key := range keys { + framework.Logf("Metric %s: %v", key, metrics[key]) + } +} From 48ea6a3f3a74ea1fd9a356918f63a135cad87b60 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 9 Mar 2023 14:15:58 +0100 Subject: [PATCH 2/3] Fix SELinux mismatch metrics DesiredStateOfWorld must remember both - the effective SELinux label to apply as a mount option (non-empty for RWOP volumes, empty otherwise) - and the label that _would_ be used if the mount option would be used by all access modes. Mismatch warning metrics must be generated from the second label. --- .../cache/desired_state_of_world.go | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go index 0d8faf32351..dae89ab401f 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go @@ -206,12 +206,23 @@ type volumeToMount struct { // Usually this value reflects size recorded in pv.Spec.Capacity persistentVolumeSize *resource.Quantity - // seLinuxFileLabel is desired SELinux label on files on the volume. If empty, then + // effectiveSELinuxMountFileLabel is the SELinux label that will be applied to the volume using mount options. + // If empty, then: + // - either the context+label is unknown (assigned randomly by the container runtime) + // - or the volume plugin responsible for this volume does not support mounting with -o context + // - or the volume is not ReadWriteOncePod + // - or the OS does not support SELinux + // In all cases, the SELinux context does not matter when mounting the volume. + effectiveSELinuxMountFileLabel string + + // originalSELinuxLabel is the SELinux label that would be used if SELinux mount was supported for all access modes. + // For RWOP volumes it's the same as effectiveSELinuxMountFileLabel. + // It is used only to report potential SELinux mismatch metrics. + // If empty, then: // - either the context+label is unknown (assigned randomly by the container runtime) // - or the volume plugin responsible for this volume does not support mounting with -o context // - or the OS does not support SELinux - // In all cases, the SELinux context does not matter when mounting the volume. - seLinuxFileLabel string + originalSELinuxLabel string } // The pod object represents a pod that references the underlying volume and @@ -307,23 +318,25 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( } } } + effectiveSELinuxMountLabel := seLinuxFileLabel if !util.VolumeSupportsSELinuxMount(volumeSpec) { // Clear SELinux label for the volume with unsupported access modes. klog.V(4).InfoS("volume does not support SELinux context mount, clearing the expected label", "volume", volumeSpec.Name()) - seLinuxFileLabel = "" + effectiveSELinuxMountLabel = "" } if seLinuxFileLabel != "" { seLinuxVolumesAdmitted.Add(1.0) } vmt := volumeToMount{ - volumeName: volumeName, - podsToMount: make(map[types.UniquePodName]podToMount), - pluginIsAttachable: attachable, - pluginIsDeviceMountable: deviceMountable, - volumeGidValue: volumeGidValue, - reportedInUse: false, - desiredSizeLimit: sizeLimit, - seLinuxFileLabel: seLinuxFileLabel, + volumeName: volumeName, + podsToMount: make(map[types.UniquePodName]podToMount), + pluginIsAttachable: attachable, + pluginIsDeviceMountable: deviceMountable, + volumeGidValue: volumeGidValue, + reportedInUse: false, + desiredSizeLimit: sizeLimit, + effectiveSELinuxMountFileLabel: effectiveSELinuxMountLabel, + originalSELinuxLabel: seLinuxFileLabel, } // record desired size of the volume if volumeSpec.PersistentVolume != nil { @@ -337,9 +350,9 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( } else { // volume exists if pluginSupportsSELinuxContextMount { - if seLinuxFileLabel != vol.seLinuxFileLabel { + if seLinuxFileLabel != vol.originalSELinuxLabel { // TODO: update the error message after tests, e.g. add at least the conflicting pod names. - fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), vol.seLinuxFileLabel, seLinuxFileLabel) + fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), vol.originalSELinuxLabel, seLinuxFileLabel) supported := util.VolumeSupportsSELinuxMount(volumeSpec) if err := handleSELinuxMetricError(fullErr, supported, seLinuxVolumeContextMismatchWarnings, seLinuxVolumeContextMismatchErrors); err != nil { return "", err @@ -499,7 +512,7 @@ func (dsw *desiredStateOfWorld) VolumeExists( // and mounted with new SELinux mount options for pod B. // Without SELinux, kubelet can (and often does) reuse device mounted // for A. - return vol.seLinuxFileLabel == seLinuxMountContext + return vol.effectiveSELinuxMountFileLabel == seLinuxMountContext } return true } @@ -515,7 +528,7 @@ func (dsw *desiredStateOfWorld) PodExistsInVolume( } if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) { - if volumeObj.seLinuxFileLabel != seLinuxMountOption { + if volumeObj.effectiveSELinuxMountFileLabel != seLinuxMountOption { // The volume is in DSW, but with a different SELinux mount option. // Report it as unused, so the volume is unmounted and mounted back // with the right SELinux option. @@ -573,7 +586,7 @@ func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount { ReportedInUse: volumeObj.reportedInUse, MountRequestTime: podObj.mountRequestTime, DesiredSizeLimit: volumeObj.desiredSizeLimit, - SELinuxLabel: volumeObj.seLinuxFileLabel, + SELinuxLabel: volumeObj.effectiveSELinuxMountFileLabel, }, } if volumeObj.persistentVolumeSize != nil { From 05cd2ba8631d612a983e0a1642308aaa2c5ba64c Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 9 Mar 2023 15:45:21 +0100 Subject: [PATCH 3/3] Don't bump nr. of admitted volumes on retry AddPodToVolume is called periodically, it does not make sense to bump volume_manager_selinux_volumes_admitted_total on each call. --- pkg/kubelet/volumemanager/cache/desired_state_of_world.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go index dae89ab401f..fc77c675572 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go @@ -357,10 +357,6 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( if err := handleSELinuxMetricError(fullErr, supported, seLinuxVolumeContextMismatchWarnings, seLinuxVolumeContextMismatchErrors); err != nil { return "", err } - } else { - if seLinuxFileLabel != "" { - seLinuxVolumesAdmitted.Add(1.0) - } } } }