From 0d71dc677e74f3bf42db578f6d88bf818cdeafeb Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 14 Oct 2024 17:47:02 +0200 Subject: [PATCH 01/14] Refactor CreateVolumeSpec Rename old CreateVolumeSpec to CreateVolumeSpecWithNodeMigration that extracts volume.Spec with node specific CSI migration. Add CreateVolumeSpec that does the same, only without evaluating node CSI migration. --- .../attachdetach/attach_detach_controller.go | 2 +- .../volume/attachdetach/metrics/metrics.go | 2 +- .../volume/attachdetach/util/util.go | 154 ++++---- .../volume/attachdetach/util/util_test.go | 2 +- .../cache/desired_state_of_world.go | 60 +--- pkg/volume/testing/testing.go | 3 +- pkg/volume/util/selinux.go | 109 +++++- pkg/volume/util/selinux_test.go | 333 ++++++++++++++++++ 8 files changed, 547 insertions(+), 118 deletions(-) create mode 100644 pkg/volume/util/selinux_test.go diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller.go b/pkg/controller/volume/attachdetach/attach_detach_controller.go index 76eb4bf6ade..667804d1f55 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller.go @@ -437,7 +437,7 @@ func (adc *attachDetachController) populateDesiredStateOfWorld(logger klog.Logge // The volume specs present in the ActualStateOfWorld are nil, let's replace those // with the correct ones found on pods. The present in the ASW with no corresponding // pod will be detached and the spec is irrelevant. - volumeSpec, err := util.CreateVolumeSpec(logger, podVolume, podToAdd, nodeName, &adc.volumePluginMgr, adc.pvcLister, adc.pvLister, adc.csiMigratedPluginManager, adc.intreeToCSITranslator) + volumeSpec, err := util.CreateVolumeSpecWithNodeMigration(logger, podVolume, podToAdd, nodeName, &adc.volumePluginMgr, adc.pvcLister, adc.pvLister, adc.csiMigratedPluginManager, adc.intreeToCSITranslator) if err != nil { logger.Error( err, diff --git a/pkg/controller/volume/attachdetach/metrics/metrics.go b/pkg/controller/volume/attachdetach/metrics/metrics.go index f0c487a7a2b..709ef9d124d 100644 --- a/pkg/controller/volume/attachdetach/metrics/metrics.go +++ b/pkg/controller/volume/attachdetach/metrics/metrics.go @@ -181,7 +181,7 @@ func (collector *attachDetachStateCollector) getVolumeInUseCount(logger klog.Log continue } for _, podVolume := range pod.Spec.Volumes { - volumeSpec, err := util.CreateVolumeSpec(logger, podVolume, pod, types.NodeName(pod.Spec.NodeName), collector.volumePluginMgr, collector.pvcLister, collector.pvLister, collector.csiMigratedPluginManager, collector.intreeToCSITranslator) + volumeSpec, err := util.CreateVolumeSpecWithNodeMigration(logger, podVolume, pod, types.NodeName(pod.Spec.NodeName), collector.volumePluginMgr, collector.pvcLister, collector.pvLister, collector.csiMigratedPluginManager, collector.intreeToCSITranslator) if err != nil { continue } diff --git a/pkg/controller/volume/attachdetach/util/util.go b/pkg/controller/volume/attachdetach/util/util.go index b075382e2d3..a47b431b606 100644 --- a/pkg/controller/volume/attachdetach/util/util.go +++ b/pkg/controller/volume/attachdetach/util/util.go @@ -33,12 +33,7 @@ import ( "k8s.io/kubernetes/pkg/volume/util" ) -// CreateVolumeSpec creates and returns a mutatable volume.Spec object for the -// specified volume. It dereference any PVC to get PV objects, if needed. -// A volume.Spec that refers to an in-tree plugin spec is translated to refer -// to a migrated CSI plugin spec if all conditions for CSI migration on a node -// for the in-tree plugin is satisfied. -func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, nodeName types.NodeName, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, error) { +func createInTreeVolumeSpec(logger klog.Logger, podVolume *v1.Volume, pod *v1.Pod, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, string, error) { claimName := "" readOnly := false if pvcSource := podVolume.VolumeSource.PersistentVolumeClaim; pvcSource != nil { @@ -47,67 +42,83 @@ func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, node } isEphemeral := podVolume.VolumeSource.Ephemeral != nil if isEphemeral { - claimName = ephemeral.VolumeClaimName(pod, &podVolume) + claimName = ephemeral.VolumeClaimName(pod, podVolume) } - if claimName != "" { - logger.V(10).Info("Found PVC", "PVC", klog.KRef(pod.Namespace, claimName)) - - // If podVolume is a PVC, fetch the real PV behind the claim - pvc, err := getPVCFromCache(pod.Namespace, claimName, pvcLister) - if err != nil { - return nil, fmt.Errorf( - "error processing PVC %q/%q: %v", - pod.Namespace, - claimName, - err) - } - if isEphemeral { - if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil { - return nil, err - } - } - - pvName, pvcUID := pvc.Spec.VolumeName, pvc.UID - logger.V(10).Info("Found bound PV for PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName)) - - // Fetch actual PV object - volumeSpec, err := getPVSpecFromCache( - pvName, readOnly, pvcUID, pvLister) - if err != nil { - return nil, fmt.Errorf( - "error processing PVC %q/%q: %v", - pod.Namespace, - claimName, - err) - } - - volumeSpec, err = translateInTreeSpecToCSIIfNeeded(logger, volumeSpec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) - if err != nil { - return nil, fmt.Errorf( - "error performing CSI migration checks and translation for PVC %q/%q: %v", - pod.Namespace, - claimName, - err) - } - - logger.V(10).Info("Extracted volumeSpec from bound PV and PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName), "volumeSpecName", volumeSpec.Name()) - - return volumeSpec, nil + if claimName == "" { + // In-line volume + return volume.NewSpecFromVolume(podVolume), "", nil } + // The volume is a PVC, dereference the PVC + PV + logger.V(10).Info("Found PVC", "PVC", klog.KRef(pod.Namespace, claimName)) - // Do not return the original volume object, since it's from the shared - // informer it may be mutated by another consumer. - clonedPodVolume := podVolume.DeepCopy() - - origspec := volume.NewSpecFromVolume(clonedPodVolume) - spec, err := translateInTreeSpecToCSIIfNeeded(logger, origspec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) + // If podVolume is a PVC, fetch the real PV behind the claim + pvc, err := getPVCFromCache(pod.Namespace, claimName, pvcLister) if err != nil { - return nil, fmt.Errorf( - "error performing CSI migration checks and translation for inline volume %q: %v", - podVolume.Name, + return nil, claimName, fmt.Errorf( + "error processing PVC %q/%q: %v", + pod.Namespace, + claimName, err) } - return spec, nil + if isEphemeral { + if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil { + return nil, claimName, err + } + } + + pvName, pvcUID := pvc.Spec.VolumeName, pvc.UID + logger.V(10).Info("Found bound PV for PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName)) + + // Fetch actual PV object + volumeSpec, err := getPVSpecFromCache( + pvName, readOnly, pvcUID, pvLister) + if err != nil { + return nil, claimName, fmt.Errorf( + "error processing PVC %q/%q: %v", + pod.Namespace, + claimName, + err) + } + + logger.V(10).Info("Extracted volumeSpec from bound PV and PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName), "volumeSpecName", volumeSpec.Name()) + return volumeSpec, claimName, nil +} + +func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, error) { + volumeSpec, claimName, err := createInTreeVolumeSpec(logger, &podVolume, pod, vpm, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) + if err != nil { + return nil, err + } + volumeSpec, err = translateInTreeSpecToCSIIfNeeded(logger, volumeSpec, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) + if err != nil { + return nil, fmt.Errorf( + "error performing CSI migration checks and translation for PVC %q/%q: %v", + pod.Namespace, + claimName, + err) + } + return volumeSpec, nil +} + +// CreateVolumeSpec creates and returns a mutatable volume.Spec object for the +// specified volume. It dereference any PVC to get PV objects, if needed. +// A volume.Spec that refers to an in-tree plugin spec is translated to refer +// to a migrated CSI plugin spec if all conditions for CSI migration on a node +// for the in-tree plugin is satisfied. +func CreateVolumeSpecWithNodeMigration(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, nodeName types.NodeName, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, error) { + volumeSpec, claimName, err := createInTreeVolumeSpec(logger, &podVolume, pod, vpm, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) + if err != nil { + return nil, err + } + volumeSpec, err = translateInTreeSpecToCSIOnNodeIfNeeded(logger, volumeSpec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) + if err != nil { + return nil, fmt.Errorf( + "error performing CSI migration checks and translation for PVC %q/%q: %v", + pod.Namespace, + claimName, + err) + } + return volumeSpec, nil } // getPVCFromCache fetches the PVC object with the given namespace and @@ -144,7 +155,6 @@ func getPVSpecFromCache(name string, pvcReadOnly bool, expectedClaimUID types.UI if err != nil { return nil, fmt.Errorf("failed to find PV %q in PVInformer cache: %v", name, err) } - if pv.Spec.ClaimRef == nil { return nil, fmt.Errorf( "found PV object %q but it has a nil pv.Spec.ClaimRef indicating it is not yet bound to the claim", @@ -204,7 +214,7 @@ func ProcessPodVolumes(logger klog.Logger, pod *v1.Pod, addVolumes bool, desired // Process volume spec for each volume defined in pod for _, podVolume := range pod.Spec.Volumes { - volumeSpec, err := CreateVolumeSpec(logger, podVolume, pod, nodeName, volumePluginMgr, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) + volumeSpec, err := CreateVolumeSpecWithNodeMigration(logger, podVolume, pod, nodeName, volumePluginMgr, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) if err != nil { logger.V(10).Info("Error processing volume for pod", "pod", klog.KObj(pod), "volumeName", podVolume.Name, "err", err) continue @@ -240,7 +250,7 @@ func ProcessPodVolumes(logger klog.Logger, pod *v1.Pod, addVolumes bool, desired } } -func translateInTreeSpecToCSIIfNeeded(logger klog.Logger, spec *volume.Spec, nodeName types.NodeName, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator, podNamespace string) (*volume.Spec, error) { +func translateInTreeSpecToCSIOnNodeIfNeeded(logger klog.Logger, spec *volume.Spec, nodeName types.NodeName, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator, podNamespace string) (*volume.Spec, error) { translatedSpec := spec migratable, err := csiMigratedPluginManager.IsMigratable(spec) if err != nil { @@ -263,6 +273,22 @@ func translateInTreeSpecToCSIIfNeeded(logger klog.Logger, spec *volume.Spec, nod return translatedSpec, nil } +func translateInTreeSpecToCSIIfNeeded(logger klog.Logger, spec *volume.Spec, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator, podNamespace string) (*volume.Spec, error) { + migratable, err := csiMigratedPluginManager.IsMigratable(spec) + if err != nil { + return nil, err + } + if !migratable { + // Jump out of translation fast so we don't check the node if the spec itself is not migratable + return spec, nil + } + translatedSpec, err := csimigration.TranslateInTreeSpecToCSI(logger, spec, podNamespace, csiTranslator) + if err != nil { + return nil, err + } + return translatedSpec, nil +} + func isCSIMigrationSupportedOnNode(nodeName types.NodeName, spec *volume.Spec, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager) (bool, error) { pluginName, err := csiMigratedPluginManager.GetInTreePluginNameFromSpec(spec.PersistentVolume, spec.Volume) if err != nil { diff --git a/pkg/controller/volume/attachdetach/util/util_test.go b/pkg/controller/volume/attachdetach/util/util_test.go index 7d5b8c9e8db..d14e2e3b89a 100644 --- a/pkg/controller/volume/attachdetach/util/util_test.go +++ b/pkg/controller/volume/attachdetach/util/util_test.go @@ -243,7 +243,7 @@ func Test_CreateVolumeSpec(t *testing.T) { t.Run(test.desc, func(t *testing.T) { logger, _ := ktesting.NewTestContext(t) plugMgr, intreeToCSITranslator, csiTranslator, pvLister, pvcLister := setup(testNodeName, t) - actualSpec, err := CreateVolumeSpec(logger, test.pod.Spec.Volumes[0], test.pod, test.createNodeName, plugMgr, pvcLister, pvLister, intreeToCSITranslator, csiTranslator) + actualSpec, err := CreateVolumeSpecWithNodeMigration(logger, test.pod.Spec.Volumes[0], test.pod, test.createNodeName, plugMgr, pvcLister, pvLister, intreeToCSITranslator, csiTranslator) if actualSpec == nil && (test.wantPersistentVolume != nil || test.wantVolume != nil) { t.Errorf("got volume spec is nil") diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go index f59b58c4cfc..6daa49693f6 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go @@ -397,63 +397,31 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( // It returns error if the SELinux label cannot be constructed or when the volume is used with multiple SELinux // labels. func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext) (seLinuxFileLabel string, pluginSupportsSELinuxContextMount bool, err error) { - if !dsw.seLinuxTranslator.SELinuxEnabled() { - return "", false, nil - } - - pluginSupportsSELinuxContextMount, err = dsw.getSELinuxMountSupport(volumeSpec) + labelInfo, err := util.GetMountSELinuxLabel(volumeSpec, seLinuxContainerContexts, podSecurityContext, dsw.volumePluginMgr, dsw.seLinuxTranslator) if err != nil { - return "", false, err - } + accessMode := getVolumeAccessMode(volumeSpec) + seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec) - if feature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) && - podSecurityContext != nil && - podSecurityContext.SELinuxChangePolicy != nil && - *podSecurityContext.SELinuxChangePolicy == v1.SELinuxChangePolicyRecursive { - // The pod has opted into recursive SELinux label changes. Do not mount with -o context. - return "", pluginSupportsSELinuxContextMount, nil - } - - if !pluginSupportsSELinuxContextMount { - return "", pluginSupportsSELinuxContextMount, nil - } - - seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec) - // Ensure that a volume that can be mounted with "-o context=XYZ" is - // used only by containers with the same SELinux contexts. - for _, containerContext := range seLinuxContainerContexts { - newLabel, err := dsw.seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext) - if err != nil { - fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerContext, err) - accessMode := getVolumeAccessMode(volumeSpec) + if util.IsSELinuxLabelTranslationError(err) { err := handleSELinuxMetricError( - fullErr, + err, seLinuxSupported, seLinuxContainerContextWarnings.WithLabelValues(accessMode), seLinuxContainerContextErrors.WithLabelValues(accessMode)) - if err != nil { - return "", false, err - } + return "", labelInfo.PluginSupportsSELinuxContextMount, err } - if seLinuxFileLabel == "" { - seLinuxFileLabel = newLabel - continue - } - if seLinuxFileLabel != newLabel { - accessMode := getVolumeAccessMode(volumeSpec) - - fullErr := fmt.Errorf("volume %s is used with two different SELinux contexts in the same pod: %q, %q", volumeSpec.Name(), seLinuxFileLabel, newLabel) + if util.IsMultipleSELinuxLabelsError(err) { err := handleSELinuxMetricError( - fullErr, + err, seLinuxSupported, seLinuxPodContextMismatchWarnings.WithLabelValues(accessMode), seLinuxPodContextMismatchErrors.WithLabelValues(accessMode)) - if err != nil { - return "", false, err - } + return "", false, err } + return "", labelInfo.PluginSupportsSELinuxContextMount, err } - return seLinuxFileLabel, pluginSupportsSELinuxContextMount, nil + + return labelInfo.SELinuxMountLabel, labelInfo.PluginSupportsSELinuxContextMount, nil } func (dsw *desiredStateOfWorld) MarkVolumesReportedInUse( @@ -668,10 +636,6 @@ func (dsw *desiredStateOfWorld) MarkVolumeAttachability(volumeName v1.UniqueVolu dsw.volumesToMount[volumeName] = volumeObj } -func (dsw *desiredStateOfWorld) getSELinuxMountSupport(volumeSpec *volume.Spec) (bool, error) { - return util.SupportsSELinuxContextMount(volumeSpec, dsw.volumePluginMgr) -} - // Based on isRWOP, bump the right warning / error metric and either consume the error or return it. func handleSELinuxMetricError(err error, seLinuxSupported bool, warningMetric, errorMetric metrics.GaugeMetric) error { if seLinuxSupported { diff --git a/pkg/volume/testing/testing.go b/pkg/volume/testing/testing.go index 30c4fa730b9..fd1777c5a1f 100644 --- a/pkg/volume/testing/testing.go +++ b/pkg/volume/testing/testing.go @@ -40,7 +40,6 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" utiltesting "k8s.io/client-go/util/testing" "k8s.io/kubernetes/pkg/volume" - "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util/recyclerclient" volumetypes "k8s.io/kubernetes/pkg/volume/util/types" "k8s.io/kubernetes/pkg/volume/util/volumepathhandler" @@ -1188,7 +1187,7 @@ func (fc *FakeProvisioner) Provision(selectedNode *v1.Node, allowedTopologies [] ObjectMeta: metav1.ObjectMeta{ Name: fc.Options.PVName, Annotations: map[string]string{ - util.VolumeDynamicallyCreatedByKey: "fakeplugin-provisioner", + "kubernetes.io/createdby": "fakeplugin-provisioner", }, }, Spec: v1.PersistentVolumeSpec{ diff --git a/pkg/volume/util/selinux.go b/pkg/volume/util/selinux.go index 9f567dbef74..d1a088cc4a2 100644 --- a/pkg/volume/util/selinux.go +++ b/pkg/volume/util/selinux.go @@ -17,11 +17,14 @@ limitations under the License. package util import ( + "errors" "fmt" + "strings" "github.com/opencontainers/selinux/go-selinux" "github.com/opencontainers/selinux/go-selinux/label" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/features" @@ -71,7 +74,7 @@ func (l *translator) SELinuxOptionsToFileLabel(opts *v1.SELinuxOptions) (string, if err != nil { // In theory, this should be unreachable. InitLabels can fail only when args contain an unknown option, // and all options returned by contextOptions are known. - return "", err + return "", &SELinuxLabelTranslationError{msg: err.Error()} } // InitLabels() may allocate a new unique SELinux label in kubelet memory. The label is *not* allocated // in the container runtime. Clear it to avoid memory problems. @@ -156,6 +159,19 @@ func (l *fakeTranslator) SELinuxEnabled() bool { return true } +type SELinuxLabelTranslationError struct { + msg string +} + +func (e *SELinuxLabelTranslationError) Error() string { + return e.msg +} + +func IsSELinuxLabelTranslationError(err error) bool { + var seLinuxError *SELinuxLabelTranslationError + return errors.As(err, &seLinuxError) +} + // SupportsSELinuxContextMount checks if the given volumeSpec supports with mount -o context func SupportsSELinuxContextMount(volumeSpec *volume.Spec, volumePluginMgr *volume.VolumePluginMgr) (bool, error) { plugin, _ := volumePluginMgr.FindPluginBySpec(volumeSpec) @@ -191,6 +207,24 @@ func VolumeSupportsSELinuxMount(volumeSpec *volume.Spec) bool { return true } +// MultipleSELinuxLabelsError tells that one volume in a pod is mounted in multiple containers and each has a different SELinux label. +type MultipleSELinuxLabelsError struct { + labels []string +} + +func (e *MultipleSELinuxLabelsError) Error() string { + return fmt.Sprintf("multiple SELinux labels found: %s", strings.Join(e.labels, ",")) +} + +func (e *MultipleSELinuxLabelsError) Labels() []string { + return e.labels +} + +func IsMultipleSELinuxLabelsError(err error) bool { + var multiError *MultipleSELinuxLabelsError + return errors.As(err, &multiError) +} + // AddSELinuxMountOption adds -o context="XYZ" mount option to a given list func AddSELinuxMountOption(options []string, seLinuxContext string) []string { if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) { @@ -200,3 +234,76 @@ func AddSELinuxMountOption(options []string, seLinuxContext string) []string { // For example: dirsync,context="system_u:object_r:container_file_t:s0:c15,c25",noatime return append(options, fmt.Sprintf("context=%q", seLinuxContext)) } + +// SELinuxLabelInfo contains information about SELinux labels that should be used to mount a volume for a Pod. +type SELinuxLabelInfo struct { + // SELinuxMountLabel is the SELinux label that should be used to mount the volume. + // The volume plugin supports SELinuxMount and the Pod did not opt out via SELinuxChangePolicy. + // Empty string otherwise. + SELinuxMountLabel string + // SELinuxProcessLabel is the SELinux label that will the container runtime use for the Pod. + // Regardless if the volume plugin supports SELinuxMount or the Pod opted out via SELinuxChangePolicy. + SELinuxProcessLabel string + // PluginSupportsSELinuxContextMount is true if the volume plugin supports SELinux mount. + PluginSupportsSELinuxContextMount bool +} + +// GetMountSELinuxLabel returns SELinux labels that should be used to mount the given volume volumeSpec and podSecurityContext. +// It does not evaluate the volume access mode! It's up to the caller to check SELinuxMount feature gate, +// it may need to bump different metrics based on feature gates / access modes / label anyway. +func GetMountSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext, volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator SELinuxLabelTranslator) (SELinuxLabelInfo, error) { + info := SELinuxLabelInfo{} + if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) { + return info, nil + } + + if !seLinuxTranslator.SELinuxEnabled() { + return info, nil + } + + pluginSupportsSELinuxContextMount, err := SupportsSELinuxContextMount(volumeSpec, volumePluginMgr) + if err != nil { + return info, err + } + + info.PluginSupportsSELinuxContextMount = pluginSupportsSELinuxContextMount + + // Collect all SELinux options from all containers that use this volume. + labels := sets.New[string]() + for _, containerContext := range seLinuxContainerContexts { + lbl, err := seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext) + if err != nil { + fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerContext, err) + return info, fullErr + } + labels.Insert(lbl) + } + + // Ensure that all containers use the same SELinux label. + if labels.Len() > 1 { + // This volume is used with more than one SELinux label in the pod. + return info, &MultipleSELinuxLabelsError{labels: labels.UnsortedList()} + } + if labels.Len() == 0 { + return info, nil + } + + lbl, _ := labels.PopAny() + info.SELinuxProcessLabel = lbl + info.SELinuxMountLabel = lbl + + if utilfeature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) && + podSecurityContext != nil && + podSecurityContext.SELinuxChangePolicy != nil && + *podSecurityContext.SELinuxChangePolicy == v1.SELinuxChangePolicyRecursive { + // The pod has opted into recursive SELinux label changes. Do not mount with -o context. + info.SELinuxMountLabel = "" + } + + if !pluginSupportsSELinuxContextMount { + // The volume plugin does not support SELinux mount. Do not mount with -o context. + info.SELinuxMountLabel = "" + } + + return info, nil +} diff --git a/pkg/volume/util/selinux_test.go b/pkg/volume/util/selinux_test.go new file mode 100644 index 00000000000..1e9a389805c --- /dev/null +++ b/pkg/volume/util/selinux_test.go @@ -0,0 +1,333 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "testing" + + v1 "k8s.io/api/core/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/component-base/featuregate" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/volume" + volumetesting "k8s.io/kubernetes/pkg/volume/testing" + "k8s.io/utils/ptr" +) + +func TestGetMountSELinuxLabel(t *testing.T) { + pvRWOP := &v1.PersistentVolume{ + Spec: v1.PersistentVolumeSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOncePod}, + PersistentVolumeSource: v1.PersistentVolumeSource{ + HostPath: &v1.HostPathVolumeSource{}, + }, + }, + } + pvRWX := &v1.PersistentVolume{ + Spec: v1.PersistentVolumeSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteMany}, + PersistentVolumeSource: v1.PersistentVolumeSource{ + HostPath: &v1.HostPathVolumeSource{}, + }, + }, + } + + seLinuxOpts1 := v1.SELinuxOptions{ + Level: "s0:c123,c456", + } + seLinuxOpts2 := v1.SELinuxOptions{ + Level: "s0:c234,c567", + } + seLinuxOpts3 := v1.SELinuxOptions{ + Level: "s0:c345,c678", + } + label1 := "system_u:object_r:container_file_t:s0:c123,c456" + + tests := []struct { + name string + featureGates []featuregate.Feature // SELinuxMountReadWriteOncePod is always enabled + pluginSupportsSELinux bool + volume *volume.Spec + podSecurityContext *v1.PodSecurityContext + seLinuxOptions []*v1.SELinuxOptions + expectError bool + expectedInfo SELinuxLabelInfo + }{ + // Tests with no labels + { + name: "no label, no changePolicy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: nil, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // no SELinuxOptions + the default policy is recursive + SELinuxProcessLabel: "", // no SELinuxOptions + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "no label, Recursive change policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: nil, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // no SELinuxOptions + recursive policy + SELinuxProcessLabel: "", // SELinuxOptions + PluginSupportsSELinuxContextMount: true, + }, + }, + // Tests with one label and RWOP volume + { + name: "one label, Recursive change policy, no feature gate", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // Recursive policy is not observed when SELinuxChangePolicy is off + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, Recursive change policy, SELinuxChangePolicy", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // Recursive policy is effective with SELinuxChangePolicy, affects RWOP too. + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, no policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // The default policy is MountOption + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, MountOption policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyMountOption)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // SELinuxChangePolicy feature is disabled, but the default policy is MountOption anyway + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + // Tests with RWX volume + { + name: "one label, no policy, RWX", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // GetMountSELinuxLabel() does not check the access mode, it's up to the caller + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, no policy, RWX, SELinuxChangePolicy", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // GetMountSELinuxLabel() does not check the access mode, it's up to the caller + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, MountOption policy, RWX, SELinuxChangePolicy", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyMountOption)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // GetMountSELinuxLabel() does not check the access mode, it's up to the caller + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, no policy, RWX, SELinuxMount", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy, features.SELinuxMount}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // SELinuxMount FG + MountOption policy + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + // No plugin support + { + name: "one label, Recursive change policy, SELinuxChangePolicy, no plugin support", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: false, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // No plugin support + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: false, + }, + }, + { + name: "one label, no policy, no plugin support", + featureGates: nil, + pluginSupportsSELinux: false, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // No plugin support + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: false, + }, + }, + { + name: "one label, MountOption policy, no plugin support", + featureGates: nil, + pluginSupportsSELinux: false, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyMountOption)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // No plugin support + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: false, + }, + }, + // Corner cases + { + name: "multiple same labels, no policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1, &seLinuxOpts1, &seLinuxOpts1, &seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // The default policy is MountOption + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + // Error cases + { + name: "multiple different labels, no policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1, &seLinuxOpts2, &seLinuxOpts3}, + expectError: true, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", + SELinuxProcessLabel: "", + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "multiple different labels, Recursive policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1, &seLinuxOpts2, &seLinuxOpts3}, + expectError: true, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", + SELinuxProcessLabel: "", + PluginSupportsSELinuxContextMount: true, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Arrange + for _, fg := range tt.featureGates { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, fg, true) + } + seLinuxTranslator := NewFakeSELinuxLabelTranslator() + pluginMgr, plugin := volumetesting.GetTestKubeletVolumePluginMgr(t) + plugin.SupportsSELinux = tt.pluginSupportsSELinux + + // Act + info, err := GetMountSELinuxLabel(tt.volume, tt.seLinuxOptions, tt.podSecurityContext, pluginMgr, seLinuxTranslator) + + // Assert + if err != nil { + if !tt.expectError { + t.Errorf("GetMountSELinuxLabel() unexpected error: %v", err) + } + return + } + if tt.expectError { + t.Errorf("GetMountSELinuxLabel() expected error, got none") + return + } + + if info != tt.expectedInfo { + t.Errorf("GetMountSELinuxLabel() expected %+v, got %+v", tt.expectedInfo, info) + } + }) + } +} From aa8872d7a330b4f52b7505b579f7e821eebbac43 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 23 Oct 2024 17:20:32 +0200 Subject: [PATCH 02/14] Add SELinux warning controller --- .../app/controllermanager.go | 1 + .../app/controllermanager_test.go | 1 + cmd/kube-controller-manager/app/core.go | 46 +- cmd/kube-controller-manager/app/plugins.go | 5 + .../names/controller_names.go | 1 + .../volume/selinuxwarning/cache/conflict.go | 52 ++ .../selinuxwarning/cache/volumecache.go | 260 ++++++++ .../selinuxwarning/cache/volumecache_test.go | 462 ++++++++++++++ .../volume/selinuxwarning/metrics.go | 81 +++ .../selinux_warning_controller.go | 507 ++++++++++++++++ .../selinux_warning_controller_test.go | 569 ++++++++++++++++++ .../volume/selinuxwarning/volumehost.go | 143 +++++ pkg/volume/csi/csi_plugin.go | 11 +- pkg/volume/plugins.go | 11 +- .../rbac/bootstrappolicy/controller_policy.go | 9 + 15 files changed, 2148 insertions(+), 11 deletions(-) create mode 100644 pkg/controller/volume/selinuxwarning/cache/conflict.go create mode 100644 pkg/controller/volume/selinuxwarning/cache/volumecache.go create mode 100644 pkg/controller/volume/selinuxwarning/cache/volumecache_test.go create mode 100644 pkg/controller/volume/selinuxwarning/metrics.go create mode 100644 pkg/controller/volume/selinuxwarning/selinux_warning_controller.go create mode 100644 pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go create mode 100644 pkg/controller/volume/selinuxwarning/volumehost.go diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index e75edc50bd2..eed0a6988eb 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -580,6 +580,7 @@ func NewControllerDescriptors() map[string]*ControllerDescriptor { register(newTaintEvictionControllerDescriptor()) register(newServiceCIDRsControllerDescriptor()) register(newStorageVersionMigratorControllerDescriptor()) + register(newSELinuxWarningControllerDescriptor()) for _, alias := range aliases.UnsortedList() { if _, ok := controllers[alias]; ok { diff --git a/cmd/kube-controller-manager/app/controllermanager_test.go b/cmd/kube-controller-manager/app/controllermanager_test.go index 0b1aeb6e9d1..be7d26ba660 100644 --- a/cmd/kube-controller-manager/app/controllermanager_test.go +++ b/cmd/kube-controller-manager/app/controllermanager_test.go @@ -96,6 +96,7 @@ func TestControllerNamesDeclaration(t *testing.T) { names.ValidatingAdmissionPolicyStatusController, names.ServiceCIDRController, names.StorageVersionMigratorController, + names.SELinuxWarningController, ) for _, name := range KnownControllers() { diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 61c9c296f02..2df27c8eec6 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -27,8 +27,6 @@ import ( "strings" "time" - "k8s.io/klog/v2" - v1 "k8s.io/api/core/v1" genericfeatures "k8s.io/apiserver/pkg/features" "k8s.io/apiserver/pkg/quota/v1/generic" @@ -40,6 +38,7 @@ import ( "k8s.io/component-base/featuregate" "k8s.io/controller-manager/controller" csitrans "k8s.io/csi-translation-lib" + "k8s.io/klog/v2" "k8s.io/kubernetes/cmd/kube-controller-manager/names" pkgcontroller "k8s.io/kubernetes/pkg/controller" endpointcontroller "k8s.io/kubernetes/pkg/controller/endpoint" @@ -64,6 +63,7 @@ import ( persistentvolumecontroller "k8s.io/kubernetes/pkg/controller/volume/persistentvolume" "k8s.io/kubernetes/pkg/controller/volume/pvcprotection" "k8s.io/kubernetes/pkg/controller/volume/pvprotection" + "k8s.io/kubernetes/pkg/controller/volume/selinuxwarning" "k8s.io/kubernetes/pkg/controller/volume/vacprotection" "k8s.io/kubernetes/pkg/features" quotainstall "k8s.io/kubernetes/pkg/quota/v1/install" @@ -141,7 +141,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo // should be dual stack (from different IPFamilies) dualstackServiceCIDR, err := netutils.IsDualStackCIDRs([]*net.IPNet{serviceCIDR, secondaryServiceCIDR}) if err != nil { - return nil, false, fmt.Errorf("failed to perform dualstack check on serviceCIDR and secondaryServiceCIDR error:%v", err) + return nil, false, fmt.Errorf("failed to perform dualstack check on serviceCIDR and secondaryServiceCIDR error: %v", err) } if !dualstackServiceCIDR { return nil, false, fmt.Errorf("serviceCIDR and secondaryServiceCIDR are not dualstack (from different IPfamiles)") @@ -891,3 +891,43 @@ func startStorageVersionGarbageCollectorController(ctx context.Context, controll ).Run(ctx) return nil, true, nil } + +func newSELinuxWarningControllerDescriptor() *ControllerDescriptor { + return &ControllerDescriptor{ + name: names.SELinuxWarningController, + aliases: []string{"selinux-warning"}, + initFunc: startSELinuxWarningController, + isDisabledByDefault: true, + } +} + +func startSELinuxWarningController(ctx context.Context, controllerContext ControllerContext, controllerName string) (controller.Interface, bool, error) { + if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) { + return nil, false, nil + } + + logger := klog.FromContext(ctx) + csiDriverInformer := controllerContext.InformerFactory.Storage().V1().CSIDrivers() + plugins, err := ProbePersistentVolumePlugins(logger, controllerContext.ComponentConfig.PersistentVolumeBinderController.VolumeConfiguration) + if err != nil { + return nil, true, fmt.Errorf("failed to probe volume plugins when starting SELinux warning controller: %w", err) + } + + ctx = klog.NewContext(ctx, logger) + seLinuxController, err := + selinuxwarning.NewController( + ctx, + controllerContext.ClientBuilder.ClientOrDie(controllerName), + controllerContext.InformerFactory.Core().V1().Pods(), + controllerContext.InformerFactory.Core().V1().PersistentVolumeClaims(), + controllerContext.InformerFactory.Core().V1().PersistentVolumes(), + csiDriverInformer, + plugins, + GetDynamicPluginProber(controllerContext.ComponentConfig.PersistentVolumeBinderController.VolumeConfiguration), + ) + if err != nil { + return nil, true, fmt.Errorf("failed to start SELinux warning controller: %w", err) + } + go seLinuxController.Run(ctx, 1) + return nil, true, nil +} diff --git a/cmd/kube-controller-manager/app/plugins.go b/cmd/kube-controller-manager/app/plugins.go index cf87b5b6e6b..2c83e7e6b2a 100644 --- a/cmd/kube-controller-manager/app/plugins.go +++ b/cmd/kube-controller-manager/app/plugins.go @@ -79,6 +79,11 @@ func ProbeProvisionableRecyclableVolumePlugins(logger klog.Logger, config persis }) } +// ProbePersistentVolumePlugins collects all volume plugins that are actually persistent. +func ProbePersistentVolumePlugins(logger klog.Logger, config persistentvolumeconfig.VolumeConfiguration) ([]volume.VolumePlugin, error) { + return probeControllerVolumePlugins(logger, config, nil) +} + // probeControllerVolumePlugins collects all persistent volume plugins // used by KCM controllers into an easy to use list. func probeControllerVolumePlugins(logger klog.Logger, config persistentvolumeconfig.VolumeConfiguration, filter func(plugin volume.VolumePlugin) bool) ([]volume.VolumePlugin, error) { diff --git a/cmd/kube-controller-manager/names/controller_names.go b/cmd/kube-controller-manager/names/controller_names.go index bcdea21bbae..d553e4e1385 100644 --- a/cmd/kube-controller-manager/names/controller_names.go +++ b/cmd/kube-controller-manager/names/controller_names.go @@ -85,4 +85,5 @@ const ( VolumeAttributesClassProtectionController = "volumeattributesclass-protection-controller" ServiceCIDRController = "service-cidr-controller" StorageVersionMigratorController = "storage-version-migrator-controller" + SELinuxWarningController = "selinux-warning-controller" ) diff --git a/pkg/controller/volume/selinuxwarning/cache/conflict.go b/pkg/controller/volume/selinuxwarning/cache/conflict.go new file mode 100644 index 00000000000..45f6121cdf3 --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/cache/conflict.go @@ -0,0 +1,52 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "fmt" + + "k8s.io/client-go/tools/cache" +) + +// A single conflict between two Pods using the same volume with different SELinux labels or policies. +// Event should be sent to both of them. +type Conflict struct { + // Human-readable name of the conflicting property, like "SELinux label" + PropertyName string + // Reason for the event, to be set as the Event.Reason field. + EventReason string + + // Pod to generate the event on + Pod cache.ObjectName + PropertyValue string + // only for logging / messaging + OtherPod cache.ObjectName + OtherPropertyValue string +} + +// Generate a message about this conflict. +func (c *Conflict) EventMessage() string { + // Quote the values for better readability. + value := "\"" + c.PropertyValue + "\"" + otherValue := "\"" + c.OtherPropertyValue + "\"" + if c.Pod.Namespace == c.OtherPod.Namespace { + // In the same namespace, be very specific about the pod names. + return fmt.Sprint(c.PropertyName, " ", value, " conflicts with pod ", c.OtherPod.Name, " that uses the same volume as this pod with ", c.PropertyName, " ", otherValue, ". If both pods land on the same node, only one of them may access the volume.") + } + // Pods are in different namespaces, do not reveal the other namespace or pod name. + return fmt.Sprint(c.PropertyName, value, " conflicts with another pod that uses the same volume as this pod with a different ", c.PropertyName, ". If both pods land on the same node, only one of them may access the volume.") +} diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache.go b/pkg/controller/volume/selinuxwarning/cache/volumecache.go new file mode 100644 index 00000000000..a250a33cb98 --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache.go @@ -0,0 +1,260 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "sort" + "sync" + + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +const ( + // Log level at which the volume cache will be dumped after each change. + dumpLogLevel = 10 +) + +type VolumeCache interface { + // Add a single volume to the cache. Returns list of conflicts it caused. + AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeName, podKey cache.ObjectName, label string, changePolicy v1.PodSELinuxChangePolicy, csiDriver string) []Conflict + + // Remove a pod from the cache. Prunes all empty structures. + DeletePod(logger klog.Logger, podKey cache.ObjectName) + + // GetPodsForCSIDriver returns all pods that use volumes with the given CSI driver. + GetPodsForCSIDriver(driverName string) []cache.ObjectName + + // SendConflicts sends all current conflicts to the given channel. + SendConflicts(logger klog.Logger, ch chan<- Conflict) +} + +// VolumeCache stores all volumes used by Pods and their properties that the controller needs to track, +// like SELinux labels and SELinuxChangePolicies. +type volumeCache struct { + mutex sync.Mutex + // All volumes of all existing Pods. + volumes map[v1.UniqueVolumeName]usedVolume +} + +var _ VolumeCache = &volumeCache{} + +// NewVolumeLabelCache creates a new VolumeCache. +func NewVolumeLabelCache() VolumeCache { + return &volumeCache{ + volumes: make(map[v1.UniqueVolumeName]usedVolume), + } +} + +// usedVolume is a volume that is used by one or more existing pods. +// It stores information about these pods to detect conflicts and generate events. +type usedVolume struct { + csiDriver string + // List of pods that use this volume. Indexed by pod key for faster deletion. + pods map[cache.ObjectName]podInfo +} + +// Information about a Pod that uses a volume. +type podInfo struct { + // SELinux label to be applied to the volume in the Pod. + // Either as mount option or recursively by the container runtime. + label string + // SELinuxChangePolicy of the Pod. + changePolicy v1.PodSELinuxChangePolicy +} + +func newPodInfoListForPod(podKey cache.ObjectName, label string, changePolicy v1.PodSELinuxChangePolicy) map[cache.ObjectName]podInfo { + return map[cache.ObjectName]podInfo{ + podKey: { + label: label, + changePolicy: changePolicy, + }, + } +} + +// Add a single volume to the cache. Returns list of conflicts it caused. +func (c *volumeCache) AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeName, podKey cache.ObjectName, label string, changePolicy v1.PodSELinuxChangePolicy, csiDriver string) []Conflict { + c.mutex.Lock() + defer c.mutex.Unlock() + defer c.dump(logger) + + conflicts := make([]Conflict, 0) + + volume, found := c.volumes[volumeName] + if !found { + // This is a new volume + volume = usedVolume{ + csiDriver: csiDriver, + pods: newPodInfoListForPod(podKey, label, changePolicy), + } + c.volumes[volumeName] = volume + return conflicts + } + + // The volume is already known + // Add the pod to the cache or update its properties + volume.pods[podKey] = podInfo{ + label: label, + changePolicy: changePolicy, + } + + // Emit conflicts for the pod + for otherPodKey, otherPodInfo := range volume.pods { + if otherPodInfo.changePolicy != changePolicy { + // Send conflict to both pods + conflicts = append(conflicts, Conflict{ + PropertyName: "SELinuxChangePolicy", + EventReason: "SELinuxChangePolicyConflict", + Pod: podKey, + PropertyValue: string(changePolicy), + OtherPod: otherPodKey, + OtherPropertyValue: string(otherPodInfo.changePolicy), + }, Conflict{ + PropertyName: "SELinuxChangePolicy", + EventReason: "SELinuxChangePolicyConflict", + Pod: otherPodKey, + PropertyValue: string(otherPodInfo.changePolicy), + OtherPod: podKey, + OtherPropertyValue: string(changePolicy), + }) + } + if otherPodInfo.label != label { + // Send conflict to both pods + conflicts = append(conflicts, Conflict{ + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: podKey, + PropertyValue: label, + OtherPod: otherPodKey, + OtherPropertyValue: otherPodInfo.label, + }, Conflict{ + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: otherPodKey, + PropertyValue: otherPodInfo.label, + OtherPod: podKey, + OtherPropertyValue: label, + }) + } + } + return conflicts +} + +// Remove a pod from the cache. Prunes all empty structures. +func (c *volumeCache) DeletePod(logger klog.Logger, podKey cache.ObjectName) { + c.mutex.Lock() + defer c.mutex.Unlock() + defer c.dump(logger) + + for volumeName, volume := range c.volumes { + delete(volume.pods, podKey) + if len(volume.pods) == 0 { + delete(c.volumes, volumeName) + } + } +} + +func (c *volumeCache) dump(logger klog.Logger) { + if !logger.V(dumpLogLevel).Enabled() { + return + } + logger.Info("VolumeCache dump:") + + // sort the volume to have consistent output + volumeIDs := make([]v1.UniqueVolumeName, 0, len(c.volumes)) + for volumeID := range c.volumes { + volumeIDs = append(volumeIDs, volumeID) + } + sort.Slice(volumeIDs, func(i, j int) bool { + return volumeIDs[i] < volumeIDs[j] + }) + for _, volumeID := range volumeIDs { + volume := c.volumes[volumeID] + logger.Info("Cached volume", "volume", volumeID, "csiDriver", volume.csiDriver) + + // Sort the pods to have consistent output + podKeys := make([]cache.ObjectName, 0, len(volume.pods)) + for podKey := range volume.pods { + podKeys = append(podKeys, podKey) + } + sort.Slice(podKeys, func(i, j int) bool { + return podKeys[i].String() < podKeys[j].String() + }) + for _, podKey := range podKeys { + podInfo := volume.pods[podKey] + logger.Info(" pod", "pod", podKey, "label", podInfo.label, "changePolicy", podInfo.changePolicy) + } + } +} + +// GetPodsForCSIDriver returns all pods that use volumes with the given CSI driver. +func (c *volumeCache) GetPodsForCSIDriver(driverName string) []cache.ObjectName { + c.mutex.Lock() + defer c.mutex.Unlock() + + var pods []cache.ObjectName + for _, volume := range c.volumes { + if volume.csiDriver != driverName { + continue + } + for podKey := range volume.pods { + pods = append(pods, podKey) + } + } + return pods +} + +// SendConflicts sends all current conflicts to the given channel. +func (c *volumeCache) SendConflicts(logger klog.Logger, ch chan<- Conflict) { + c.mutex.Lock() + defer c.mutex.Unlock() + logger.V(4).Info("Scraping conflicts") + c.dump(logger) + + for _, volume := range c.volumes { + // compare pods that use the same volume with each other + for podKey, podInfo := range volume.pods { + for otherPodKey, otherPodInfo := range volume.pods { + if podKey == otherPodKey { + continue + } + // create conflict only for the first pod. The other pod will get the same conflict in its own iteration of `volume.pods` loop. + if podInfo.changePolicy != otherPodInfo.changePolicy { + ch <- Conflict{ + PropertyName: "SELinuxChangePolicy", + EventReason: "SELinuxChangePolicyConflict", + Pod: podKey, + PropertyValue: string(podInfo.changePolicy), + OtherPod: otherPodKey, + OtherPropertyValue: string(otherPodInfo.changePolicy), + } + } + if podInfo.label != otherPodInfo.label { + ch <- Conflict{ + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: podKey, + PropertyValue: podInfo.label, + OtherPod: otherPodKey, + OtherPropertyValue: otherPodInfo.label, + } + } + } + } + } +} diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go b/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go new file mode 100644 index 00000000000..03a1e49c094 --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go @@ -0,0 +1,462 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "reflect" + "sort" + "testing" + + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + "k8s.io/klog/v2/ktesting" +) + +func getTestLoggers(t *testing.T) (klog.Logger, klog.Logger) { + // Logger with the default V(5), does not log dumps + logger, _ := ktesting.NewTestContext(t) + + // Logger with V(dumpLogLevel), logs dumps + logConfig := ktesting.NewConfig(ktesting.Verbosity(dumpLogLevel)) + dumpLogger := ktesting.NewLogger(t, logConfig) + + return logger, dumpLogger +} + +func sortConflicts(conflicts []Conflict) { + sort.Slice(conflicts, func(i, j int) bool { + return conflicts[i].Pod.String() < conflicts[j].Pod.String() + }) +} + +// Delete all items in a bigger cache and check it's empty +func TestVolumeCache_DeleteAll(t *testing.T) { + var podsToDelete []cache.ObjectName + c := NewVolumeLabelCache().(*volumeCache) + logger, dumpLogger := getTestLoggers(t) + + // Arrange: add a lot of volumes to the cache + for _, namespace := range []string{"ns1", "ns2", "ns3", "ns4"} { + for _, name := range []string{"pod1", "pod2", "pod3", "pod4"} { + for _, volumeName := range []v1.UniqueVolumeName{"vol1", "vol2", "vol3", "vol4"} { + podKey := cache.ObjectName{Namespace: namespace, Name: name} + podsToDelete = append(podsToDelete, podKey) + conflicts := c.AddVolume(logger, volumeName, podKey, "label1", v1.SELinuxChangePolicyMountOption, "csiDriver1") + if len(conflicts) != 0 { + // All volumes have the same labels and policy, there should be no conflicts + t.Errorf("AddVolume %s/%s %s returned unexpected conflicts: %+v", namespace, name, volumeName, conflicts) + } + } + } + } + t.Log("Before deleting all pods:") + c.dump(dumpLogger) + + // Act: delete all pods + for _, podKey := range podsToDelete { + c.DeletePod(logger, podKey) + } + + // Assert: the cache is empty + if len(c.volumes) != 0 { + t.Errorf("Expected cache to be empty, got %d volumes", len(c.volumes)) + c.dump(dumpLogger) + } +} + +type podWithVolume struct { + podNamespace string + podName string + volumeName v1.UniqueVolumeName + label string + changePolicy v1.PodSELinuxChangePolicy +} + +func addReverseConflict(conflicts []Conflict) []Conflict { + newConflicts := make([]Conflict, 0, len(conflicts)*2) + for _, c := range conflicts { + reversedConflict := Conflict{ + PropertyName: c.PropertyName, + EventReason: c.EventReason, + Pod: c.OtherPod, + PropertyValue: c.OtherPropertyValue, + OtherPod: c.Pod, + OtherPropertyValue: c.PropertyValue, + } + newConflicts = append(newConflicts, c, reversedConflict) + } + return newConflicts +} + +// Test AddVolume and SendConflicts together, they both provide []conflict with the same data +func TestVolumeCache_AddVolumeSendConflicts(t *testing.T) { + existingPods := []podWithVolume{ + { + podNamespace: "ns1", + podName: "pod1-mountOption", + volumeName: "vol1", + label: "label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + { + podNamespace: "ns2", + podName: "pod2-recursive", + volumeName: "vol2", + label: "label2", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + { + podNamespace: "ns3", + podName: "pod3-1", + volumeName: "vol3", // vol3 is used by 2 pods with the same label + recursive policy + label: "label3", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + { + podNamespace: "ns3", + podName: "pod3-2", + volumeName: "vol3", // vol3 is used by 2 pods with the same label + recursive policy + label: "label3", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + { + podNamespace: "ns4", + podName: "pod4-1", + volumeName: "vol4", // vol4 is used by 2 pods with the same label + mount policy + label: "label4", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + { + podNamespace: "ns4", + podName: "pod4-2", + volumeName: "vol4", // vol4 is used by 2 pods with the same label + mount policy + label: "label4", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + } + + tests := []struct { + name string + initialPods []podWithVolume + podToAdd podWithVolume + expectedConflicts []Conflict // conflicts for the other direction (pod, otherPod = otherPod, pod) will be added automatically + }{ + { + name: "new volume in empty cache", + initialPods: nil, + podToAdd: podWithVolume{ + podNamespace: "testns", + podName: "testpod", + volumeName: "vol-new", + label: "label-new", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + expectedConflicts: nil, + }, + { + name: "new volume", + initialPods: existingPods, + podToAdd: podWithVolume{ + podNamespace: "testns", + podName: "testpod", + volumeName: "vol-new", + label: "label-new", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + expectedConflicts: nil, + }, + { + name: "existing volume in a new pod with existing policy and label", + initialPods: existingPods, + podToAdd: podWithVolume{ + podNamespace: "testns", + podName: "testpod", + volumeName: "vol1", + label: "label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + expectedConflicts: nil, + }, + { + name: "existing volume in a new pod with existing policy and new conflicting label", + initialPods: existingPods, + podToAdd: podWithVolume{ + podNamespace: "testns", + podName: "testpod", + volumeName: "vol1", + label: "label-new", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + expectedConflicts: []Conflict{ + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: "testns", Name: "testpod"}, + PropertyValue: "label-new", + OtherPod: cache.ObjectName{Namespace: "ns1", Name: "pod1-mountOption"}, + OtherPropertyValue: "label1", + }, + }, + }, + { + name: "existing volume in a new pod with new conflicting policy and existing label", + initialPods: existingPods, + podToAdd: podWithVolume{ + podNamespace: "testns", + podName: "testpod", + volumeName: "vol1", + label: "label1", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + expectedConflicts: []Conflict{ + { + PropertyName: "SELinuxChangePolicy", + EventReason: "SELinuxChangePolicyConflict", + Pod: cache.ObjectName{Namespace: "testns", Name: "testpod"}, + PropertyValue: "Recursive", + OtherPod: cache.ObjectName{Namespace: "ns1", Name: "pod1-mountOption"}, + OtherPropertyValue: "MountOption", + }, + }, + }, + { + name: "existing volume in a new pod with new conflicting policy and new conflicting label", + initialPods: existingPods, + podToAdd: podWithVolume{ + podNamespace: "testns", + podName: "testpod", + volumeName: "vol1", + label: "label-new", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + expectedConflicts: []Conflict{ + { + PropertyName: "SELinuxChangePolicy", + EventReason: "SELinuxChangePolicyConflict", + Pod: cache.ObjectName{Namespace: "testns", Name: "testpod"}, + PropertyValue: "Recursive", + OtherPod: cache.ObjectName{Namespace: "ns1", Name: "pod1-mountOption"}, + OtherPropertyValue: "MountOption", + }, + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: "testns", Name: "testpod"}, + PropertyValue: "label-new", + OtherPod: cache.ObjectName{Namespace: "ns1", Name: "pod1-mountOption"}, + OtherPropertyValue: "label1", + }, + }, + }, + { + name: "existing pod is replaced with different non-conflicting policy and label", + initialPods: existingPods, + podToAdd: podWithVolume{ + + podNamespace: "ns2", + podName: "pod2-recursive", + volumeName: "vol2", // there is no other pod that uses vol2 -> change of policy and label is possible + label: "label-new", // was label2 in the original pod2 + changePolicy: v1.SELinuxChangePolicyMountOption, // was Recursive in the original pod2 + }, + expectedConflicts: nil, + }, + { + name: "existing pod is replaced with conflicting policy and label", + initialPods: existingPods, + podToAdd: podWithVolume{ + + podNamespace: "ns3", + podName: "pod3-1", + volumeName: "vol3", // vol3 is used by pod3-2 with label3 and Recursive policy + label: "label-new", // Technically, it's not possible to change a label of an existing pod, but we still check for conflicts + changePolicy: v1.SELinuxChangePolicyMountOption, // ChangePolicy change can happen when CSIDriver is updated from SELinuxMount: false to SELinuxMount: true + }, + expectedConflicts: []Conflict{ + { + PropertyName: "SELinuxChangePolicy", + EventReason: "SELinuxChangePolicyConflict", + Pod: cache.ObjectName{Namespace: "ns3", Name: "pod3-1"}, + PropertyValue: "MountOption", + OtherPod: cache.ObjectName{Namespace: "ns3", Name: "pod3-2"}, + OtherPropertyValue: "Recursive", + }, + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: "ns3", Name: "pod3-1"}, + PropertyValue: "label-new", + OtherPod: cache.ObjectName{Namespace: "ns3", Name: "pod3-2"}, + OtherPropertyValue: "label3", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + logger, dumpLogger := getTestLoggers(t) + // Arrange: add initial pods to the cache + c := NewVolumeLabelCache().(*volumeCache) + for _, podToAdd := range tt.initialPods { + conflicts := c.AddVolume(logger, podToAdd.volumeName, cache.ObjectName{Namespace: podToAdd.podNamespace, Name: podToAdd.podName}, podToAdd.label, podToAdd.changePolicy, "csiDriver1") + if len(conflicts) != 0 { + t.Errorf("Initial AddVolume %s/%s %s returned unexpected conflicts: %+v", podToAdd.podNamespace, podToAdd.podName, podToAdd.volumeName, conflicts) + } + } + + // Act + conflicts := c.AddVolume(logger, tt.podToAdd.volumeName, cache.ObjectName{Namespace: tt.podToAdd.podNamespace, Name: tt.podToAdd.podName}, tt.podToAdd.label, tt.podToAdd.changePolicy, "csiDriver1") + + // Assert + expectedConflicts := addReverseConflict(tt.expectedConflicts) + sortConflicts(conflicts) + sortConflicts(expectedConflicts) + if !reflect.DeepEqual(conflicts, expectedConflicts) { + t.Errorf("AddVolume returned unexpected conflicts: %+v", conflicts) + c.dump(dumpLogger) + } + // Expect the pod + volume to be present in the cache + volume, ok := c.volumes[tt.podToAdd.volumeName] + if !ok { + t.Errorf("volume %s is not present in the cache", tt.podToAdd.volumeName) + } + podKey := cache.ObjectName{Namespace: tt.podToAdd.podNamespace, Name: tt.podToAdd.podName} + existingInfo, ok := volume.pods[podKey] + if !ok { + t.Errorf("pod %s is not present in the cache", podKey) + } + expectedPodInfo := podInfo{ + label: tt.podToAdd.label, + changePolicy: tt.podToAdd.changePolicy, + } + if !reflect.DeepEqual(existingInfo, expectedPodInfo) { + t.Errorf("pod %s has unexpected info: %+v", podKey, existingInfo) + } + + // Act again: get the conflicts via SendConflicts + ch := make(chan Conflict) + go func() { + c.SendConflicts(logger, ch) + close(ch) + }() + + // Assert + receivedConflicts := []Conflict{} + for c := range ch { + receivedConflicts = append(receivedConflicts, c) + } + sortConflicts(receivedConflicts) + if !reflect.DeepEqual(receivedConflicts, expectedConflicts) { + t.Errorf("SendConflicts returned unexpected conflicts: %+v", receivedConflicts) + c.dump(dumpLogger) + } + }) + } +} + +func TestVolumeCache_GetPodsForCSIDriver(t *testing.T) { + c := NewVolumeLabelCache().(*volumeCache) + logger, dumpLogger := getTestLoggers(t) + + existingPods := map[string][]podWithVolume{ + "csiDriver1": { + { + podNamespace: "ns1", + podName: "pod1-mountOption", + volumeName: "vol1", + label: "label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + }, + "csiDriver2": { + { + podNamespace: "ns2", + podName: "pod2-recursive", + volumeName: "vol2", + label: "label2", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + { + podNamespace: "ns3", + podName: "pod3-1", + volumeName: "vol3", // vol3 is used by 2 pods with the same label + recursive policy + label: "label3", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + { + podNamespace: "ns3", + podName: "pod3-2", + volumeName: "vol3", // vol3 is used by 2 pods with the same label + recursive policy + label: "label3", + changePolicy: v1.SELinuxChangePolicyRecursive, + }, + }, + "csiDriver3": { + { + podNamespace: "ns4", + podName: "pod4-1", + volumeName: "vol4", // vol4 is used by 2 pods with the same label + mount policy + label: "label4", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + { + podNamespace: "ns4", + podName: "pod4-2", + volumeName: "vol4", // vol4 is used by 2 pods with the same label + mount policy + label: "label4", + changePolicy: v1.SELinuxChangePolicyMountOption, + }, + }, + } + + for csiDriverName, pods := range existingPods { + for _, podToAdd := range pods { + conflicts := c.AddVolume(logger, podToAdd.volumeName, cache.ObjectName{Namespace: podToAdd.podNamespace, Name: podToAdd.podName}, podToAdd.label, podToAdd.changePolicy, csiDriverName) + if len(conflicts) != 0 { + t.Errorf("Initial AddVolume %s/%s %s returned unexpected conflicts: %+v", podToAdd.podNamespace, podToAdd.podName, podToAdd.volumeName, conflicts) + } + } + } + + // Act + expectedPods := map[string][]cache.ObjectName{ + "csiDriver1": { + {Namespace: "ns1", Name: "pod1-mountOption"}, + }, + "csiDriver2": { + {Namespace: "ns2", Name: "pod2-recursive"}, + {Namespace: "ns3", Name: "pod3-1"}, + {Namespace: "ns3", Name: "pod3-2"}, + }, + "csiDriver3": { + {Namespace: "ns4", Name: "pod4-1"}, + {Namespace: "ns4", Name: "pod4-2"}, + }, + "csiDriver4": nil, // totally unknown CSI driver + } + for csiDriverName, expectedPodsForDriver := range expectedPods { + podsForDriver := c.GetPodsForCSIDriver(csiDriverName) + sort.Slice(podsForDriver, func(i, j int) bool { + return podsForDriver[i].String() < podsForDriver[j].String() + }) + if !reflect.DeepEqual(podsForDriver, expectedPodsForDriver) { + t.Errorf("GetPodsForCSIDriver(%s) returned unexpected pods: %+v", csiDriverName, podsForDriver) + c.dump(dumpLogger) + } + } +} diff --git a/pkg/controller/volume/selinuxwarning/metrics.go b/pkg/controller/volume/selinuxwarning/metrics.go new file mode 100644 index 00000000000..d95665c9162 --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/metrics.go @@ -0,0 +1,81 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package selinuxwarning + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/controller/volume/selinuxwarning/cache" +) + +var ( + seLinuxConflictDesc = metrics.NewDesc( + metrics.BuildFQName("", "selinux_warning_controller", "selinux_volume_conflict"), + "Conflict between two Pods using the same volume", + []string{"property", "pod1_namespace", "pod1_name", "pod1_value", "pod2_namespace", "pod2_name", "pod2_value"}, nil, + metrics.ALPHA, "") +) +var registerMetrics sync.Once + +func RegisterMetrics(logger klog.Logger, cache cache.VolumeCache) { + registerMetrics.Do(func() { + legacyregistry.CustomMustRegister(newCollector(logger, cache)) + }) +} +func newCollector(logger klog.Logger, cache cache.VolumeCache) *collector { + return &collector{ + logger: logger, + cache: cache, + } +} + +var _ metrics.StableCollector = &collector{} + +type collector struct { + metrics.BaseStableCollector + cache cache.VolumeCache + logger klog.Logger +} + +func (c *collector) DescribeWithStability(ch chan<- *metrics.Desc) { + ch <- seLinuxConflictDesc +} + +func (c *collector) CollectWithStability(ch chan<- metrics.Metric) { + conflictCh := make(chan cache.Conflict) + go func() { + c.cache.SendConflicts(c.logger, conflictCh) + close(conflictCh) + }() + + for conflict := range conflictCh { + ch <- metrics.NewLazyConstMetric(seLinuxConflictDesc, + metrics.GaugeValue, + 1.0, + conflict.PropertyName, + conflict.Pod.Namespace, + conflict.Pod.Name, + conflict.PropertyValue, + conflict.OtherPod.Namespace, + conflict.OtherPod.Name, + conflict.OtherPropertyValue, + ) + } +} diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go new file mode 100644 index 00000000000..e531e3c1c3d --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go @@ -0,0 +1,507 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package selinuxwarning + +import ( + "context" + "errors" + "fmt" + "time" + + v1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + errorutils "k8s.io/apimachinery/pkg/util/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + coreinformers "k8s.io/client-go/informers/core/v1" + storageinformersv1 "k8s.io/client-go/informers/storage/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + storagelisters "k8s.io/client-go/listers/storage/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + csitrans "k8s.io/csi-translation-lib" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/controller/volume/attachdetach/util" + "k8s.io/kubernetes/pkg/controller/volume/common" + volumecache "k8s.io/kubernetes/pkg/controller/volume/selinuxwarning/cache" + "k8s.io/kubernetes/pkg/volume" + "k8s.io/kubernetes/pkg/volume/csi" + "k8s.io/kubernetes/pkg/volume/csimigration" + volumeutil "k8s.io/kubernetes/pkg/volume/util" + "k8s.io/utils/ptr" +) + +// SELinuxWarning controller is a controller that emits a warning event and metrics when +// two pods *might* use the same volume with different SELinux labels. +// It is optional. It does nothing on a cluster that has SELinux disabled. +// It does not modify any API objects except for the warning events. +// +// The controller watches *all* Pods and reports issues on all their volumes, +// regardless if the Pod actually run on are Pending forever or if they have +// correct node anti-affinity and never run on the same node. +type Controller struct { + kubeClient clientset.Interface + podLister corelisters.PodLister + podIndexer cache.Indexer + podsSynced cache.InformerSynced + pvLister corelisters.PersistentVolumeLister + pvsSynced cache.InformerSynced + pvcLister corelisters.PersistentVolumeClaimLister + pvcsSynced cache.InformerSynced + csiDriverLister storagelisters.CSIDriverLister + csiDriversSynced cache.InformerSynced + + vpm *volume.VolumePluginMgr + cmpm csimigration.PluginManager + csiTranslator csimigration.InTreeToCSITranslator + seLinuxTranslator volumeutil.SELinuxLabelTranslator + eventRecorder record.EventRecorder + + recorder record.EventRecorder + queue workqueue.TypedRateLimitingInterface[cache.ObjectName] + + labelCache volumecache.VolumeCache +} + +func NewController( + ctx context.Context, + kubeClient clientset.Interface, + podInformer coreinformers.PodInformer, + pvcInformer coreinformers.PersistentVolumeClaimInformer, + pvInformer coreinformers.PersistentVolumeInformer, + csiDriverInformer storageinformersv1.CSIDriverInformer, + plugins []volume.VolumePlugin, + prober volume.DynamicPluginProber, +) (*Controller, error) { + + eventBroadcaster := record.NewBroadcaster(record.WithContext(ctx)) + eventBroadcaster.StartLogging(klog.Infof) + eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "selinux_warning"}) + c := &Controller{ + kubeClient: kubeClient, + podLister: podInformer.Lister(), + podIndexer: podInformer.Informer().GetIndexer(), + podsSynced: podInformer.Informer().HasSynced, + pvLister: pvInformer.Lister(), + pvsSynced: pvInformer.Informer().HasSynced, + pvcLister: pvcInformer.Lister(), + pvcsSynced: pvcInformer.Informer().HasSynced, + csiDriverLister: csiDriverInformer.Lister(), + csiDriversSynced: csiDriverInformer.Informer().HasSynced, + vpm: &volume.VolumePluginMgr{}, + seLinuxTranslator: volumeutil.NewSELinuxLabelTranslator(), + + recorder: recorder, + queue: workqueue.NewTypedRateLimitingQueueWithConfig[cache.ObjectName]( + workqueue.DefaultTypedControllerRateLimiter[cache.ObjectName](), + workqueue.TypedRateLimitingQueueConfig[cache.ObjectName]{ + Name: "selinux_warning", + }, + ), + labelCache: volumecache.NewVolumeLabelCache(), + eventRecorder: recorder, + } + + err := c.vpm.InitPlugins(plugins, prober, c) + if err != nil { + return nil, fmt.Errorf("could not initialize volume plugins for SELinux warning controller: %w", err) + } + csiTranslator := csitrans.New() + c.csiTranslator = csiTranslator + c.cmpm = csimigration.NewPluginManager(csiTranslator, utilfeature.DefaultFeatureGate) + + err = common.AddPodPVCIndexerIfNotPresent(c.podIndexer) + if err != nil { + return nil, fmt.Errorf("could not initialize SELinux warning controller: %w", err) + } + + logger := klog.FromContext(ctx) + _, err = podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { c.addPod(logger, obj) }, + DeleteFunc: func(obj interface{}) { c.deletePod(logger, obj) }, + // Not watching updates: Pod volumes and SecurityContext are immutable after creation + }) + if err != nil { + return nil, err + } + + _, err = pvcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { c.addPVC(logger, obj) }, + UpdateFunc: func(oldObj, newObj interface{}) { c.updatePVC(logger, oldObj, newObj) }, + }) + if err != nil { + return nil, err + } + + _, err = pvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { c.addPV(logger, obj) }, + UpdateFunc: func(oldObj, newObj interface{}) { c.updatePV(logger, oldObj, newObj) }, + }) + if err != nil { + return nil, err + } + + _, err = csiDriverInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { c.addCSIDriver(logger, obj) }, + UpdateFunc: func(oldObj, newObj interface{}) { c.updateCSIDriver(logger, oldObj, newObj) }, + DeleteFunc: func(obj interface{}) { c.deleteCSIDriver(logger, obj) }, + }) + if err != nil { + return nil, err + } + + RegisterMetrics(logger, c.labelCache) + + return c, nil +} + +func (c *Controller) addPod(logger klog.Logger, obj interface{}) { + podRef, err := cache.DeletionHandlingObjectToName(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for pod %#v: %w", obj, err)) + } + c.queue.Add(podRef) +} + +func (c *Controller) deletePod(logger klog.Logger, obj interface{}) { + podRef, err := cache.DeletionHandlingObjectToName(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for pod %#v: %w", obj, err)) + } + c.queue.Add(podRef) +} + +func (c *Controller) addPVC(logger klog.Logger, obj interface{}) { + pvc, ok := obj.(*v1.PersistentVolumeClaim) + if !ok { + return + } + if pvc.Spec.VolumeName == "" { + // Ignore unbound PVCs + return + } + c.enqueueAllPodsForPVC(logger, pvc.Namespace, pvc.Name) +} + +func (c *Controller) updatePVC(logger klog.Logger, oldObj, newObj interface{}) { + oldPVC, ok := oldObj.(*v1.PersistentVolumeClaim) + if !ok { + return + } + newPVC, ok := newObj.(*v1.PersistentVolumeClaim) + if !ok { + return + } + if oldPVC.Spec.VolumeName != newPVC.Spec.VolumeName { + // The PVC was just bound + c.enqueueAllPodsForPVC(logger, newPVC.Namespace, newPVC.Name) + } +} + +func (c *Controller) addPV(logger klog.Logger, obj interface{}) { + pv, ok := obj.(*v1.PersistentVolume) + if !ok { + return + } + claimRef := pv.Spec.ClaimRef + if claimRef == nil { + // Ignore unbound PVs + return + } + if claimRef.UID == "" { + // Ignore PVs with incomplete binding. + return + } + c.enqueueAllPodsForPVC(logger, claimRef.Namespace, claimRef.Name) +} + +func (c *Controller) updatePV(logger klog.Logger, oldObj, newObj interface{}) { + oldPV, ok := oldObj.(*v1.PersistentVolume) + if !ok { + return + } + newPV, ok := newObj.(*v1.PersistentVolume) + if !ok { + return + } + newClaimRef := newPV.Spec.ClaimRef + if newClaimRef == nil { + // Ignore unbound PVs + return + } + if newClaimRef.UID == "" { + // Ignore PVs with incomplete binding. + return + } + oldClaimRef := oldPV.Spec.ClaimRef + if oldClaimRef == nil || oldClaimRef.UID != newClaimRef.UID { + // The PV was just bound (or un-bound) + // Re-queue all Pods in the same namespace as the PVC bound to this PV + c.enqueueAllPodsForPVC(logger, newClaimRef.Namespace, newClaimRef.Name) + } +} + +func (c *Controller) enqueueAllPodsForPVC(logger klog.Logger, namespace, name string) { + // Re-queue all Pods in the same namespace as the PVC. + // As consequence, all events for Pods in the namespace will be re-sent. + // Possible optimizations: + // - Resolve which Pods use the PVC here, in informer hook, using Pod informer. That could block the hook for longer than necessary. + // - Resolve which Pods use the PVC here, in informer hook, using a new cache (map?) of Pods that wait for a PVC with given name. + // - Enqueue the PVC name and find Pods that use the PVC in a worker thread. That would mean that the queue can have either PVCs or Pods. + objs, err := c.podIndexer.ByIndex(common.PodPVCIndex, fmt.Sprintf("%s/%s", namespace, name)) + if err != nil { + logger.Error(err, "listing pods from cache") + return + } + for _, obj := range objs { + podRef, err := cache.DeletionHandlingObjectToName(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for pod %#v: %w", obj, err)) + } + c.queue.Add(podRef) + } +} + +func (c *Controller) addCSIDriver(logger klog.Logger, obj interface{}) { + csiDriver, ok := obj.(*storagev1.CSIDriver) + if !ok { + return + } + // SELinuxMount may have changed. Pods that use volumes of this driver may have + // a different effective SELinuxChangePolicy. + // With SELinuxMount: false / nil, Pod SELinuxChangePolicy is ignored and implied to `Recursive`. + // With SELinuxMount: true, the actual SELinuxChangePolicy is used. + // Re-queue all pods that use this CSIDriver to re-evaluate their conflicts. + c.enqueueAllPodsForCSIDriver(csiDriver.Name) +} + +func (c *Controller) updateCSIDriver(logger klog.Logger, oldObj, newObj interface{}) { + oldCSIDriver, ok := oldObj.(*storagev1.CSIDriver) + if !ok { + return + } + newCSIDriver, ok := newObj.(*storagev1.CSIDriver) + if !ok { + return + } + oldSELinuxMount := ptr.Deref(oldCSIDriver.Spec.SELinuxMount, false) + newSELinuxMount := ptr.Deref(newCSIDriver.Spec.SELinuxMount, false) + if oldSELinuxMount != newSELinuxMount { + // SELinuxMount changed. Pods that use volumes of this driver may have + // a different effective SELinuxChangePolicy. + // With SELinuxMount: false / nil, Pod SELinuxChangePolicy is ignored and implied to `Recursive`. + // With SELinuxMount: true, the actual SELinuxChangePolicy is used. + // Re-queue all pods that use this CSIDriver to re-evaluate their conflicts. + c.enqueueAllPodsForCSIDriver(newCSIDriver.Name) + } +} + +func (c *Controller) deleteCSIDriver(logger klog.Logger, obj interface{}) { + csiDriver, ok := obj.(*storagev1.CSIDriver) + if !ok { + return + } + if ptr.Deref(csiDriver.Spec.SELinuxMount, false) { + // The deleted CSIDriver announced SELinuxMount support. Drivers without CSIDriver instance default to `SELinuxMount: false`. + // Pods that use volumes of this driver may have a different effective SELinuxChangePolicy now. + // With SELinuxMount: true, the actual SELinuxChangePolicy was used. + // With missing CSIDriver (= SELinuxMount: false), Pod SELinuxChangePolicy is ignored and implied to `Recursive`. + // Re-queue all pods that use this CSIDriver to re-evaluate their conflicts. + c.enqueueAllPodsForCSIDriver(csiDriver.Name) + } +} + +func (c *Controller) enqueueAllPodsForCSIDriver(csiDriverName string) { + podKeys := c.labelCache.GetPodsForCSIDriver(csiDriverName) + for _, podKey := range podKeys { + c.queue.Add(podKey) + } +} + +func (c *Controller) Run(ctx context.Context, workers int) { + defer utilruntime.HandleCrash() + defer c.queue.ShutDown() + logger := klog.FromContext(ctx) + logger.Info("Starting SELinux warning controller") + defer logger.Info("Shutting down SELinux warning controller") + + if !cache.WaitForNamedCacheSync("selinux_warning", ctx.Done(), c.podsSynced, c.pvcsSynced, c.pvsSynced, c.csiDriversSynced) { + return + } + + for i := 0; i < workers; i++ { + go wait.UntilWithContext(ctx, c.runWorker, time.Second) + } + + <-ctx.Done() +} + +func (c *Controller) runWorker(ctx context.Context) { + for c.processNextWorkItem(ctx) { + } +} + +func (c *Controller) processNextWorkItem(ctx context.Context) bool { + item, shutdown := c.queue.Get() + if shutdown { + return false + } + defer c.queue.Done(item) + + err := c.sync(ctx, item) + if err == nil { + c.queue.Forget(item) + return true + } + + utilruntime.HandleError(fmt.Errorf("%v failed with: %w", item, err)) + c.queue.AddRateLimited(item) + + return true +} + +// syncHandler is invoked for each pod which might need to be processed. +// If an error is returned from this function, the pod will be requeued. +func (c *Controller) sync(ctx context.Context, podRef cache.ObjectName) error { + logger := klog.FromContext(ctx) + + pod, err := c.podLister.Pods(podRef.Namespace).Get(podRef.Name) + if err != nil { + if apierrors.IsNotFound(err) { + // The pod must have been deleted + return c.syncPodDelete(ctx, podRef) + } + logger.V(5).Info("Error getting pod from informer", "pod", klog.KObj(pod), "podUID", pod.UID, "err", err) + return err + } + + return c.syncPod(ctx, pod) +} + +func (c *Controller) syncPodDelete(ctx context.Context, podKey cache.ObjectName) error { + logger := klog.FromContext(ctx) + logger.V(4).Info("Deleting pod", "key", podKey) + + c.labelCache.DeletePod(logger, podKey) + return nil +} + +func (c *Controller) syncPod(ctx context.Context, pod *v1.Pod) error { + logger := klog.FromContext(ctx) + logger.V(4).Info("Syncing pod", "pod", klog.KObj(pod)) + + errs := []error{} + volumeSpecs := make(map[string]*volume.Spec) + + // Pre-compute volumes + for i := range pod.Spec.Volumes { + spec, err := util.CreateVolumeSpec(logger, pod.Spec.Volumes[i], pod, c.vpm, c.pvcLister, c.pvLister, c.cmpm, c.csiTranslator) + if err != nil { + // This can happen frequently when PVC or PV do not exist yet. + // Report it, but continue further. + errs = append(errs, err) + continue + } + volumeSpecs[pod.Spec.Volumes[i].Name] = spec + } + + mounts, _, seLinuxLabels := volumeutil.GetPodVolumeNames(pod, true /* collectSELinuxOptions */) + for _, mount := range mounts.UnsortedList() { + opts := seLinuxLabels[mount] + spec, found := volumeSpecs[mount] + if !found { + // This must be a volume that failed CreateVolumeSpec above. Error will be reported. + logger.V(4).Info("skipping not found volume", "pod", klog.KObj(pod), "volume", mount) + continue + } + mountInfo, err := volumeutil.GetMountSELinuxLabel(spec, opts, pod.Spec.SecurityContext, c.vpm, c.seLinuxTranslator) + if err != nil { + errors.Is(err, &volumeutil.MultipleSELinuxLabelsError{}) + if volumeutil.IsMultipleSELinuxLabelsError(err) { + c.eventRecorder.Eventf(pod, v1.EventTypeWarning, "MultipleSELinuxLabels", "Volume %q is mounted twice with different SELinux labels inside this pod", mount) + } + logger.V(4).Error(err, "failed to get SELinux label", "pod", klog.KObj(pod), "volume", mount) + errs = append(errs, err) + continue + } + + // Ignore how the volume is going to be mounted. + // Report any errors when a volume is used by two pdos with different SELinux labels regardless of their + // SELinuxChangePolicy + label := mountInfo.SELinuxProcessLabel + + err = c.syncVolume(logger, pod, spec, label, mountInfo.PluginSupportsSELinuxContextMount) + if err != nil { + errs = append(errs, err) + } + } + + return errorutils.NewAggregate(errs) +} + +func (c *Controller) syncVolume(logger klog.Logger, pod *v1.Pod, spec *volume.Spec, label string, pluginSupportsSELinuxContextMount bool) error { + plugin, err := c.vpm.FindPluginBySpec(spec) + if err != nil { + // The controller does not have all volume plugins, only those that affect SELinux. + logger.V(4).Info("Skipping volume of unknown plugin", "volume", spec.Name(), "err", err) + return nil + } + + uniqueVolumeName, err := volumeutil.GetUniqueVolumeNameFromSpec(plugin, spec) + if err != nil { + return fmt.Errorf("failed to get unique volume name for volume %q: %w", spec.Name(), err) + } + + changePolicy := v1.SELinuxChangePolicyMountOption + if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.SELinuxChangePolicy != nil { + changePolicy = *pod.Spec.SecurityContext.SELinuxChangePolicy + } + if !pluginSupportsSELinuxContextMount { + changePolicy = v1.SELinuxChangePolicyRecursive + } + + csiDriver, err := csi.GetCSIDriverName(spec) + if err != nil { + // This is likely not a CSI volume + csiDriver = "" + } + logger.V(4).Info("Syncing pod volume", "pod", klog.KObj(pod), "volume", spec.Name(), "label", label, "uniqueVolumeName", uniqueVolumeName, "changePolicy", changePolicy, "csiDriver", csiDriver) + + conflicts := c.labelCache.AddVolume(logger, uniqueVolumeName, cache.MetaObjectToName(pod), label, changePolicy, csiDriver) + c.reportConflictEvents(logger, conflicts) + + return nil +} + +func (c *Controller) reportConflictEvents(logger klog.Logger, conflicts []volumecache.Conflict) { + for _, conflict := range conflicts { + pod, err := c.podLister.Pods(conflict.Pod.Namespace).Get(conflict.Pod.Name) + if err != nil { + logger.V(2).Error(err, "failed to get first pod for event", "pod", conflict.Pod) + // It does not make sense to report a conflict that has been resolved by deleting one of the pods. + return + } + c.eventRecorder.Event(pod, v1.EventTypeNormal, conflict.EventReason, conflict.EventMessage()) + } +} diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go new file mode 100644 index 00000000000..ee44834da4b --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go @@ -0,0 +1,569 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package selinuxwarning + +import ( + "reflect" + "sort" + "testing" + + v1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "k8s.io/klog/v2/ktesting" + "k8s.io/kubernetes/pkg/controller" + volumecache "k8s.io/kubernetes/pkg/controller/volume/selinuxwarning/cache" + "k8s.io/kubernetes/pkg/volume" + volumetesting "k8s.io/kubernetes/pkg/volume/testing" + "k8s.io/kubernetes/pkg/volume/util" + "k8s.io/utils/ptr" +) + +const ( + namespace = "ns1" + pvcUID = "uid1" +) + +func TestSELinuxWarningController_Sync(t *testing.T) { + tests := []struct { + name string + existingPVCs []*v1.PersistentVolumeClaim + existingPVs []*v1.PersistentVolume + existingCSIDrivers []*storagev1.CSIDriver + existingPods []*v1.Pod + + pod cache.ObjectName + conflicts []volumecache.Conflict + expectError bool + expectedAddedVolumes []addedVolume + expectedEvents []string + expectedDeletedPods []cache.ObjectName + }{ + { + name: "existing pod with no volumes", + existingPods: []*v1.Pod{ + pod("pod1", "label1", nil), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectedEvents: nil, + expectedAddedVolumes: nil, + }, + { + name: "existing pod with unbound PVC", + existingPods: []*v1.Pod{ + podWithPVC("pod1", "label1", nil, "non-existing-pvc", "vol1"), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectError: true, // PVC is missing, add back to queue with exp. backoff + expectedEvents: nil, + expectedAddedVolumes: nil, + }, + { + name: "existing pod with fully bound PVC", + existingPVCs: []*v1.PersistentVolumeClaim{ + pvcBoundToPV("pv1", "pvc1"), + }, + existingPVs: []*v1.PersistentVolume{ + pvBoundToPVC("pv1", "pvc1"), + }, + existingPods: []*v1.Pod{ + podWithPVC("pod1", "label1", nil, "pvc1", "vol1"), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectedEvents: nil, + expectedAddedVolumes: []addedVolume{ + { + volumeName: "fake-plugin/pv1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + csiDriver: "ebs.csi.aws.com", // The PV is a fake EBS volume + }, + }, + }, + { + name: "existing pod with fully bound PVC, Recursive change policy", + existingPVCs: []*v1.PersistentVolumeClaim{ + pvcBoundToPV("pv1", "pvc1"), + }, + existingPVs: []*v1.PersistentVolume{ + pvBoundToPVC("pv1", "pvc1"), + }, + existingPods: []*v1.Pod{ + podWithPVC("pod1", "label1", ptr.To(v1.SELinuxChangePolicyRecursive), "pvc1", "vol1"), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectedEvents: nil, + expectedAddedVolumes: []addedVolume{ + { + volumeName: "fake-plugin/pv1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyRecursive, + csiDriver: "ebs.csi.aws.com", // The PV is a fake EBS volume + }, + }, + }, + { + name: "existing pod with inline volume", + existingPVCs: []*v1.PersistentVolumeClaim{ + pvcBoundToPV("pv1", "pvc1"), + }, + existingPVs: []*v1.PersistentVolume{ + pvBoundToPVC("pv1", "pvc1"), + }, + existingPods: []*v1.Pod{ + addInlineVolume(pod("pod1", "label1", nil)), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectedEvents: nil, + expectedAddedVolumes: []addedVolume{ + { + volumeName: "fake-plugin/ebs.csi.aws.com-inlinevol1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + csiDriver: "ebs.csi.aws.com", // The inline volume is AWS EBS + }, + }, + }, + { + name: "existing pod with inline volume and PVC", + existingPVCs: []*v1.PersistentVolumeClaim{ + pvcBoundToPV("pv1", "pvc1"), + }, + existingPVs: []*v1.PersistentVolume{ + pvBoundToPVC("pv1", "pvc1"), + }, + existingPods: []*v1.Pod{ + addInlineVolume(podWithPVC("pod1", "label1", nil, "pvc1", "vol1")), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectedEvents: nil, + expectedAddedVolumes: []addedVolume{ + { + volumeName: "fake-plugin/pv1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + csiDriver: "ebs.csi.aws.com", // The PV is a fake EBS volume + }, + { + volumeName: "fake-plugin/ebs.csi.aws.com-inlinevol1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + csiDriver: "ebs.csi.aws.com", // The inline volume is AWS EBS + }, + }, + }, + { + name: "existing pod with PVC generates conflict, the other pod exists", + existingPVCs: []*v1.PersistentVolumeClaim{ + pvcBoundToPV("pv1", "pvc1"), + }, + existingPVs: []*v1.PersistentVolume{ + pvBoundToPVC("pv1", "pvc1"), + }, + existingPods: []*v1.Pod{ + podWithPVC("pod1", "label1", nil, "pvc1", "vol1"), + pod("pod2", "label2", nil), + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + conflicts: []volumecache.Conflict{ + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + PropertyValue: "label1", + OtherPod: cache.ObjectName{Namespace: namespace, Name: "pod2"}, + OtherPropertyValue: "label2", + }, + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: namespace, Name: "pod2"}, + PropertyValue: "label2", + OtherPod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + OtherPropertyValue: "label1", + }, + }, + expectedAddedVolumes: []addedVolume{ + { + volumeName: "fake-plugin/pv1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + csiDriver: "ebs.csi.aws.com", // The PV is a fake EBS volume + }, + }, + expectedEvents: []string{ + `Normal SELinuxLabelConflict SELinux label "label1" conflicts with pod pod2 that uses the same volume as this pod with SELinux label "label2". If both pods land on the same node, only one of them may access the volume.`, + `Normal SELinuxLabelConflict SELinux label "label2" conflicts with pod pod1 that uses the same volume as this pod with SELinux label "label1". If both pods land on the same node, only one of them may access the volume.`, + }, + }, + { + name: "existing pod with PVC generates conflict, the other pod doesn't exist", + existingPVCs: []*v1.PersistentVolumeClaim{ + pvcBoundToPV("pv1", "pvc1"), + }, + existingPVs: []*v1.PersistentVolume{ + pvBoundToPVC("pv1", "pvc1"), + }, + existingPods: []*v1.Pod{ + podWithPVC("pod1", "label1", nil, "pvc1", "vol1"), + // "pod2" does not exist + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + conflicts: []volumecache.Conflict{ + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + PropertyValue: "label1", + OtherPod: cache.ObjectName{Namespace: namespace, Name: "pod2"}, + OtherPropertyValue: "label2", + }, + { + PropertyName: "SELinux label", + EventReason: "SELinuxLabelConflict", + Pod: cache.ObjectName{Namespace: namespace, Name: "pod2"}, + PropertyValue: "label2", + OtherPod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + OtherPropertyValue: "label1", + }, + }, + expectedAddedVolumes: []addedVolume{ + { + volumeName: "fake-plugin/pv1", + podKey: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + label: "system_u:object_r:container_file_t:label1", + changePolicy: v1.SELinuxChangePolicyMountOption, + csiDriver: "ebs.csi.aws.com", // The PV is a fake EBS volume + }, + }, + expectedEvents: []string{ + // Event for the missing pod is not sent + `Normal SELinuxLabelConflict SELinux label "label1" conflicts with pod pod2 that uses the same volume as this pod with SELinux label "label2". If both pods land on the same node, only one of them may access the volume.`, + }, + }, + { + name: "deleted pod", + existingPods: []*v1.Pod{ + // "pod1" does not exist in the informer + }, + pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, + expectError: false, + expectedEvents: nil, + expectedAddedVolumes: nil, + expectedDeletedPods: []cache.ObjectName{{Namespace: namespace, Name: "pod1"}}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, ctx := ktesting.NewTestContext(t) + seLinuxTranslator := util.NewFakeSELinuxLabelTranslator() + _, plugin := volumetesting.GetTestKubeletVolumePluginMgr(t) + plugin.SupportsSELinux = true + + fakeClient := fake.NewClientset() + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + podInformer := fakeInformerFactory.Core().V1().Pods() + pvcInformer := fakeInformerFactory.Core().V1().PersistentVolumeClaims() + pvInformer := fakeInformerFactory.Core().V1().PersistentVolumes() + csiDriverInformer := fakeInformerFactory.Storage().V1().CSIDrivers() + + c, err := NewController( + ctx, + fakeClient, + podInformer, + pvcInformer, + pvInformer, + csiDriverInformer, + []volume.VolumePlugin{plugin}, + nil, + ) + if err != nil { + t.Fatalf("failed to create controller: %v", err) + } + // Use the fake translator, it pretends to support SELinux on non-selinux systems + c.seLinuxTranslator = seLinuxTranslator + // Use a fake volume cache + labelCache := &fakeVolumeCache{ + conflictsToSend: map[cache.ObjectName][]volumecache.Conflict{ + {Namespace: tt.pod.Namespace, Name: tt.pod.Name}: tt.conflicts, + }, + } + c.labelCache = labelCache + fakeRecorder := record.NewFakeRecorder(10) + c.eventRecorder = fakeRecorder + + // Start the informers + fakeInformerFactory.Start(ctx.Done()) + fakeInformerFactory.WaitForCacheSync(ctx.Done()) + // Start the controller + go c.Run(ctx, 1) + + // Inject fake existing objects + for _, pvc := range tt.existingPVCs { + _ = pvcInformer.Informer().GetStore().Add(pvc) + } + for _, pv := range tt.existingPVs { + _ = pvInformer.Informer().GetStore().Add(pv) + } + for _, pod := range tt.existingPods { + _ = podInformer.Informer().GetStore().Add(pod) + } + + // Act: call sync() on the pod that *is* in the informer cache + err = c.sync(ctx, tt.pod) + + // Assert: + if tt.expectError { + if err == nil { + t.Fatalf("expected error, got nil") + } + return // do not check the rest on error + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + sortAddedVolumes(tt.expectedAddedVolumes) + sortAddedVolumes(labelCache.addedVolumes) + if !reflect.DeepEqual(tt.expectedAddedVolumes, labelCache.addedVolumes) { + t.Errorf("unexpected added volumes, expected \n%+v\ngot\n%+v", tt.expectedAddedVolumes, labelCache.addedVolumes) + } + + events := collectEvents(fakeRecorder.Events) + receivedSet := sets.New(events...) + expectedSet := sets.New(tt.expectedEvents...) + if !receivedSet.Equal(expectedSet) { + t.Errorf("got unexpected events: %+v", receivedSet.Difference(expectedSet)) + t.Errorf("missing events: %+v", expectedSet.Difference(receivedSet)) + } + + if !reflect.DeepEqual(tt.expectedDeletedPods, labelCache.deletedPods) { + t.Errorf("unexpected deleted pods, expected \n%+v\ngot\n%+v", tt.expectedDeletedPods, labelCache.deletedPods) + } + }) + } +} + +func pv(name string) *v1.PersistentVolume { + return &v1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: v1.PersistentVolumeSpec{ + PersistentVolumeSource: v1.PersistentVolumeSource{ + AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{ + VolumeID: name, + }, + }, + }, + } +} + +func pvc(name string) *v1.PersistentVolumeClaim { + return &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + UID: pvcUID, + }, + Spec: v1.PersistentVolumeClaimSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + }, + } +} + +func pvBoundToPVC(pvName, pvcName string) *v1.PersistentVolume { + pv := pv(pvName) + pv.Spec.ClaimRef = &v1.ObjectReference{ + Kind: "PersistentVolumeClaim", + Namespace: namespace, + Name: pvcName, + UID: pvcUID, + APIVersion: "v1", + } + pv.Status.Phase = v1.VolumeBound + return pv +} + +func pvcBoundToPV(pvName, pvcName string) *v1.PersistentVolumeClaim { + pvc := pvc(pvcName) + pvc.Spec.VolumeName = pvName + pvc.Status.Phase = v1.ClaimBound + + return pvc +} + +func pod(podName, label string, changePolicy *v1.PodSELinuxChangePolicy) *v1.Pod { + var opts *v1.SELinuxOptions + if label != "" { + opts = &v1.SELinuxOptions{ + Level: label, + } + } + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns1", + Name: podName, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "container1", + Image: "image1", + VolumeMounts: []v1.VolumeMount{ + { + Name: "vol1", + MountPath: "/mnt", + }, + }, + }, + }, + SecurityContext: &v1.PodSecurityContext{ + SELinuxChangePolicy: changePolicy, + SELinuxOptions: opts, + }, + Volumes: []v1.Volume{ + { + Name: "emptyDir1", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + }, + }, + }, + } +} + +func addInlineVolume(pod *v1.Pod) *v1.Pod { + pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{ + Name: "inlineVolume", + VolumeSource: v1.VolumeSource{ + AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{ + VolumeID: "inlinevol1", + }, + }, + }) + pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, v1.VolumeMount{ + Name: "inlineVolume", + MountPath: "/mnt", + }) + + return pod +} + +func podWithPVC(podName, label string, changePolicy *v1.PodSELinuxChangePolicy, pvcName, volumeName string) *v1.Pod { + pod := pod(podName, label, changePolicy) + pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{ + Name: volumeName, + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }) + pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, v1.VolumeMount{ + Name: volumeName, + MountPath: "/mnt", + }) + return pod +} + +type addedVolume struct { + volumeName v1.UniqueVolumeName + podKey cache.ObjectName + label string + changePolicy v1.PodSELinuxChangePolicy + csiDriver string +} + +func sortAddedVolumes(a []addedVolume) { + sort.Slice(a, func(i, j int) bool { + ikey := string(a[i].volumeName) + "/" + a[i].podKey.String() + jkey := string(a[j].volumeName) + "/" + a[j].podKey.String() + return ikey < jkey + }) +} + +type fakeVolumeCache struct { + addedVolumes []addedVolume + // Conflicts to send when AddPod with given pod name is called. + conflictsToSend map[cache.ObjectName][]volumecache.Conflict + deletedPods []cache.ObjectName +} + +var _ volumecache.VolumeCache = &fakeVolumeCache{} + +func (f *fakeVolumeCache) AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeName, podKey cache.ObjectName, label string, changePolicy v1.PodSELinuxChangePolicy, csiDriver string) []volumecache.Conflict { + f.addedVolumes = append(f.addedVolumes, addedVolume{ + volumeName: volumeName, + podKey: podKey, + label: label, + changePolicy: changePolicy, + csiDriver: csiDriver, + }) + conflicts := f.conflictsToSend[podKey] + return conflicts +} + +func (f *fakeVolumeCache) DeletePod(logger klog.Logger, podKey cache.ObjectName) { + f.deletedPods = append(f.deletedPods, podKey) +} + +func (f *fakeVolumeCache) GetPodsForCSIDriver(driverName string) []cache.ObjectName { + pods := []cache.ObjectName{} + for _, v := range f.addedVolumes { + if v.csiDriver == driverName { + pods = append(pods, v.podKey) + } + } + return pods +} + +func (f *fakeVolumeCache) SendConflicts(logger klog.Logger, ch chan<- volumecache.Conflict) { + for _, conflicts := range f.conflictsToSend { + for _, conflict := range conflicts { + ch <- conflict + } + } +} + +func collectEvents(source <-chan string) []string { + done := false + events := make([]string, 0) + for !done { + select { + case event := <-source: + events = append(events, event) + default: + done = true + } + } + return events +} diff --git a/pkg/controller/volume/selinuxwarning/volumehost.go b/pkg/controller/volume/selinuxwarning/volumehost.go new file mode 100644 index 00000000000..cba33ea3b53 --- /dev/null +++ b/pkg/controller/volume/selinuxwarning/volumehost.go @@ -0,0 +1,143 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package selinuxwarning + +import ( + "fmt" + "net" + + authenticationv1 "k8s.io/api/authentication/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + clientset "k8s.io/client-go/kubernetes" + storagelisters "k8s.io/client-go/listers/storage/v1" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/volume" + "k8s.io/kubernetes/pkg/volume/util/subpath" + "k8s.io/mount-utils" + utilexec "k8s.io/utils/exec" +) + +var _ volume.VolumeHost = &Controller{} +var _ volume.CSIDriverVolumeHost = &Controller{} + +// VolumeHost implementation. It requires a lot of kubelet specific methods that are not used in the controller. +func (c *Controller) GetPluginDir(podUID string) string { + return "" +} + +func (c *Controller) GetVolumeDevicePluginDir(podUID string) string { + return "" +} + +func (c *Controller) GetPodsDir() string { + return "" +} + +func (c *Controller) GetPodVolumeDir(podUID types.UID, pluginName, volumeName string) string { + return "" +} + +func (c *Controller) GetPodPluginDir(podUID types.UID, pluginName string) string { + return "" +} + +func (c *Controller) GetPodVolumeDeviceDir(podUID types.UID, pluginName string) string { + return "" +} + +func (c *Controller) GetKubeClient() clientset.Interface { + return c.kubeClient +} + +func (c *Controller) NewWrapperMounter(volName string, spec volume.Spec, pod *v1.Pod) (volume.Mounter, error) { + return nil, fmt.Errorf("NewWrapperMounter not supported by SELinux controller VolumeHost implementation") +} + +func (c *Controller) NewWrapperUnmounter(volName string, spec volume.Spec, podUID types.UID) (volume.Unmounter, error) { + return nil, fmt.Errorf("NewWrapperUnmounter not supported by SELinux controller VolumeHost implementation") +} + +func (c *Controller) GetMounter(pluginName string) mount.Interface { + return nil +} + +func (c *Controller) GetHostName() string { + return "" +} + +func (c *Controller) GetHostIP() (net.IP, error) { + return nil, fmt.Errorf("GetHostIP() not supported by SELinux controller VolumeHost implementation") +} + +func (c *Controller) GetNodeAllocatable() (v1.ResourceList, error) { + return v1.ResourceList{}, nil +} + +func (c *Controller) GetAttachedVolumesFromNodeStatus() (map[v1.UniqueVolumeName]string, error) { + return map[v1.UniqueVolumeName]string{}, nil +} + +func (c *Controller) GetSecretFunc() func(namespace, name string) (*v1.Secret, error) { + return func(_, _ string) (*v1.Secret, error) { + return nil, fmt.Errorf("GetSecret unsupported in SELinux controller") + } +} + +func (c *Controller) GetConfigMapFunc() func(namespace, name string) (*v1.ConfigMap, error) { + return func(_, _ string) (*v1.ConfigMap, error) { + return nil, fmt.Errorf("GetConfigMap unsupported in SELinux controller") + } +} + +func (c *Controller) GetServiceAccountTokenFunc() func(_, _ string, _ *authenticationv1.TokenRequest) (*authenticationv1.TokenRequest, error) { + return func(_, _ string, _ *authenticationv1.TokenRequest) (*authenticationv1.TokenRequest, error) { + return nil, fmt.Errorf("GetServiceAccountToken unsupported in SELinux controller") + } +} + +func (c *Controller) DeleteServiceAccountTokenFunc() func(types.UID) { + return func(types.UID) { + // nolint:logcheck + klog.ErrorS(nil, "DeleteServiceAccountToken unsupported in SELinux controller") + } +} + +func (c *Controller) GetExec(pluginName string) utilexec.Interface { + return utilexec.New() +} + +func (c *Controller) GetNodeLabels() (map[string]string, error) { + return nil, fmt.Errorf("GetNodeLabels() unsupported in SELinux controller") +} + +func (c *Controller) GetNodeName() types.NodeName { + return "" +} + +func (c *Controller) GetEventRecorder() record.EventRecorder { + return nil +} + +func (c *Controller) GetSubpather() subpath.Interface { + return nil +} + +func (c *Controller) CSIDriverLister() storagelisters.CSIDriverLister { + return c.csiDriverLister +} diff --git a/pkg/volume/csi/csi_plugin.go b/pkg/volume/csi/csi_plugin.go index 9afcd8cde20..59fa6245b7f 100644 --- a/pkg/volume/csi/csi_plugin.go +++ b/pkg/volume/csi/csi_plugin.go @@ -202,12 +202,15 @@ func (p *csiPlugin) Init(host volume.VolumeHost) error { klog.Warning(log("kubeclient not set, assuming standalone kubelet")) } else { // set CSIDriverLister and volumeAttachmentLister + seLinuxHost, ok := host.(volume.CSIDriverVolumeHost) + if ok { + p.csiDriverLister = seLinuxHost.CSIDriverLister() + if p.csiDriverLister == nil { + klog.Error(log("CSIDriverLister not found on CSIDriverVolumeHost")) + } + } adcHost, ok := host.(volume.AttachDetachVolumeHost) if ok { - p.csiDriverLister = adcHost.CSIDriverLister() - if p.csiDriverLister == nil { - klog.Error(log("CSIDriverLister not found on AttachDetachVolumeHost")) - } p.volumeAttachmentLister = adcHost.VolumeAttachmentLister() if p.volumeAttachmentLister == nil { klog.Error(log("VolumeAttachmentLister not found on AttachDetachVolumeHost")) diff --git a/pkg/volume/plugins.go b/pkg/volume/plugins.go index ee9a692ed22..4dfb5e658d5 100644 --- a/pkg/volume/plugins.go +++ b/pkg/volume/plugins.go @@ -309,15 +309,18 @@ type KubeletVolumeHost interface { GetTrustAnchorsBySigner(signerName string, labelSelector *metav1.LabelSelector, allowMissing bool) ([]byte, error) } +// CSIDriverVolumeHost is a volume host that has access to CSIDriverLister +type CSIDriverVolumeHost interface { + // CSIDriverLister returns the informer lister for the CSIDriver API Object + CSIDriverLister() storagelistersv1.CSIDriverLister +} + // AttachDetachVolumeHost is a AttachDetach Controller specific interface that plugins can use // to access methods on the Attach Detach Controller. type AttachDetachVolumeHost interface { + CSIDriverVolumeHost // CSINodeLister returns the informer lister for the CSINode API Object CSINodeLister() storagelistersv1.CSINodeLister - - // CSIDriverLister returns the informer lister for the CSIDriver API Object - CSIDriverLister() storagelistersv1.CSIDriverLister - // VolumeAttachmentLister returns the informer lister for the VolumeAttachment API Object VolumeAttachmentLister() storagelistersv1.VolumeAttachmentLister // IsAttachDetachController is an interface marker to strictly tie AttachDetachVolumeHost diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go index 90deb4a5d25..a6e10a663b5 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go @@ -501,6 +501,15 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) }) } + if utilfeature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) { + addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "selinux-warning-controller"}, + Rules: []rbacv1.PolicyRule{ + eventsRule(), + }, + }) + } + return controllerRoles, controllerRoleBindings } From da2d9fa16e8e8fef477c5726de0affac4a03e818 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Fri, 25 Oct 2024 15:37:52 +0200 Subject: [PATCH 03/14] Fix golint errors Revealed by the new SELinux warning controller, but not related to it. --- cmd/kube-controller-manager/app/core.go | 2 +- pkg/controller/volume/attachdetach/util/util.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 2df27c8eec6..353557e071e 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -141,7 +141,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo // should be dual stack (from different IPFamilies) dualstackServiceCIDR, err := netutils.IsDualStackCIDRs([]*net.IPNet{serviceCIDR, secondaryServiceCIDR}) if err != nil { - return nil, false, fmt.Errorf("failed to perform dualstack check on serviceCIDR and secondaryServiceCIDR error: %v", err) + return nil, false, fmt.Errorf("failed to perform dualstack check on serviceCIDR and secondaryServiceCIDR error: %w", err) } if !dualstackServiceCIDR { return nil, false, fmt.Errorf("serviceCIDR and secondaryServiceCIDR are not dualstack (from different IPfamiles)") diff --git a/pkg/controller/volume/attachdetach/util/util.go b/pkg/controller/volume/attachdetach/util/util.go index a47b431b606..b08f41135bd 100644 --- a/pkg/controller/volume/attachdetach/util/util.go +++ b/pkg/controller/volume/attachdetach/util/util.go @@ -55,7 +55,7 @@ func createInTreeVolumeSpec(logger klog.Logger, podVolume *v1.Volume, pod *v1.Po pvc, err := getPVCFromCache(pod.Namespace, claimName, pvcLister) if err != nil { return nil, claimName, fmt.Errorf( - "error processing PVC %q/%q: %v", + "error processing PVC %q/%q: %w", pod.Namespace, claimName, err) @@ -74,7 +74,7 @@ func createInTreeVolumeSpec(logger klog.Logger, podVolume *v1.Volume, pod *v1.Po pvName, readOnly, pvcUID, pvLister) if err != nil { return nil, claimName, fmt.Errorf( - "error processing PVC %q/%q: %v", + "error processing PVC %q/%q: %w", pod.Namespace, claimName, err) @@ -92,7 +92,7 @@ func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, vpm volumeSpec, err = translateInTreeSpecToCSIIfNeeded(logger, volumeSpec, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) if err != nil { return nil, fmt.Errorf( - "error performing CSI migration checks and translation for PVC %q/%q: %v", + "error performing CSI migration checks and translation for PVC %q/%q: %w", pod.Namespace, claimName, err) @@ -113,7 +113,7 @@ func CreateVolumeSpecWithNodeMigration(logger klog.Logger, podVolume v1.Volume, volumeSpec, err = translateInTreeSpecToCSIOnNodeIfNeeded(logger, volumeSpec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) if err != nil { return nil, fmt.Errorf( - "error performing CSI migration checks and translation for PVC %q/%q: %v", + "error performing CSI migration checks and translation for PVC %q/%q: %w", pod.Namespace, claimName, err) From e438bc0561a65ff958594e56eafd4598d0cc09ab Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 10:52:52 +0100 Subject: [PATCH 04/14] Rework event recorder startup * Remove Controller.recorder field, there already is eventRecorder. * Start the event broadcaster in Run(), to save a bit of CPU and memory when something initializes the controller, but does not Run() it. * Log events with log level 3, as the other contollers usually do. * Use StartStructuredLogging(), which looks fancier than StartLogging --- .../selinuxwarning/selinux_warning_controller.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go index e531e3c1c3d..9e82899d1dd 100644 --- a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go @@ -75,10 +75,9 @@ type Controller struct { cmpm csimigration.PluginManager csiTranslator csimigration.InTreeToCSITranslator seLinuxTranslator volumeutil.SELinuxLabelTranslator + eventBroadcaster record.EventBroadcaster eventRecorder record.EventRecorder - - recorder record.EventRecorder - queue workqueue.TypedRateLimitingInterface[cache.ObjectName] + queue workqueue.TypedRateLimitingInterface[cache.ObjectName] labelCache volumecache.VolumeCache } @@ -95,8 +94,6 @@ func NewController( ) (*Controller, error) { eventBroadcaster := record.NewBroadcaster(record.WithContext(ctx)) - eventBroadcaster.StartLogging(klog.Infof) - eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "selinux_warning"}) c := &Controller{ kubeClient: kubeClient, @@ -112,15 +109,15 @@ func NewController( vpm: &volume.VolumePluginMgr{}, seLinuxTranslator: volumeutil.NewSELinuxLabelTranslator(), - recorder: recorder, + eventBroadcaster: eventBroadcaster, + eventRecorder: recorder, queue: workqueue.NewTypedRateLimitingQueueWithConfig[cache.ObjectName]( workqueue.DefaultTypedControllerRateLimiter[cache.ObjectName](), workqueue.TypedRateLimitingQueueConfig[cache.ObjectName]{ Name: "selinux_warning", }, ), - labelCache: volumecache.NewVolumeLabelCache(), - eventRecorder: recorder, + labelCache: volumecache.NewVolumeLabelCache(), } err := c.vpm.InitPlugins(plugins, prober, c) @@ -346,6 +343,9 @@ func (c *Controller) Run(ctx context.Context, workers int) { logger.Info("Starting SELinux warning controller") defer logger.Info("Shutting down SELinux warning controller") + c.eventBroadcaster.StartStructuredLogging(3) // verbosity level 3 is used by the other KCM controllers + c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.kubeClient.CoreV1().Events("")}) + if !cache.WaitForNamedCacheSync("selinux_warning", ctx.Done(), c.podsSynced, c.pvcsSynced, c.pvsSynced, c.csiDriversSynced) { return } From dfb88095b0d0883a046398e39e070c4a2771650c Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 11:10:36 +0100 Subject: [PATCH 05/14] Rename label to seLinuxLabel In various parameters, variables and fields. To make the name more obvious. --- .../selinuxwarning/cache/volumecache.go | 26 +++++++++---------- .../selinuxwarning/cache/volumecache_test.go | 2 +- .../selinux_warning_controller.go | 12 ++++----- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache.go b/pkg/controller/volume/selinuxwarning/cache/volumecache.go index a250a33cb98..4c178074054 100644 --- a/pkg/controller/volume/selinuxwarning/cache/volumecache.go +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache.go @@ -32,7 +32,7 @@ const ( type VolumeCache interface { // Add a single volume to the cache. Returns list of conflicts it caused. - AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeName, podKey cache.ObjectName, label string, changePolicy v1.PodSELinuxChangePolicy, csiDriver string) []Conflict + AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeName, podKey cache.ObjectName, seLinuxLabel string, changePolicy v1.PodSELinuxChangePolicy, csiDriver string) []Conflict // Remove a pod from the cache. Prunes all empty structures. DeletePod(logger klog.Logger, podKey cache.ObjectName) @@ -71,17 +71,17 @@ type usedVolume struct { // Information about a Pod that uses a volume. type podInfo struct { - // SELinux label to be applied to the volume in the Pod. + // SELinux seLinuxLabel to be applied to the volume in the Pod. // Either as mount option or recursively by the container runtime. - label string + seLinuxLabel string // SELinuxChangePolicy of the Pod. changePolicy v1.PodSELinuxChangePolicy } -func newPodInfoListForPod(podKey cache.ObjectName, label string, changePolicy v1.PodSELinuxChangePolicy) map[cache.ObjectName]podInfo { +func newPodInfoListForPod(podKey cache.ObjectName, seLinuxLabel string, changePolicy v1.PodSELinuxChangePolicy) map[cache.ObjectName]podInfo { return map[cache.ObjectName]podInfo{ podKey: { - label: label, + seLinuxLabel: seLinuxLabel, changePolicy: changePolicy, }, } @@ -109,7 +109,7 @@ func (c *volumeCache) AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeNa // The volume is already known // Add the pod to the cache or update its properties volume.pods[podKey] = podInfo{ - label: label, + seLinuxLabel: label, changePolicy: changePolicy, } @@ -133,7 +133,7 @@ func (c *volumeCache) AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeNa OtherPropertyValue: string(changePolicy), }) } - if otherPodInfo.label != label { + if otherPodInfo.seLinuxLabel != label { // Send conflict to both pods conflicts = append(conflicts, Conflict{ PropertyName: "SELinux label", @@ -141,12 +141,12 @@ func (c *volumeCache) AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeNa Pod: podKey, PropertyValue: label, OtherPod: otherPodKey, - OtherPropertyValue: otherPodInfo.label, + OtherPropertyValue: otherPodInfo.seLinuxLabel, }, Conflict{ PropertyName: "SELinux label", EventReason: "SELinuxLabelConflict", Pod: otherPodKey, - PropertyValue: otherPodInfo.label, + PropertyValue: otherPodInfo.seLinuxLabel, OtherPod: podKey, OtherPropertyValue: label, }) @@ -197,7 +197,7 @@ func (c *volumeCache) dump(logger klog.Logger) { }) for _, podKey := range podKeys { podInfo := volume.pods[podKey] - logger.Info(" pod", "pod", podKey, "label", podInfo.label, "changePolicy", podInfo.changePolicy) + logger.Info(" pod", "pod", podKey, "seLinuxLabel", podInfo.seLinuxLabel, "changePolicy", podInfo.changePolicy) } } } @@ -244,14 +244,14 @@ func (c *volumeCache) SendConflicts(logger klog.Logger, ch chan<- Conflict) { OtherPropertyValue: string(otherPodInfo.changePolicy), } } - if podInfo.label != otherPodInfo.label { + if podInfo.seLinuxLabel != otherPodInfo.seLinuxLabel { ch <- Conflict{ PropertyName: "SELinux label", EventReason: "SELinuxLabelConflict", Pod: podKey, - PropertyValue: podInfo.label, + PropertyValue: podInfo.seLinuxLabel, OtherPod: otherPodKey, - OtherPropertyValue: otherPodInfo.label, + OtherPropertyValue: otherPodInfo.seLinuxLabel, } } } diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go b/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go index 03a1e49c094..92f2d9d0a7a 100644 --- a/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go @@ -341,7 +341,7 @@ func TestVolumeCache_AddVolumeSendConflicts(t *testing.T) { t.Errorf("pod %s is not present in the cache", podKey) } expectedPodInfo := podInfo{ - label: tt.podToAdd.label, + seLinuxLabel: tt.podToAdd.label, changePolicy: tt.podToAdd.changePolicy, } if !reflect.DeepEqual(existingInfo, expectedPodInfo) { diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go index 9e82899d1dd..1a779ef2d9f 100644 --- a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go @@ -447,11 +447,11 @@ func (c *Controller) syncPod(ctx context.Context, pod *v1.Pod) error { } // Ignore how the volume is going to be mounted. - // Report any errors when a volume is used by two pdos with different SELinux labels regardless of their + // Report any errors when a volume is used by two pods with different SELinux labels regardless of their // SELinuxChangePolicy - label := mountInfo.SELinuxProcessLabel + seLinuxLabel := mountInfo.SELinuxProcessLabel - err = c.syncVolume(logger, pod, spec, label, mountInfo.PluginSupportsSELinuxContextMount) + err = c.syncVolume(logger, pod, spec, seLinuxLabel, mountInfo.PluginSupportsSELinuxContextMount) if err != nil { errs = append(errs, err) } @@ -460,7 +460,7 @@ func (c *Controller) syncPod(ctx context.Context, pod *v1.Pod) error { return errorutils.NewAggregate(errs) } -func (c *Controller) syncVolume(logger klog.Logger, pod *v1.Pod, spec *volume.Spec, label string, pluginSupportsSELinuxContextMount bool) error { +func (c *Controller) syncVolume(logger klog.Logger, pod *v1.Pod, spec *volume.Spec, seLinuxLabel string, pluginSupportsSELinuxContextMount bool) error { plugin, err := c.vpm.FindPluginBySpec(spec) if err != nil { // The controller does not have all volume plugins, only those that affect SELinux. @@ -486,9 +486,9 @@ func (c *Controller) syncVolume(logger klog.Logger, pod *v1.Pod, spec *volume.Sp // This is likely not a CSI volume csiDriver = "" } - logger.V(4).Info("Syncing pod volume", "pod", klog.KObj(pod), "volume", spec.Name(), "label", label, "uniqueVolumeName", uniqueVolumeName, "changePolicy", changePolicy, "csiDriver", csiDriver) + logger.V(4).Info("Syncing pod volume", "pod", klog.KObj(pod), "volume", spec.Name(), "label", seLinuxLabel, "uniqueVolumeName", uniqueVolumeName, "changePolicy", changePolicy, "csiDriver", csiDriver) - conflicts := c.labelCache.AddVolume(logger, uniqueVolumeName, cache.MetaObjectToName(pod), label, changePolicy, csiDriver) + conflicts := c.labelCache.AddVolume(logger, uniqueVolumeName, cache.MetaObjectToName(pod), seLinuxLabel, changePolicy, csiDriver) c.reportConflictEvents(logger, conflicts) return nil From e6807a8e4f7a7463de10dbdf2bdf38407e82e5d5 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 11:27:09 +0100 Subject: [PATCH 06/14] Use _ for unused parameters Sometimes the logger is not used. This fixes some linter warnings. --- .../selinuxwarning/selinux_warning_controller.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go index 1a779ef2d9f..c45e61f3314 100644 --- a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go @@ -173,7 +173,7 @@ func NewController( return c, nil } -func (c *Controller) addPod(logger klog.Logger, obj interface{}) { +func (c *Controller) addPod(_ klog.Logger, obj interface{}) { podRef, err := cache.DeletionHandlingObjectToName(obj) if err != nil { utilruntime.HandleError(fmt.Errorf("couldn't get key for pod %#v: %w", obj, err)) @@ -181,7 +181,7 @@ func (c *Controller) addPod(logger klog.Logger, obj interface{}) { c.queue.Add(podRef) } -func (c *Controller) deletePod(logger klog.Logger, obj interface{}) { +func (c *Controller) deletePod(_ klog.Logger, obj interface{}) { podRef, err := cache.DeletionHandlingObjectToName(obj) if err != nil { utilruntime.HandleError(fmt.Errorf("couldn't get key for pod %#v: %w", obj, err)) @@ -280,7 +280,7 @@ func (c *Controller) enqueueAllPodsForPVC(logger klog.Logger, namespace, name st } } -func (c *Controller) addCSIDriver(logger klog.Logger, obj interface{}) { +func (c *Controller) addCSIDriver(_ klog.Logger, obj interface{}) { csiDriver, ok := obj.(*storagev1.CSIDriver) if !ok { return @@ -293,7 +293,7 @@ func (c *Controller) addCSIDriver(logger klog.Logger, obj interface{}) { c.enqueueAllPodsForCSIDriver(csiDriver.Name) } -func (c *Controller) updateCSIDriver(logger klog.Logger, oldObj, newObj interface{}) { +func (c *Controller) updateCSIDriver(_ klog.Logger, oldObj, newObj interface{}) { oldCSIDriver, ok := oldObj.(*storagev1.CSIDriver) if !ok { return @@ -314,7 +314,7 @@ func (c *Controller) updateCSIDriver(logger klog.Logger, oldObj, newObj interfac } } -func (c *Controller) deleteCSIDriver(logger klog.Logger, obj interface{}) { +func (c *Controller) deleteCSIDriver(_ klog.Logger, obj interface{}) { csiDriver, ok := obj.(*storagev1.CSIDriver) if !ok { return From 6eab8a86910ea257bd3a8e102e0dd3a6c4275b25 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 12:38:08 +0100 Subject: [PATCH 07/14] Use RWLock for the controller cache It could help a tiny bit with parallel operations. --- .../volume/selinuxwarning/cache/volumecache.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache.go b/pkg/controller/volume/selinuxwarning/cache/volumecache.go index 4c178074054..661f5cc3a0b 100644 --- a/pkg/controller/volume/selinuxwarning/cache/volumecache.go +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache.go @@ -47,7 +47,7 @@ type VolumeCache interface { // VolumeCache stores all volumes used by Pods and their properties that the controller needs to track, // like SELinux labels and SELinuxChangePolicies. type volumeCache struct { - mutex sync.Mutex + mutex sync.RWMutex // All volumes of all existing Pods. volumes map[v1.UniqueVolumeName]usedVolume } @@ -204,8 +204,8 @@ func (c *volumeCache) dump(logger klog.Logger) { // GetPodsForCSIDriver returns all pods that use volumes with the given CSI driver. func (c *volumeCache) GetPodsForCSIDriver(driverName string) []cache.ObjectName { - c.mutex.Lock() - defer c.mutex.Unlock() + c.mutex.RLock() + defer c.mutex.RUnlock() var pods []cache.ObjectName for _, volume := range c.volumes { @@ -221,8 +221,8 @@ func (c *volumeCache) GetPodsForCSIDriver(driverName string) []cache.ObjectName // SendConflicts sends all current conflicts to the given channel. func (c *volumeCache) SendConflicts(logger klog.Logger, ch chan<- Conflict) { - c.mutex.Lock() - defer c.mutex.Unlock() + c.mutex.RLock() + defer c.mutex.RUnlock() logger.V(4).Info("Scraping conflicts") c.dump(logger) From 3ff3ed4b6d8b6f6d3a4cf0dae23cc67fe5ecd226 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 12:59:42 +0100 Subject: [PATCH 08/14] Add comment how GetPodsForCSIDriver is useful --- pkg/controller/volume/selinuxwarning/cache/volumecache.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache.go b/pkg/controller/volume/selinuxwarning/cache/volumecache.go index 661f5cc3a0b..df8b1169e59 100644 --- a/pkg/controller/volume/selinuxwarning/cache/volumecache.go +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache.go @@ -38,6 +38,10 @@ type VolumeCache interface { DeletePod(logger klog.Logger, podKey cache.ObjectName) // GetPodsForCSIDriver returns all pods that use volumes with the given CSI driver. + // This is useful when a CSIDrive changes its spec.seLinuxMount and the controller + // needs to reevaluate all pods that use volumes with this driver. + // The controller doesn't need to track in-tree volume plugins, because they don't + // change their SELinux support dynamically. GetPodsForCSIDriver(driverName string) []cache.ObjectName // SendConflicts sends all current conflicts to the given channel. From cf7a2c7d35ce0ff0dc00054264e30bf02c8133e8 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 13:05:07 +0100 Subject: [PATCH 09/14] Add a comment why PVC indexer is used --- .../volume/selinuxwarning/selinux_warning_controller.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go index c45e61f3314..b214c7cc72e 100644 --- a/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller.go @@ -128,6 +128,8 @@ func NewController( c.csiTranslator = csiTranslator c.cmpm = csimigration.NewPluginManager(csiTranslator, utilfeature.DefaultFeatureGate) + // Index pods by its PVC keys. Then we don't need to iterate all pods every time to find + // pods which reference given PVC. err = common.AddPodPVCIndexerIfNotPresent(c.podIndexer) if err != nil { return nil, fmt.Errorf("could not initialize SELinux warning controller: %w", err) From 8791efc7324f2b37f5ebf22c1cf1915bda562cdc Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 20:03:07 +0100 Subject: [PATCH 10/14] Update property name in metrics selinux_volume_conflict should not have space in its label value - it's harder to query for that value. Use SELinuxLabel as both human friendly (in an event) and label value. --- .../volume/selinuxwarning/cache/conflict.go | 2 +- .../volume/selinuxwarning/cache/volumecache.go | 6 +++--- .../selinuxwarning/cache/volumecache_test.go | 6 +++--- .../selinux_warning_controller_test.go | 14 +++++++------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pkg/controller/volume/selinuxwarning/cache/conflict.go b/pkg/controller/volume/selinuxwarning/cache/conflict.go index 45f6121cdf3..34d874cfbbb 100644 --- a/pkg/controller/volume/selinuxwarning/cache/conflict.go +++ b/pkg/controller/volume/selinuxwarning/cache/conflict.go @@ -25,7 +25,7 @@ import ( // A single conflict between two Pods using the same volume with different SELinux labels or policies. // Event should be sent to both of them. type Conflict struct { - // Human-readable name of the conflicting property, like "SELinux label" + // Human-readable name of the conflicting property + value of "property" label of selinux_volume_conflict metric. PropertyName string // Reason for the event, to be set as the Event.Reason field. EventReason string diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache.go b/pkg/controller/volume/selinuxwarning/cache/volumecache.go index df8b1169e59..7d372399895 100644 --- a/pkg/controller/volume/selinuxwarning/cache/volumecache.go +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache.go @@ -140,14 +140,14 @@ func (c *volumeCache) AddVolume(logger klog.Logger, volumeName v1.UniqueVolumeNa if otherPodInfo.seLinuxLabel != label { // Send conflict to both pods conflicts = append(conflicts, Conflict{ - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: podKey, PropertyValue: label, OtherPod: otherPodKey, OtherPropertyValue: otherPodInfo.seLinuxLabel, }, Conflict{ - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: otherPodKey, PropertyValue: otherPodInfo.seLinuxLabel, @@ -250,7 +250,7 @@ func (c *volumeCache) SendConflicts(logger klog.Logger, ch chan<- Conflict) { } if podInfo.seLinuxLabel != otherPodInfo.seLinuxLabel { ch <- Conflict{ - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: podKey, PropertyValue: podInfo.seLinuxLabel, diff --git a/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go b/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go index 92f2d9d0a7a..660206676cf 100644 --- a/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go +++ b/pkg/controller/volume/selinuxwarning/cache/volumecache_test.go @@ -204,7 +204,7 @@ func TestVolumeCache_AddVolumeSendConflicts(t *testing.T) { }, expectedConflicts: []Conflict{ { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: "testns", Name: "testpod"}, PropertyValue: "label-new", @@ -254,7 +254,7 @@ func TestVolumeCache_AddVolumeSendConflicts(t *testing.T) { OtherPropertyValue: "MountOption", }, { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: "testns", Name: "testpod"}, PropertyValue: "label-new", @@ -297,7 +297,7 @@ func TestVolumeCache_AddVolumeSendConflicts(t *testing.T) { OtherPropertyValue: "Recursive", }, { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: "ns3", Name: "pod3-1"}, PropertyValue: "label-new", diff --git a/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go b/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go index ee44834da4b..21e897cd159 100644 --- a/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go +++ b/pkg/controller/volume/selinuxwarning/selinux_warning_controller_test.go @@ -192,7 +192,7 @@ func TestSELinuxWarningController_Sync(t *testing.T) { pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, conflicts: []volumecache.Conflict{ { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, PropertyValue: "label1", @@ -200,7 +200,7 @@ func TestSELinuxWarningController_Sync(t *testing.T) { OtherPropertyValue: "label2", }, { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: namespace, Name: "pod2"}, PropertyValue: "label2", @@ -218,8 +218,8 @@ func TestSELinuxWarningController_Sync(t *testing.T) { }, }, expectedEvents: []string{ - `Normal SELinuxLabelConflict SELinux label "label1" conflicts with pod pod2 that uses the same volume as this pod with SELinux label "label2". If both pods land on the same node, only one of them may access the volume.`, - `Normal SELinuxLabelConflict SELinux label "label2" conflicts with pod pod1 that uses the same volume as this pod with SELinux label "label1". If both pods land on the same node, only one of them may access the volume.`, + `Normal SELinuxLabelConflict SELinuxLabel "label1" conflicts with pod pod2 that uses the same volume as this pod with SELinuxLabel "label2". If both pods land on the same node, only one of them may access the volume.`, + `Normal SELinuxLabelConflict SELinuxLabel "label2" conflicts with pod pod1 that uses the same volume as this pod with SELinuxLabel "label1". If both pods land on the same node, only one of them may access the volume.`, }, }, { @@ -237,7 +237,7 @@ func TestSELinuxWarningController_Sync(t *testing.T) { pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, conflicts: []volumecache.Conflict{ { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: namespace, Name: "pod1"}, PropertyValue: "label1", @@ -245,7 +245,7 @@ func TestSELinuxWarningController_Sync(t *testing.T) { OtherPropertyValue: "label2", }, { - PropertyName: "SELinux label", + PropertyName: "SELinuxLabel", EventReason: "SELinuxLabelConflict", Pod: cache.ObjectName{Namespace: namespace, Name: "pod2"}, PropertyValue: "label2", @@ -264,7 +264,7 @@ func TestSELinuxWarningController_Sync(t *testing.T) { }, expectedEvents: []string{ // Event for the missing pod is not sent - `Normal SELinuxLabelConflict SELinux label "label1" conflicts with pod pod2 that uses the same volume as this pod with SELinux label "label2". If both pods land on the same node, only one of them may access the volume.`, + `Normal SELinuxLabelConflict SELinuxLabel "label1" conflicts with pod pod2 that uses the same volume as this pod with SELinuxLabel "label2". If both pods land on the same node, only one of them may access the volume.`, }, }, { From 8875f4daf0de20200bbd179a3d0df3bd2bcbef6b Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 5 Nov 2024 20:14:22 +0100 Subject: [PATCH 11/14] Describe what the input list of SELinux options is --- pkg/volume/util/selinux.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/volume/util/selinux.go b/pkg/volume/util/selinux.go index d1a088cc4a2..f3507054f3a 100644 --- a/pkg/volume/util/selinux.go +++ b/pkg/volume/util/selinux.go @@ -249,9 +249,12 @@ type SELinuxLabelInfo struct { } // GetMountSELinuxLabel returns SELinux labels that should be used to mount the given volume volumeSpec and podSecurityContext. +// It expects effectiveSELinuxContainerLabels as returned by volumeutil.GetPodVolumeNames, i.e. with all SELinuxOptions +// from all containers that use the volume in the pod, potentially expanded with PodSecurityContext.SELinuxOptions, +// if container's SELinuxOptions are nil. // It does not evaluate the volume access mode! It's up to the caller to check SELinuxMount feature gate, // it may need to bump different metrics based on feature gates / access modes / label anyway. -func GetMountSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext, volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator SELinuxLabelTranslator) (SELinuxLabelInfo, error) { +func GetMountSELinuxLabel(volumeSpec *volume.Spec, effectiveSELinuxContainerLabels []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext, volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator SELinuxLabelTranslator) (SELinuxLabelInfo, error) { info := SELinuxLabelInfo{} if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) { return info, nil @@ -269,11 +272,12 @@ func GetMountSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v info.PluginSupportsSELinuxContextMount = pluginSupportsSELinuxContextMount // Collect all SELinux options from all containers that use this volume. + // A set will squash any duplicities. labels := sets.New[string]() - for _, containerContext := range seLinuxContainerContexts { - lbl, err := seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext) + for _, containerLabel := range effectiveSELinuxContainerLabels { + lbl, err := seLinuxTranslator.SELinuxOptionsToFileLabel(containerLabel) if err != nil { - fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerContext, err) + fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerLabel, err) return info, fullErr } labels.Insert(lbl) From 4b99a342f49114bf356be415adbaa4a7026e762d Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 6 Nov 2024 11:20:14 +0100 Subject: [PATCH 12/14] Move feature gate to ControllerDescriptor --- cmd/kube-controller-manager/app/core.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 353557e071e..e2837d4398a 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -895,9 +895,11 @@ func startStorageVersionGarbageCollectorController(ctx context.Context, controll func newSELinuxWarningControllerDescriptor() *ControllerDescriptor { return &ControllerDescriptor{ name: names.SELinuxWarningController, - aliases: []string{"selinux-warning"}, initFunc: startSELinuxWarningController, isDisabledByDefault: true, + requiredFeatureGates: []featuregate.Feature{ + features.SELinuxChangePolicy, + }, } } From 52b47ea4def737fc491df4a6f61eac8890788ac5 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 6 Nov 2024 11:20:48 +0100 Subject: [PATCH 13/14] Don't create a new controller context --- cmd/kube-controller-manager/app/core.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index e2837d4398a..9386b4dd11f 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -915,7 +915,6 @@ func startSELinuxWarningController(ctx context.Context, controllerContext Contro return nil, true, fmt.Errorf("failed to probe volume plugins when starting SELinux warning controller: %w", err) } - ctx = klog.NewContext(ctx, logger) seLinuxController, err := selinuxwarning.NewController( ctx, From df88b1a771b91a98caa8b1a1bb4280fb07795bfc Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 6 Nov 2024 13:05:36 +0100 Subject: [PATCH 14/14] Add all RBAC rules for the SELinux controller The SELinuxWarningController does not necessarily need permissions to read the objects, because it gets them through a shared informer instantiated by KCM itself, but let's list the permissions for completeness. --- .../auth/authorizer/rbac/bootstrappolicy/controller_policy.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go index a6e10a663b5..c3db5899194 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go @@ -506,6 +506,10 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "selinux-warning-controller"}, Rules: []rbacv1.PolicyRule{ eventsRule(), + rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("persistentvolumes").RuleOrDie(), + rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("persistentvolumeclaims").RuleOrDie(), + rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("pods").RuleOrDie(), + rbacv1helpers.NewRule("get", "list", "watch").Groups(storageGroup).Resources("csidrivers").RuleOrDie(), }, }) }