From c73308e90ac1a1587a2b7c95c1a199515e65851d Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 3 Oct 2024 10:38:07 +0200 Subject: [PATCH] Introduce SELinuxChangePolicy Feature gate + the API --- pkg/apis/core/types.go | 42 +++++++++++++++++++ pkg/features/kube_features.go | 9 ++++ pkg/features/versioned_kube_features.go | 4 ++ staging/src/k8s.io/api/core/v1/types.go | 42 +++++++++++++++++++ .../test_data/versioned_feature_list.yaml | 6 +++ 5 files changed, 103 insertions(+) diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index cf3879fd78b..bfad02c78f8 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -3684,6 +3684,22 @@ const ( SupplementalGroupsPolicyStrict SupplementalGroupsPolicy = "Strict" ) +// PodSELinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. +type PodSELinuxChangePolicy string + +const ( + // Recursive relabeling of all Pod volumes by the container runtime. + // This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + SELinuxChangePolicyRecursive PodSELinuxChangePolicy = "Recursive" + // MountOption mounts all eligible Pod volumes with `-o context` mount option. + // This requires all Pods that share the same volume to use the same SELinux label. + // It is not possible to share the same volume among privileged and unprivileged Pods. + // Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + // whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + // CSIDriver instance. Other volumes are always re-labelled recursively. + SELinuxChangePolicyMountOption PodSELinuxChangePolicy = "MountOption" +) + // PodSecurityContext holds pod-level security attributes and common container settings. // Some fields are also present in container.securityContext. Field values of // container.securityContext take precedence over field values of PodSecurityContext. @@ -3820,6 +3836,32 @@ type PodSecurityContext struct { // Note that this field cannot be set when spec.os.name is windows. // +optional AppArmorProfile *AppArmorProfile + // seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + // It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + // Valid values are "MountOption" and "Recursive". + // + // "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + // This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + // + // "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + // This requires all Pods that share the same volume to use the same SELinux label. + // It is not possible to share the same volume among privileged and unprivileged Pods. + // Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + // whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + // CSIDriver instance. Other volumes are always re-labelled recursively. + // "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + // + // If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + // If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + // and "Recursive" for all other volumes. + // + // This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + // + // All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + // Note that this field cannot be set when spec.os.name is windows. + // +featureGate=SELinuxChangePolicy + // +optional + SELinuxChangePolicy *PodSELinuxChangePolicy } // SeccompProfile defines a pod/container's seccomp profile settings. diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 69258b83528..fbecc1c3d9c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -788,6 +788,15 @@ const ( // unresponsive. The feature gate is enabled by default, but should only be used // if the system supports the systemd watchdog feature and has it configured properly. SystemdWatchdog = featuregate.Feature("SystemdWatchdog") + + // owner: @jsafrane + // kep: https://kep.k8s.io/1710 + // alpha: v1.32 + // + // Speed up container startup by mounting volumes with the correct SELinux label + // instead of changing each file on the volumes recursively. + // Enables the SELinuxChangePolicy field in PodSecurityContext before SELinuxMount featgure gate is enabled. + SELinuxChangePolicy featuregate.Feature = "SELinuxChangePolicy" ) func init() { diff --git a/pkg/features/versioned_kube_features.go b/pkg/features/versioned_kube_features.go index 0ed4bef24fc..7cd8227077e 100644 --- a/pkg/features/versioned_kube_features.go +++ b/pkg/features/versioned_kube_features.go @@ -634,6 +634,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate {Version: version.MustParse("1.28"), Default: false, PreRelease: featuregate.Beta}, }, + SELinuxChangePolicy: { + {Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha}, + }, + SELinuxMount: { {Version: version.MustParse("1.30"), Default: false, PreRelease: featuregate.Alpha}, }, diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index 66df4250867..637182c64d9 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -4310,6 +4310,22 @@ const ( SupplementalGroupsPolicyStrict SupplementalGroupsPolicy = "Strict" ) +// PodSELinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. +type PodSELinuxChangePolicy string + +const ( + // Recursive relabeling of all Pod volumes by the container runtime. + // This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + SELinuxChangePolicyRecursive PodSELinuxChangePolicy = "Recursive" + // MountOption mounts all eligible Pod volumes with `-o context` mount option. + // This requires all Pods that share the same volume to use the same SELinux label. + // It is not possible to share the same volume among privileged and unprivileged Pods. + // Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + // whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + // CSIDriver instance. Other volumes are always re-labelled recursively. + SELinuxChangePolicyMountOption PodSELinuxChangePolicy = "MountOption" +) + // PodSecurityContext holds pod-level security attributes and common container settings. // Some fields are also present in container.securityContext. Field values of // container.securityContext take precedence over field values of PodSecurityContext. @@ -4408,6 +4424,32 @@ type PodSecurityContext struct { // Note that this field cannot be set when spec.os.name is windows. // +optional AppArmorProfile *AppArmorProfile `json:"appArmorProfile,omitempty" protobuf:"bytes,11,opt,name=appArmorProfile"` + // seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + // It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + // Valid values are "MountOption" and "Recursive". + // + // "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + // This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + // + // "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + // This requires all Pods that share the same volume to use the same SELinux label. + // It is not possible to share the same volume among privileged and unprivileged Pods. + // Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + // whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + // CSIDriver instance. Other volumes are always re-labelled recursively. + // "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + // + // If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + // If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + // and "Recursive" for all other volumes. + // + // This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + // + // All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + // Note that this field cannot be set when spec.os.name is windows. + // +featureGate=SELinuxChangePolicy + // +optional + SELinuxChangePolicy *PodSELinuxChangePolicy `json:"seLinuxChangePolicy,omitempty" protobuf:"bytes,12,opt,name=seLinuxChangePolicy"` } // SeccompProfile defines a pod/container's seccomp profile settings. diff --git a/test/featuregates_linter/test_data/versioned_feature_list.yaml b/test/featuregates_linter/test_data/versioned_feature_list.yaml index ca7fdf1590c..0c73dbfebf1 100644 --- a/test/featuregates_linter/test_data/versioned_feature_list.yaml +++ b/test/featuregates_linter/test_data/versioned_feature_list.yaml @@ -1046,6 +1046,12 @@ lockToDefault: false preRelease: Beta version: "1.28" +- name: SELinuxChangePolicy + versionedSpecs: + - default: false + lockToDefault: false + preRelease: Alpha + version: "1.32" - name: SELinuxMount versionedSpecs: - default: false