kubelet: KEP-3857: Recursive Read-only (RRO) mounts

See <https://kep.k8s.io/3857>.

An example manifest:
```yaml
apiVersion: v1
kind: Pod
metadata:
  name: rro
spec:
  volumes:
    - name: mnt
      hostPath:
        # tmpfs is mounted on /mnt/tmpfs
        path: /mnt
  containers:
    - name: busybox
      image: busybox
      args: ["sleep", "infinity"]
      volumeMounts:
        # /mnt-rro/tmpfs is not writable
        - name: mnt
          mountPath: /mnt-rro
          readOnly: true
          mountPropagation: None
          recursiveReadOnly: IfPossible
        # /mnt-ro/tmpfs is writable
        - name: mnt
          mountPath: /mnt-ro
          readOnly: true
        # /mnt-rw/tmpfs is writable
        - name: mnt
          mountPath: /mnt-rw
```

Requirements:
- Feature gate "RecursiveReadOnlyMounts" to be enabled
- Linux kernel >= 5.12
- runc >= 1.1

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2024-03-09 09:48:17 +09:00
parent 6f12e1d8e5
commit c7f52b34f3
No known key found for this signature in database
GPG Key ID: 49524C6F9F638F1A
6 changed files with 213 additions and 15 deletions

View File

@ -440,6 +440,9 @@ type Mount struct {
HostPath string HostPath string
// Whether the mount is read-only. // Whether the mount is read-only.
ReadOnly bool ReadOnly bool
// Whether the mount is recursive read-only.
// Must not be true if ReadOnly is false.
RecursiveReadOnly bool
// Whether the mount needs SELinux relabeling // Whether the mount needs SELinux relabeling
SELinuxRelabel bool SELinuxRelabel bool
// Requested propagation mode // Requested propagation mode

View File

@ -242,7 +242,7 @@ func shouldMountHostsFile(pod *v1.Pod, podIPs []string) bool {
} }
// makeMounts determines the mount points for the given container. // makeMounts determines the mount points for the given container.
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar) ([]kubecontainer.Mount, func(), error) { func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar, supportsRRO bool) ([]kubecontainer.Mount, func(), error) {
mountEtcHostsFile := shouldMountHostsFile(pod, podIPs) mountEtcHostsFile := shouldMountHostsFile(pod, podIPs)
klog.V(3).InfoS("Creating hosts mount for container", "pod", klog.KObj(pod), "containerName", container.Name, "podIPs", podIPs, "path", mountEtcHostsFile) klog.V(3).InfoS("Creating hosts mount for container", "pod", klog.KObj(pod), "containerName", container.Name, "podIPs", podIPs, "path", mountEtcHostsFile)
mounts := []kubecontainer.Mount{} mounts := []kubecontainer.Mount{}
@ -343,13 +343,22 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
klog.V(5).InfoS("Mount has propagation", "pod", klog.KObj(pod), "containerName", container.Name, "volumeMountName", mount.Name, "propagation", propagation) klog.V(5).InfoS("Mount has propagation", "pod", klog.KObj(pod), "containerName", container.Name, "volumeMountName", mount.Name, "propagation", propagation)
mustMountRO := vol.Mounter.GetAttributes().ReadOnly mustMountRO := vol.Mounter.GetAttributes().ReadOnly
rro, err := resolveRecursiveReadOnly(mount, supportsRRO)
if err != nil {
return nil, cleanupAction, fmt.Errorf("failed to resolve recursive read-only mode: %w", err)
}
if rro && !utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
return nil, cleanupAction, fmt.Errorf("recursive read-only mount needs feature gate %q to be enabled", features.RecursiveReadOnlyMounts)
}
mounts = append(mounts, kubecontainer.Mount{ mounts = append(mounts, kubecontainer.Mount{
Name: mount.Name, Name: mount.Name,
ContainerPath: containerPath, ContainerPath: containerPath,
HostPath: hostPath, HostPath: hostPath,
ReadOnly: mount.ReadOnly || mustMountRO, ReadOnly: mount.ReadOnly || mustMountRO,
SELinuxRelabel: relabelVolume, RecursiveReadOnly: rro,
Propagation: propagation, SELinuxRelabel: relabelVolume,
Propagation: propagation,
}) })
} }
if mountEtcHostsFile { if mountEtcHostsFile {
@ -554,6 +563,8 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {
// GenerateRunContainerOptions generates the RunContainerOptions, which can be used by // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
// the container runtime to set parameters for launching a container. // the container runtime to set parameters for launching a container.
func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (*kubecontainer.RunContainerOptions, func(), error) { func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (*kubecontainer.RunContainerOptions, func(), error) {
supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
opts, err := kl.containerManager.GetResources(pod, container) opts, err := kl.containerManager.GetResources(pod, container)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -587,7 +598,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod,
opts.Envs = append(opts.Envs, envs...) opts.Envs = append(opts.Envs, envs...)
// only podIPs is sent to makeMounts, as podIPs is populated even if dual-stack feature flag is not enabled. // only podIPs is sent to makeMounts, as podIPs is populated even if dual-stack feature flag is not enabled.
mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIPs, volumes, kl.hostutil, kl.subpather, opts.Envs) mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIPs, volumes, kl.hostutil, kl.subpather, opts.Envs, supportsRRO)
if err != nil { if err != nil {
return nil, cleanupAction, err return nil, cleanupAction, err
} }
@ -2114,6 +2125,8 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: PodInitializing}} defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: PodInitializing}}
} }
supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
for _, container := range containers { for _, container := range containers {
status := &v1.ContainerStatus{ status := &v1.ContainerStatus{
Name: container.Name, Name: container.Name,
@ -2131,6 +2144,16 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
} }
if vol.ReadOnly { if vol.ReadOnly {
rroMode := v1.RecursiveReadOnlyDisabled rroMode := v1.RecursiveReadOnlyDisabled
if b, err := resolveRecursiveReadOnly(vol, supportsRRO); err != nil {
klog.ErrorS(err, "failed to resolve recursive read-only mode", "mode", *vol.RecursiveReadOnly)
} else if b {
if utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
rroMode = v1.RecursiveReadOnlyEnabled
} else {
klog.ErrorS(nil, "recursive read-only mount needs feature gate to be enabled",
"featureGate", features.RecursiveReadOnlyMounts)
}
}
volStatus.RecursiveReadOnly = &rroMode // Disabled or Enabled volStatus.RecursiveReadOnly = &rroMode // Disabled or Enabled
} }
status.VolumeMounts = append(status.VolumeMounts, volStatus) status.VolumeMounts = append(status.VolumeMounts, volStatus)
@ -2420,3 +2443,49 @@ func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupP
go pcm.Destroy(val) go pcm.Destroy(val)
} }
} }
func (kl *Kubelet) runtimeClassSupportsRecursiveReadOnlyMounts(pod *v1.Pod) bool {
var runtimeClassName string
if pod.Spec.RuntimeClassName != nil {
runtimeClassName = *pod.Spec.RuntimeClassName
}
runtimeHandlers := kl.runtimeState.runtimeHandlers()
return runtimeClassSupportsRecursiveReadOnlyMounts(runtimeClassName, runtimeHandlers)
}
// runtimeClassSupportsRecursiveReadOnlyMounts checks whether the runtime class supports recursive read-only mounts.
// The kubelet feature gate is not checked here.
func runtimeClassSupportsRecursiveReadOnlyMounts(runtimeClassName string, runtimeHandlers []kubecontainer.RuntimeHandler) bool {
for _, h := range runtimeHandlers {
if h.Name == runtimeClassName {
return h.SupportsRecursiveReadOnlyMounts
}
}
klog.ErrorS(nil, "unknown runtime class", "runtimeClassName", runtimeClassName)
return false
}
// resolveRecursiveReadOnly resolves the recursive read-only mount mode.
func resolveRecursiveReadOnly(m v1.VolumeMount, runtimeSupportsRRO bool) (bool, error) {
if m.RecursiveReadOnly == nil || *m.RecursiveReadOnly == v1.RecursiveReadOnlyDisabled {
return false, nil
}
if !m.ReadOnly {
return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not read-only", m.Name)
}
if m.MountPropagation != nil && *m.MountPropagation != v1.MountPropagationNone {
return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not compatible with propagation %q",
m.Name, *m.MountPropagation)
}
switch rroMode := *m.RecursiveReadOnly; rroMode {
case v1.RecursiveReadOnlyIfPossible:
return runtimeSupportsRRO, nil
case v1.RecursiveReadOnlyEnabled:
if !runtimeSupportsRRO {
return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not supported by the runtime", m.Name)
}
return true, nil
default:
return false, fmt.Errorf("unknown recursive read-only mode %q", rroMode)
}
}

View File

@ -42,6 +42,7 @@ func TestMakeMounts(t *testing.T) {
testCases := map[string]struct { testCases := map[string]struct {
container v1.Container container v1.Container
podVolumes kubecontainer.VolumeMap podVolumes kubecontainer.VolumeMap
supportsRRO bool
expectErr bool expectErr bool
expectedErrMsg string expectedErrMsg string
expectedMounts []kubecontainer.Mount expectedMounts []kubecontainer.Mount
@ -250,7 +251,7 @@ func TestMakeMounts(t *testing.T) {
}, },
} }
mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", []string{""}, tc.podVolumes, fhu, fsp, nil) mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", []string{""}, tc.podVolumes, fhu, fsp, nil, tc.supportsRRO)
// validate only the error if we expect an error // validate only the error if we expect an error
if tc.expectErr { if tc.expectErr {

View File

@ -58,6 +58,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
"k8s.io/utils/ptr"
) )
var containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways var containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
@ -6075,3 +6076,126 @@ func TestParseGetSubIdsOutput(t *testing.T) {
}) })
} }
} }
func TestResolveRecursiveReadOnly(t *testing.T) {
testCases := []struct {
m v1.VolumeMount
runtimeSupportsRRO bool
expected bool
expectedErr string
}{
{
m: v1.VolumeMount{Name: "rw"},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "ro", ReadOnly: true},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "ro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyDisabled)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible),
MountPropagation: ptr.To(v1.MountPropagationNone)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible),
MountPropagation: ptr.To(v1.MountPropagationHostToContainer)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible),
MountPropagation: ptr.To(v1.MountPropagationBidirectional)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: false, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not read-only",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: false, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible)},
runtimeSupportsRRO: false,
expected: false,
expectedErr: "not read-only",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled),
MountPropagation: ptr.To(v1.MountPropagationNone)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled),
MountPropagation: ptr.To(v1.MountPropagationHostToContainer)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled),
MountPropagation: ptr.To(v1.MountPropagationBidirectional)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro", RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not read-only",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled)},
runtimeSupportsRRO: false,
expected: false,
expectedErr: "not supported by the runtime",
},
{
m: v1.VolumeMount{Name: "invalid", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyMode("foo"))},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "unknown recursive read-only mode",
},
}
for _, tc := range testCases {
got, err := resolveRecursiveReadOnly(tc.m, tc.runtimeSupportsRRO)
t.Logf("resolveRecursiveReadOnly(%+v, %v) = (%v, %v)", tc.m, tc.runtimeSupportsRRO, got, err)
if tc.expectedErr == "" {
assert.Equal(t, tc.expected, got)
assert.NoError(t, err)
} else {
assert.ErrorContains(t, err, tc.expectedErr)
}
}
}

View File

@ -90,7 +90,7 @@ func TestMakeMountsWindows(t *testing.T) {
podDir, err := os.MkdirTemp("", "test-rotate-logs") podDir, err := os.MkdirTemp("", "test-rotate-logs")
require.NoError(t, err) require.NoError(t, err)
defer os.RemoveAll(podDir) defer os.RemoveAll(podDir)
mounts, _, err := makeMounts(&pod, podDir, &container, "fakepodname", "", []string{""}, podVolumes, fhu, fsp, nil) mounts, _, err := makeMounts(&pod, podDir, &container, "fakepodname", "", []string{""}, podVolumes, fhu, fsp, nil, false)
require.NoError(t, err) require.NoError(t, err)
expectedMounts := []kubecontainer.Mount{ expectedMounts := []kubecontainer.Mount{

View File

@ -430,11 +430,12 @@ func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerO
v := opts.Mounts[idx] v := opts.Mounts[idx]
selinuxRelabel := v.SELinuxRelabel && selinux.GetEnabled() selinuxRelabel := v.SELinuxRelabel && selinux.GetEnabled()
mount := &runtimeapi.Mount{ mount := &runtimeapi.Mount{
HostPath: v.HostPath, HostPath: v.HostPath,
ContainerPath: v.ContainerPath, ContainerPath: v.ContainerPath,
Readonly: v.ReadOnly, Readonly: v.ReadOnly,
SelinuxRelabel: selinuxRelabel, SelinuxRelabel: selinuxRelabel,
Propagation: v.Propagation, Propagation: v.Propagation,
RecursiveReadOnly: v.RecursiveReadOnly,
} }
volumeMounts = append(volumeMounts, mount) volumeMounts = append(volumeMounts, mount)