kubelet: KEP-3857: Recursive Read-only (RRO) mounts

See <https://kep.k8s.io/3857>.

An example manifest:
```yaml
apiVersion: v1
kind: Pod
metadata:
  name: rro
spec:
  volumes:
    - name: mnt
      hostPath:
        # tmpfs is mounted on /mnt/tmpfs
        path: /mnt
  containers:
    - name: busybox
      image: busybox
      args: ["sleep", "infinity"]
      volumeMounts:
        # /mnt-rro/tmpfs is not writable
        - name: mnt
          mountPath: /mnt-rro
          readOnly: true
          mountPropagation: None
          recursiveReadOnly: IfPossible
        # /mnt-ro/tmpfs is writable
        - name: mnt
          mountPath: /mnt-ro
          readOnly: true
        # /mnt-rw/tmpfs is writable
        - name: mnt
          mountPath: /mnt-rw
```

Requirements:
- Feature gate "RecursiveReadOnlyMounts" to be enabled
- Linux kernel >= 5.12
- runc >= 1.1

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2024-03-09 09:48:17 +09:00
parent 6f12e1d8e5
commit c7f52b34f3
No known key found for this signature in database
GPG Key ID: 49524C6F9F638F1A
6 changed files with 213 additions and 15 deletions

View File

@ -440,6 +440,9 @@ type Mount struct {
HostPath string
// Whether the mount is read-only.
ReadOnly bool
// Whether the mount is recursive read-only.
// Must not be true if ReadOnly is false.
RecursiveReadOnly bool
// Whether the mount needs SELinux relabeling
SELinuxRelabel bool
// Requested propagation mode

View File

@ -242,7 +242,7 @@ func shouldMountHostsFile(pod *v1.Pod, podIPs []string) bool {
}
// makeMounts determines the mount points for the given container.
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar) ([]kubecontainer.Mount, func(), error) {
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar, supportsRRO bool) ([]kubecontainer.Mount, func(), error) {
mountEtcHostsFile := shouldMountHostsFile(pod, podIPs)
klog.V(3).InfoS("Creating hosts mount for container", "pod", klog.KObj(pod), "containerName", container.Name, "podIPs", podIPs, "path", mountEtcHostsFile)
mounts := []kubecontainer.Mount{}
@ -343,13 +343,22 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
klog.V(5).InfoS("Mount has propagation", "pod", klog.KObj(pod), "containerName", container.Name, "volumeMountName", mount.Name, "propagation", propagation)
mustMountRO := vol.Mounter.GetAttributes().ReadOnly
rro, err := resolveRecursiveReadOnly(mount, supportsRRO)
if err != nil {
return nil, cleanupAction, fmt.Errorf("failed to resolve recursive read-only mode: %w", err)
}
if rro && !utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
return nil, cleanupAction, fmt.Errorf("recursive read-only mount needs feature gate %q to be enabled", features.RecursiveReadOnlyMounts)
}
mounts = append(mounts, kubecontainer.Mount{
Name: mount.Name,
ContainerPath: containerPath,
HostPath: hostPath,
ReadOnly: mount.ReadOnly || mustMountRO,
SELinuxRelabel: relabelVolume,
Propagation: propagation,
Name: mount.Name,
ContainerPath: containerPath,
HostPath: hostPath,
ReadOnly: mount.ReadOnly || mustMountRO,
RecursiveReadOnly: rro,
SELinuxRelabel: relabelVolume,
Propagation: propagation,
})
}
if mountEtcHostsFile {
@ -554,6 +563,8 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {
// GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
// the container runtime to set parameters for launching a container.
func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (*kubecontainer.RunContainerOptions, func(), error) {
supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
opts, err := kl.containerManager.GetResources(pod, container)
if err != nil {
return nil, nil, err
@ -587,7 +598,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod,
opts.Envs = append(opts.Envs, envs...)
// only podIPs is sent to makeMounts, as podIPs is populated even if dual-stack feature flag is not enabled.
mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIPs, volumes, kl.hostutil, kl.subpather, opts.Envs)
mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIPs, volumes, kl.hostutil, kl.subpather, opts.Envs, supportsRRO)
if err != nil {
return nil, cleanupAction, err
}
@ -2114,6 +2125,8 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: PodInitializing}}
}
supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
for _, container := range containers {
status := &v1.ContainerStatus{
Name: container.Name,
@ -2131,6 +2144,16 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
}
if vol.ReadOnly {
rroMode := v1.RecursiveReadOnlyDisabled
if b, err := resolveRecursiveReadOnly(vol, supportsRRO); err != nil {
klog.ErrorS(err, "failed to resolve recursive read-only mode", "mode", *vol.RecursiveReadOnly)
} else if b {
if utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
rroMode = v1.RecursiveReadOnlyEnabled
} else {
klog.ErrorS(nil, "recursive read-only mount needs feature gate to be enabled",
"featureGate", features.RecursiveReadOnlyMounts)
}
}
volStatus.RecursiveReadOnly = &rroMode // Disabled or Enabled
}
status.VolumeMounts = append(status.VolumeMounts, volStatus)
@ -2420,3 +2443,49 @@ func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupP
go pcm.Destroy(val)
}
}
func (kl *Kubelet) runtimeClassSupportsRecursiveReadOnlyMounts(pod *v1.Pod) bool {
var runtimeClassName string
if pod.Spec.RuntimeClassName != nil {
runtimeClassName = *pod.Spec.RuntimeClassName
}
runtimeHandlers := kl.runtimeState.runtimeHandlers()
return runtimeClassSupportsRecursiveReadOnlyMounts(runtimeClassName, runtimeHandlers)
}
// runtimeClassSupportsRecursiveReadOnlyMounts checks whether the runtime class supports recursive read-only mounts.
// The kubelet feature gate is not checked here.
func runtimeClassSupportsRecursiveReadOnlyMounts(runtimeClassName string, runtimeHandlers []kubecontainer.RuntimeHandler) bool {
for _, h := range runtimeHandlers {
if h.Name == runtimeClassName {
return h.SupportsRecursiveReadOnlyMounts
}
}
klog.ErrorS(nil, "unknown runtime class", "runtimeClassName", runtimeClassName)
return false
}
// resolveRecursiveReadOnly resolves the recursive read-only mount mode.
func resolveRecursiveReadOnly(m v1.VolumeMount, runtimeSupportsRRO bool) (bool, error) {
if m.RecursiveReadOnly == nil || *m.RecursiveReadOnly == v1.RecursiveReadOnlyDisabled {
return false, nil
}
if !m.ReadOnly {
return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not read-only", m.Name)
}
if m.MountPropagation != nil && *m.MountPropagation != v1.MountPropagationNone {
return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not compatible with propagation %q",
m.Name, *m.MountPropagation)
}
switch rroMode := *m.RecursiveReadOnly; rroMode {
case v1.RecursiveReadOnlyIfPossible:
return runtimeSupportsRRO, nil
case v1.RecursiveReadOnlyEnabled:
if !runtimeSupportsRRO {
return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not supported by the runtime", m.Name)
}
return true, nil
default:
return false, fmt.Errorf("unknown recursive read-only mode %q", rroMode)
}
}

View File

@ -42,6 +42,7 @@ func TestMakeMounts(t *testing.T) {
testCases := map[string]struct {
container v1.Container
podVolumes kubecontainer.VolumeMap
supportsRRO bool
expectErr bool
expectedErrMsg string
expectedMounts []kubecontainer.Mount
@ -250,7 +251,7 @@ func TestMakeMounts(t *testing.T) {
},
}
mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", []string{""}, tc.podVolumes, fhu, fsp, nil)
mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", []string{""}, tc.podVolumes, fhu, fsp, nil, tc.supportsRRO)
// validate only the error if we expect an error
if tc.expectErr {

View File

@ -58,6 +58,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
netutils "k8s.io/utils/net"
"k8s.io/utils/ptr"
)
var containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
@ -6075,3 +6076,126 @@ func TestParseGetSubIdsOutput(t *testing.T) {
})
}
}
func TestResolveRecursiveReadOnly(t *testing.T) {
testCases := []struct {
m v1.VolumeMount
runtimeSupportsRRO bool
expected bool
expectedErr string
}{
{
m: v1.VolumeMount{Name: "rw"},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "ro", ReadOnly: true},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "ro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyDisabled)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible),
MountPropagation: ptr.To(v1.MountPropagationNone)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible),
MountPropagation: ptr.To(v1.MountPropagationHostToContainer)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible),
MountPropagation: ptr.To(v1.MountPropagationBidirectional)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: false, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not read-only",
},
{
m: v1.VolumeMount{Name: "rro-if-possible", ReadOnly: false, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyIfPossible)},
runtimeSupportsRRO: false,
expected: false,
expectedErr: "not read-only",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled),
MountPropagation: ptr.To(v1.MountPropagationNone)},
runtimeSupportsRRO: true,
expected: true,
expectedErr: "",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled),
MountPropagation: ptr.To(v1.MountPropagationHostToContainer)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled),
MountPropagation: ptr.To(v1.MountPropagationBidirectional)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not compatible with propagation",
},
{
m: v1.VolumeMount{Name: "rro", RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled)},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "not read-only",
},
{
m: v1.VolumeMount{Name: "rro", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyEnabled)},
runtimeSupportsRRO: false,
expected: false,
expectedErr: "not supported by the runtime",
},
{
m: v1.VolumeMount{Name: "invalid", ReadOnly: true, RecursiveReadOnly: ptr.To(v1.RecursiveReadOnlyMode("foo"))},
runtimeSupportsRRO: true,
expected: false,
expectedErr: "unknown recursive read-only mode",
},
}
for _, tc := range testCases {
got, err := resolveRecursiveReadOnly(tc.m, tc.runtimeSupportsRRO)
t.Logf("resolveRecursiveReadOnly(%+v, %v) = (%v, %v)", tc.m, tc.runtimeSupportsRRO, got, err)
if tc.expectedErr == "" {
assert.Equal(t, tc.expected, got)
assert.NoError(t, err)
} else {
assert.ErrorContains(t, err, tc.expectedErr)
}
}
}

View File

@ -90,7 +90,7 @@ func TestMakeMountsWindows(t *testing.T) {
podDir, err := os.MkdirTemp("", "test-rotate-logs")
require.NoError(t, err)
defer os.RemoveAll(podDir)
mounts, _, err := makeMounts(&pod, podDir, &container, "fakepodname", "", []string{""}, podVolumes, fhu, fsp, nil)
mounts, _, err := makeMounts(&pod, podDir, &container, "fakepodname", "", []string{""}, podVolumes, fhu, fsp, nil, false)
require.NoError(t, err)
expectedMounts := []kubecontainer.Mount{

View File

@ -430,11 +430,12 @@ func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerO
v := opts.Mounts[idx]
selinuxRelabel := v.SELinuxRelabel && selinux.GetEnabled()
mount := &runtimeapi.Mount{
HostPath: v.HostPath,
ContainerPath: v.ContainerPath,
Readonly: v.ReadOnly,
SelinuxRelabel: selinuxRelabel,
Propagation: v.Propagation,
HostPath: v.HostPath,
ContainerPath: v.ContainerPath,
Readonly: v.ReadOnly,
SelinuxRelabel: selinuxRelabel,
Propagation: v.Propagation,
RecursiveReadOnly: v.RecursiveReadOnly,
}
volumeMounts = append(volumeMounts, mount)