mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-03-01 02:02:11 +00:00
runtime: Simplify mounting guest devices when using hostPath volumes
This change crystallizes and simplifies the current handling of /dev hostPath mounts with virtually no functional change. Before this change: - If a mount DESTINATION is in /dev and it is a non-regular file on the HOST, the shim passes the OCI bind mount as is to the guest (e.g. /dev/kmsg:/dev/kmsg). The container rightfully sees the GUEST device. - If the mount DESTINATION does not exist on the host, the shim relies on k8s/containerd to automatically create a directory (ie. non-regular file) on the HOST. The shim then also passes the OCI bind mount as is to the guest. The container rightfully sees the GUEST device. - For other /dev mounts, the shim passes the device major/minor to the guest over virtio-fs. The container rightfully sees the GUEST device. After this change: - If a mount SOURCE is in /dev and it is a non-regular file on the HOST, the shim passes the OCI bind mount as is to the guest. The container rightfully sees the GUEST device. - The shim does not anymore rely on k8s/containerd to create missing mount directories. Instead it explicitely handles missing mount SOURCES, and treats them like the previous bullet point. - The shim no longer uses virtio-fs to pass /dev device major/minor to the guest, instead it passes the OCI bind mount as is. Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
This commit is contained in:
@@ -166,6 +166,15 @@ moment.
|
||||
See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
|
||||
[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
|
||||
|
||||
### Kubernetes [hostPath][k8s-hostpath] volumes
|
||||
|
||||
When the source path of a hostPath volume is under `/dev`, and the path
|
||||
either corresponds to a host device or is not accessible by the Kata
|
||||
shim, the Kata agent bind mounts the source path directly from the
|
||||
*guest* filesystem into the container.
|
||||
|
||||
[k8s-hostpath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
|
||||
|
||||
## Host resource sharing
|
||||
|
||||
### Privileged containers
|
||||
|
||||
@@ -469,8 +469,15 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i
|
||||
|
||||
// Ignore /dev, directories and all other device files. We handle
|
||||
// only regular files in /dev. It does not make sense to pass the host
|
||||
// device nodes to the guest.
|
||||
if isHostDevice(m.Destination) {
|
||||
// device nodes to the guest. We also ignore inaccessible host
|
||||
// devices in case we're mounting a device that is only
|
||||
// accessible in the guest.
|
||||
//
|
||||
// Note: K8s/containerd seems to create the source path as a
|
||||
// directory on the host if it does not already exist.
|
||||
// isHostDevice() will still return true in that case, so the
|
||||
// above contract holds.
|
||||
if isDevice, err := isHostDevice(m.Source); isDevice || err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -54,33 +54,32 @@ func isSystemMount(m string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func isHostDevice(m string) bool {
|
||||
// isHostDevice returns whether the given path is a non-regular file
|
||||
// under /dev (or /dev itself) on the host. If os.Stat fails on the
|
||||
// file, it returns false plus the error from os.Stat.
|
||||
func isHostDevice(m string) (bool, error) {
|
||||
m = filepath.Clean(m)
|
||||
if m == "/dev" {
|
||||
return true
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if strings.HasPrefix(m, "/dev/") {
|
||||
// Check if regular file
|
||||
s, err := os.Stat(m)
|
||||
|
||||
// This should not happen. In case file does not exist let the
|
||||
// error be handled by the agent, simply return false here.
|
||||
if err != nil {
|
||||
return false
|
||||
return false, err
|
||||
}
|
||||
|
||||
if s.Mode().IsRegular() {
|
||||
return false
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// This is not a regular file in /dev. It is either a
|
||||
// device file, directory or any other special file which is
|
||||
// specific to the host system.
|
||||
return true
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func major(dev uint64) int {
|
||||
@@ -131,7 +130,7 @@ func getDeviceForPath(path string) (device, error) {
|
||||
return device{}, err
|
||||
}
|
||||
|
||||
if isHostDevice(path) {
|
||||
if isDevice, _ := isHostDevice(path); isDevice {
|
||||
// stat.Rdev describes the device that this file (inode) represents.
|
||||
devMajor = major(uint64(stat.Rdev))
|
||||
devMinor = minor(uint64(stat.Rdev))
|
||||
|
||||
@@ -255,7 +255,7 @@ func TestIsHostDevice(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
result := isHostDevice(test.mnt)
|
||||
result, _ := isHostDevice(test.mnt)
|
||||
assert.Equal(result, test.expected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +65,9 @@ func TestIsHostDeviceCreateFile(t *testing.T) {
|
||||
assert.NoError(err)
|
||||
f.Close()
|
||||
|
||||
assert.False(isHostDevice(path))
|
||||
isDevice, err := isHostDevice(path)
|
||||
assert.False(isDevice)
|
||||
assert.NoError(err)
|
||||
assert.NoError(os.Remove(path))
|
||||
}
|
||||
|
||||
|
||||
57
tests/integration/kubernetes/k8s-hostpath-volume.bats
Normal file
57
tests/integration/kubernetes/k8s-hostpath-volume.bats
Normal file
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env bats
|
||||
#
|
||||
# Copyright (c) 2025 Microsoft Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
load "${BATS_TEST_DIRNAME}/lib.sh"
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
setup() {
|
||||
setup_common
|
||||
get_pod_config_dir
|
||||
|
||||
pod_name="hostpath-kmsg"
|
||||
yaml_file="${pod_config_dir}/pod-hostpath-kmsg.yaml"
|
||||
|
||||
cmd_mountinfo=(sh -c "grep /dev/kmsg /proc/self/mountinfo")
|
||||
cmd_stat=(sh -c "stat -c '%t,%T' /dev/kmsg")
|
||||
cmd_head=(sh -c "head -10 /dev/kmsg")
|
||||
|
||||
policy_settings_dir="$(create_tmp_policy_settings_dir "${pod_config_dir}")"
|
||||
add_exec_to_policy_settings "${policy_settings_dir}" "${cmd_mountinfo[@]}"
|
||||
add_exec_to_policy_settings "${policy_settings_dir}" "${cmd_stat[@]}"
|
||||
add_exec_to_policy_settings "${policy_settings_dir}" "${cmd_head[@]}"
|
||||
add_requests_to_policy_settings "${policy_settings_dir}" "ReadStreamRequest"
|
||||
auto_generate_policy "${policy_settings_dir}" "${yaml_file}"
|
||||
}
|
||||
|
||||
@test "/dev hostPath volume bind mounts the guest device and skips virtio-fs" {
|
||||
kubectl apply -f "${yaml_file}"
|
||||
kubectl wait --for=condition=Ready --timeout="${timeout}" pod "${pod_name}"
|
||||
|
||||
# Check the mount info.
|
||||
|
||||
mount_info="$(kubectl exec "${pod_name}" -- "${cmd_mountinfo[@]}")"
|
||||
read root mountpoint fstype < <(awk '{print $4, $5, $9}' <<< "$mount_info")
|
||||
|
||||
[ "$root" == "/kmsg" ] # Would look like "/<CONTAINER_ID>-<RANDOM_ID>-kmsg" with virtio-fs.
|
||||
[ "$mountpoint" == "/dev/kmsg" ]
|
||||
[ "$fstype" == "devtmpfs" ] # Would be "virtiofs" with virtio-fs.
|
||||
|
||||
# Check the device major/minor.
|
||||
|
||||
majminor="$(kubectl exec "${pod_name}" -- "${cmd_stat[@]}")"
|
||||
[ "$majminor" == "1,b" ]
|
||||
|
||||
# Check that the device is actually accessible.
|
||||
|
||||
kubectl exec "${pod_name}" -- "${cmd_head[@]}"
|
||||
}
|
||||
|
||||
teardown() {
|
||||
delete_tmp_policy_settings_dir "${policy_settings_dir}"
|
||||
teardown_common "${node}" "${node_start_time:-}"
|
||||
}
|
||||
@@ -60,6 +60,7 @@ else
|
||||
"k8s-exec.bats" \
|
||||
"k8s-file-volume.bats" \
|
||||
"k8s-hostname.bats" \
|
||||
"k8s-hostpath-volume.bats" \
|
||||
"k8s-inotify.bats" \
|
||||
"k8s-ip6tables.bats" \
|
||||
"k8s-job.bats" \
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
#
|
||||
# Copyright (c) 2025 Microsoft Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: hostpath-kmsg
|
||||
spec:
|
||||
terminationGracePeriodSeconds: 0
|
||||
runtimeClassName: kata
|
||||
restartPolicy: Never
|
||||
volumes:
|
||||
- name: dev-kmsg
|
||||
hostPath:
|
||||
path: /dev/kmsg
|
||||
containers:
|
||||
- image: quay.io/prometheus/busybox:latest
|
||||
name: container
|
||||
volumeMounts:
|
||||
- name: dev-kmsg
|
||||
mountPath: /dev/kmsg
|
||||
Reference in New Issue
Block a user