diff --git a/docs/design/architecture/storage.md b/docs/design/architecture/storage.md index 3aefc7ecf9..d3cb71ad85 100644 --- a/docs/design/architecture/storage.md +++ b/docs/design/architecture/storage.md @@ -51,6 +51,7 @@ containers started after the VM has been launched. Users can check to see if the container uses the `devicemapper` block device as its rootfs by calling `mount(8)` within the container. If the `devicemapper` block device is used, the root filesystem (`/`) -will be mounted from `/dev/vda`. Users can disable direct mounting of -the underlying block device through the runtime -[configuration](README.md#configuration). +will be mounted from `/dev/vda`. Users can enable direct mounting of +the underlying block device by setting the runtime +[configuration](README.md#configuration) flag `disable_block_device_use` to +`false`. diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index aa044367b3..37c454cba4 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -50,7 +50,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor | | `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` | | `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor | -| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used | +| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disable hotplugging host block devices to guest VMs for container rootfs | | `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) | | `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host | | `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages | diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 270943e152..662f8dcb84 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -250,7 +250,7 @@ DEFSECCOMPSANDBOXPARAM := DEFENTROPYSOURCE := /dev/urandom DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] -DEFDISABLEBLOCK := false +DEFDISABLEBLOCK := true DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs # Please keep DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS in sync with TDX/SNP diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index fe5fca4883..937d25d6ba 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -109,6 +109,20 @@ memory_slots = @DEFMEMSLOTS@ # > amount of physical RAM --> will be set to the actual amount of physical RAM default_maxmemory = @DEFMAXMEMSZ@ +# Disable hotplugging host block devices to guest VMs for container rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. +disable_block_device_use = @DEFDISABLEBLOCK@ + # Shared file system type: # - virtio-fs (default) # - virtio-fs-nydus diff --git a/src/runtime/config/configuration-qemu-cca.toml.in b/src/runtime/config/configuration-qemu-cca.toml.in index e1469b59ce..7d71dc1d47 100644 --- a/src/runtime/config/configuration-qemu-cca.toml.in +++ b/src/runtime/config/configuration-qemu-cca.toml.in @@ -159,12 +159,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-coco-dev.toml.in b/src/runtime/config/configuration-qemu-coco-dev.toml.in index 64b3917124..1a792d9bf8 100644 --- a/src/runtime/config/configuration-qemu-coco-dev.toml.in +++ b/src/runtime/config/configuration-qemu-coco-dev.toml.in @@ -145,12 +145,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index ef0333d75a..0da0ccd413 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -185,12 +185,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 90a0707800..3b3fc11d27 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -162,12 +162,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in index 65323638e7..ea03eff328 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -144,12 +144,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-se.toml.in b/src/runtime/config/configuration-qemu-se.toml.in index a7732bd1f9..483fb348b3 100644 --- a/src/runtime/config/configuration-qemu-se.toml.in +++ b/src/runtime/config/configuration-qemu-se.toml.in @@ -153,12 +153,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-snp.toml.in b/src/runtime/config/configuration-qemu-snp.toml.in index e79051fec6..13a364de9a 100644 --- a/src/runtime/config/configuration-qemu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-snp.toml.in @@ -184,12 +184,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in index 287d356a55..5029c3ec92 100644 --- a/src/runtime/config/configuration-qemu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -161,12 +161,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index c31d17f489..af971558ca 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -144,12 +144,18 @@ memory_offset = 0 # Default false enable_virtio_mem = false -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/src/runtime/config/configuration-stratovirt.toml.in b/src/runtime/config/configuration-stratovirt.toml.in index a86a584a52..b9f28a74a6 100644 --- a/src/runtime/config/configuration-stratovirt.toml.in +++ b/src/runtime/config/configuration-stratovirt.toml.in @@ -103,12 +103,18 @@ default_maxmemory = @DEFMAXMEMSZ@ # Default 0 memory_offset = 0 -# Disable block device from being used for a container's rootfs. +# Disable hotplugging host block devices to guest VMs for container rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed # directly to the hypervisor for performance reasons. # This flag prevents the block device from being passed to the hypervisor, # virtio-fs is used instead to pass the rootfs. +# WARNING: +# Don't set this flag to false if you don't understand well the behavior of +# your container runtime and image snapshotter. Some snapshotters might use +# container image storage devices that are not meant to be hotplugged into a +# guest VM - e.g., because they contain files used by the host or by other +# guests. disable_block_device_use = @DEFDISABLEBLOCK@ # Shared file system type: diff --git a/tests/integration/kubernetes/k8s-empty-image.bats b/tests/integration/kubernetes/k8s-empty-image.bats new file mode 100644 index 0000000000..6d003b2aad --- /dev/null +++ b/tests/integration/kubernetes/k8s-empty-image.bats @@ -0,0 +1,59 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2025 NVIDIA Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/lib.sh" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + setup_common || die "setup_common failed" + pod_name="no-layer-image" + get_pod_config_dir + + yaml_file="${pod_config_dir}/${pod_name}.yaml" + + # genpolicy fails for this unusual container image, so use the allow_all policy. + add_allow_all_policy_to_yaml "${yaml_file}" +} + +@test "Test image with no layers cannot run" { + # Error from run-k8s-tests (ubuntu, qemu, small): + # + # failed to create containerd task: failed to create shim task: the file sleep was not found + # + # Error from run-k8s-tests-on-tee (sev-snp, qemu-snp): + # + # failed to create containerd task: failed to create shim task: rpc status: + # Status { code: INTERNAL, message: "[CDH] [ERROR]: Image Pull error: Failed to pull image + # ghcr.io/kata-containers/no-layer-image:latest from all mirror/mapping locations or original location: image: + # ghcr.io/kata-containers/no-layer-image:latest, error: Internal error", details: [], special_fields: + # SpecialFields { unknown_fields: UnknownFields { fields: None }, cached_size: CachedSize { size: 0 } } } + # + # Error from run-k8s-tests-coco-nontee-with-erofs-snapshotter (qemu-coco-dev, erofs, default): + # + # failed to create containerd task: failed to create shim task: failed to mount + # /run/kata-containers/shared/containers/fadd1af7ea2a7bfc6caf26471f70e9a913a2989fd4a1be9d001b59e48c0781aa/rootfs + # to /run/kata-containers/fadd1af7ea2a7bfc6caf26471f70e9a913a2989fd4a1be9d001b59e48c0781aa/rootfs, with error: + # ENOENT: No such file or directory + + kubectl create -f "${yaml_file}" + + local -r command="kubectl describe "pod/${pod_name}" | grep -E \ + 'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT: No such file or directory'" + info "Waiting ${wait_time} seconds for: ${command}" + waitForProcess "${wait_time}" "${sleep_time}" "${command}" >/dev/null 2>/dev/null +} + +teardown() { + # Debugging information + kubectl describe "pod/${pod_name}" + kubectl get "pod/${pod_name}" -o yaml + + kubectl delete pod "${pod_name}" + + teardown_common "${node}" "${node_start_time:-}" +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index a1c24c11d7..bdd6a79def 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -42,6 +42,7 @@ else ) K8S_TEST_SMALL_HOST_UNION=( \ + "k8s-empty-image.bats" \ "k8s-guest-pull-image.bats" \ "k8s-confidential.bats" \ "k8s-sealed-secret.bats" \ diff --git a/tests/integration/kubernetes/runtimeclass_workloads/no-layer-image.yaml b/tests/integration/kubernetes/runtimeclass_workloads/no-layer-image.yaml new file mode 100644 index 0000000000..0e552eb5d3 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/no-layer-image.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: no-layer-image +spec: + runtimeClassName: kata + containers: + - name: no-layer-image + image: ghcr.io/kata-containers/no-layer-image:latest + resources: {} + command: + - sleep + - infinity