mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-03-17 18:22:14 +00:00
Compare commits
1 Commits
3.25.0
...
topic/arm6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca1e365a27 |
@@ -65,7 +65,7 @@ jobs:
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-bats
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
|
||||
@@ -133,17 +133,6 @@ PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)
|
||||
FIRMWAREPATH :=
|
||||
FIRMWAREVOLUMEPATH :=
|
||||
|
||||
ROOTMEASURECONFIG ?= ""
|
||||
KERNELTDXPARAMS += $(ROOTMEASURECONFIG)
|
||||
|
||||
# TDX
|
||||
DEFSHAREDFS_QEMU_TDX_VIRTIOFS := none
|
||||
FIRMWARETDXPATH := $(PREFIXDEPS)/share/ovmf/OVMF.inteltdx.fd
|
||||
|
||||
# SEV-SNP
|
||||
FIRMWARE_SNP_PATH := $(PREFIXDEPS)/share/ovmf/AMDSEV.fd
|
||||
FIRMWARE_VOLUME_SNP_PATH :=
|
||||
|
||||
##VAR DEFVCPUS=<number> Default number of vCPUs
|
||||
DEFVCPUS := 1
|
||||
##VAR DEFMAXVCPUS=<number> Default maximum number of vCPUs
|
||||
@@ -187,7 +176,6 @@ DEFVIRTIOFSQUEUESIZE ?= 1024
|
||||
# Make sure you quote args.
|
||||
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"-o\", \"announce_submounts\"]
|
||||
DEFENABLEIOTHREADS := false
|
||||
DEFINDEPIOTHREADS := 0
|
||||
DEFENABLEVHOSTUSERSTORE := false
|
||||
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
|
||||
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
|
||||
@@ -204,8 +192,6 @@ QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT := 4050
|
||||
DEFCREATECONTAINERTIMEOUT ?= 30
|
||||
DEFCREATECONTAINERTIMEOUT_COCO ?= 60
|
||||
DEFSTATICRESOURCEMGMT_COCO = true
|
||||
DEFDISABLEIMAGENVDIMM ?= false
|
||||
DEFPODRESOURCEAPISOCK := ""
|
||||
|
||||
SED = sed
|
||||
CLI_DIR = cmd
|
||||
@@ -306,30 +292,6 @@ ifneq (,$(QEMUCMD))
|
||||
|
||||
CONFIGS += $(CONFIG_QEMU)
|
||||
|
||||
CONFIG_FILE_QEMU_TDX = configuration-qemu-tdx-runtime-rs.toml
|
||||
CONFIG_QEMU_TDX = config/$(CONFIG_FILE_QEMU_TDX)
|
||||
CONFIG_QEMU_TDX_IN = $(CONFIG_QEMU_TDX).in
|
||||
|
||||
CONFIG_PATH_QEMU_TDX = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_TDX))
|
||||
CONFIG_PATHS += $(CONFIG_PATH_QEMU_TDX)
|
||||
|
||||
SYSCONFIG_QEMU_TDX = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_TDX))
|
||||
SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_TDX)
|
||||
|
||||
CONFIGS += $(CONFIG_QEMU_TDX)
|
||||
|
||||
CONFIG_FILE_QEMU_SNP = configuration-qemu-snp-runtime-rs.toml
|
||||
CONFIG_QEMU_SNP = config/$(CONFIG_FILE_QEMU_SNP)
|
||||
CONFIG_QEMU_SNP_IN = $(CONFIG_QEMU_SNP).in
|
||||
|
||||
CONFIG_PATH_QEMU_SNP = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_SNP))
|
||||
CONFIG_PATHS += $(CONFIG_PATH_QEMU_SNP)
|
||||
|
||||
SYSCONFIG_QEMU_SNP = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_SNP))
|
||||
SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_SNP)
|
||||
|
||||
CONFIGS += $(CONFIG_QEMU_SNP)
|
||||
|
||||
CONFIG_FILE_QEMU_SE = configuration-qemu-se-runtime-rs.toml
|
||||
CONFIG_QEMU_SE = config/$(CONFIG_FILE_QEMU_SE)
|
||||
CONFIG_QEMU_SE_IN = $(CONFIG_QEMU_SE).in
|
||||
@@ -560,7 +522,6 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS
|
||||
USER_VARS += DEFENABLEANNOTATIONS
|
||||
USER_VARS += DEFENABLEANNOTATIONS_COCO
|
||||
USER_VARS += DEFENABLEIOTHREADS
|
||||
USER_VARS += DEFINDEPIOTHREADS
|
||||
USER_VARS += DEFSECCOMPSANDBOXPARAM
|
||||
USER_VARS += DEFGUESTSELINUXLABEL
|
||||
USER_VARS += DEFENABLEVHOSTUSERSTORE
|
||||
@@ -581,7 +542,6 @@ USER_VARS += DEFSTATICRESOURCEMGMT_FC
|
||||
USER_VARS += DEFSTATICRESOURCEMGMT_CLH
|
||||
USER_VARS += DEFSTATICRESOURCEMGMT_QEMU
|
||||
USER_VARS += DEFSTATICRESOURCEMGMT_COCO
|
||||
USER_VARS += DEFDISABLEIMAGENVDIMM
|
||||
USER_VARS += DEFBINDMOUNTS
|
||||
USER_VARS += DEFVFIOMODE
|
||||
USER_VARS += DEFVFIOMODE_SE
|
||||
@@ -602,13 +562,6 @@ USER_VARS += DEFFORCEGUESTPULL
|
||||
USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT
|
||||
USER_VARS += DEFCREATECONTAINERTIMEOUT
|
||||
USER_VARS += DEFCREATECONTAINERTIMEOUT_COCO
|
||||
USER_VARS += QEMUTDXEXPERIMENTALCMD
|
||||
USER_VARS += FIRMWARE_SNP_PATH
|
||||
USER_VARS += FIRMWARE_VOLUME_SNP_PATH
|
||||
USER_VARS += KERNELTDXPARAMS
|
||||
USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS
|
||||
USER_VARS += FIRMWARETDXPATH
|
||||
USER_VARS += DEFPODRESOURCEAPISOCK
|
||||
|
||||
SOURCES := \
|
||||
$(shell find . 2>&1 | grep -E '.*\.rs$$') \
|
||||
@@ -646,8 +599,6 @@ GENERATED_VARS = \
|
||||
VERSION \
|
||||
CONFIG_DB_IN \
|
||||
CONFIG_FC_IN \
|
||||
CONFIG_QEMU_TDX_IN \
|
||||
CONFIG_QEMU_SNP_IN \
|
||||
$(USER_VARS)
|
||||
|
||||
|
||||
|
||||
@@ -1,770 +0,0 @@
|
||||
# Copyright (c) 2017-2019 Intel Corporation
|
||||
# Copyright (c) 2021 Adobe Inc.
|
||||
# Copyright (c) 2024 IBM Corp.
|
||||
# Copyright (c) 2025-2026 Ant Group
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# XXX: WARNING: this file is auto-generated.
|
||||
# XXX:
|
||||
# XXX: Source file: "@CONFIG_QEMU_IN@"
|
||||
# XXX: Project:
|
||||
# XXX: Name: @PROJECT_NAME@
|
||||
# XXX: Type: @PROJECT_TYPE@
|
||||
|
||||
[hypervisor.qemu]
|
||||
path = "@QEMUPATH@"
|
||||
kernel = "@KERNELPATH_COCO@"
|
||||
initrd = "@INITRDCONFIDENTIALPATH@"
|
||||
# image = "@IMAGECONFIDENTIALPATH@"
|
||||
machine_type = "@MACHINETYPE@"
|
||||
|
||||
# Enable confidential guest support.
|
||||
# Toggling that setting may trigger different hardware features, ranging
|
||||
# from memory encryption to both memory and CPU-state encryption and integrity.
|
||||
# The Kata Containers runtime dynamically detects the available feature set and
|
||||
# aims at enabling the largest possible one, returning an error if none is
|
||||
# available, or none is supported by the hypervisor.
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
# Default false
|
||||
confidential_guest = true
|
||||
|
||||
# Enable AMD SEV-SNP confidential guests
|
||||
# In case of using confidential guests on AMD hardware that supports SEV-SNP,
|
||||
# the following enables SEV-SNP guests. Default true
|
||||
sev_snp_guest = true
|
||||
|
||||
# SNP 'ID Block' and 'ID Authentication Information Structure'.
|
||||
# If one of snp_id_block or snp_id_auth is specified, the other must be specified, too.
|
||||
# Notice that the default SNP policy of QEMU (0x30000) is used by Kata, if not explicitly
|
||||
# set via 'snp_guest_policy' option. The IDBlock contains the guest policy as field, and
|
||||
# it must match the value from 'snp_guest_policy' or, if unset, the QEMU default policy.
|
||||
#
|
||||
# 96-byte, base64-encoded blob to provide the ‘ID Block’ structure for the
|
||||
# SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
|
||||
snp_id_block = ""
|
||||
# 4096-byte, base64-encoded blob to provide the ‘ID Authentication Information Structure’
|
||||
# for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
|
||||
snp_id_auth = ""
|
||||
|
||||
# SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
|
||||
# If unset, the QEMU default policy (0x30000) will be used.
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# won't start at all if the policy denys it. This will be indicated by a
|
||||
# 'SNP_LAUNCH_START' error.
|
||||
snp_guest_policy = 196608
|
||||
|
||||
# rootfs filesystem type:
|
||||
# - ext4 (default)
|
||||
# - xfs
|
||||
# - erofs
|
||||
rootfs_type = @DEFROOTFSTYPE@
|
||||
|
||||
# Block storage driver to be used for the VM rootfs is backed
|
||||
# by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
|
||||
vm_rootfs_driver = "virtio-blk-pci"
|
||||
|
||||
# Enable running QEMU VMM as a non-root user.
|
||||
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
|
||||
# a non-root random user. See documentation for the limitations of this mode.
|
||||
rootless = false
|
||||
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
|
||||
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
# trouble running pre-2.15 glibc.
|
||||
#
|
||||
# WARNING: - any parameter specified here will take priority over the default
|
||||
# parameter value of the same name used to start the virtual machine.
|
||||
# Do not set values here unless you understand the impact of doing so as you
|
||||
# may stop the virtual machine from booting.
|
||||
# To see the list of default parameters, enable hypervisor debug, create a
|
||||
# container and look for 'default-kernel-parameters' log entries.
|
||||
kernel_params = "@KERNELPARAMS@"
|
||||
|
||||
# Path to the firmware.
|
||||
# If you want that qemu uses the default firmware leave this option empty
|
||||
firmware = "@FIRMWARE_SNP_PATH@"
|
||||
|
||||
# Path to the firmware volume.
|
||||
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
|
||||
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
|
||||
# can be customized per each user while UEFI code is kept same.
|
||||
firmware_volume = "@FIRMWARE_VOLUME_SNP_PATH@"
|
||||
|
||||
# Machine accelerators
|
||||
# comma-separated list of machine accelerators to pass to the hypervisor.
|
||||
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
|
||||
machine_accelerators = "@MACHINEACCELERATORS@"
|
||||
|
||||
# Qemu seccomp sandbox feature
|
||||
# comma-separated list of seccomp sandbox features to control the syscall access.
|
||||
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
|
||||
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
|
||||
# Another note: enabling this feature may reduce performance, you may enable
|
||||
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
|
||||
# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
|
||||
seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"
|
||||
|
||||
# CPU features
|
||||
# comma-separated list of cpu features to pass to the cpu
|
||||
# For example, `cpu_features = "pmu=off,vmx=off"
|
||||
cpu_features = "@CPUFEATURES@"
|
||||
|
||||
# Default number of vCPUs per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFVCPUS@
|
||||
# < 0 --> will be set to the actual number of physical cores
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores
|
||||
default_vcpus = @DEFVCPUS_QEMU@
|
||||
|
||||
# Default maximum number of vCPUs per SB/VM:
|
||||
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
|
||||
# the actual number of physical cores is greater than it.
|
||||
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
|
||||
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
|
||||
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
|
||||
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
|
||||
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
|
||||
# unless you know what are you doing.
|
||||
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
|
||||
default_maxvcpus = @DEFMAXVCPUS_QEMU@
|
||||
|
||||
# Bridges can be used to hot plug devices.
|
||||
# Limitations:
|
||||
# * Currently only pci bridges are supported
|
||||
# * Until 30 devices per bridge can be hot plugged.
|
||||
# * Until 5 PCI bridges can be cold plugged per VM.
|
||||
# This limitation could be a bug in qemu or in the kernel
|
||||
# Default number of bridges per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFBRIDGES@
|
||||
# > 1 <= 5 --> will be set to the specified number
|
||||
# > 5 --> will be set to 5
|
||||
default_bridges = @DEFBRIDGES@
|
||||
|
||||
# Default memory size in MiB for SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSZ@ MiB.
|
||||
default_memory = @DEFMEMSZ@
|
||||
|
||||
#
|
||||
# Default memory slots per SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSLOTS@.
|
||||
# This is will determine the times that memory will be hotadded to sandbox/VM.
|
||||
memory_slots = @DEFMEMSLOTS@
|
||||
|
||||
# Default maximum memory in MiB per SB / VM
|
||||
# unspecified or == 0 --> will be set to the actual amount of physical RAM
|
||||
# > 0 <= amount of physical RAM --> will be set to the specified number
|
||||
# > amount of physical RAM --> will be set to the actual amount of physical RAM
|
||||
default_maxmemory = @DEFMAXMEMSZ@
|
||||
|
||||
# The size in MiB will be plused to max memory of hypervisor.
|
||||
# It is the memory address space for the NVDIMM device.
|
||||
# If set block storage driver (block_device_driver) to "nvdimm",
|
||||
# should set memory_offset to the size of block device.
|
||||
# Default 0
|
||||
memory_offset = 0
|
||||
|
||||
# Specifies virtio-mem will be enabled or not.
|
||||
# Please note that this option should be used with the command
|
||||
# "echo 1 > /proc/sys/vm/overcommit_memory".
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
# - virtio-fs (default)
|
||||
# - virtio-fs-nydus
|
||||
# - none
|
||||
shared_fs = "none"
|
||||
|
||||
# Path to vhost-user-fs daemon.
|
||||
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
|
||||
|
||||
# List of valid annotations values for the virtiofs daemon
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
|
||||
# Default size of DAX cache in MiB
|
||||
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
|
||||
|
||||
# Default size of virtqueues
|
||||
virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
|
||||
|
||||
# Extra args for virtiofsd daemon
|
||||
#
|
||||
# Format example:
|
||||
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
|
||||
# Examples:
|
||||
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
|
||||
#
|
||||
# see `virtiofsd -h` for possible options.
|
||||
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
|
||||
# Cache mode:
|
||||
#
|
||||
# - never
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
# to open consistency).
|
||||
#
|
||||
# - always
|
||||
# Metadata, data, and pathname lookup are cached in guest and never expire.
|
||||
virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
|
||||
|
||||
# Block storage driver to be used for the hypervisor in case the container
|
||||
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
|
||||
# or nvdimm.
|
||||
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
|
||||
|
||||
# aio is the I/O mechanism used by qemu
|
||||
# Options:
|
||||
#
|
||||
# - threads
|
||||
# Pthread based disk I/O.
|
||||
#
|
||||
# - native
|
||||
# Native Linux I/O.
|
||||
#
|
||||
# - io_uring
|
||||
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
|
||||
# qemu >=5.0.
|
||||
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
|
||||
|
||||
# Specifies cache-related options will be set to block devices or not.
|
||||
# Default false
|
||||
block_device_cache_set = false
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
|
||||
# Default false
|
||||
block_device_cache_direct = false
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether flush requests for the device are ignored.
|
||||
# Default false
|
||||
block_device_cache_noflush = false
|
||||
|
||||
# Enable iothreads (data-plane) to be used. This causes IO to be
|
||||
# handled in a separate IO thread. This is currently only implemented
|
||||
# for SCSI.
|
||||
#
|
||||
enable_iothreads = @DEFENABLEIOTHREADS@
|
||||
|
||||
# Independent IOThreads enables IO to be processed in a separate thread, it is
|
||||
# for QEMU hotplug device attach to iothread, like virtio-blk.
|
||||
indep_iothreads = @DEFINDEPIOTHREADS@
|
||||
|
||||
# Enable pre allocation of VM RAM, default false
|
||||
# Enabling this will result in lower container density
|
||||
# as all of the memory will be allocated and locked
|
||||
# This is useful when you want to reserve all the memory
|
||||
# upfront or in the cases where you want memory latencies
|
||||
# to be very predictable
|
||||
# Default false
|
||||
enable_mem_prealloc = false
|
||||
|
||||
# Reclaim guest freed memory.
|
||||
# Enabling this will result in the VM balloon device having f_reporting=on set.
|
||||
# Then the hypervisor will use it to reclaim guest freed memory.
|
||||
# This is useful for reducing the amount of memory used by a VM.
|
||||
# Enabling this feature may sometimes reduce the speed of memory access in
|
||||
# the VM.
|
||||
#
|
||||
# Default false
|
||||
reclaim_guest_freed_memory = false
|
||||
|
||||
# Enable huge pages for VM RAM, default false
|
||||
# Enabling this will result in the VM memory
|
||||
# being allocated using huge pages.
|
||||
# This is useful when you want to use vhost-user network
|
||||
# stacks within the container. This will automatically
|
||||
# result in memory pre allocation
|
||||
enable_hugepages = false
|
||||
|
||||
# Enable vhost-user storage device, default false
|
||||
# Enabling this will result in some Linux reserved block type
|
||||
# major range 240-254 being chosen to represent vhost-user devices.
|
||||
enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
|
||||
|
||||
# The base directory specifically used for vhost-user devices.
|
||||
# Its sub-path "block" is used for block devices; "block/sockets" is
|
||||
# where we expect vhost-user sockets to live; "block/devices" is where
|
||||
# simulated block device nodes for vhost-user devices to live.
|
||||
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
enable_iommu = false
|
||||
|
||||
# Enable IOMMU_PLATFORM, default false
|
||||
# Enabling this will result in the VM device having iommu_platform=on set
|
||||
enable_iommu_platform = false
|
||||
|
||||
# List of valid annotations values for the vhost user store path
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
|
||||
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
|
||||
# qemu will delay this many seconds and then attempt to reconnect.
|
||||
# Zero disables reconnecting, and the default is zero.
|
||||
vhost_user_reconnect_timeout_sec = 0
|
||||
|
||||
# Enable file based guest memory support. The default is an empty string which
|
||||
# will disable this feature. In the case of virtio-fs, this is enabled
|
||||
# automatically and '/dev/shm' is used as the backing folder.
|
||||
# This option will be ignored if VM templating is enabled.
|
||||
file_mem_backend = ""
|
||||
|
||||
# List of valid annotations values for the file_mem_backend annotation
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@
|
||||
valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
|
||||
|
||||
# -pflash can add image file to VM. The arguments of it should be in format
|
||||
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
|
||||
pflashes = []
|
||||
|
||||
# This option changes the default hypervisor and kernel parameters
|
||||
# to enable debug output where available. And Debug also enable the hmp socket.
|
||||
#
|
||||
# Default false
|
||||
enable_debug = false
|
||||
|
||||
# Disable the customizations done in the runtime when it detects
|
||||
# that it is running on top a VMM. This will result in the runtime
|
||||
# behaving as it would when running on bare metal.
|
||||
#
|
||||
disable_nesting_checks = true
|
||||
|
||||
# If false and nvdimm is supported, use nvdimm device to plug guest image.
|
||||
# Otherwise virtio-block device is used.
|
||||
#
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
# The value means the number of pcie_root_port
|
||||
# Default 0
|
||||
pcie_root_port = 0
|
||||
|
||||
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
|
||||
# security (vhost-net runs ring0) for network I/O performance.
|
||||
disable_vhost_net = false
|
||||
|
||||
# This option allows to add an extra HMP or QMP socket when `enable_debug = true`
|
||||
#
|
||||
# WARNING: Anyone with access to the extra socket can take full control of
|
||||
# Qemu. This is for debugging purpose only and must *NEVER* be used in
|
||||
# production.
|
||||
#
|
||||
# Valid values are :
|
||||
# - "hmp"
|
||||
# - "qmp"
|
||||
# - "qmp-pretty" (same as "qmp" with pretty json formatting)
|
||||
#
|
||||
# If set to the empty string "", no extra monitor socket is added. This is
|
||||
# the default.
|
||||
#extra_monitor_socket = "hmp"
|
||||
|
||||
#
|
||||
# Default entropy source.
|
||||
# The path to a host source of entropy (including a real hardware RNG)
|
||||
# /dev/urandom and /dev/random are two main options.
|
||||
# Be aware that /dev/random is a blocking source of entropy. If the host
|
||||
# runs out of entropy, the VMs boot time will increase leading to get startup
|
||||
# timeouts.
|
||||
# The source of entropy /dev/urandom is non-blocking and provides a
|
||||
# generally acceptable source of entropy. It should work well for pretty much
|
||||
# all practical purposes.
|
||||
entropy_source = "@DEFENTROPYSOURCE@"
|
||||
|
||||
# List of valid annotations values for entropy_source
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
|
||||
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
||||
|
||||
# Path to OCI hook binaries in the *guest rootfs*.
|
||||
# This does not affect host-side hooks which must instead be added to
|
||||
# the OCI spec passed to the runtime.
|
||||
#
|
||||
# You can create a rootfs with hooks by customizing the osbuilder scripts:
|
||||
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
|
||||
#
|
||||
# Hooks must be stored in a subdirectory of guest_hook_path according to their
|
||||
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
|
||||
# The agent will scan these directories for executable files and add them, in
|
||||
# lexicographical order, to the lifecycle of the guest container.
|
||||
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
|
||||
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
|
||||
# Warnings will be logged if any error is encountered while scanning for hooks,
|
||||
# but it will not abort container execution.
|
||||
# Recommended value when enabling: "/usr/share/oci/hooks"
|
||||
guest_hook_path = ""
|
||||
|
||||
#
|
||||
# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
rx_rate_limiter_max_rate = 0
|
||||
# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
|
||||
# to discipline traffic.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
tx_rate_limiter_max_rate = 0
|
||||
|
||||
# Set where to save the guest memory dump file.
|
||||
# If set, when GUEST_PANICKED event occurred,
|
||||
# guest memeory will be dumped to host filesystem under guest_memory_dump_path,
|
||||
# This directory will be created automatically if it does not exist.
|
||||
#
|
||||
# The dumped file(also called vmcore) can be processed with crash or gdb.
|
||||
#
|
||||
# WARNING:
|
||||
# Dump guest's memory can take very long depending on the amount of guest memory
|
||||
# and use much disk space.
|
||||
# Recommended value when enabling: "/var/crash/kata"
|
||||
guest_memory_dump_path = ""
|
||||
|
||||
# If enable paging.
|
||||
# Basically, if you want to use "gdb" rather than "crash",
|
||||
# or need the guest-virtual addresses in the ELF vmcore,
|
||||
# then you should enable paging.
|
||||
#
|
||||
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
|
||||
guest_memory_dump_paging = false
|
||||
|
||||
# Enable swap in the guest. Default false.
|
||||
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
|
||||
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
|
||||
# is bigger than 0.
|
||||
# The size of the swap device should be
|
||||
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
|
||||
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
|
||||
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
|
||||
# be default_memory.
|
||||
enable_guest_swap = false
|
||||
|
||||
# use legacy serial for guest console if available and implemented for architecture. Default false
|
||||
use_legacy_serial = false
|
||||
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux = @DEFDISABLESELINUX@
|
||||
|
||||
# disable applying SELinux on the container process
|
||||
# If set to false, the type `container_t` is applied to the container process by default.
|
||||
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||
# with `SELINUX=yes`.
|
||||
# (default: true)
|
||||
disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
|
||||
|
||||
|
||||
[factory]
|
||||
# VM templating support. Once enabled, new VMs are created from template
|
||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||
# agent memory by mapping it readonly. It helps speeding up new container
|
||||
# creation and saves a lot of memory if there are many kata containers running
|
||||
# on the same host.
|
||||
#
|
||||
# When disabled, new VMs are created from scratch.
|
||||
#
|
||||
# Note: Requires "initrd=" to be set ("image=" is not supported).
|
||||
#
|
||||
# Default false
|
||||
enable_template = false
|
||||
|
||||
# Specifies the path of template.
|
||||
#
|
||||
# Default "/run/vc/vm/template"
|
||||
template_path = "/run/vc/vm/template"
|
||||
|
||||
# The number of caches of VMCache:
|
||||
# unspecified or == 0 --> VMCache is disabled
|
||||
# > 0 --> will be set to the specified number
|
||||
#
|
||||
# VMCache is a function that creates VMs as caches before using it.
|
||||
# It helps speed up new container creation.
|
||||
# The function consists of a server and some clients communicating
|
||||
# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto.
|
||||
# The VMCache server will create some VMs and cache them by factory cache.
|
||||
# It will convert the VM to gRPC format and transport it when gets
|
||||
# requestion from clients.
|
||||
# Factory grpccache is the VMCache client. It will request gRPC format
|
||||
# VM and convert it back to a VM. If VMCache function is enabled,
|
||||
# kata-runtime will request VM from factory grpccache when it creates
|
||||
# a new sandbox.
|
||||
#
|
||||
# Default 0
|
||||
vm_cache_number = 0
|
||||
|
||||
# Specify the address of the Unix socket that is used by VMCache.
|
||||
#
|
||||
# Default /var/run/kata-containers/cache.sock
|
||||
vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
|
||||
|
||||
[agent.@PROJECT_TYPE@]
|
||||
# If enabled, make the agent display debug-level messages.
|
||||
# (default: disabled)
|
||||
enable_debug = false
|
||||
|
||||
# Enable agent tracing.
|
||||
#
|
||||
# If enabled, the agent will generate OpenTelemetry trace spans.
|
||||
#
|
||||
# Notes:
|
||||
#
|
||||
# - If the runtime also has tracing enabled, the agent spans will be
|
||||
# associated with the appropriate runtime parent span.
|
||||
# - If enabled, the runtime will wait for the container to shutdown,
|
||||
# increasing the container shutdown time slightly.
|
||||
#
|
||||
# (default: disabled)
|
||||
enable_tracing = false
|
||||
|
||||
# Comma separated list of kernel modules and their parameters.
|
||||
# These modules will be loaded in the guest kernel using modprobe(8).
|
||||
# The following example can be used to load two kernel modules with parameters
|
||||
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
|
||||
# The first word is considered as the module name and the rest as its parameters.
|
||||
# Container will not be started when:
|
||||
# * A kernel module is specified and the modprobe command is not installed in the guest
|
||||
# or it fails loading the module.
|
||||
# * The module is not available in the guest or it doesn't met the guest kernel
|
||||
# requirements, like architecture and version.
|
||||
#
|
||||
kernel_modules = []
|
||||
|
||||
# Enable debug console.
|
||||
|
||||
# If enabled, user can connect guest OS running inside hypervisor
|
||||
# through "kata-runtime exec <sandbox-id>" command
|
||||
debug_console_enabled = false
|
||||
|
||||
# Agent dial timeout in millisecond.
|
||||
# (default: 10)
|
||||
dial_timeout_ms = 10
|
||||
|
||||
# Agent reconnect timeout in millisecond.
|
||||
# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
|
||||
# If you find pod cannot connect to the agent when starting, please
|
||||
# consider increasing this value to increase the retry times.
|
||||
# You'd better not change the value of dial_timeout_ms, unless you have an
|
||||
# idea of what you are doing.
|
||||
# (default: 3000)
|
||||
reconnect_timeout_ms = 3000
|
||||
|
||||
# Create Container Request Timeout
|
||||
# This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
|
||||
# It's also used to ensure that workloads, especially those involving large image pulls within the guest,
|
||||
# have sufficient time to complete.
|
||||
#
|
||||
# Effective Timeout Determination:
|
||||
# The effective timeout for a CreateContainerRequest is determined by taking the minimum of the following two values:
|
||||
# - create_container_timeout: The timeout value configured for creating containers (default: 30,000 milliseconds).
|
||||
# - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT_COCO@ second(s)
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT_COCO@
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
# system log
|
||||
# (default: disabled)
|
||||
enable_debug = false
|
||||
#
|
||||
# Internetworking model
|
||||
# Determines how the VM should be connected to the
|
||||
# the container network interface
|
||||
# Options:
|
||||
#
|
||||
# - macvtap
|
||||
# Used when the Container network interface can be bridged using
|
||||
# macvtap.
|
||||
#
|
||||
# - none
|
||||
# Used when customize network. Only creates a tap device. No veth pair.
|
||||
#
|
||||
# - tcfilter
|
||||
# Uses tc filter rules to redirect traffic from the network interface
|
||||
# provided by plugin to a tap interface connected to the VM.
|
||||
#
|
||||
internetworking_model="@DEFNETWORKMODEL_QEMU@"
|
||||
|
||||
name="@RUNTIMENAME@"
|
||||
hypervisor_name="@HYPERVISOR_QEMU@"
|
||||
agent_name="@PROJECT_TYPE@"
|
||||
|
||||
# disable guest seccomp
|
||||
# Determines whether container seccomp profiles are passed to the virtual
|
||||
# machine and applied by the kata agent. If set to true, seccomp is not applied
|
||||
# within the guest
|
||||
# (default: true)
|
||||
disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# vCPUs pinning settings
|
||||
# if enabled, each vCPU thread will be scheduled to a fixed CPU
|
||||
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
|
||||
enable_vcpus_pinning = false
|
||||
|
||||
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||
# This is used when you want to apply a type other than the default `container_t`,
|
||||
# so general users should not uncomment and apply it.
|
||||
# (format: "user:role:type")
|
||||
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||
guest_selinux_label = "@DEFGUESTSELINUXLABEL@"
|
||||
|
||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||
# (default: disabled)
|
||||
enable_tracing = false
|
||||
|
||||
# Set the full url to the Jaeger HTTP Thrift collector.
|
||||
# The default if not set will be "http://localhost:14268/api/traces"
|
||||
jaeger_endpoint = ""
|
||||
|
||||
# Sets the username to be used if basic auth is required for Jaeger.
|
||||
jaeger_user = ""
|
||||
|
||||
# Sets the password to be used if basic auth is required for Jaeger.
|
||||
jaeger_password = ""
|
||||
|
||||
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
|
||||
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
|
||||
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
|
||||
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
|
||||
# (like OVS) directly.
|
||||
# (default: false)
|
||||
disable_new_netns = false
|
||||
|
||||
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# The sandbox cgroup is constrained if there is no container type annotation.
|
||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@
|
||||
|
||||
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||
# Compatibility for determining appropriate sandbox (VM) size:
|
||||
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||
# does not yet support sandbox sizing annotations.
|
||||
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||
static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_COCO@
|
||||
|
||||
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
||||
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
||||
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
||||
# These will not be exposed to the container workloads, and are only provided for potential guest services.
|
||||
sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
|
||||
# VFIO Mode
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - vfio
|
||||
# Matches behaviour of OCI runtimes (e.g. runc) as much as
|
||||
# possible. VFIO devices will appear in the container as VFIO
|
||||
# character devices under /dev/vfio. The exact names may differ
|
||||
# from the host (they need to match the VM's IOMMU group numbers
|
||||
# rather than the host's)
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
# claims it. This means it will appear as one or more device nodes
|
||||
# or network interfaces depending on the nature of the device.
|
||||
# Using this mode requires specially built workloads that know how
|
||||
# to locate the relevant device interfaces within the VM.
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# they may break compatibility, and are prepared for a big version bump.
|
||||
# Supported experimental features:
|
||||
# for example:
|
||||
# experimental=["force_guest_pull"]
|
||||
# which is for enable force_guest_pull mode in CoCo scenarios.
|
||||
# (default: [])
|
||||
experimental = @DEFAULTEXPFEATURES@
|
||||
|
||||
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
|
||||
# (default: false)
|
||||
enable_pprof = false
|
||||
|
||||
# Base directory of directly attachable network config.
|
||||
# Network devices for VM-based containers are allowed to be placed in the
|
||||
# host netns to eliminate as many hops as possible, which is what we
|
||||
# called a "Directly Attachable Network". The config, set by special CNI
|
||||
# plugins, is used to tell the Kata containers what devices are attached
|
||||
# to the hypervisor.
|
||||
# (default: /run/kata-containers/dans)
|
||||
dan_conf = "@DEFDANCONF@"
|
||||
|
||||
# pod_resource_api_sock specifies the unix socket for the Kubelet's
|
||||
# PodResource API endpoint. If empty, kubernetes based cold plug
|
||||
# will not be attempted. In order for this feature to work, the
|
||||
# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
|
||||
# if using Kubelet older than 1.34.
|
||||
#
|
||||
# The pod resource API's socket is relative to the Kubelet's root-dir,
|
||||
# which is defined by the cluster admin, and its location is:
|
||||
# ${KubeletRootDir}/pod-resources/kubelet.sock
|
||||
#
|
||||
# cold_plug_vfio(see hypervisor config) acts as a feature gate:
|
||||
# cold_plug_vfio = no_port (default) => no cold plug
|
||||
# cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
|
||||
# explicit CDI annotation for cold plug (applies mainly
|
||||
# to non-k8s cases)
|
||||
# cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
|
||||
# based cold plug.
|
||||
pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
|
||||
@@ -1,746 +0,0 @@
|
||||
# Copyright (c) 2017-2019 Intel Corporation
|
||||
# Copyright (c) 2021 Adobe Inc.
|
||||
# Copyright (c) 2025-2026 Ant Group
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# XXX: WARNING: this file is auto-generated.
|
||||
# XXX:
|
||||
# XXX: Source file: "@CONFIG_QEMU_IN@"
|
||||
# XXX: Project:
|
||||
# XXX: Name: @PROJECT_NAME@
|
||||
# XXX: Type: @PROJECT_TYPE@
|
||||
|
||||
[hypervisor.qemu]
|
||||
path = "@QEMUPATH@"
|
||||
kernel = "@KERNELPATH_COCO@"
|
||||
image = "@IMAGECONFIDENTIALPATH@"
|
||||
# initrd = "@INITRDPATH@"
|
||||
machine_type = "@MACHINETYPE@"
|
||||
tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@
|
||||
|
||||
# rootfs filesystem type:
|
||||
# - ext4 (default)
|
||||
# - xfs
|
||||
# - erofs
|
||||
rootfs_type = @DEFROOTFSTYPE@
|
||||
|
||||
# Block storage driver to be used for the VM rootfs is backed
|
||||
# by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
|
||||
vm_rootfs_driver = "virtio-blk-pci"
|
||||
|
||||
# Enable confidential guest support.
|
||||
# Toggling that setting may trigger different hardware features, ranging
|
||||
# from memory encryption to both memory and CPU-state encryption and integrity.
|
||||
# The Kata Containers runtime dynamically detects the available feature set and
|
||||
# aims at enabling the largest possible one, returning an error if none is
|
||||
# available, or none is supported by the hypervisor.
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
# Default false
|
||||
confidential_guest = true
|
||||
|
||||
# Enable running QEMU VMM as a non-root user.
|
||||
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
|
||||
# a non-root random user. See documentation for the limitations of this mode.
|
||||
rootless = false
|
||||
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
|
||||
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
# trouble running pre-2.15 glibc.
|
||||
#
|
||||
# WARNING: - any parameter specified here will take priority over the default
|
||||
# parameter value of the same name used to start the virtual machine.
|
||||
# Do not set values here unless you understand the impact of doing so as you
|
||||
# may stop the virtual machine from booting.
|
||||
# To see the list of default parameters, enable hypervisor debug, create a
|
||||
# container and look for 'default-kernel-parameters' log entries.
|
||||
kernel_params = "@KERNELTDXPARAMS@"
|
||||
|
||||
# Path to the firmware.
|
||||
# If you want that qemu uses the default firmware leave this option empty
|
||||
firmware = "@FIRMWARETDXPATH@"
|
||||
|
||||
# Path to the firmware volume.
|
||||
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
|
||||
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
|
||||
# can be customized per each user while UEFI code is kept same.
|
||||
firmware_volume = "@FIRMWAREVOLUMEPATH@"
|
||||
|
||||
# Machine accelerators
|
||||
# comma-separated list of machine accelerators to pass to the hypervisor.
|
||||
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
|
||||
machine_accelerators = "@MACHINEACCELERATORS@"
|
||||
|
||||
# Qemu seccomp sandbox feature
|
||||
# comma-separated list of seccomp sandbox features to control the syscall access.
|
||||
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
|
||||
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
|
||||
# Another note: enabling this feature may reduce performance, you may enable
|
||||
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
|
||||
# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
|
||||
seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"
|
||||
|
||||
# CPU features
|
||||
# comma-separated list of cpu features to pass to the cpu
|
||||
# For example, `cpu_features = "pmu=off,vmx=off"
|
||||
cpu_features = "@CPUFEATURES@"
|
||||
|
||||
# Default number of vCPUs per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFVCPUS@
|
||||
# < 0 --> will be set to the actual number of physical cores
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores
|
||||
default_vcpus = 1
|
||||
|
||||
# Default maximum number of vCPUs per SB/VM:
|
||||
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
|
||||
# the actual number of physical cores is greater than it.
|
||||
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
|
||||
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
|
||||
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
|
||||
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
|
||||
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
|
||||
# unless you know what are you doing.
|
||||
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
|
||||
default_maxvcpus = @DEFMAXVCPUS@
|
||||
|
||||
# Bridges can be used to hot plug devices.
|
||||
# Limitations:
|
||||
# * Currently only pci bridges are supported
|
||||
# * Until 30 devices per bridge can be hot plugged.
|
||||
# * Until 5 PCI bridges can be cold plugged per VM.
|
||||
# This limitation could be a bug in qemu or in the kernel
|
||||
# Default number of bridges per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFBRIDGES@
|
||||
# > 1 <= 5 --> will be set to the specified number
|
||||
# > 5 --> will be set to 5
|
||||
default_bridges = @DEFBRIDGES@
|
||||
|
||||
# Default memory size in MiB for SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSZ@ MiB.
|
||||
default_memory = @DEFMEMSZ@
|
||||
#
|
||||
# Default memory slots per SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSLOTS@.
|
||||
# This is will determine the times that memory will be hotadded to sandbox/VM.
|
||||
memory_slots = @DEFMEMSLOTS@
|
||||
|
||||
# Default maximum memory in MiB per SB / VM
|
||||
# unspecified or == 0 --> will be set to the actual amount of physical RAM
|
||||
# > 0 <= amount of physical RAM --> will be set to the specified number
|
||||
# > amount of physical RAM --> will be set to the actual amount of physical RAM
|
||||
default_maxmemory = @DEFMAXMEMSZ@
|
||||
|
||||
# The size in MiB will be plused to max memory of hypervisor.
|
||||
# It is the memory address space for the NVDIMM device.
|
||||
# If set block storage driver (block_device_driver) to "nvdimm",
|
||||
# should set memory_offset to the size of block device.
|
||||
# Default 0
|
||||
memory_offset = 0
|
||||
|
||||
# Specifies virtio-mem will be enabled or not.
|
||||
# Please note that this option should be used with the command
|
||||
# "echo 1 > /proc/sys/vm/overcommit_memory".
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
# - virtio-fs (default)
|
||||
# - virtio-fs-nydus
|
||||
# - none
|
||||
shared_fs = "@DEFSHAREDFS_QEMU_TDX_VIRTIOFS@"
|
||||
|
||||
# Path to vhost-user-fs daemon.
|
||||
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
|
||||
|
||||
# List of valid annotations values for the virtiofs daemon
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
|
||||
# Default size of DAX cache in MiB
|
||||
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
|
||||
|
||||
# Default size of virtqueues
|
||||
virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
|
||||
|
||||
# Extra args for virtiofsd daemon
|
||||
#
|
||||
# Format example:
|
||||
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
|
||||
# Examples:
|
||||
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
|
||||
#
|
||||
# see `virtiofsd -h` for possible options.
|
||||
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
|
||||
# Cache mode:
|
||||
#
|
||||
# - never
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
# to open consistency).
|
||||
#
|
||||
# - always
|
||||
# Metadata, data, and pathname lookup are cached in guest and never expire.
|
||||
virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
|
||||
|
||||
# Block storage driver to be used for the hypervisor in case the container
|
||||
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
|
||||
# or nvdimm.
|
||||
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
|
||||
|
||||
# aio is the I/O mechanism used by qemu
|
||||
# Options:
|
||||
#
|
||||
# - threads
|
||||
# Pthread based disk I/O.
|
||||
#
|
||||
# - native
|
||||
# Native Linux I/O.
|
||||
#
|
||||
# - io_uring
|
||||
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
|
||||
# qemu >=5.0.
|
||||
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
|
||||
|
||||
# Specifies cache-related options will be set to block devices or not.
|
||||
# Default false
|
||||
block_device_cache_set = false
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
|
||||
# Default false
|
||||
block_device_cache_direct = false
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether flush requests for the device are ignored.
|
||||
# Default false
|
||||
block_device_cache_noflush = false
|
||||
|
||||
# Enable iothreads (data-plane) to be used. This causes IO to be
|
||||
# handled in a separate IO thread. This is currently implemented
|
||||
# for virtio-scsi and virtio-blk.
|
||||
#
|
||||
enable_iothreads = @DEFENABLEIOTHREADS@
|
||||
|
||||
# Independent IOThreads enables IO to be processed in a separate thread, it is
|
||||
# for QEMU hotplug device attach to iothread, like virtio-blk.
|
||||
indep_iothreads = @DEFINDEPIOTHREADS@
|
||||
|
||||
# Enable pre allocation of VM RAM, default false
|
||||
# Enabling this will result in lower container density
|
||||
# as all of the memory will be allocated and locked
|
||||
# This is useful when you want to reserve all the memory
|
||||
# upfront or in the cases where you want memory latencies
|
||||
# to be very predictable
|
||||
# Default false
|
||||
enable_mem_prealloc = false
|
||||
|
||||
# Reclaim guest freed memory.
|
||||
# Enabling this will result in the VM balloon device having f_reporting=on set.
|
||||
# Then the hypervisor will use it to reclaim guest freed memory.
|
||||
# This is useful for reducing the amount of memory used by a VM.
|
||||
# Enabling this feature may sometimes reduce the speed of memory access in
|
||||
# the VM.
|
||||
#
|
||||
# Default false
|
||||
reclaim_guest_freed_memory = false
|
||||
|
||||
# Enable huge pages for VM RAM, default false
|
||||
# Enabling this will result in the VM memory
|
||||
# being allocated using huge pages.
|
||||
# This is useful when you want to use vhost-user network
|
||||
# stacks within the container. This will automatically
|
||||
# result in memory pre allocation
|
||||
enable_hugepages = false
|
||||
|
||||
# Enable vhost-user storage device, default false
|
||||
# Enabling this will result in some Linux reserved block type
|
||||
# major range 240-254 being chosen to represent vhost-user devices.
|
||||
enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
|
||||
|
||||
# The base directory specifically used for vhost-user devices.
|
||||
# Its sub-path "block" is used for block devices; "block/sockets" is
|
||||
# where we expect vhost-user sockets to live; "block/devices" is where
|
||||
# simulated block device nodes for vhost-user devices to live.
|
||||
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
enable_iommu = false
|
||||
|
||||
# Enable IOMMU_PLATFORM, default false
|
||||
# Enabling this will result in the VM device having iommu_platform=on set
|
||||
enable_iommu_platform = false
|
||||
|
||||
# List of valid annotations values for the vhost user store path
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
|
||||
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
|
||||
# qemu will delay this many seconds and then attempt to reconnect.
|
||||
# Zero disables reconnecting, and the default is zero.
|
||||
vhost_user_reconnect_timeout_sec = 0
|
||||
|
||||
# Enable file based guest memory support. The default is an empty string which
|
||||
# will disable this feature. In the case of virtio-fs, this is enabled
|
||||
# automatically and '/dev/shm' is used as the backing folder.
|
||||
# This option will be ignored if VM templating is enabled.
|
||||
file_mem_backend = "@DEFFILEMEMBACKEND@"
|
||||
|
||||
# List of valid annotations values for the file_mem_backend annotation
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@
|
||||
valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
|
||||
|
||||
# -pflash can add image file to VM. The arguments of it should be in format
|
||||
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
|
||||
pflashes = []
|
||||
|
||||
# This option changes the default hypervisor and kernel parameters
|
||||
# to enable debug output where available. And Debug also enable the hmp socket.
|
||||
#
|
||||
# Default false
|
||||
enable_debug = false
|
||||
|
||||
# This option allows to add an extra HMP or QMP socket when `enable_debug = true`
|
||||
#
|
||||
# WARNING: Anyone with access to the extra socket can take full control of
|
||||
# Qemu. This is for debugging purpose only and must *NEVER* be used in
|
||||
# production.
|
||||
#
|
||||
# Valid values are :
|
||||
# - "hmp"
|
||||
# - "qmp"
|
||||
# - "qmp-pretty" (same as "qmp" with pretty json formatting)
|
||||
#
|
||||
# If set to the empty string "", no extra monitor socket is added. This is
|
||||
# the default.
|
||||
extra_monitor_socket = ""
|
||||
|
||||
# Disable the customizations done in the runtime when it detects
|
||||
# that it is running on top a VMM. This will result in the runtime
|
||||
# behaving as it would when running on bare metal.
|
||||
#
|
||||
disable_nesting_checks = false
|
||||
|
||||
# If false and nvdimm is supported, use nvdimm device to plug guest image.
|
||||
# Otherwise virtio-block device is used.
|
||||
#
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
# The value means the number of pcie_root_port
|
||||
# Default 0
|
||||
pcie_root_port = 0
|
||||
|
||||
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
|
||||
# security (vhost-net runs ring0) for network I/O performance.
|
||||
disable_vhost_net = false
|
||||
|
||||
#
|
||||
# Default entropy source.
|
||||
# The path to a host source of entropy (including a real hardware RNG)
|
||||
# /dev/urandom and /dev/random are two main options.
|
||||
# Be aware that /dev/random is a blocking source of entropy. If the host
|
||||
# runs out of entropy, the VMs boot time will increase leading to get startup
|
||||
# timeouts.
|
||||
# The source of entropy /dev/urandom is non-blocking and provides a
|
||||
# generally acceptable source of entropy. It should work well for pretty much
|
||||
# all practical purposes.
|
||||
entropy_source = "@DEFENTROPYSOURCE@"
|
||||
|
||||
|
||||
# List of valid annotations values for entropy_source
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
|
||||
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
||||
|
||||
# Path to OCI hook binaries in the *guest rootfs*.
|
||||
# This does not affect host-side hooks which must instead be added to
|
||||
# the OCI spec passed to the runtime.
|
||||
#
|
||||
# You can create a rootfs with hooks by customizing the osbuilder scripts:
|
||||
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
|
||||
#
|
||||
# Hooks must be stored in a subdirectory of guest_hook_path according to their
|
||||
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
|
||||
# The agent will scan these directories for executable files and add them, in
|
||||
# lexicographical order, to the lifecycle of the guest container.
|
||||
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
|
||||
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
|
||||
# Warnings will be logged if any error is encountered while scanning for hooks,
|
||||
# but it will not abort container execution.
|
||||
# Recommended value when enabling: "/usr/share/oci/hooks"
|
||||
guest_hook_path = ""
|
||||
#
|
||||
# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
rx_rate_limiter_max_rate = 0
|
||||
# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
|
||||
# to discipline traffic.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
tx_rate_limiter_max_rate = 0
|
||||
|
||||
# Set where to save the guest memory dump file.
|
||||
# If set, when GUEST_PANICKED event occurred,
|
||||
# guest memeory will be dumped to host filesystem under guest_memory_dump_path,
|
||||
# This directory will be created automatically if it does not exist.
|
||||
#
|
||||
# The dumped file(also called vmcore) can be processed with crash or gdb.
|
||||
#
|
||||
# WARNING:
|
||||
# Dump guest's memory can take very long depending on the amount of guest memory
|
||||
# and use much disk space.
|
||||
# Recommended value when enabling: "/var/crash/kata"
|
||||
guest_memory_dump_path = ""
|
||||
|
||||
# If enable paging.
|
||||
# Basically, if you want to use "gdb" rather than "crash",
|
||||
# or need the guest-virtual addresses in the ELF vmcore,
|
||||
# then you should enable paging.
|
||||
#
|
||||
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
|
||||
guest_memory_dump_paging = false
|
||||
|
||||
# Enable swap in the guest. Default false.
|
||||
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
|
||||
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
|
||||
# is bigger than 0.
|
||||
# The size of the swap device should be
|
||||
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
|
||||
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
|
||||
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
|
||||
# be default_memory.
|
||||
enable_guest_swap = false
|
||||
|
||||
# use legacy serial for guest console if available and implemented for architecture. Default false
|
||||
use_legacy_serial = false
|
||||
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux = @DEFDISABLESELINUX@
|
||||
|
||||
# disable applying SELinux on the container process
|
||||
# If set to false, the type `container_t` is applied to the container process by default.
|
||||
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||
# with `SELINUX=yes`.
|
||||
# (default: true)
|
||||
disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
|
||||
|
||||
|
||||
[factory]
|
||||
# VM templating support. Once enabled, new VMs are created from template
|
||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||
# agent memory by mapping it readonly. It helps speeding up new container
|
||||
# creation and saves a lot of memory if there are many kata containers running
|
||||
# on the same host.
|
||||
#
|
||||
# When disabled, new VMs are created from scratch.
|
||||
#
|
||||
# Note: Requires "initrd=" to be set ("image=" is not supported).
|
||||
#
|
||||
# Default false
|
||||
enable_template = false
|
||||
|
||||
# Specifies the path of template.
|
||||
#
|
||||
# Default "/run/vc/vm/template"
|
||||
template_path = "/run/vc/vm/template"
|
||||
|
||||
# The number of caches of VMCache:
|
||||
# unspecified or == 0 --> VMCache is disabled
|
||||
# > 0 --> will be set to the specified number
|
||||
#
|
||||
# VMCache is a function that creates VMs as caches before using it.
|
||||
# It helps speed up new container creation.
|
||||
# The function consists of a server and some clients communicating
|
||||
# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto.
|
||||
# The VMCache server will create some VMs and cache them by factory cache.
|
||||
# It will convert the VM to gRPC format and transport it when gets
|
||||
# requestion from clients.
|
||||
# Factory grpccache is the VMCache client. It will request gRPC format
|
||||
# VM and convert it back to a VM. If VMCache function is enabled,
|
||||
# kata-runtime will request VM from factory grpccache when it creates
|
||||
# a new sandbox.
|
||||
#
|
||||
# Default 0
|
||||
vm_cache_number = 0
|
||||
|
||||
# Specify the address of the Unix socket that is used by VMCache.
|
||||
#
|
||||
# Default /var/run/kata-containers/cache.sock
|
||||
vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
|
||||
|
||||
[agent.@PROJECT_TYPE@]
|
||||
# If enabled, make the agent display debug-level messages.
|
||||
# (default: disabled)
|
||||
enable_debug = false
|
||||
|
||||
# Enable agent tracing.
|
||||
#
|
||||
# If enabled, the agent will generate OpenTelemetry trace spans.
|
||||
#
|
||||
# Notes:
|
||||
#
|
||||
# - If the runtime also has tracing enabled, the agent spans will be
|
||||
# associated with the appropriate runtime parent span.
|
||||
# - If enabled, the runtime will wait for the container to shutdown,
|
||||
# increasing the container shutdown time slightly.
|
||||
#
|
||||
# (default: disabled)
|
||||
enable_tracing = false
|
||||
|
||||
# Comma separated list of kernel modules and their parameters.
|
||||
# These modules will be loaded in the guest kernel using modprobe(8).
|
||||
# The following example can be used to load two kernel modules with parameters
|
||||
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
|
||||
# The first word is considered as the module name and the rest as its parameters.
|
||||
# Container will not be started when:
|
||||
# * A kernel module is specified and the modprobe command is not installed in the guest
|
||||
# or it fails loading the module.
|
||||
# * The module is not available in the guest or it doesn't met the guest kernel
|
||||
# requirements, like architecture and version.
|
||||
#
|
||||
kernel_modules = []
|
||||
|
||||
# Enable debug console.
|
||||
|
||||
# If enabled, user can connect guest OS running inside hypervisor
|
||||
# through "kata-runtime exec <sandbox-id>" command
|
||||
|
||||
debug_console_enabled = false
|
||||
|
||||
# Agent dial timeout in millisecond.
|
||||
# (default: 10)
|
||||
dial_timeout_ms = 10
|
||||
|
||||
# Agent reconnect timeout in millisecond.
|
||||
# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
|
||||
# If you find pod cannot connect to the agent when starting, please
|
||||
# consider increasing this value to increase the retry times.
|
||||
# You'd better not change the value of dial_timeout_ms, unless you have an
|
||||
# idea of what you are doing.
|
||||
# (default: 3000)
|
||||
reconnect_timeout_ms = 3000
|
||||
|
||||
# Create Container Request Timeout
|
||||
# This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
|
||||
# It's also used to ensure that workloads, especially those involving large image pulls within the guest,
|
||||
# have sufficient time to complete.
|
||||
#
|
||||
# Effective Timeout Determination:
|
||||
# The effective timeout for a CreateContainerRequest is determined by taking the minimum of the following two values:
|
||||
# - create_container_timeout: The timeout value configured for creating containers (default: 30,000 milliseconds).
|
||||
# - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT_COCO@ second(s)
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT_COCO@
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
# system log
|
||||
# (default: disabled)
|
||||
enable_debug = false
|
||||
#
|
||||
# Internetworking model
|
||||
# Determines how the VM should be connected to the
|
||||
# the container network interface
|
||||
# Options:
|
||||
#
|
||||
# - macvtap
|
||||
# Used when the Container network interface can be bridged using
|
||||
# macvtap.
|
||||
#
|
||||
# - none
|
||||
# Used when customize network. Only creates a tap device. No veth pair.
|
||||
#
|
||||
# - tcfilter
|
||||
# Uses tc filter rules to redirect traffic from the network interface
|
||||
# provided by plugin to a tap interface connected to the VM.
|
||||
#
|
||||
internetworking_model = "@DEFNETWORKMODEL_QEMU@"
|
||||
|
||||
name="@RUNTIMENAME@"
|
||||
hypervisor_name="@HYPERVISOR_QEMU@"
|
||||
agent_name="@PROJECT_TYPE@"
|
||||
|
||||
# disable guest seccomp
|
||||
# Determines whether container seccomp profiles are passed to the virtual
|
||||
# machine and applied by the kata agent. If set to true, seccomp is not applied
|
||||
# within the guest
|
||||
# (default: true)
|
||||
disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# vCPUs pinning settings
|
||||
# if enabled, each vCPU thread will be scheduled to a fixed CPU
|
||||
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
|
||||
enable_vcpus_pinning = false
|
||||
|
||||
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||
# This is used when you want to apply a type other than the default `container_t`,
|
||||
# so general users should not uncomment and apply it.
|
||||
# (format: "user:role:type")
|
||||
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||
# Example value when enabling: "system_u:system_r:container_t"
|
||||
guest_selinux_label = "@DEFGUESTSELINUXLABEL@"
|
||||
|
||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||
# (default: disabled)
|
||||
enable_tracing = false
|
||||
|
||||
# Set the full url to the Jaeger HTTP Thrift collector.
|
||||
# The default if not set will be "http://localhost:14268/api/traces"
|
||||
jaeger_endpoint = ""
|
||||
|
||||
# Sets the username to be used if basic auth is required for Jaeger.
|
||||
jaeger_user = ""
|
||||
|
||||
# Sets the password to be used if basic auth is required for Jaeger.
|
||||
jaeger_password = ""
|
||||
|
||||
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
|
||||
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
|
||||
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
|
||||
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
|
||||
# (like OVS) directly.
|
||||
# (default: false)
|
||||
disable_new_netns = false
|
||||
|
||||
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# The sandbox cgroup is constrained if there is no container type annotation.
|
||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@
|
||||
|
||||
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||
# Compatibility for determining appropriate sandbox (VM) size:
|
||||
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||
# does not yet support sandbox sizing annotations.
|
||||
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||
static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_COCO@
|
||||
|
||||
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
||||
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
||||
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
||||
# These will not be exposed to the container workloads, and are only provided for potential guest services.
|
||||
sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
|
||||
# VFIO Mode
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - vfio
|
||||
# Matches behaviour of OCI runtimes (e.g. runc) as much as
|
||||
# possible. VFIO devices will appear in the container as VFIO
|
||||
# character devices under /dev/vfio. The exact names may differ
|
||||
# from the host (they need to match the VM's IOMMU group numbers
|
||||
# rather than the host's)
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
# claims it. This means it will appear as one or more device nodes
|
||||
# or network interfaces depending on the nature of the device.
|
||||
# Using this mode requires specially built workloads that know how
|
||||
# to locate the relevant device interfaces within the VM.
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# they may break compatibility, and are prepared for a big version bump.
|
||||
# Supported experimental features:
|
||||
# for example:
|
||||
# experimental=["force_guest_pull"]
|
||||
# which is for enable force_guest_pull mode in CoCo scenarios.
|
||||
# (default: [])
|
||||
experimental = @DEFAULTEXPFEATURES@
|
||||
|
||||
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
|
||||
# (default: false)
|
||||
enable_pprof = false
|
||||
|
||||
# Base directory of directly attachable network config.
|
||||
# Network devices for VM-based containers are allowed to be placed in the
|
||||
# host netns to eliminate as many hops as possible, which is what we
|
||||
# called a "Directly Attachable Network". The config, set by special CNI
|
||||
# plugins, is used to tell the Kata containers what devices are attached
|
||||
# to the hypervisor.
|
||||
# (default: /run/kata-containers/dans)
|
||||
dan_conf = "@DEFDANCONF@"
|
||||
|
||||
# pod_resource_api_sock specifies the unix socket for the Kubelet's
|
||||
# PodResource API endpoint. If empty, kubernetes based cold plug
|
||||
# will not be attempted. In order for this feature to work, the
|
||||
# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
|
||||
# if using Kubelet older than 1.34.
|
||||
#
|
||||
# The pod resource API's socket is relative to the Kubelet's root-dir,
|
||||
# which is defined by the cluster admin, and its location is:
|
||||
# ${KubeletRootDir}/pod-resources/kubelet.sock
|
||||
#
|
||||
# cold_plug_vfio(see hypervisor config) acts as a feature gate:
|
||||
# cold_plug_vfio = no_port (default) => no cold plug
|
||||
# cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
|
||||
# explicit CDI annotation for cold plug (applies mainly
|
||||
# to non-k8s cases)
|
||||
# cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
|
||||
# based cold plug.
|
||||
pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
|
||||
@@ -74,21 +74,43 @@ impl KernelParams {
|
||||
pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str, rootfs_type: &str) -> Result<Self> {
|
||||
let mut params = vec![];
|
||||
|
||||
// DAX is disabled on aarch64 due to kernel panic in dax_disassociate_entry
|
||||
// with virtio-pmem on kernel 6.18.x
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
let use_dax = false;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
let use_dax = true;
|
||||
|
||||
match rootfs_driver {
|
||||
VM_ROOTFS_DRIVER_PMEM => {
|
||||
params.push(Param::new("root", VM_ROOTFS_ROOT_PMEM));
|
||||
match rootfs_type {
|
||||
VM_ROOTFS_FILESYSTEM_EXT4 => {
|
||||
params.push(Param::new(
|
||||
"rootflags",
|
||||
"dax,data=ordered,errors=remount-ro ro",
|
||||
));
|
||||
if use_dax {
|
||||
params.push(Param::new(
|
||||
"rootflags",
|
||||
"dax,data=ordered,errors=remount-ro ro",
|
||||
));
|
||||
} else {
|
||||
params.push(Param::new(
|
||||
"rootflags",
|
||||
"data=ordered,errors=remount-ro ro",
|
||||
));
|
||||
}
|
||||
}
|
||||
VM_ROOTFS_FILESYSTEM_XFS => {
|
||||
params.push(Param::new("rootflags", "dax ro"));
|
||||
if use_dax {
|
||||
params.push(Param::new("rootflags", "dax ro"));
|
||||
} else {
|
||||
params.push(Param::new("rootflags", "ro"));
|
||||
}
|
||||
}
|
||||
VM_ROOTFS_FILESYSTEM_EROFS => {
|
||||
params.push(Param::new("rootflags", "dax ro"));
|
||||
if use_dax {
|
||||
params.push(Param::new("rootflags", "dax ro"));
|
||||
} else {
|
||||
params.push(Param::new("rootflags", "ro"));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("Unsupported rootfs type {}", rootfs_type));
|
||||
@@ -233,6 +255,22 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_rootfs_kernel_params() {
|
||||
// DAX is disabled on aarch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
let ext4_pmem_rootflags = "data=ordered,errors=remount-ro ro";
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
let ext4_pmem_rootflags = "dax,data=ordered,errors=remount-ro ro";
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
let xfs_pmem_rootflags = "ro";
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
let xfs_pmem_rootflags = "dax ro";
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
let erofs_pmem_rootflags = "ro";
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
let erofs_pmem_rootflags = "dax ro";
|
||||
|
||||
let tests = &[
|
||||
// EXT4
|
||||
TestData {
|
||||
@@ -241,7 +279,7 @@ mod tests {
|
||||
expect_params: KernelParams {
|
||||
params: [
|
||||
Param::new("root", VM_ROOTFS_ROOT_PMEM),
|
||||
Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"),
|
||||
Param::new("rootflags", ext4_pmem_rootflags),
|
||||
Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4),
|
||||
]
|
||||
.to_vec(),
|
||||
@@ -268,7 +306,7 @@ mod tests {
|
||||
expect_params: KernelParams {
|
||||
params: [
|
||||
Param::new("root", VM_ROOTFS_ROOT_PMEM),
|
||||
Param::new("rootflags", "dax ro"),
|
||||
Param::new("rootflags", xfs_pmem_rootflags),
|
||||
Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS),
|
||||
]
|
||||
.to_vec(),
|
||||
@@ -295,7 +333,7 @@ mod tests {
|
||||
expect_params: KernelParams {
|
||||
params: [
|
||||
Param::new("root", VM_ROOTFS_ROOT_PMEM),
|
||||
Param::new("rootflags", "dax ro"),
|
||||
Param::new("rootflags", erofs_pmem_rootflags),
|
||||
Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS),
|
||||
]
|
||||
.to_vec(),
|
||||
|
||||
@@ -585,7 +585,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
|
||||
clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs))
|
||||
|
||||
disableNvdimm := (clh.config.DisableImageNvdimm || clh.config.ConfidentialGuest)
|
||||
enableDax := !disableNvdimm
|
||||
// DAX is disabled on aarch64 due to kernel panic in dax_disassociate_entry
|
||||
// with virtio-pmem on kernel 6.18.x
|
||||
enableDax := !disableNvdimm && runtime.GOARCH != "arm64"
|
||||
|
||||
params, err := getNonUserDefinedKernelParams(hypervisorConfig.RootfsType, disableNvdimm, enableDax, clh.config.Debug, clh.config.ConfidentialGuest, clh.config.IOMMU)
|
||||
if err != nil {
|
||||
|
||||
@@ -69,9 +69,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) {
|
||||
kernelParamsDebug: kernelParamsDebug,
|
||||
kernelParams: kernelParams,
|
||||
disableNvdimm: config.DisableImageNvdimm,
|
||||
dax: true,
|
||||
protection: noneProtection,
|
||||
legacySerial: config.LegacySerial,
|
||||
// DAX is disabled on aarch64 due to kernel panic in dax_disassociate_entry
|
||||
// with virtio-pmem on kernel 6.18.x
|
||||
dax: false,
|
||||
protection: noneProtection,
|
||||
legacySerial: config.LegacySerial,
|
||||
},
|
||||
measurementAlgo: config.MeasurementAlgo,
|
||||
}
|
||||
|
||||
@@ -1415,13 +1415,6 @@ func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Con
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If we want the network, scan the netns again to update the network
|
||||
// configuration after the prestart hooks have run.
|
||||
if !s.config.NetworkConfig.DisableNewNetwork {
|
||||
if _, err := s.network.AddEndpoints(ctx, s, nil, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.network.Run(ctx, func() error {
|
||||
|
||||
@@ -74,6 +74,8 @@ Extra environment variables:
|
||||
AGENT_BIN: Use it to change the expected agent binary name
|
||||
AGENT_INIT: Use kata agent as init process
|
||||
BLOCK_SIZE: Use to specify the size of blocks in bytes. DEFAULT: 4096
|
||||
DAX_DISABLE: If set to "yes", skip DAX metadata header (for kernels without FS_DAX support).
|
||||
DEFAULT: not set
|
||||
IMAGE_REGISTRY: Hostname for the image registry used to pull down the rootfs build image.
|
||||
NSDAX_BIN: Use to specify path to pre-compiled 'nsdax' tool.
|
||||
USE_DOCKER: If set will build image in a Docker Container (requries docker)
|
||||
@@ -169,6 +171,7 @@ build_with_container() {
|
||||
--env BLOCK_SIZE="${block_size}" \
|
||||
--env ROOT_FREE_SPACE="${root_free_space}" \
|
||||
--env NSDAX_BIN="${nsdax_bin}" \
|
||||
--env DAX_DISABLE="${DAX_DISABLE:-no}" \
|
||||
--env MEASURED_ROOTFS="${MEASURED_ROOTFS}" \
|
||||
--env SELINUX="${SELINUX}" \
|
||||
--env DEBUG="${DEBUG}" \
|
||||
@@ -304,8 +307,12 @@ calculate_img_size() {
|
||||
local fs_type="$3"
|
||||
local block_size="$4"
|
||||
|
||||
# rootfs start + DAX header size + rootfs end
|
||||
local reserved_size_mb=$((rootfs_start + dax_header_sz + rootfs_end))
|
||||
# rootfs start + DAX header size (if enabled) + rootfs end
|
||||
local dax_sz=0
|
||||
if [ "${DAX_DISABLE:-no}" != "yes" ]; then
|
||||
dax_sz="${dax_header_sz}"
|
||||
fi
|
||||
local reserved_size_mb=$((rootfs_start + dax_sz + rootfs_end))
|
||||
|
||||
disk_size="$(calculate_required_disk_size "${rootfs}" "${fs_type}" "${block_size}")"
|
||||
|
||||
@@ -624,25 +631,35 @@ main() {
|
||||
die "Invalid rootfs"
|
||||
fi
|
||||
|
||||
# Determine DAX header size based on DAX_DISABLE setting
|
||||
local dax_sz=0
|
||||
if [ "${DAX_DISABLE:-no}" != "yes" ]; then
|
||||
dax_sz="${dax_header_sz}"
|
||||
fi
|
||||
|
||||
if [ "${fs_type}" == 'erofs' ]; then
|
||||
# mkfs.erofs accepts an src root dir directory as an input
|
||||
# rather than some device, so no need to guess the device dest size first.
|
||||
create_erofs_rootfs_image "${rootfs}" "${image}" \
|
||||
"${block_size}" "${agent_bin}"
|
||||
rootfs_img_size=$?
|
||||
img_size=$((rootfs_img_size + dax_header_sz))
|
||||
img_size=$((rootfs_img_size + dax_sz))
|
||||
else
|
||||
img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" \
|
||||
"${fs_type}" "${block_size}")
|
||||
|
||||
# the first 2M are for the first MBR + NVDIMM metadata and were already
|
||||
# consider in calculate_img_size
|
||||
rootfs_img_size=$((img_size - dax_header_sz))
|
||||
# consider in calculate_img_size (if DAX is enabled)
|
||||
rootfs_img_size=$((img_size - dax_sz))
|
||||
create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \
|
||||
"${fs_type}" "${block_size}" "${agent_bin}"
|
||||
fi
|
||||
# insert at the beginning of the image the MBR + DAX header
|
||||
set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}"
|
||||
if [ "${DAX_DISABLE:-no}" != "yes" ]; then
|
||||
set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}"
|
||||
else
|
||||
info "Skipping DAX header (DAX_DISABLE=yes)"
|
||||
fi
|
||||
|
||||
chown "${USER}:${GROUP}" "${image}"
|
||||
}
|
||||
|
||||
@@ -18,9 +18,12 @@ die() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
arch_target=$1
|
||||
nvidia_gpu_stack="$2"
|
||||
base_os="$3"
|
||||
run_file_name=$2
|
||||
run_fm_file_name=$3
|
||||
arch_target=$4
|
||||
nvidia_gpu_stack="$5"
|
||||
driver_version=""
|
||||
base_os="noble"
|
||||
|
||||
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
|
||||
|
||||
@@ -31,6 +34,31 @@ is_feature_enabled() {
|
||||
[[ ",${nvidia_gpu_stack}," == *",${feature},"* ]]
|
||||
}
|
||||
|
||||
set_driver_version() {
|
||||
# Extract the driver=XXX part first, then get the value
|
||||
if [[ "${nvidia_gpu_stack}" =~ driver=([^,]+) ]]; then
|
||||
driver_version="${BASH_REMATCH[1]}"
|
||||
fi
|
||||
echo "chroot: driver_version: ${driver_version}"
|
||||
echo "chroot: TODO remove with new NVRC"
|
||||
cat <<-CHROOT_EOF > "/supported-gpu.devids"
|
||||
0x230E
|
||||
0x2321
|
||||
0x2322
|
||||
0x2324
|
||||
0x2329
|
||||
0x232C
|
||||
0x2330
|
||||
0x2331
|
||||
0x2335
|
||||
0x2339
|
||||
0x233A
|
||||
0x233B
|
||||
0x2342
|
||||
0x2348
|
||||
CHROOT_EOF
|
||||
}
|
||||
|
||||
install_nvidia_ctk() {
|
||||
echo "chroot: Installing NVIDIA GPU container runtime"
|
||||
# Base gives a nvidia-ctk and the nvidia-container-runtime
|
||||
@@ -48,19 +76,13 @@ install_nvidia_fabricmanager() {
|
||||
}
|
||||
|
||||
install_userspace_components() {
|
||||
# Extract the driver=XXX part first, then get the value
|
||||
if [[ "${nvidia_gpu_stack}" =~ driver=([^,]+) ]]; then
|
||||
driver_version="${BASH_REMATCH[1]}"
|
||||
fi
|
||||
echo "chroot: driver_version: ${driver_version}"
|
||||
|
||||
eval "${APT_INSTALL}" nvidia-driver-pinning-"${driver_version}"
|
||||
eval "${APT_INSTALL}" nvidia-imex nvidia-firmware \
|
||||
eval "${APT_INSTALL}" nvidia-imex nvidia-firmware \
|
||||
libnvidia-cfg1 libnvidia-gl libnvidia-extra \
|
||||
libnvidia-decode libnvidia-fbc1 libnvidia-encode \
|
||||
libnvidia-nscq
|
||||
|
||||
apt-mark hold nvidia-imex nvidia-firmware \
|
||||
apt-mark hold nvidia-imex nvidia-firmware \
|
||||
libnvidia-cfg1 libnvidia-gl libnvidia-extra \
|
||||
libnvidia-decode libnvidia-fbc1 libnvidia-encode \
|
||||
libnvidia-nscq
|
||||
@@ -89,13 +111,12 @@ setup_apt_repositories() {
|
||||
rm -f /etc/apt/sources.list.d/*
|
||||
|
||||
key="/usr/share/keyrings/ubuntu-archive-keyring.gpg"
|
||||
comp="main restricted universe multiverse"
|
||||
|
||||
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/"${base_os}".list
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os} ${comp}
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-updates ${comp}
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-security ${comp}
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-backports ${comp}
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os} main restricted universe multiverse
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-updates main restricted universe multiverse
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-security main restricted universe multiverse
|
||||
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-backports main restricted universe multiverse
|
||||
CHROOT_EOF
|
||||
|
||||
local arch="${arch_target}"
|
||||
@@ -108,7 +129,7 @@ setup_apt_repositories() {
|
||||
curl -fsSL -O "https://developer.download.nvidia.com/compute/cuda/repos/${osver}/${arch}/${keyring}"
|
||||
dpkg -i "${keyring}" && rm -f "${keyring}"
|
||||
|
||||
# Set priorities: CUDA repos highest, Ubuntu non-driver next, Ubuntu blocked for driver packages
|
||||
# Set priorities: Ubuntu repos highest, NVIDIA Container Toolkit next, CUDA repo blocked for driver packages
|
||||
cat <<-CHROOT_EOF > /etc/apt/preferences.d/nvidia-priority
|
||||
Package: *
|
||||
Pin: $(dirname "${mirror}")
|
||||
@@ -160,6 +181,7 @@ cleanup_rootfs() {
|
||||
# Start of script
|
||||
echo "chroot: Setup NVIDIA GPU rootfs stage one"
|
||||
|
||||
set_driver_version
|
||||
setup_apt_repositories
|
||||
install_userspace_components
|
||||
install_nvidia_fabricmanager
|
||||
|
||||
@@ -94,11 +94,25 @@ setup_nvidia_gpu_rootfs_stage_one() {
|
||||
mkdir -p ./lib/modules/
|
||||
tar --zstd -xvf "${BUILD_DIR}"/kata-static-kernel-nvidia-gpu"${appendix}"-modules.tar.zst -C ./lib/modules/
|
||||
|
||||
# If we find a local downloaded run file build the kernel modules
|
||||
# with it, otherwise use the distribution packages. Run files may have
|
||||
# more recent drivers available then the distribution packages.
|
||||
local run_file_name="nvidia-driver.run"
|
||||
if [[ -f ${BUILD_DIR}/${run_file_name} ]]; then
|
||||
cp -L "${BUILD_DIR}"/"${run_file_name}" ./"${run_file_name}"
|
||||
fi
|
||||
|
||||
local run_fm_file_name="nvidia-fabricmanager.run"
|
||||
if [[ -f ${BUILD_DIR}/${run_fm_file_name} ]]; then
|
||||
cp -L "${BUILD_DIR}"/"${run_fm_file_name}" ./"${run_fm_file_name}"
|
||||
fi
|
||||
|
||||
mount --rbind /dev ./dev
|
||||
mount --make-rslave ./dev
|
||||
mount -t proc /proc ./proc
|
||||
|
||||
chroot . /bin/bash -c "/nvidia_chroot.sh ${machine_arch} ${NVIDIA_GPU_STACK} noble"
|
||||
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} \
|
||||
${run_fm_file_name} ${machine_arch} ${NVIDIA_GPU_STACK}"
|
||||
|
||||
umount -R ./dev
|
||||
umount ./proc
|
||||
@@ -245,6 +259,7 @@ chisseled_init() {
|
||||
cp -a "${stage_one}"/etc/kata-opa etc/.
|
||||
fi
|
||||
cp -a "${stage_one}"/etc/resolv.conf etc/.
|
||||
cp -a "${stage_one}"/supported-gpu.devids .
|
||||
|
||||
cp -a "${stage_one}"/lib/firmware/nvidia lib/firmware/.
|
||||
cp -a "${stage_one}"/sbin/ldconfig.real sbin/ldconfig
|
||||
|
||||
@@ -15,13 +15,13 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: "3.25.0"
|
||||
version: "3.24.0"
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "3.25.0"
|
||||
appVersion: "3.24.0"
|
||||
|
||||
dependencies:
|
||||
- name: node-feature-discovery
|
||||
|
||||
@@ -114,6 +114,7 @@ RUNTIME_CHOICE="${RUNTIME_CHOICE:-both}"
|
||||
IMAGE_SIZE_ALIGNMENT_MB=${IMAGE_SIZE_ALIGNMENT_MB:-}
|
||||
KERNEL_DEBUG_ENABLED="${KERNEL_DEBUG_ENABLED:-}"
|
||||
INIT_DATA="${INIT_DATA:-yes}"
|
||||
DAX_DISABLE="${DAX_DISABLE:-no}"
|
||||
|
||||
docker run \
|
||||
-v $HOME/.docker:/root/.docker \
|
||||
@@ -160,6 +161,7 @@ docker run \
|
||||
--env CROSS_BUILD="${CROSS_BUILD}" \
|
||||
--env TARGET_ARCH="${TARGET_ARCH}" \
|
||||
--env ARCH="${ARCH}" \
|
||||
--env DAX_DISABLE="${DAX_DISABLE}" \
|
||||
--rm \
|
||||
-w ${script_dir} \
|
||||
build-kata-deploy "${kata_deploy_create}" "$@"
|
||||
|
||||
@@ -468,6 +468,12 @@ install_image() {
|
||||
export REPO_COMPONENTS
|
||||
fi
|
||||
|
||||
# Disable DAX for ARM64 due to kernel panic in dax_disassociate_entry
|
||||
# with virtio-pmem on kernel 6.18.x
|
||||
if [ "${ARCH}" == "aarch64" ]; then
|
||||
export DAX_DISABLE=yes
|
||||
fi
|
||||
|
||||
"${rootfs_builder}" --osname="${os_name}" --osversion="${os_version}" --imagetype=image --prefix="${prefix}" --destdir="${destdir}" --image_initrd_suffix="${variant}"
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
176
|
||||
175
|
||||
|
||||
@@ -1,170 +0,0 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
|
||||
by smtp.subspace.kernel.org (Postfix) with ESMTP id BCE6D2FFDD5
|
||||
for <nvdimm@lists.linux.dev>; Wed, 14 Jan 2026 17:49:59 +0000 (UTC)
|
||||
Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172
|
||||
ARC-Seal:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
|
||||
t=1768413001; cv=none; b=mYidLRrJZn5ooS7h+lyKLsbA8/GKL/ZqDOCHo66hKab86eV5cgpwbWPeudpYGPiMp/QhczPxwDzq1J9qP57FU8xy5AmIFwF6cAn6FPN0BzBWxBQUeT/pxDwfOkvh33RigAd/HACiTa+9waLWNn94H1tPpUOn1SUKYcC2anb/EMA=
|
||||
ARC-Message-Signature:i=1; a=rsa-sha256; d=subspace.kernel.org;
|
||||
s=arc-20240116; t=1768413001; c=relaxed/simple;
|
||||
bh=/AHMpb4+3MfzIhYBPESp8KGt8HTeUy14LUGKGDaY7Jc=;
|
||||
h=Message-ID:Date:MIME-Version:To:Cc:From:Subject:Content-Type; b=nYCvtwNmfNPRI2kMt6FzMOpG8Xv2GrytpiVJh3K4jRBpJFo3NO0icdYoGz0yjvq1G4vQvh/VrhrLhOVAEHdNkuGQz1yL6qHm/9KniwafY98ihbvaadCAZtdiNtjhZ646/irNi48nnwxquGqUkiPk2n9PqYSVLR9VYf60nr/nAOA=
|
||||
ARC-Authentication-Results:i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172
|
||||
Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com
|
||||
Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com
|
||||
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
|
||||
by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4F3F21515;
|
||||
Wed, 14 Jan 2026 09:49:52 -0800 (PST)
|
||||
Received: from [10.1.37.132] (unknown [10.1.37.132])
|
||||
by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 0CA253F59E;
|
||||
Wed, 14 Jan 2026 09:49:57 -0800 (PST)
|
||||
Message-ID: <18af3213-6c46-4611-ba75-da5be5a1c9b0@arm.com>
|
||||
Date: Wed, 14 Jan 2026 17:49:30 +0000
|
||||
Precedence: bulk
|
||||
X-Mailing-List: nvdimm@lists.linux.dev
|
||||
List-Id: <nvdimm.lists.linux.dev>
|
||||
List-Subscribe: <mailto:nvdimm+subscribe@lists.linux.dev>
|
||||
List-Unsubscribe: <mailto:nvdimm+unsubscribe@lists.linux.dev>
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla Thunderbird
|
||||
Content-Language: en-US
|
||||
To: linux-kernel@vger.kernel.org
|
||||
Cc: linux-fsdevel@vger.kernel.org, nvdimm@lists.linux.dev,
|
||||
dan.j.williams@intel.com, willy@infradead.org, jack@suse.cz,
|
||||
Nick.Connolly@arm.com, ffidencio@nvidia.com
|
||||
From: Seunguk Shin <seunguk.shin@arm.com>
|
||||
Subject: [PATCH] fs/dax: check zero or empty entry before converting xarray
|
||||
Content-Type: text/plain; charset=UTF-8; format=flowed
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Trying to convert zero or empty xarray entry causes kernel panic.
|
||||
|
||||
[ 0.737679] EXT4-fs (pmem0p1): mounted filesystem
|
||||
79676804-7c8b-491a-b2a6-9bae3c72af70 ro with ordered data mode. Quota
|
||||
mode: disabled.
|
||||
[ 0.737891] VFS: Mounted root (ext4 filesystem) readonly on device 259:1.
|
||||
[ 0.739119] devtmpfs: mounted
|
||||
[ 0.739476] Freeing unused kernel memory: 1920K
|
||||
[ 0.740156] Run /sbin/init as init process
|
||||
[ 0.740229] with arguments:
|
||||
[ 0.740286] /sbin/init
|
||||
[ 0.740321] with environment:
|
||||
[ 0.740369] HOME=/
|
||||
[ 0.740400] TERM=linux
|
||||
[ 0.743162] Unable to handle kernel paging request at virtual address
|
||||
fffffdffbf000008
|
||||
[ 0.743285] Mem abort info:
|
||||
[ 0.743316] ESR = 0x0000000096000006
|
||||
[ 0.743371] EC = 0x25: DABT (current EL), IL = 32 bits
|
||||
[ 0.743444] SET = 0, FnV = 0
|
||||
[ 0.743489] EA = 0, S1PTW = 0
|
||||
[ 0.743545] FSC = 0x06: level 2 translation fault
|
||||
[ 0.743610] Data abort info:
|
||||
[ 0.743656] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
|
||||
[ 0.743720] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
|
||||
[ 0.743785] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
|
||||
[ 0.743848] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000b9d17000
|
||||
[ 0.743931] [fffffdffbf000008] pgd=10000000bfa3d403,
|
||||
p4d=10000000bfa3d403, pud=1000000040bfe403, pmd=0000000000000000
|
||||
[ 0.744070] Internal error: Oops: 0000000096000006 [#1] SMP
|
||||
[ 0.748888] CPU: 0 UID: 0 PID: 1 Comm: init Not tainted 6.18.4 #1 NONE
|
||||
[ 0.749421] pstate: 004000c5 (nzcv daIF +PAN -UAO -TCO -DIT -SSBS
|
||||
BTYPE=--)
|
||||
[ 0.749969] pc : dax_disassociate_entry.constprop.0+0x20/0x50
|
||||
[ 0.750444] lr : dax_insert_entry+0xcc/0x408
|
||||
[ 0.750802] sp : ffff80008000b9e0
|
||||
[ 0.751083] x29: ffff80008000b9e0 x28: 0000000000000000 x27:
|
||||
0000000000000000
|
||||
[ 0.751682] x26: 0000000001963d01 x25: ffff0000004f7d90 x24:
|
||||
0000000000000000
|
||||
[ 0.752264] x23: 0000000000000000 x22: ffff80008000bcc8 x21:
|
||||
0000000000000011
|
||||
[ 0.752836] x20: ffff80008000ba90 x19: 0000000001963d01 x18:
|
||||
0000000000000000
|
||||
[ 0.753407] x17: 0000000000000000 x16: 0000000000000000 x15:
|
||||
0000000000000000
|
||||
[ 0.753970] x14: ffffbf3154b9ae70 x13: 0000000000000000 x12:
|
||||
ffffbf3154b9ae70
|
||||
[ 0.754548] x11: ffffffffffffffff x10: 0000000000000000 x9 :
|
||||
0000000000000000
|
||||
[ 0.755122] x8 : 000000000000000d x7 : 000000000000001f x6 :
|
||||
0000000000000000
|
||||
[ 0.755707] x5 : 0000000000000000 x4 : 0000000000000000 x3 :
|
||||
fffffdffc0000000
|
||||
[ 0.756287] x2 : 0000000000000008 x1 : 0000000040000000 x0 :
|
||||
fffffdffbf000000
|
||||
[ 0.756871] Call trace:
|
||||
[ 0.757107] dax_disassociate_entry.constprop.0+0x20/0x50 (P)
|
||||
[ 0.757592] dax_iomap_pte_fault+0x4fc/0x808
|
||||
[ 0.757951] dax_iomap_fault+0x28/0x30
|
||||
[ 0.758258] ext4_dax_huge_fault+0x80/0x2dc
|
||||
[ 0.758594] ext4_dax_fault+0x10/0x3c
|
||||
[ 0.758892] __do_fault+0x38/0x12c
|
||||
[ 0.759175] __handle_mm_fault+0x530/0xcf0
|
||||
[ 0.759518] handle_mm_fault+0xe4/0x230
|
||||
[ 0.759833] do_page_fault+0x17c/0x4dc
|
||||
[ 0.760144] do_translation_fault+0x30/0x38
|
||||
[ 0.760483] do_mem_abort+0x40/0x8c
|
||||
[ 0.760771] el0_ia+0x4c/0x170
|
||||
[ 0.761032] el0t_64_sync_handler+0xd8/0xdc
|
||||
[ 0.761371] el0t_64_sync+0x168/0x16c
|
||||
[ 0.761677] Code: f9453021 f2dfbfe3 cb813080 8b001860 (f9400401)
|
||||
[ 0.762168] ---[ end trace 0000000000000000 ]---
|
||||
[ 0.762550] note: init[1] exited with irqs disabled
|
||||
[ 0.762631] Kernel panic - not syncing: Attempted to kill init!
|
||||
exitcode=0x0000000b
|
||||
|
||||
This patch just reorders checking and converting.
|
||||
|
||||
Signed-off-by: Seunguk Shin <seunguk.shin@arm.com>
|
||||
---
|
||||
fs/dax.c | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/fs/dax.c b/fs/dax.c
|
||||
index 516f995a9..69ac2562c 100644
|
||||
--- a/fs/dax.c
|
||||
+++ b/fs/dax.c
|
||||
@@ -443,11 +443,12 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
|
||||
unsigned long address, bool shared)
|
||||
{
|
||||
unsigned long size = dax_entry_size(entry), index;
|
||||
- struct folio *folio = dax_to_folio(entry);
|
||||
+ struct folio *folio;
|
||||
|
||||
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
|
||||
return;
|
||||
|
||||
+ folio = dax_to_folio(entry);
|
||||
index = linear_page_index(vma, address & ~(size - 1));
|
||||
if (shared && (folio->mapping || dax_folio_is_shared(folio))) {
|
||||
if (folio->mapping)
|
||||
@@ -468,21 +469,23 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
|
||||
static void dax_disassociate_entry(void *entry, struct address_space *mapping,
|
||||
bool trunc)
|
||||
{
|
||||
- struct folio *folio = dax_to_folio(entry);
|
||||
+ struct folio *folio;
|
||||
|
||||
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
|
||||
return;
|
||||
|
||||
+ folio = dax_to_folio(entry);
|
||||
dax_folio_put(folio);
|
||||
}
|
||||
|
||||
static struct page *dax_busy_page(void *entry)
|
||||
{
|
||||
- struct folio *folio = dax_to_folio(entry);
|
||||
+ struct folio *folio;
|
||||
|
||||
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
|
||||
return NULL;
|
||||
|
||||
+ folio = dax_to_folio(entry);
|
||||
if (folio_ref_count(folio) - folio_mapcount(folio))
|
||||
return &folio->page;
|
||||
else
|
||||
--
|
||||
2.43.0
|
||||
|
||||
@@ -40,7 +40,6 @@ function _check_required_env_var()
|
||||
KATA_STATIC_TARBALL) env_var="${KATA_STATIC_TARBALL}" ;;
|
||||
KATA_DEPLOY_IMAGE_TAGS) env_var="${KATA_DEPLOY_IMAGE_TAGS}" ;;
|
||||
KATA_DEPLOY_REGISTRIES) env_var="${KATA_DEPLOY_REGISTRIES}" ;;
|
||||
KATA_TOOLS_STATIC_TARBALL) env_var="${KATA_TOOLS_STATIC_TARBALL}" ;;
|
||||
*) >&2 _die "Invalid environment variable \"${1}\"" ;;
|
||||
esac
|
||||
|
||||
|
||||
Reference in New Issue
Block a user