runtime: generate dedicated CLH Azure config variants

Create configuration-clh-azure{,-runtime-rs}.toml from the base CLH configs during build. This keeps Mariner-specific defaults in explicit config artifacts instead of ad-hoc runtime mutation. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-07-02 07:02:16 +00:00 · 2026-05-28 13:31:05 +02:00
parent a423cf9526
commit f36c383b4f
4 changed files with 1119 additions and 0 deletions
--- a/src/runtime-rs/Makefile
+++ b/src/runtime-rs/Makefile
@@ -291,9 +291,24 @@ ifneq (,$(CLHCMD))

    CONFIGS += $(CONFIG_CLH)

+    CONFIG_FILE_CLH_AZURE = configuration-clh-azure-runtime-rs.toml
+    CONFIG_CLH_AZURE = config/$(CONFIG_FILE_CLH_AZURE)
+    CONFIG_CLH_AZURE_IN = $(CONFIG_CLH_AZURE).in
+
+    CONFIG_PATH_CLH_AZURE = $(abspath $(CONFDIR)/$(CONFIG_FILE_CLH_AZURE))
+    CONFIG_PATHS += $(CONFIG_PATH_CLH_AZURE)
+
+    SYSCONFIG_CLH_AZURE = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_CLH_AZURE))
+    SYSCONFIG_PATHS += $(SYSCONFIG_CLH_AZURE)
+
+    CONFIGS += $(CONFIG_CLH_AZURE)
+
    # CLH-specific options (all should be suffixed by "_CLH")
    # currently, huge pages are required for virtiofsd support
    DEFNETWORKMODEL_CLH := tcfilter
+    IMAGEPATH_CLH_AZURE := $(PKGDATADIR)/kata-containers-mariner.img
+    KERNELPATH_CLH_AZURE := /usr/share/cloud-hypervisor/vmlinux.bin
+    DEFSTATICRESOURCEMGMT_CLH_AZURE := true
    KERNELTYPE_CLH = uncompressed
    KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH))
    KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH)
@@ -596,6 +611,7 @@ USER_VARS += QEMUTDXPATH
 USER_VARS += QEMUTDXVALIDHYPERVISORPATHS
 USER_VARS += FIRMWAREPATH_CLH
 USER_VARS += KERNELPATH_CLH
+USER_VARS += KERNELPATH_CLH_AZURE
 USER_VARS += FCCMD
 USER_VARS += FCPATH
 USER_VARS += FCVALIDHYPERVISORPATHS
@@ -608,6 +624,7 @@ USER_VARS += IMAGENAME
 USER_VARS += IMAGENAME_NV
 USER_VARS += IMAGECONFIDENTIALNAME
 USER_VARS += IMAGEPATH
+USER_VARS += IMAGEPATH_CLH_AZURE
 USER_VARS += IMAGEPATH_NV
 USER_VARS += IMAGECONFIDENTIALPATH
 USER_VARS += INITRDNAME
@@ -711,6 +728,7 @@ USER_VARS += DEFENABLEVCPUSPINNING_QEMU
 USER_VARS += DEFSTATICRESOURCEMGMT_DB
 USER_VARS += DEFSTATICRESOURCEMGMT_FC
 USER_VARS += DEFSTATICRESOURCEMGMT_CLH
+USER_VARS += DEFSTATICRESOURCEMGMT_CLH_AZURE
 USER_VARS += DEFSTATICRESOURCEMGMT_QEMU
 USER_VARS += DEFSTATICRESOURCEMGMT_COCO
 USER_VARS += DEFDISABLEIMAGENVDIMM
--- a/src/runtime-rs/config/configuration-clh-azure-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-clh-azure-runtime-rs.toml.in
@@ -0,0 +1,545 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# XXX: WARNING: this file is auto-generated.
+# XXX:
+# XXX: Source file: "@CONFIG_CLH_AZURE_RUNTIME_RS_IN@"
+# XXX: Project:
+# XXX:   Name: @PROJECT_NAME@
+# XXX:   Type: @PROJECT_TYPE@
+
+[hypervisor.clh]
+path = "@CLHPATH@"
+kernel = "@KERNELPATH_CLH_AZURE@"
+image = "@IMAGEPATH_CLH_AZURE@"
+
+# rootfs filesystem type:
+#   - ext4 (default)
+#   - xfs
+#   - erofs
+rootfs_type = @DEFROOTFSTYPE@
+
+# Block storage driver to be used for the VM rootfs is backed
+# by a block device.
+#
+# virtio-pmem is not supported with Cloud Hypervisor.
+vm_rootfs_driver = "@VMROOTFSDRIVER_CLH@"
+
+# Path to the firmware.
+# If you want Cloud Hypervisor to use a specific firmware, set its path below.
+firmware = "@FIRMWAREPATH@"
+
+# List of valid annotation names for the hypervisor
+# Each member of the list is a regular expression, which is the base name
+# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
+enable_annotations = @DEFENABLEANNOTATIONS@
+
+# List of valid annotations values for the hypervisor
+# Each member of the list is a path pattern as described by glob(3).
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @CLHVALIDHYPERVISORPATHS@
+valid_hypervisor_paths = @CLHVALIDHYPERVISORPATHS@
+
+# List of valid annotations values for ctlpath
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends:
+valid_ctlpaths = []
+
+# Optional space-separated list of options to pass to the guest kernel.
+# For example, use `kernel_params = "vsyscall=emulate"` if you are having
+# trouble running pre-2.15 glibc.
+#
+# WARNING: - any parameter specified here will take priority over the default
+# parameter value of the same name used to start the virtual machine.
+# Do not set values here unless you understand the impact of doing so as you
+# may stop the virtual machine from booting.
+# To see the list of default parameters, enable hypervisor debug, create a
+# container and look for 'default-kernel-parameters' log entries.
+kernel_params = "@KERNELPARAMS@"
+
+# Default number of vCPUs per SB/VM:
+# unspecified or 0                --> will be set to 1
+# < 0                             --> will be set to the actual number of physical cores
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores      --> will be set to the actual number of physical cores
+default_vcpus = @DEFVCPUS@
+
+# Default maximum number of vCPUs per SB/VM:
+# unspecified or == 0             --> will be set to the actual number of physical cores or to the maximum number
+#                                     of vCPUs supported by KVM if that number is exceeded
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores      --> will be set to the actual number of physical cores or to the maximum number
+#                                     of vCPUs supported by KVM if that number is exceeded
+# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
+# the actual number of physical cores is greater than it.
+# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
+# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
+# can be added to a SB/VM, but the memory footprint will be big. Another example, with
+# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
+# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
+# unless you know what are you doing.
+default_maxvcpus = @DEFMAXVCPUS@
+
+# Default memory size in MiB for SB/VM.
+# If unspecified then it will be set @DEFMEMSZ@ MiB.
+default_memory = @DEFMEMSZ@
+
+# Shared file system type:
+#   - virtio-fs
+#   - virtio-fs-nydus
+#   - none
+shared_fs = "@DEFSHAREDFS_CLH_VIRTIOFS@"
+
+# Path to vhost-user-fs daemon.
+virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
+
+# Default size of DAX cache in MiB
+virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
+
+# Default size of virtqueues
+virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
+
+# Extra args for virtiofsd daemon
+#
+# Format example:
+#   ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
+# Examples:
+#   Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
+#
+# see `virtiofsd -h` for possible options.
+virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
+
+# Cache mode:
+#
+#  - never
+#    Metadata, data, and pathname lookup are not cached in guest. They are
+#    always fetched from host and any changes are immediately pushed to host.
+#
+#  - auto
+#    Metadata and pathname lookup cache expires after a configured amount of
+#    time (default is 1 second). Data is cached while the file is open (close
+#    to open consistency).
+#
+#  - always
+#    Metadata, data, and pathname lookup are cached in guest and never expire.
+virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
+
+# Bridges can be used to hot plug devices.
+# Limitations:
+# * Currently only pci bridges are supported
+# * Until 30 devices per bridge can be hot plugged.
+# * Until 5 PCI bridges can be cold plugged per VM.
+#   This limitation could be a bug in the kernel
+# Default number of bridges per SB/VM:
+# unspecified or 0   --> will be set to @DEFBRIDGES@
+# > 1 <= 5           --> will be set to the specified number
+# > 5                --> will be set to 5
+default_bridges = @DEFBRIDGES@
+
+# Reclaim guest freed memory.
+# Enabling this will result in the VM balloon device having f_reporting=on set.
+# Then the hypervisor will use it to reclaim guest freed memory.
+# This is useful for reducing the amount of memory used by a VM.
+# Enabling this feature may sometimes reduce the speed of memory access in
+# the VM.
+#
+# Default false
+reclaim_guest_freed_memory = false
+
+# Block device driver to be used by the hypervisor when a container's storage
+# is backed by a block device or a file. This driver facilitates attaching the
+# storage directly to the guest VM.
+block_device_driver = "virtio-blk-pci"
+
+# Specifies cache-related options for block devices.
+# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
+# Default false
+block_device_cache_direct = false
+
+# Bandwidth rate limiter options
+#
+# disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+disk_rate_limiter_bw_max_rate = 0
+
+# disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
+# set to a non zero value.
+disk_rate_limiter_bw_one_time_burst = 0
+
+# Operation rate limiter options
+#
+# disk_rate_limiter_ops_max_rate controls disk I/O bandwidth (size in ops/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+disk_rate_limiter_ops_max_rate = 0
+
+# disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
+# set to a non zero value.
+disk_rate_limiter_ops_one_time_burst = 0
+
+# Virtio queue size. Size: byte. default 128
+queue_size = 128
+
+# Block device multi-queue, default 1
+num_queues = 1
+
+# network_queues configures the number of virtio-net queue pairs (RX/TX) exposed to the guest.
+# Setting network_queues = N creates N RX queues and N TX queues (i.e., N queue pairs).
+# More queues can improve network throughput and reduce per-queue contention by allowing packet processing to scale
+# across multiple vCPUs/threads (subject to host/guest capabilities and backend configuration such as vhost-net).
+# Increasing this value consumes more resources (e.g., virtqueue state, interrupts/MSI-X vectors, backend threads),
+# so it should typically not exceed the number of vCPUs or the practical parallelism of the networking backend.
+# Default: 1, Range: 1..=256
+network_queues = @DEFNETQUEUES@
+
+# Enable pre allocation of VM RAM, default false
+# Enabling this will result in lower container density
+# as all of the memory will be allocated and locked
+# This is useful when you want to reserve all the memory
+# upfront or in the cases where you want memory latencies
+# to be very predictable
+# Default false
+enable_mem_prealloc = false
+
+# Enable huge pages for VM RAM, default false
+# Enabling this will result in the VM memory
+# being allocated using huge pages.
+# This is useful when you want to use vhost-user network
+# stacks within the container. This will automatically
+# result in memory pre allocation
+enable_hugepages = false
+
+# Enable running clh VMM as a non-root user.
+# By default clh VMM run as root. When this is set to true, clh VMM process runs as
+# a non-root random user. See documentation for the limitations of this mode.
+rootless = false
+
+# Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
+disable_seccomp = false
+
+# This option changes the default hypervisor and kernel parameters
+# to enable debug output where available.
+#
+# Default false
+enable_debug = false
+
+# Disable the customizations done in the runtime when it detects
+# that it is running on top a VMM. This will result in the runtime
+# behaving as it would when running on bare metal.
+#
+disable_nesting_checks = false
+
+# Path to OCI hook binaries in the *guest rootfs*.
+# This does not affect host-side hooks which must instead be added to
+# the OCI spec passed to the runtime.
+#
+# You can create a rootfs with hooks by customizing the osbuilder scripts:
+# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
+#
+# Hooks must be stored in a subdirectory of guest_hook_path according to their
+# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
+# The agent will scan these directories for executable files and add them, in
+# lexicographical order, to the lifecycle of the guest container.
+# Hooks are executed in the runtime namespace of the guest. See the official documentation:
+# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
+# Warnings will be logged if any error is encountered while scanning for hooks,
+# but it will not abort container execution.
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
+
+# Enable swap in the guest. Default false.
+# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device.
+enable_guest_swap = false
+
+# If enable_guest_swap is enabled, the swap device will be created in the guest
+# at this path. Default "/run/kata-containers/swap".
+guest_swap_path = "/run/kata-containers/swap"
+
+# The percentage of the total memory to be used as swap device.
+# Default 100.
+guest_swap_size_percent = 100
+
+# The threshold in seconds to create swap device in the guest.
+# Kata will wait guest_swap_create_threshold_secs seconds before creating swap device.
+# Default 60.
+guest_swap_create_threshold_secs = 60
+
+[agent.@PROJECT_TYPE@]
+container_pipe_size = @PIPESIZE@
+# If enabled, make the agent display debug-level messages.
+# (default: disabled)
+enable_debug = false
+
+# Enable agent tracing.
+#
+# If enabled, the agent will generate OpenTelemetry trace spans.
+#
+# Notes:
+#
+# - If the runtime also has tracing enabled, the agent spans will be
+#   associated with the appropriate runtime parent span.
+# - If enabled, the runtime will wait for the container to shutdown,
+#   increasing the container shutdown time slightly.
+#
+# (default: disabled)
+enable_tracing = false
+
+# Enable debug console.
+
+# If enabled, user can connect guest OS running inside hypervisor
+# through "kata-runtime exec <sandbox-id>" command
+
+debug_console_enabled = false
+
+# Agent dial timeout in millisecond.
+# (default: 10)
+dial_timeout_ms = 10
+
+# Agent reconnect timeout in millisecond.
+# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
+# If you find pod cannot connect to the agent when starting, please
+# consider increasing this value to increase the retry times.
+# You'd better not change the value of dial_timeout_ms, unless you have an
+# idea of what you are doing.
+# (default: 3000)
+reconnect_timeout_ms = 3000
+
+# Create Container Request Timeout
+# This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
+# It's also used to ensure that workloads, especially those involving large image pulls within the guest,
+# have sufficient time to complete.
+#
+# Effective Timeout Determination:
+# The effective timeout for a CreateContainerRequest is determined by taking the minimum of the following two values:
+# - create_container_timeout: The timeout value configured for creating containers (default: 30 seconds).
+# - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+
+[agent.@PROJECT_TYPE@.mem_agent]
+# Control the mem-agent function enable or disable.
+# Default to false
+mem_agent_enable = false
+
+# Control the mem-agent memcg function disable or enable
+# Default to false
+memcg_disable = false
+
+# Control the mem-agent function swap enable or disable.
+# Default to false
+memcg_swap = false
+
+# Control the mem-agent function swappiness max number.
+# Default to 50
+memcg_swappiness_max = 50
+
+# Control the mem-agent memcg function wait period seconds
+# Default to 600
+memcg_period_secs = 600
+
+# Control the mem-agent memcg wait period PSI percent limit.
+# If the percentage of memory and IO PSI stall time within
+# the memcg waiting period for a cgroup exceeds this value,
+# then the aging and eviction for this cgroup will not be
+# executed after this waiting period.
+# Default to 1
+memcg_period_psi_percent_limit = 1
+
+# Control the mem-agent memcg eviction PSI percent limit.
+# If the percentage of memory and IO PSI stall time for a cgroup
+# exceeds this value during an eviction cycle, the eviction for
+# this cgroup will immediately stop and will not resume until
+# the next memcg waiting period.
+# Default to 1
+memcg_eviction_psi_percent_limit = 1
+
+# Control the mem-agent memcg eviction run aging count min.
+# A cgroup will only perform eviction when the number of aging cycles
+# in memcg is greater than or equal to memcg_eviction_run_aging_count_min.
+# Default to 3
+memcg_eviction_run_aging_count_min = 3
+
+# Control the mem-agent compact function disable or enable
+# Default to false
+compact_disable = false
+
+# Control the mem-agent compaction function wait period seconds
+# Default to 600
+compact_period_secs = 600
+
+# Control the mem-agent compaction function wait period PSI percent limit.
+# If the percentage of memory and IO PSI stall time within
+# the compaction waiting period exceeds this value,
+# then the compaction will not be executed after this waiting period.
+# Default to 1
+compact_period_psi_percent_limit = 1
+
+# Control the mem-agent compaction function compact PSI percent limit.
+# During compaction, the percentage of memory and IO PSI stall time
+# is checked every second. If this percentage exceeds
+# compact_psi_percent_limit, the compaction process will stop.
+# Default to 5
+compact_psi_percent_limit = 5
+
+# Control the maximum number of seconds for each compaction of mem-agent compact function.
+# Default to 300
+compact_sec_max = 300
+
+# Control the mem-agent compaction function compact order.
+# compact_order is use with compact_threshold.
+# Default to 9
+compact_order = 9
+
+# Control the mem-agent compaction function compact threshold.
+# compact_threshold is the pages number.
+# When examining the /proc/pagetypeinfo, if there's an increase in the
+# number of movable pages of orders smaller than the compact_order
+# compared to the amount following the previous compaction,
+# and this increase surpasses a certain threshold—specifically,
+# more than 'compact_threshold' number of pages.
+# Or the number of free pages has decreased by 'compact_threshold'
+# since the previous compaction.
+# then the system should initiate another round of memory compaction.
+# Default to 1024
+compact_threshold = 1024
+
+# Control the mem-agent compaction function force compact times.
+# After one compaction, if there has not been a compaction within
+# the next compact_force_times times, a compaction will be forced
+# regardless of the system's memory situation.
+# If compact_force_times is set to 0, will do force compaction each time.
+# If compact_force_times is set to 18446744073709551615, will never do force compaction.
+# Default to 18446744073709551615
+# Note: Using a large but valid u64 value (within i64::MAX range) instead of u64::MAX to avoid TOML parser issues
+# Using 9223372036854775807 (i64::MAX) which is effectively "never" for practical purposes
+compact_force_times = 9223372036854775807
+
+[runtime]
+# If enabled, the runtime will log additional debug messages to the
+# system log
+# (default: disabled)
+enable_debug = false
+
+# If enabled, enabled, it means that 1) if the runtime exits abnormally,
+# the cleanup process will be skipped, and 2) the runtime will not exit
+# even if the health check fails.
+# This option is typically used to retain abnormal information for debugging.
+# (default: false)
+keep_abnormal = false
+
+# Internetworking model
+# Determines how the VM should be connected to the
+# the container network interface
+# Options:
+#
+#   - bridged (Deprecated)
+#     Uses a linux bridge to interconnect the container interface to
+#     the VM. Works for most cases except macvlan and ipvlan.
+#     ***NOTE: This feature has been deprecated with plans to remove this
+#     feature in the future. Please use other network models listed below.
+#
+#
+#   - macvtap
+#     Used when the Container network interface can be bridged using
+#     macvtap.
+#
+#   - none
+#     Used when customize network. Only creates a tap device. No veth pair.
+#
+#   - tcfilter
+#     Uses tc filter rules to redirect traffic from the network interface
+#     provided by plugin to a tap interface connected to the VM.
+#
+internetworking_model = "@DEFNETWORKMODEL_CLH@"
+
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_NAME_CLH@"
+agent_name = "@PROJECT_TYPE@"
+
+# disable guest seccomp
+# Determines whether container seccomp profiles are passed to the virtual
+# machine and applied by the kata agent. If set to true, seccomp is not applied
+# within the guest
+# (default: true)
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
+
+# If enabled, the runtime will create opentracing.io traces and spans.
+# (See https://www.jaegertracing.io/docs/getting-started).
+# (default: disabled)
+enable_tracing = false
+
+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+jaeger_password = ""
+
+# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
+# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
+# `disable_new_netns` conflicts with `internetworking_model=bridged` and `internetworking_model=macvtap`. It works only
+# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
+# (like OVS) directly.
+# (default: false)
+disable_new_netns = false
+
+# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
+# The container cgroups in the host are not created, just one single cgroup per sandbox.
+# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
+# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
+# The sandbox cgroup is constrained if there is no container type annotation.
+# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_CLH@
+
+# Enabled experimental feature list, format: ["a", "b"].
+# Experimental features are features not stable enough for production,
+# they may break compatibility, and are prepared for a big version bump.
+# Supported experimental features:
+# (default: [])
+experimental = @DEFAULTEXPFEATURES@
+
+# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
+# (default: false)
+enable_pprof = false
+
+# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
+# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
+# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
+# Compatibility for determining appropriate sandbox (VM) size:
+# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
+#   does not yet support sandbox sizing annotations.
+# - When running single containers using a tool like ctr, container sizing information will be available.
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH_AZURE@
+
+# If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path.
+# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
+# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
+# These will not be exposed to the container workloads, and are only provided for potential guest services.
+# Now it supports three kinds of bind mount format:
+# - "/path/to", default readonly mode.
+# - "/path/to:ro", readonly mode.
+# - "/path/to:rw", readwrite mode.
+sandbox_bind_mounts = @DEFBINDMOUNTS@
+
+# Base directory of directly attachable network config.
+# Network devices for VM-based containers are allowed to be placed in the
+# host netns to eliminate as many hops as possible, which is what we
+# called a "Directly Attachable Network". The config, set by special CNI
+# plugins, is used to tell the Kata containers what devices are attached
+# to the hypervisor.
+# (default: /run/kata-containers/dans)
+dan_conf = "@DEFDANCONF@"
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -539,9 +539,24 @@ ifneq (,$(CLHCMD))

    CONFIGS += $(CONFIG_CLH)

+    CONFIG_FILE_CLH_AZURE = configuration-clh-azure.toml
+    CONFIG_CLH_AZURE = config/$(CONFIG_FILE_CLH_AZURE)
+    CONFIG_CLH_AZURE_IN = $(CONFIG_CLH_AZURE).in
+
+    CONFIG_PATH_CLH_AZURE = $(abspath $(CONFDIR)/$(CONFIG_FILE_CLH_AZURE))
+    CONFIG_PATHS += $(CONFIG_PATH_CLH_AZURE)
+
+    SYSCONFIG_CLH_AZURE = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_CLH_AZURE))
+    SYSCONFIG_PATHS += $(SYSCONFIG_CLH_AZURE)
+
+    CONFIGS += $(CONFIG_CLH_AZURE)
+
    # CLH-specific options (all should be suffixed by "_CLH")
    # currently, huge pages are required for virtiofsd support
    DEFNETWORKMODEL_CLH := tcfilter
+    IMAGEPATH_CLH_AZURE := $(PKGDATADIR)/kata-containers-mariner.img
+    KERNELPATH_CLH_AZURE := /usr/share/cloud-hypervisor/vmlinux.bin
+    DEFSTATICRESOURCEMGMT_CLH_AZURE := true
    KERNELTYPE_CLH = uncompressed
    KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH))
    KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH)
@@ -666,6 +681,7 @@ USER_VARS += SYSCONFIG
 USER_VARS += IMAGENAME
 USER_VARS += IMAGECONFIDENTIALNAME
 USER_VARS += IMAGEPATH
+USER_VARS += IMAGEPATH_CLH_AZURE
 USER_VARS += IMAGECONFIDENTIALPATH
 USER_VARS += INITRDNAME
 USER_VARS += INITRDCONFIDENTIALNAME
@@ -704,6 +720,7 @@ USER_VARS += KERNELCONFIDENTIALPATH
 USER_VARS += KERNELCONFIDENTIALPATH_CCA
 USER_VARS += KERNELSEPATH
 USER_VARS += KERNELPATH_CLH
+USER_VARS += KERNELPATH_CLH_AZURE
 USER_VARS += KERNELPATH_FC
 USER_VARS += KERNELPATH_STRATOVIRT
 USER_VARS += KERNELVIRTIOFSPATH
@@ -811,6 +828,7 @@ USER_VARS += DEFSANDBOXCGROUPONLY
 USER_VARS += DEFSTATICRESOURCEMGMT
 USER_VARS += DEFSTATICRESOURCEMGMT_QEMU
 USER_VARS += DEFSTATICRESOURCEMGMT_CLH
+USER_VARS += DEFSTATICRESOURCEMGMT_CLH_AZURE
 USER_VARS += DEFSTATICRESOURCEMGMT_FC
 USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT
 USER_VARS += DEFSTATICRESOURCEMGMT_TEE
--- a/src/runtime/config/configuration-clh-azure.toml.in
+++ b/src/runtime/config/configuration-clh-azure.toml.in
@@ -0,0 +1,538 @@
+# Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH
+# Copyright (c) 2021 Adobe Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# XXX: WARNING: this file is auto-generated.
+# XXX:
+# XXX: Source file: "@CONFIG_CLH_AZURE_IN@"
+# XXX: Project:
+# XXX:   Name: @PROJECT_NAME@
+# XXX:   Type: @PROJECT_TYPE@
+
+[hypervisor.clh]
+path = "@CLHPATH@"
+kernel = "@KERNELPATH_CLH_AZURE@"
+image = "@IMAGEPATH_CLH_AZURE@"
+
+# rootfs filesystem type:
+#   - ext4 (default)
+#   - xfs
+#   - erofs
+rootfs_type = @DEFROOTFSTYPE@
+
+# Enable running clh VMM as a non-root user.
+# By default clh VMM run as root. When this is set to true, clh VMM process runs as
+# a non-root random user. See documentation for the limitations of this mode.
+rootless = false
+
+# disable applying SELinux on the VMM process (default false)
+disable_selinux = @DEFDISABLESELINUX@
+
+# disable applying SELinux on the container process
+# If set to false, the type `container_t` is applied to the container process by default.
+# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
+# with `SELINUX=yes`.
+# (default: true)
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
+
+# Path to the firmware.
+# If you want Cloud Hypervisor to use a specific firmware, set its path below.
+# This is option is only used when confidential_guest is enabled.
+#
+# For more information about firmwared that can be used with specific TEEs,
+# please, refer to:
+# * Intel TDX:
+#   - td-shim: https://github.com/confidential-containers/td-shim
+#
+# firmware = "@FIRMWAREPATH@"
+
+# List of valid annotation names for the hypervisor
+# Each member of the list is a regular expression, which is the base name
+# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
+enable_annotations = @DEFENABLEANNOTATIONS@
+
+# List of valid annotations values for the hypervisor
+# Each member of the list is a path pattern as described by glob(3).
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @CLHVALIDHYPERVISORPATHS@
+valid_hypervisor_paths = @CLHVALIDHYPERVISORPATHS@
+
+# Optional space-separated list of options to pass to the guest kernel.
+# For example, use `kernel_params = "vsyscall=emulate"` if you are having
+# trouble running pre-2.15 glibc.
+#
+# WARNING: - any parameter specified here will take priority over the default
+# parameter value of the same name used to start the virtual machine.
+# Do not set values here unless you understand the impact of doing so as you
+# may stop the virtual machine from booting.
+# To see the list of default parameters, enable hypervisor debug, create a
+# container and look for 'default-kernel-parameters' log entries.
+kernel_params = "@KERNELPARAMS@"
+
+# Default number of vCPUs per SB/VM:
+# unspecified or 0                --> will be set to @DEFVCPUS@
+# < 0                             --> will be set to the actual number of physical cores
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores      --> will be set to the actual number of physical cores
+default_vcpus = 1
+
+# Default maximum number of vCPUs per SB/VM:
+# unspecified or == 0             --> will be set to the actual number of physical cores or to the maximum number
+#                                     of vCPUs supported by KVM if that number is exceeded
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores      --> will be set to the actual number of physical cores or to the maximum number
+#                                     of vCPUs supported by KVM if that number is exceeded
+# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
+# the actual number of physical cores is greater than it.
+# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
+# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
+# can be added to a SB/VM, but the memory footprint will be big. Another example, with
+# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
+# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
+# unless you know what are you doing.
+default_maxvcpus = @DEFMAXVCPUS@
+
+# Default memory size in MiB for SB/VM.
+# If unspecified then it will be set @DEFMEMSZ@ MiB.
+default_memory = @DEFMEMSZ@
+
+# Default memory slots per SB/VM.
+# If unspecified then it will be set @DEFMEMSLOTS@.
+# This is will determine the times that memory will be hotadded to sandbox/VM.
+memory_slots = @DEFMEMSLOTS@
+
+# Default maximum memory in MiB per SB / VM
+# unspecified or == 0           --> will be set to the actual amount of physical RAM
+# > 0 <= amount of physical RAM --> will be set to the specified number
+# > amount of physical RAM      --> will be set to the actual amount of physical RAM
+default_maxmemory = @DEFMAXMEMSZ@
+
+# Disable hotplugging host block devices to guest VMs for container rootfs.
+# In case of a storage driver like devicemapper where a container's
+# root file system is backed by a block device, the block device is passed
+# directly to the hypervisor for performance reasons.
+# This flag prevents the block device from being passed to the hypervisor,
+# virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
+disable_block_device_use = @DEFDISABLEBLOCK@
+
+# Shared file system type:
+#   - virtio-fs (default)
+#   - virtio-fs-nydus
+#   - none
+shared_fs = "@DEFSHAREDFS_CLH_VIRTIOFS@"
+
+# Path to vhost-user-fs daemon.
+virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
+
+# List of valid annotations values for the virtiofs daemon
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
+valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
+
+# Default size of DAX cache in MiB
+virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
+
+# Default size of virtqueues
+virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
+
+# Extra args for virtiofsd daemon
+#
+# Format example:
+#   ["--arg1=xxx", "--arg2=yyy"]
+# Examples:
+#   Set virtiofsd log level to debug : ["--log-level=debug"]
+# see `virtiofsd -h` for possible options.
+virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
+
+# Cache mode:
+#
+#  - never
+#    Metadata, data, and pathname lookup are not cached in guest. They are
+#    always fetched from host and any changes are immediately pushed to host.
+#
+#  - metadata
+#    Metadata and pathname lookup are cached in guest and never expire.
+#    Data is never cached in guest.
+#
+#  - auto
+#    Metadata and pathname lookup cache expires after a configured amount of
+#    time (default is 1 second). Data is cached while the file is open (close
+#    to open consistency).
+#
+#  - always
+#    Metadata, data, and pathname lookup are cached in guest and never expire.
+virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
+
+# Block storage driver to be used for the hypervisor in case the container
+# rootfs is backed by a block device. This is virtio-blk.
+block_device_driver = "virtio-blk"
+
+# Specifies cache-related options will be set to block devices or not.
+# Default false
+block_device_cache_set = false
+
+# Specifies cache-related options for block devices.
+# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
+# Default false
+block_device_cache_direct = false
+
+# Reclaim guest freed memory.
+# Enabling this will result in the VM balloon device having f_reporting=on set.
+# Then the hypervisor will use it to reclaim guest freed memory.
+# This is useful for reducing the amount of memory used by a VM.
+# Enabling this feature may sometimes reduce the speed of memory access in
+# the VM.
+#
+# Default false
+reclaim_guest_freed_memory = false
+
+# Enable huge pages for VM RAM, default false
+# Enabling this will result in the VM memory
+# being allocated using huge pages.
+enable_hugepages = false
+
+# Disable the 'seccomp' feature from Cloud Hypervisor, default false
+disable_seccomp = false
+
+# Enable vIOMMU, default false
+# Enabling this will result in the VM having a vIOMMU device
+# This will also add the following options to the kernel's
+# command line: iommu=pt
+enable_iommu = false
+
+# This option changes the default hypervisor and kernel parameters
+# to enable debug output where available.
+#
+# Default false
+enable_debug = false
+
+# This option specifies the loglevel of the hypervisor
+#
+# Default 1
+hypervisor_loglevel = 1
+
+# If false and nvdimm is supported, use nvdimm device to plug guest image.
+# Otherwise virtio-block device is used.
+#
+# nvdimm is not supported with Cloud Hypervisor or when `confidential_guest = true`.
+disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_CLH@
+
+# Enable hot-plugging of VFIO devices to a root-port.
+# The default setting is  "no-port"
+hot_plug_vfio = "no-port"
+
+# In a confidential compute environment hot-plugging can compromise
+# security.
+# Enable cold-plugging of VFIO devices to a root-port.
+# The default setting is  "no-port", which means disabled.
+cold_plug_vfio = "no-port"
+
+# Path to OCI hook binaries in the *guest rootfs*.
+# This does not affect host-side hooks which must instead be added to
+# the OCI spec passed to the runtime.
+#
+# You can create a rootfs with hooks by customizing the osbuilder scripts:
+# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
+#
+# Hooks must be stored in a subdirectory of guest_hook_path according to their
+# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
+# The agent will scan these directories for executable files and add them, in
+# lexicographical order, to the lifecycle of the guest container.
+# Hooks are executed in the runtime namespace of the guest. See the official documentation:
+# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
+# Warnings will be logged if any error is encountered while scanning for hooks,
+# but it will not abort container execution.
+guest_hook_path = ""
+#
+# These options are related to network rate limiter at the VMM level, and are
+# based on the Cloud Hypervisor I/O throttling.  Those are disabled by default
+# and we strongly advise users to refer the Cloud Hypervisor official
+# documentation for a better understanding of its internals:
+# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
+#
+# Bandwidth rate limiter options
+#
+# net_rate_limiter_bw_max_rate controls network I/O bandwidth (size in bits/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+net_rate_limiter_bw_max_rate = 0
+#
+# net_rate_limiter_bw_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
+# set to a non zero value.
+net_rate_limiter_bw_one_time_burst = 0
+#
+# Operation rate limiter options
+#
+# net_rate_limiter_ops_max_rate controls network I/O bandwidth (size in ops/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+net_rate_limiter_ops_max_rate = 0
+#
+# net_rate_limiter_ops_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
+# set to a non zero value.
+net_rate_limiter_ops_one_time_burst = 0
+#
+# These options are related to disk rate limiter at the VMM level, and are
+# based on the Cloud Hypervisor I/O throttling.  Those are disabled by default
+# and we strongly advise users to refer the Cloud Hypervisor official
+# documentation for a better understanding of its internals:
+# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
+#
+# Bandwidth rate limiter options
+#
+# disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+disk_rate_limiter_bw_max_rate = 0
+#
+# disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
+# set to a non zero value.
+disk_rate_limiter_bw_one_time_burst = 0
+#
+# Operation rate limiter options
+#
+# disk_rate_limiter_ops_max_rate controls disk I/O bandwidth (size in ops/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+disk_rate_limiter_ops_max_rate = 0
+#
+# disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
+# set to a non zero value.
+disk_rate_limiter_ops_one_time_burst = 0
+
+[agent.@PROJECT_TYPE@]
+# If enabled, make the agent display debug-level messages.
+# (default: disabled)
+enable_debug = false
+
+# Enable agent tracing.
+#
+# If enabled, the agent will generate OpenTelemetry trace spans.
+#
+# Notes:
+#
+# - If the runtime also has tracing enabled, the agent spans will be
+#   associated with the appropriate runtime parent span.
+# - If enabled, the runtime will wait for the container to shutdown,
+#   increasing the container shutdown time slightly.
+#
+# (default: disabled)
+enable_tracing = false
+
+# Enable debug console.
+
+# If enabled, user can connect guest OS running inside hypervisor
+# through "kata-runtime exec <sandbox-id>" command
+
+debug_console_enabled = false
+
+# Agent connection dialing timeout value in seconds
+# (default: 45)
+dial_timeout = 45
+
+# Confidential Data Hub API timeout value in seconds
+# (default: 50)
+cdh_api_timeout = 50
+
+[runtime]
+# If enabled, the runtime will log additional debug messages to the
+# system log
+# (default: disabled)
+enable_debug = false
+#
+# Internetworking model
+# Determines how the VM should be connected to the
+# the container network interface
+# Options:
+#
+#   - macvtap
+#     Used when the Container network interface can be bridged using
+#     macvtap.
+#
+#   - none
+#     Used when customize network. Only creates a tap device. No veth pair.
+#
+#   - tcfilter
+#     Uses tc filter rules to redirect traffic from the network interface
+#     provided by plugin to a tap interface connected to the VM.
+#
+internetworking_model = "@DEFNETWORKMODEL_CLH@"
+
+# disable guest seccomp
+# Determines whether container seccomp profiles are passed to the virtual
+# machine and applied by the kata agent. If set to true, seccomp is not applied
+# within the guest
+# (default: true)
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
+
+# Apply a custom SELinux security policy to the container process inside the VM.
+# This is used when you want to apply a type other than the default `container_t`,
+# so general users should not uncomment and apply it.
+# (format: "user:role:type")
+# Note: You cannot specify MCS policy with the label because the sensitivity levels and
+# categories are determined automatically by high-level container runtimes such as containerd.
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"
+
+# If enabled, the runtime will create opentracing.io traces and spans.
+# (See https://www.jaegertracing.io/docs/getting-started).
+# (default: disabled)
+enable_tracing = false
+
+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+jaeger_password = ""
+
+# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
+# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
+# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
+# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
+# (like OVS) directly.
+# (default: false)
+disable_new_netns = false
+
+# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
+# The container cgroups in the host are not created, just one single cgroup per sandbox.
+# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
+# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
+# The sandbox cgroup is constrained if there is no container type annotation.
+# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@
+
+# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
+# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
+# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
+# Compatibility for determining appropriate sandbox (VM) size:
+# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
+#   does not yet support sandbox sizing annotations.
+# - When running single containers using a tool like ctr, container sizing information will be available.
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH_AZURE@
+
+# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
+# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
+# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
+# These will not be exposed to the container workloads, and are only provided for potential guest services.
+sandbox_bind_mounts = @DEFBINDMOUNTS@
+
+# VFIO Mode
+# Determines how VFIO devices should be be presented to the container.
+# Options:
+#
+#  - vfio
+#    Matches behaviour of OCI runtimes (e.g. runc) as much as
+#    possible.  VFIO devices will appear in the container as VFIO
+#    character devices under /dev/vfio.  The exact names may differ
+#    from the host (they need to match the VM's IOMMU group numbers
+#    rather than the host's)
+#
+#  - guest-kernel
+#    This is a Kata-specific behaviour that's useful in certain cases.
+#    The VFIO device is managed by whatever driver in the VM kernel
+#    claims it.  This means it will appear as one or more device nodes
+#    or network interfaces depending on the nature of the device.
+#    Using this mode requires specially built workloads that know how
+#    to locate the relevant device interfaces within the VM.
+#
+vfio_mode = "@DEFVFIOMODE@"
+
+# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
+# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
+
+# Specifies how Kubernetes emptyDir volumes are handled.
+# Options:
+#
+#   - shared-fs (default)
+#     Shares the emptyDir folder with the guest using the method given
+#     by the `shared_fs` setting.
+#
+#   - block-encrypted
+#     Plugs a block device to be encrypted in the guest.
+#
+emptydir_mode = "@DEFEMPTYDIRMODE@"
+
+# Enabled experimental feature list, format: ["a", "b"].
+# Experimental features are features not stable enough for production,
+# they may break compatibility, and are prepared for a big version bump.
+# Supported experimental features:
+# (default: [])
+experimental = @DEFAULTEXPFEATURES@
+
+# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
+# (default: false)
+enable_pprof = false
+
+# Indicates the CreateContainer request timeout needed for the workload(s)
+# It using guest_pull this includes the time to pull the image inside the guest
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
+# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+
+# Base directory of directly attachable network config.
+# Network devices for VM-based containers are allowed to be placed in the
+# host netns to eliminate as many hops as possible, which is what we
+# called a "Directly Attachable Network". The config, set by special CNI
+# plugins, is used to tell the Kata containers what devices are attached
+# to the hypervisor.
+# (default: /run/kata-containers/dans)
+dan_conf = "@DEFDANCONF@"
+
+# kubelet_root_dir is the kubelet root directory used to match ConfigMap/Secret
+# volume paths for propagation. Override for distros that use a different path
+# (e.g. k0s: /var/lib/k0s/kubelet).
+kubelet_root_dir = "@DEFKUBELETROOTDIR@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"