diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml index fc9376c935..1cd5e467e3 100644 --- a/.github/workflows/build-kata-static-tarball-arm64.yaml +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -45,6 +45,7 @@ jobs: - kernel - kernel-dragonball-experimental - kernel-nvidia-gpu + - kernel-cca-confidential - nydus - ovmf - qemu @@ -126,7 +127,7 @@ jobs: if-no-files-found: error - name: store-extratarballs-artifact ${{ matrix.asset }} - if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }} + if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') || startsWith(matrix.asset, 'kernel-cca-confidential') }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: kata-artifacts-arm64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }} diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 940338c8e6..a4295227e1 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -108,6 +108,7 @@ GENERATED_VARS = \ CONFIG_QEMU_NVIDIA_GPU_TDX_IN \ CONFIG_QEMU_TDX_IN \ CONFIG_QEMU_SNP_IN \ + CONFIG_QEMU_CCA_IN \ CONFIG_CLH_IN \ CONFIG_FC_IN \ CONFIG_STRATOVIRT_IN \ @@ -186,6 +187,8 @@ QEMUSNPVALIDHYPERVISORPATHS := [\"$(QEMUSNPPATH)\"] QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD) +DEFCCAMEASUREMENTALGO := sha512 + CLHPATH := $(CLHBINDIR)/$(CLHCMD) CLHVALIDHYPERVISORPATHS := [\"$(CLHPATH)\"] @@ -246,6 +249,7 @@ DEFSHAREDFS_STRATOVIRT_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_TDX_VIRTIOFS := none DEFSHAREDFS_QEMU_SNP_VIRTIOFS := none DEFSHAREDFS_QEMU_SEL_VIRTIOFS := none +DEFSHAREDFS_QEMU_CCA_VIRTIOFS := none DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"] # Default DAX mapping cache size in MiB @@ -359,6 +363,18 @@ ifneq (,$(QEMUCMD)) CONFIGS += $(CONFIG_QEMU_SNP) + CONFIG_FILE_QEMU_CCA = configuration-qemu-cca.toml + CONFIG_QEMU_CCA = config/$(CONFIG_FILE_QEMU_CCA) + CONFIG_QEMU_CCA_IN = $(CONFIG_QEMU_CCA).in + + CONFIG_PATH_QEMU_CCA = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_CCA)) + CONFIG_PATHS += $(CONFIG_PATH_QEMU_CCA) + + SYSCONFIG_QEMU_CCA = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_CCA)) + SYSCONFIG_PATHS_CCA += $(SYSCONFIG_QEMU_CCA) + + CONFIGS += $(CONFIG_QEMU_CCA) + CONFIG_FILE_QEMU_NVIDIA_GPU = configuration-qemu-nvidia-gpu.toml CONFIG_QEMU_NVIDIA_GPU = config/$(CONFIG_FILE_QEMU_NVIDIA_GPU) CONFIG_QEMU_NVIDIA_GPU_IN = $(CONFIG_QEMU_NVIDIA_GPU).in @@ -754,6 +770,8 @@ USER_VARS += DEFVFIOMODE USER_VARS += DEFVFIOMODE_SE USER_VARS += BUILDFLAGS USER_VARS += DEFDISABLEIMAGENVDIMM +USER_VARS += DEFCCAMEASUREMENTALGO +USER_VARS += DEFSHAREDFS_QEMU_CCA_VIRTIOFS V = @ diff --git a/src/runtime/config/configuration-qemu-cca.toml.in b/src/runtime/config/configuration-qemu-cca.toml.in new file mode 100644 index 0000000000..ed39e62144 --- /dev/null +++ b/src/runtime/config/configuration-qemu-cca.toml.in @@ -0,0 +1,648 @@ +# Copyright 2022 Advanced Micro Devices, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_CCA_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUPATH@" +kernel = "@KERNELCONFIDENTIALPATH@" +initrd = "@INITRDCONFIDENTIALPATH@" +machine_type = "@MACHINETYPE@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +confidential_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWAREPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWAREVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@CPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM device. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +# - none +shared_fs = "@DEFSHAREDFS_QEMU_CCA_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - none +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +#file_mem_backend = "@DEFFILEMEMBACKEND@" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@ + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# Default 0 +#pcie_root_port = 2 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + +# In QEMU, the Realm Management Extension (RME) measurement algorithm is used for attestation, and it supports +SHA256 and SHA512 as options. The default is SHA512. This algorithm is crucial for verifying the integrity of a +Realm, a secure execution environment within the larger system. QEMU supports SHA256 and SHA512 for CCA RME +measurements. SHA512 is generally preferred on 64-bit architectures due to potential hardware acceleration. +measurement_algo = @DEFCCAMEASUREMENTALGO@ + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 90) +dial_timeout = 90 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# Indicates the CreateContainer request timeout needed for the workload(s) +# It using guest_pull this includes the time to pull the image inside the guest +# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s) +# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config +# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. +# In essence, the timeout used for guest pull=runtime-request-timeout 0 { + // PersonalizationValue in Arm-CCA should be exactly 64 bytes + personalizationValueSlice := adjustProperLength(object.InitdataDigest, 64) + personalizationValue := base64.StdEncoding.EncodeToString(personalizationValueSlice) + objectParams = append(objectParams, fmt.Sprintf("personalization-value=%s", personalizationValue)) + } } if len(deviceParams) > 0 { diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index d7680dd1bd..cd8dafb1fe 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -101,6 +101,7 @@ const defaultDisableSeccomp = false const defaultDisableGuestSeLinux = true const defaultVfioMode = "guest-kernel" const defaultLegacySerial = false +const defaultMeasurementAlgo = "sha512" var defaultSGXEPCSize = int64(0) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index f15d945ca9..b9f81ee377 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -109,6 +109,7 @@ type hypervisor struct { RemoteHypervisorSocket string `toml:"remote_hypervisor_socket"` SnpIdBlock string `toml:"snp_id_block"` SnpIdAuth string `toml:"snp_id_auth"` + MeasurementAlgo string `toml:"measurement_algo"` HypervisorPathList []string `toml:"valid_hypervisor_paths"` JailerPathList []string `toml:"valid_jailer_paths"` VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"` @@ -407,6 +408,14 @@ func (h hypervisor) GetEntropySource() string { return h.EntropySource } +func (h hypervisor) GetMeasurementAlgo() string { + if h.MeasurementAlgo == "" { + return defaultMeasurementAlgo + } + + return h.MeasurementAlgo +} + var procCPUInfo = "/proc/cpuinfo" func getHostCPUs() uint32 { @@ -992,6 +1001,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { ExtraMonitorSocket: extraMonitorSocket, SnpIdBlock: h.SnpIdBlock, SnpIdAuth: h.SnpIdAuth, + MeasurementAlgo: h.GetMeasurementAlgo(), }, nil } @@ -1118,6 +1128,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DiskRateLimiterBwOneTimeBurst: h.getDiskRateLimiterBwOneTimeBurst(), DiskRateLimiterOpsMaxRate: h.getDiskRateLimiterOpsMaxRate(), DiskRateLimiterOpsOneTimeBurst: h.getDiskRateLimiterOpsOneTimeBurst(), + MeasurementAlgo: h.GetMeasurementAlgo(), }, nil } diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index b7e8b049d0..9732e7895d 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -190,6 +190,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime PFlash: []string{}, SGXEPCSize: epcSize, QgsPort: defaultQgsPort, + MeasurementAlgo: defaultMeasurementAlgo, } if goruntime.GOARCH == "arm64" && len(hypervisorConfig.PFlash) == 0 && hypervisorConfig.FirmwarePath == "" { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 22423ab122..7c39e85bb8 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -702,6 +702,10 @@ type HypervisorConfig struct { DefaultGPUs uint32 // DefaultGPUModel specifies GPU model like tesla, h100, readeon etc. DefaultGPUModel string + + // MeasurementAlgo is the algorithm for measurement + // This is only relevant for Arm CCA cca-guest objects + MeasurementAlgo string } // vcpu mapping from vcpu number to thread number @@ -1080,6 +1084,10 @@ const ( // https://www.kernel.org/doc/html/latest/virt/kvm/s390-pv.html // Exclude from lint checking for it won't be used on arm64 code seProtection + + // Arm Realm Management Extension (Arm Confidential Computing Architecture) + // https://www.arm.com/architecture/security-features/arm-confidential-compute-architecture + ccaProtection ) var guestProtectionStr = [...]string{ @@ -1089,6 +1097,7 @@ var guestProtectionStr = [...]string{ sevProtection: "sev", snpProtection: "snp", tdxProtection: "tdx", + ccaProtection: "cca", } func (gp guestProtection) String() string { diff --git a/src/runtime/virtcontainers/hypervisor_linux_arm64.go b/src/runtime/virtcontainers/hypervisor_linux_arm64.go index 9c6a13ea91..956d36aaa7 100644 --- a/src/runtime/virtcontainers/hypervisor_linux_arm64.go +++ b/src/runtime/virtcontainers/hypervisor_linux_arm64.go @@ -4,7 +4,62 @@ package virtcontainers -// Guest protection is not supported on ARM64. +/* +#include + +const int KVM_CAP_ARM_RME_ID = KVM_CAP_ARM_RME; +*/ +import "C" + +import ( + "github.com/sirupsen/logrus" + "syscall" +) + +// variables rather than consts to allow tests to modify them +var ( + kvmDevice = "/dev/kvm" +) + func availableGuestProtection() (guestProtection, error) { - return noneProtection, nil + ret, err := checkKVMExtensionsRME() + if err != nil { + return noneProtection, err + } + if ret == true { + return ccaProtection, nil + } else { + return noneProtection, nil + } +} + +// checkKVMExtensionsRME allows to query about the specific kvm extensions +func checkKVMExtensionsRME() (bool, error) { + flags := syscall.O_RDWR | syscall.O_CLOEXEC + kvm, err := syscall.Open(kvmDevice, flags, 0) + if err != nil { + return false, err + } + defer syscall.Close(kvm) + + logger := hvLogger.WithFields(logrus.Fields{ + "type": "kvm extension", + "description": "Realm Management Extension", + "id": C.KVM_CAP_ARM_RME_ID, + }) + ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, + uintptr(kvm), + uintptr(C.KVM_CHECK_EXTENSION), + uintptr(C.KVM_CAP_ARM_RME_ID)) + + // Generally return value(ret) 0 means no and 1 means yes, + // but some extensions may report additional information in the integer return value. + if errno != 0 { + logger.Error("is not supported") + return false, errno + } + if int(ret) == 1 { + return true, nil + } + return false, nil } diff --git a/src/runtime/virtcontainers/hypervisor_linux_arm64_test.go b/src/runtime/virtcontainers/hypervisor_linux_arm64_test.go index 9b1d94cc66..b2bd540947 100644 --- a/src/runtime/virtcontainers/hypervisor_linux_arm64_test.go +++ b/src/runtime/virtcontainers/hypervisor_linux_arm64_test.go @@ -25,10 +25,3 @@ func TestRunningOnVMM(t *testing.T) { assert.NoError(err) assert.Equal(expectedOutput, running) } - -func TestAvailableGuestProtection(t *testing.T) { - assert := assert.New(t) - - out, _ := availableGuestProtection() - assert.Equal(out, noneProtection) -} diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 2756cb2be0..f8cb53844b 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -275,6 +275,12 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { assert.Error(err) assert.Empty(bios) + // CCA protection + amd64.(*qemuAmd64).protection = ccaProtection + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) + assert.Error(err) + assert.Empty(bios) + // sev protection amd64.(*qemuAmd64).protection = sevProtection diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index bfe6e2d405..6029e7e69f 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -11,16 +11,18 @@ import ( "context" "fmt" "os" - "runtime" "time" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/sirupsen/logrus" ) type qemuArm64 struct { // inherit from qemuArchBase, overwrite methods if needed qemuArchBase + + measurementAlgo string } const defaultQemuPath = "/usr/bin/qemu-system-aarch64" @@ -31,6 +33,14 @@ const qmpMigrationWaitTimeout = 10 * time.Second const defaultQemuMachineOptions = "usb=off,accel=kvm,gic-version=host" +const ( + // sha512 measurement Algorithm for Arm CCA RME + measurementAlgoSha512 string = "sha512" + + // sha256 measurement Algorithm for Arm CCA RME + measurementAlgoSha256 string = "sha256" +) + var kernelParams = []Param{ {"iommu.passthrough", "0"}, } @@ -51,7 +61,7 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } q := &qemuArm64{ - qemuArchBase{ + qemuArchBase: qemuArchBase{ qemuMachine: supportedQemuMachine, qemuExePath: defaultQemuPath, memoryOffset: config.MemOffset, @@ -63,6 +73,22 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { protection: noneProtection, legacySerial: config.LegacySerial, }, + measurementAlgo: config.MeasurementAlgo, + } + + if config.ConfidentialGuest { + if err := q.enableProtection(); err != nil { + return nil, err + } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuArm64").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } + + if q.measurementAlgo != measurementAlgoSha512 && q.measurementAlgo != measurementAlgoSha256 { + return nil, fmt.Errorf("invalid measurement algo: %v, should be sha512 or sha256", q.measurementAlgo) + } } if err := q.handleImagePath(config); err != nil { @@ -146,20 +172,46 @@ func (q *qemuArm64) getPFlash() ([]string, error) { } func (q *qemuArm64) enableProtection() error { - q.protection, _ = availableGuestProtection() - if q.protection != noneProtection { - return fmt.Errorf("Protection %v is not supported on arm64", q.protection) + var err error + q.protection, err = availableGuestProtection() + if err != nil { + return err } + if q.protection != ccaProtection { + return fmt.Errorf("Configured confidential guest but kvm does not supported") + } + logger := hvLogger.WithFields(logrus.Fields{ + "subsystem": "qemuArm64", + "machine": q.qemuMachine, + "kernel-params-debug": q.kernelParamsDebug, + "kernel-params-non-debug": q.kernelParamsNonDebug, + "kernel-params": q.kernelParams}) + if q.qemuMachine.Options != "" { + q.qemuMachine.Options += "," + } + q.qemuMachine.Options += "confidential-guest-support=rme0" + logger.Info("Enabling Arm CCA Realm protection") return nil } func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { - err := q.enableProtection() - if err != nil { - hvLogger.WithField("arch", runtime.GOARCH).Error(err) + switch q.protection { + case ccaProtection: + return append(devices, + govmmQemu.Object{ + Type: govmmQemu.CCAGuest, + ID: "rme0", + Debug: false, + File: firmware, + MeasurementAlgo: q.measurementAlgo, + InitdataDigest: initdataDigest, + }), "", nil + case noneProtection: + return devices, firmware, nil + default: + return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection) } - return devices, firmware, err } func (q *qemuArm64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory { diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index aca9497412..56d23ea794 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -183,43 +183,61 @@ func TestQemuArm64AppendProtectionDevice(t *testing.T) { var err error // no protection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // PEF protection arm64.(*qemuArm64).protection = pefProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // Secure Execution protection arm64.(*qemuArm64).protection = seProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // SEV protection arm64.(*qemuArm64).protection = sevProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // SNP protection arm64.(*qemuArm64).protection = snpProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // TDX protection arm64.(*qemuArm64).protection = tdxProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) + + // CCA RME protection + arm64.(*qemuArm64).protection = ccaProtection + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte("")) + assert.Empty(bios) + assert.NoError(err) + + expectedOut := []govmmQemu.Device{ + govmmQemu.Object{ + Type: govmmQemu.CCAGuest, + ID: "rme0", + Debug: false, + File: firmware, + MeasurementAlgo: "", + InitdataDigest: []byte(""), + }, + } + assert.Equal(expectedOut, devices) } diff --git a/src/runtime/virtcontainers/qemu_ppc64le_test.go b/src/runtime/virtcontainers/qemu_ppc64le_test.go index 7bb79bc0e5..f2e0e2f3e7 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le_test.go +++ b/src/runtime/virtcontainers/qemu_ppc64le_test.go @@ -90,6 +90,12 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) { assert.Error(err) assert.Empty(bios) + // CCA protection + ppc64le.(*qemuPPC64le).protection = ccaProtection + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + assert.Error(err) + assert.Empty(bios) + //PEF protection ppc64le.(*qemuPPC64le).protection = pefProtection devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) diff --git a/src/runtime/virtcontainers/qemu_s390x_test.go b/src/runtime/virtcontainers/qemu_s390x_test.go index db88b4690f..90fdff8603 100644 --- a/src/runtime/virtcontainers/qemu_s390x_test.go +++ b/src/runtime/virtcontainers/qemu_s390x_test.go @@ -141,6 +141,12 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) { assert.Error(err) assert.Empty(bios) + // CCA protection + s390x.(*qemuS390x).protection = ccaProtection + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + assert.Error(err) + assert.Empty(bios) + // Secure Execution protection s390x.(*qemuS390x).protection = seProtection diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index bd1fb7031b..de4852e2ac 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -49,6 +49,19 @@ BASE_TARBALLS = serial-targets \ virtiofsd-tarball BASE_SERIAL_TARBALLS = rootfs-image-tarball \ rootfs-initrd-tarball +else ifeq ($(ARCH), aarch64) +BASE_TARBALLS = serial-targets \ + kernel-confidential-tarball \ + kernel-cca-confidential-tarball \ + kernel-tarball \ + qemu-tarball \ + qemu-cca-experimental-tarball \ + shim-v2-tarball \ + virtiofsd-tarball +BASE_SERIAL_TARBALLS = rootfs-image-tarball \ + rootfs-cca-confidential-image-tarball \ + rootfs-cca-confidential-initrd-tarball \ + rootfs-initrd-tarball endif define BUILD @@ -135,6 +148,9 @@ kernel-tarball: kernel-confidential-tarball: ${MAKE} $@-build +kernel-cca-confidential-tarball: + ${MAKE} $@-build + nydus-tarball: ${MAKE} $@-build @@ -150,6 +166,9 @@ qemu-snp-experimental-tarball: qemu-tdx-experimental-tarball: ${MAKE} $@-build +qemu-cca-experimental-tarball: + ${MAKE} $@-build + qemu-tarball: ${MAKE} $@-build @@ -188,9 +207,21 @@ rootfs-image-nvidia-gpu-confidential-tarball: agent-tarball busybox-tarball paus rootfs-initrd-nvidia-gpu-confidential-tarball: agent-tarball busybox-tarball pause-image-tarball coco-guest-components-tarball kernel-nvidia-gpu-confidential-tarball ${MAKE} $@-build +rootfs-cca-confidential-image-tarball: agent-tarball pause-image-tarball coco-guest-components-tarball kernel-cca-confidential-tarball + ${MAKE} $@-build + +rootfs-cca-confidential-initrd-tarball: agent-tarball pause-image-tarball coco-guest-components-tarball kernel-cca-confidential-tarball + ${MAKE} $@-build + shim-v2-tarball: ${MAKE} $@-build +# The shim-v2 build for aarch64 needs the kernel-headers tar file from kernel-cca-confidential kernel. +ifeq ($(ARCH), aarch64) +shim-v2-tarball-build: kernel-cca-confidential-tarball-build + $(call BUILD,shim-v2) +endif + trace-forwarder-tarball: copy-scripts-for-the-tools-build ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 8ec9a7884b..9c5609a1bc 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -114,6 +114,7 @@ options: kata-manager kernel kernel-confidential + kernel-cca-confidential kernel-dragonball-experimental kernel-experimental kernel-nvidia-gpu @@ -123,6 +124,7 @@ options: ovmf ovmf-sev qemu + qemu-cca-experimental qemu-snp-experimental qemu-tdx-experimental stratovirt @@ -160,17 +162,22 @@ get_kernel_modules_dir() { local version=${kernel_version#v} local numeric_final_version=${version} - # Every first release of a kernel is x.y, while the resulting folder would be x.y.0 - local rc=$(echo ${version} | grep -oE "\-rc[0-9]+$") - if [ -n "${rc}" ]; then - numeric_final_version="${numeric_final_version%"${rc}"}" - fi + if [ -z "${kernel_ref}" ]; then + # Every first release of a kernel is x.y, while the resulting folder would be x.y.0 + local rc=$(echo ${version} | grep -oE "\-rc[0-9]+$") + if [ -n "${rc}" ]; then + numeric_final_version="${numeric_final_version%"${rc}"}" + fi - local dots=$(echo ${version} | grep -o '\.' | wc -l) - [ "${dots}" == "1" ] && numeric_final_version="${numeric_final_version}.0" + local dots=$(echo ${version} | grep -o '\.' | wc -l) + [ "${dots}" == "1" ] && numeric_final_version="${numeric_final_version}.0" - if [ -n "${rc}" ]; then - numeric_final_version="${numeric_final_version}${rc}" + if [ -n "${rc}" ]; then + numeric_final_version="${numeric_final_version}${rc}" + fi + else + # kernel_version should be vx.y.z-rcn-hash format when git is used + numeric_final_version="${numeric_final_version%-*}+" fi local kernel_modules_dir="${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/${kernel_name}/builddir/kata-linux-${version}-${kernel_kata_config_version}/lib/modules/${numeric_final_version}" @@ -608,7 +615,7 @@ install_cached_kernel_tarball_component() { || return 1 case ${kernel_name} in - "kernel-nvidia-gpu"*"") + "kernel-nvidia-gpu"*"" | "kernel-cca-confidential") local kernel_headers_dir=$(get_kernel_headers_dir "${kernel_name}") mkdir -p ${kernel_headers_dir} || true tar xvf ${workdir}/${kernel_name}/builddir/kata-static-${kernel_name}-headers.tar.xz -C "${kernel_headers_dir}" || return 1 @@ -632,9 +639,10 @@ install_kernel_helper() { export kernel_version="$(get_from_kata_deps .${kernel_yaml_path}.version)" export kernel_url="$(get_from_kata_deps .${kernel_yaml_path}.url)" + export kernel_ref="$(get_from_kata_deps .${kernel_yaml_path}.ref)" export kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)" - if [[ "${kernel_name}" == "kernel"*"-confidential" ]]; then + if [[ "${kernel_name}" == "kernel"*"-confidential" ]] && [[ "${ARCH}" == "x86_64" ]]; then kernel_version="$(get_from_kata_deps .assets.kernel.confidential.version)" kernel_url="$(get_from_kata_deps .assets.kernel.confidential.url)" fi @@ -645,7 +653,7 @@ install_kernel_helper() { extra_tarballs="${kernel_modules_tarball_name}:${kernel_modules_tarball_path}" fi - if [[ "${kernel_name}" == "kernel-nvidia-gpu*" ]]; then + if [[ "${kernel_name}" == "kernel-nvidia-gpu*" ]] || [[ "${kernel_name}" == "kernel-cca-confidential" ]]; then local kernel_headers_tarball_name="kata-static-${kernel_name}-headers.tar.xz" local kernel_headers_tarball_path="${workdir}/${kernel_headers_tarball_name}" extra_tarballs+=" ${kernel_headers_tarball_name}:${kernel_headers_tarball_path}" @@ -657,6 +665,9 @@ install_kernel_helper() { info "build ${kernel_name}" info "Kernel version ${kernel_version}" + if [ -n "${kernel_ref}" ]; then + extra_cmd+=" -r ${kernel_ref}" + fi DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -v "${kernel_version}" -f -u "${kernel_url}" "${extra_cmd}" } @@ -681,6 +692,15 @@ install_kernel_confidential() { "-x" } +install_kernel_cca_confidential() { + export MEASURED_ROOTFS=yes + + install_kernel_helper \ + "assets.kernel-arm-experimental.confidential" \ + "kernel-confidential" \ + "-x -H deb" +} + install_kernel_dragonball_experimental() { install_kernel_helper \ "assets.kernel-dragonball-experimental" \ @@ -746,6 +766,17 @@ install_qemu() { "${qemu_builder}" } +install_qemu_cca_experimental() { + export qemu_suffix="cca-experimental" + export qemu_tarball_name="kata-static-qemu-${qemu_suffix}.tar.gz" + + install_qemu_helper \ + "assets.hypervisor.qemu-${qemu_suffix}.url" \ + "assets.hypervisor.qemu-${qemu_suffix}.tag" \ + "qemu-${qemu_suffix}" \ + "${qemu_experimental_builder}" +} + install_qemu_snp_experimental() { export qemu_suffix="snp-experimental" export qemu_tarball_name="kata-static-qemu-${qemu_suffix}.tar.gz" @@ -1190,6 +1221,7 @@ handle_build() { install_kata_manager install_kernel install_kernel_confidential + install_kernel_cca_confidential install_kernel_dragonball_experimental install_log_parser_rs install_nydus @@ -1233,6 +1265,8 @@ handle_build() { kernel-confidential) install_kernel_confidential ;; + kernel-cca-confidential) install_kernel_cca_confidential ;; + kernel-dragonball-experimental) install_kernel_dragonball_experimental ;; kernel-nvidia-gpu-dragonball-experimental) install_kernel_nvidia_gpu_dragonball_experimental ;; @@ -1251,6 +1285,8 @@ handle_build() { qemu) install_qemu ;; + qemu-cca-experimental) install_qemu_cca_experimental ;; + qemu-snp-experimental) install_qemu_snp_experimental ;; qemu-tdx-experimental) install_qemu_tdx_experimental ;; @@ -1275,6 +1311,10 @@ handle_build() { rootfs-initrd-nvidia-gpu-confidential) install_initrd_nvidia_gpu_confidential ;; + rootfs-cca-confidential-image) install_image_confidential ;; + + rootfs-cca-confidential-initrd) install_initrd_confidential ;; + runk) install_runk ;; shim-v2) install_shimv2 ;; @@ -1299,7 +1339,7 @@ handle_build() { tar tvf "${final_tarball_path}" case ${build_target} in - kernel-nvidia-gpu*) + kernel-nvidia-gpu* | kernel-cca-confidential) local kernel_headers_final_tarball_path="${workdir}/kata-static-${build_target}-headers.tar.xz" if [ ! -f "${kernel_headers_final_tarball_path}" ]; then local kernel_headers_dir @@ -1401,7 +1441,7 @@ handle_build() { "kata-static-${build_target}-headers.tar.xz" ) ;; - kernel-nvidia-gpu-confidential) + kernel-nvidia-gpu-confidential | kernel-cca-confidential) files_to_push+=( "kata-static-${build_target}-modules.tar.xz" "kata-static-${build_target}-headers.tar.xz" diff --git a/tools/packaging/kernel/build-kernel.sh b/tools/packaging/kernel/build-kernel.sh index f08a5d4e30..671c459cb7 100755 --- a/tools/packaging/kernel/build-kernel.sh +++ b/tools/packaging/kernel/build-kernel.sh @@ -65,6 +65,8 @@ PREFIX="${PREFIX:-/usr}" kernel_url="" #Linux headers for GPU guest fs module building linux_headers="" +# Kernel Reference to download using git +kernel_ref="" # Enable measurement of the guest rootfs at boot. measured_rootfs="false" @@ -109,6 +111,7 @@ Options: -m : Enable measured rootfs. -k : Path to kernel to build. -p : Path to a directory with patches to apply to kernel. + -r : Enable git mode to download kernel using ref. -s : Skip .config checks -t : Hypervisor_target. -u : Kernel URL to be used to download the kernel tarball. @@ -138,6 +141,26 @@ check_initramfs_or_die() { die "Initramfs for measured rootfs not found at ${default_initramfs}" } +get_git_kernel() { + local kernel_path="${2:-}" + + if [ ! -d "${kernel_path}" ] ; then + mkdir -p "${kernel_path}" + pushd "${kernel_path}" + local kernel_git_url="https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git" + if [ -n "${kernel_url}" ]; then + kernel_git_url="${kernel_url}" + fi + git init + git remote add origin "${kernel_git_url}" + popd + fi + pushd "${kernel_path}" + git fetch --depth 1 origin "${kernel_ref}" + git checkout "${kernel_ref}" + popd +} + get_kernel() { local version="${1:-}" @@ -341,6 +364,10 @@ get_kernel_frag_path() { results=$(grep "${not_in_string}" <<< "$results") # Do not care about options that are in whitelist results=$(grep -v -f ${default_config_whitelist} <<< "$results") + local version_config_whitelist="${default_config_whitelist%.*}-${kernel_version}.conf" + if [ -f ${version_config_whitelist} ]; then + results=$(grep -v -f ${version_config_whitelist} <<< "$results") + fi [[ "${skip_config_checks}" == "true" ]] && echo "${config_path}" && return @@ -441,7 +468,11 @@ setup_kernel() { [ -n "$kernel_version" ] || die "failed to get kernel version: Kernel version is emtpy" if [[ ${download_kernel} == "true" ]]; then - get_kernel "${kernel_version}" "${kernel_path}" + if [ -z "${kernel_ref}" ]; then + get_kernel "${kernel_version}" "${kernel_path}" + else + get_git_kernel "${kernel_version}" "${kernel_path}" + fi fi [ -n "$kernel_path" ] || die "failed to find kernel source path" @@ -591,7 +622,7 @@ install_kata() { } main() { - while getopts "a:b:c:dD:eEfg:hH:k:mp:st:u:v:x" opt; do + while getopts "a:b:c:dD:eEfg:hH:k:mp:r:st:u:v:x" opt; do case "$opt" in a) arch_target="${OPTARG}" @@ -638,6 +669,9 @@ main() { p) patches_path="${OPTARG}" ;; + r) + kernel_ref="${OPTARG}" + ;; s) skip_config_checks="true" ;; diff --git a/tools/packaging/kernel/configs/fragments/arm64/confidential/cca.conf b/tools/packaging/kernel/configs/fragments/arm64/confidential/cca.conf new file mode 100644 index 0000000000..c4eaf2c839 --- /dev/null +++ b/tools/packaging/kernel/configs/fragments/arm64/confidential/cca.conf @@ -0,0 +1,9 @@ +CONFIG_VIRT_DRIVERS=y +CONFIG_TSM_REPORTS=y +CONFIG_ARM_CCA_GUEST=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_ACPI_PROCESSOR=y +CONFIG_HOTPLUG_CPU=y +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_RODATA_FULL_DEFAULT_ENABLED=y diff --git a/tools/packaging/kernel/configs/fragments/arm64/confidential/hotplug.conf b/tools/packaging/kernel/configs/fragments/arm64/confidential/hotplug.conf new file mode 100644 index 0000000000..3a71fa8b58 --- /dev/null +++ b/tools/packaging/kernel/configs/fragments/arm64/confidential/hotplug.conf @@ -0,0 +1,3 @@ +# Define hotplugs to be online immediately. Speeds things up, and makes things +# work smoother on some arch's. +CONFIG_MHP_DEFAULT_ONLINE_TYPE_ONLINE_AUTO=y diff --git a/tools/packaging/kernel/configs/fragments/arm64/confidential/rme.conf b/tools/packaging/kernel/configs/fragments/arm64/confidential/rme.conf new file mode 100644 index 0000000000..58091ddecd --- /dev/null +++ b/tools/packaging/kernel/configs/fragments/arm64/confidential/rme.conf @@ -0,0 +1,3 @@ +CONFIG_ARCH_HAS_CC_PLATFORM=y +CONFIG_ARCH_HAS_MEM_ENCRYPT=y +CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y diff --git a/tools/packaging/kernel/configs/fragments/whitelist-6.15.0-rc1-916aeec68dd4500a1cdf4ebf214c5620955daf3f.conf b/tools/packaging/kernel/configs/fragments/whitelist-6.15.0-rc1-916aeec68dd4500a1cdf4ebf214c5620955daf3f.conf new file mode 100644 index 0000000000..bcdae9ed16 --- /dev/null +++ b/tools/packaging/kernel/configs/fragments/whitelist-6.15.0-rc1-916aeec68dd4500a1cdf4ebf214c5620955daf3f.conf @@ -0,0 +1,9 @@ +# CONFIG_RANDOM_TRUST_CPU is removed from config since v6.2 +# https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=b9b01a5625b5a9e9d96d14d4a813a54e8a124f4b +CONFIG_RANDOM_TRUST_CPU +# CONFIG_ACPI_HOTPLUG_CPU is disabled arm64 and riscv since v6.8 +# https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=a02f66bb3cf475947b58dd3851b987b8ccd998c1 +CONFIG_ACPI_HOTPLUG_CPU +# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is disabled since v6.14 +# https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=44d46b76c3a4b514a0cc9dab147ed430e5c1d699 +CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index a76256037d..9386c220a1 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -160 +161 diff --git a/tools/packaging/kernel/patches/6.15.x/no_patches.txt b/tools/packaging/kernel/patches/6.15.x/no_patches.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/packaging/qemu/patches/tag_patches/97345ddc501d3eb45bbbf15d97608fba0c2c0c7b/no_patches.txt b/tools/packaging/qemu/patches/tag_patches/97345ddc501d3eb45bbbf15d97608fba0c2c0c7b/no_patches.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/packaging/static-build/coco-guest-components/build.sh b/tools/packaging/static-build/coco-guest-components/build.sh index ff7143c54d..abc9ed1ac7 100755 --- a/tools/packaging/static-build/coco-guest-components/build.sh +++ b/tools/packaging/static-build/coco-guest-components/build.sh @@ -46,6 +46,7 @@ RESOURCE_PROVIDER="kbs,sev" case "$(uname -m)" in x86_64) ATTESTER="snp-attester,tdx-attester" ;; s390x) ATTESTER="se-attester" ;; + aarch64) ATTESTER="cca-attester" ;; *) ATTESTER="none" ;; esac diff --git a/tools/packaging/static-build/kernel/Dockerfile b/tools/packaging/static-build/kernel/Dockerfile index d5eba7b2df..cb44138165 100644 --- a/tools/packaging/static-build/kernel/Dockerfile +++ b/tools/packaging/static-build/kernel/Dockerfile @@ -22,6 +22,7 @@ RUN apt-get update && \ kmod \ libelf-dev \ libssl-dev \ + python3 \ gettext \ rsync \ cpio \ diff --git a/tools/packaging/static-build/shim-v2/Dockerfile b/tools/packaging/static-build/shim-v2/Dockerfile index 90d3181c88..67364a8e4f 100644 --- a/tools/packaging/static-build/shim-v2/Dockerfile +++ b/tools/packaging/static-build/shim-v2/Dockerfile @@ -58,3 +58,6 @@ RUN ARCH=$(uname -m); \ curl -OL "https://storage.googleapis.com/golang/go${GO_VERSION}.${kernelname}-${goarch}.tar.gz" && \ tar -C "${GO_HOME}" -xzf "go${GO_VERSION}.${kernelname}-${goarch}.tar.gz" && \ rm "go${GO_VERSION}.${kernelname}-${goarch}.tar.gz" + +COPY pkg pkg +RUN dpkg -i pkg/linux-* || true diff --git a/tools/packaging/static-build/shim-v2/build.sh b/tools/packaging/static-build/shim-v2/build.sh index 3baef6d807..4d566ef3c6 100755 --- a/tools/packaging/static-build/shim-v2/build.sh +++ b/tools/packaging/static-build/shim-v2/build.sh @@ -49,6 +49,11 @@ if [ "${MEASURED_ROOTFS}" == "yes" ]; then EXTRA_OPTS+=" ROOTMEASURECONFIG=\"${root_measure_config}\"" fi +# add kernel header package +rm -rf "${script_dir}/pkg" +mkdir -p "${script_dir}/pkg" +find ${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/ -maxdepth 1 -name "kata-static-*-headers.tar.xz" -exec tar -Jxvf {} -C ${script_dir}/pkg \; + docker pull ${container_image} || \ (docker ${BUILDX} build ${PLATFORM} \ --build-arg GO_VERSION="${GO_VERSION}" \ diff --git a/versions.yaml b/versions.yaml index 18af53c4ae..c58b0e3739 100644 --- a/versions.yaml +++ b/versions.yaml @@ -99,6 +99,11 @@ assets: https://github.com/qemu/qemu/tags .*/v?(\d\S+)\.tar\.gz + qemu-cca-experimental: + description: "QEMU with experimental CCA support" + url: "https://git.codelinaro.org/linaro/dcap/qemu.git" + tag: "97345ddc501d3eb45bbbf15d97608fba0c2c0c7b" + qemu-snp-experimental: description: "QEMU with GPU+SNP support" url: "https://github.com/confidential-containers/qemu.git" @@ -123,6 +128,9 @@ assets: aarch64: name: "ubuntu" version: "noble" # 24.04 LTS + confidential: + name: "ubuntu" + version: "noble" # 24.04 LTS nvidia-gpu: name: "ubuntu" version: "noble" # 24.04 LTS @@ -163,6 +171,9 @@ assets: aarch64: name: "alpine" version: "3.18" + confidential: + name: "ubuntu" + version: "noble" # 24.04 LTS nvidia-gpu: name: "ubuntu" version: "noble" # 24.04 LTS @@ -206,6 +217,11 @@ assets: description: "Linux kernel with cpu/mem hotplug support on arm64" url: "https://cdn.kernel.org/pub/linux/kernel/v5.x/" version: "v5.15.138" + confidential: + description: "Linux kernel with RME support on arm64" + url: "https://gitlab.arm.com/linux-arm/linux-cca" + version: "v6.15.0-rc1-916aeec68dd4500a1cdf4ebf214c5620955daf3f" + ref: "916aeec68dd4500a1cdf4ebf214c5620955daf3f" kernel-dragonball-experimental: description: "Linux kernel with Dragonball VMM optimizations like upcall"