diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index 8432e77148..a7f3bdc197 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -20,11 +20,14 @@ jobs: - firecracker - kernel - kernel-dragonball-experimental + - kernel-tdx-experimental - nydus - qemu + - qemu-tdx-experimental - rootfs-image - rootfs-initrd - shim-v2 + - tdvf - virtiofsd steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index 2dfd6c728d..6db1cda72f 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -28,3 +28,11 @@ jobs: repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-amd64 secrets: inherit + + run-k8s-tests-on-tdx: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-on-tdx.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-amd64 diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml new file mode 100644 index 0000000000..78e5d5a89a --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -0,0 +1,50 @@ +name: CI | Run kubernetes tests on TDX +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Deploy kata-deploy + run: | + sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml + cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml + cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image" + + kubectl apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml + kubectl apply -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml + kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod + kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml + env: + KUBECONFIG: /etc/rancher/k3s/k3s.yaml + + - name: Run tests + timeout-minutes: 30 + run: | + pushd tests/integration/kubernetes + sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml + bash run_kubernetes_tests.sh + popd + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /etc/rancher/k3s/k3s.yaml diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 95efaff78f..6f719f1cbd 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -95,6 +95,7 @@ GENERATED_VARS = \ VERSION \ CONFIG_ACRN_IN \ CONFIG_QEMU_IN \ + CONFIG_QEMU_TDX_IN \ CONFIG_CLH_IN \ CONFIG_FC_IN \ $(USER_VARS) @@ -121,6 +122,9 @@ DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) FIRMWAREPATH := FIRMWAREVOLUMEPATH := +FIRMWARETDVFPATH := $(PREFIXDEPS)/share/tdvf/OVMF.fd +FIRMWARETDVFVOLUMEPATH := + # Name of default configuration file the runtime will use. CONFIG_FILE = configuration.toml @@ -138,6 +142,9 @@ HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVIS QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD) QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"] +QEMUTDXPATH := $(QEMUBINDIR)/$(QEMUTDXCMD) +QEMUTDXVALIDHYPERVISORPATHS := [\"$(QEMUTDXPATH)\"] + QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD) CLHPATH := $(CLHBINDIR)/$(CLHCMD) @@ -195,6 +202,7 @@ DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] DEFDISABLEBLOCK := false DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs +DEFSHAREDFS_QEMU_TDX_VIRTIOFS := virtio-9p DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd ifeq ($(ARCH),ppc64le) DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/qemu/virtiofsd @@ -265,13 +273,30 @@ ifneq (,$(QEMUCMD)) CONFIGS += $(CONFIG_QEMU) + CONFIG_FILE_QEMU_TDX = configuration-qemu-tdx.toml + CONFIG_QEMU_TDX = config/$(CONFIG_FILE_QEMU_TDX) + CONFIG_QEMU_TDX_IN = $(CONFIG_QEMU_TDX).in + + CONFIG_PATH_QEMU_TDX = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_TDX)) + CONFIG_PATHS += $(CONFIG_PATH_QEMU_TDX) + + SYSCONFIG_QEMU_TDX = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_TDX)) + SYSCONFIG_PATHS_TDX += $(SYSCONFIG_QEMU_TDX) + + CONFIGS += $(CONFIG_QEMU_TDX) + # qemu-specific options (all should be suffixed by "_QEMU") DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi DEFBLOCKDEVICEAIO_QEMU := io_uring DEFNETWORKMODEL_QEMU := tcfilter + KERNELTYPE = uncompressed KERNELNAME = $(call MAKE_KERNEL_NAME,$(KERNELTYPE)) KERNELPATH = $(KERNELDIR)/$(KERNELNAME) + + KERNELTDXTYPE = compressed + KERNELTDXNAME = $(call MAKE_KERNEL_TDX_NAME,$(KERNELTDXTYPE)) + KERNELTDXPATH = $(KERNELDIR)/$(KERNELTDXNAME) endif ifneq (,$(CLHCMD)) @@ -427,15 +452,20 @@ USER_VARS += KERNELTYPE_ACRN USER_VARS += KERNELTYPE_CLH USER_VARS += KERNELPATH_ACRN USER_VARS += KERNELPATH +USER_VARS += KERNELTDXPATH USER_VARS += KERNELPATH_CLH USER_VARS += KERNELPATH_FC USER_VARS += KERNELVIRTIOFSPATH USER_VARS += FIRMWAREPATH +USER_VARS += FIRMWARETDVFPATH USER_VARS += FIRMWAREVOLUMEPATH +USER_VARS += FIRMWARETDVFVOLUMEPATH USER_VARS += MACHINEACCELERATORS USER_VARS += CPUFEATURES +USER_VARS += TDXCPUFEATURES USER_VARS += DEFMACHINETYPE_CLH USER_VARS += KERNELPARAMS +USER_VARS += KERNELTDXPARAMS USER_VARS += LIBEXECDIR USER_VARS += LOCALSTATEDIR USER_VARS += PKGDATADIR @@ -451,8 +481,11 @@ USER_VARS += PROJECT_TYPE USER_VARS += PROJECT_URL USER_VARS += QEMUBINDIR USER_VARS += QEMUCMD +USER_VARS += QEMUTDXCMD USER_VARS += QEMUPATH +USER_VARS += QEMUTDXPATH USER_VARS += QEMUVALIDHYPERVISORPATHS +USER_VARS += QEMUTDXVALIDHYPERVISORPATHS USER_VARS += QEMUVIRTIOFSCMD USER_VARS += QEMUVIRTIOFSPATH USER_VARS += RUNTIME_NAME @@ -482,6 +515,7 @@ USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU USER_VARS += DEFBLOCKDEVICEAIO_QEMU USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS +USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS USER_VARS += DEFVIRTIOFSDAEMON USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS USER_VARS += DEFVIRTIOFSCACHESIZE @@ -587,6 +621,10 @@ define MAKE_KERNEL_VIRTIOFS_NAME $(if $(findstring uncompressed,$1),vmlinux-virtiofs.container,vmlinuz-virtiofs.container) endef +define MAKE_KERNEL_TDX_NAME +$(if $(findstring uncompressed,$1),vmlinux-tdx.container,vmlinuz-tdx.container) +endef + GENERATED_FILES += pkg/katautils/config-settings.go $(RUNTIME_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary diff --git a/src/runtime/arch/amd64-options.mk b/src/runtime/arch/amd64-options.mk index 6c1c9e9671..ab82539ba1 100644 --- a/src/runtime/arch/amd64-options.mk +++ b/src/runtime/arch/amd64-options.mk @@ -11,6 +11,8 @@ MACHINEACCELERATORS := CPUFEATURES := pmu=off QEMUCMD := qemu-system-x86_64 +QEMUTDXCMD := qemu-system-x86_64-tdx-experimental +TDXCPUFEATURES := -vmx-rdseed-exit,pmu=off # Firecracker binary name FCCMD := firecracker diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in new file mode 100644 index 0000000000..6cecabdba9 --- /dev/null +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -0,0 +1,686 @@ +# Copyright (c) 2017-2019 Intel Corporation +# Copyright (c) 2021 Adobe Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUTDXPATH@" +kernel = "@KERNELTDXPATH@" +image = "@IMAGEPATH@" +# initrd = "@INITRDPATH@" +machine_type = "@MACHINETYPE@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +confidential_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUTDXVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELTDXPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWARETDVFPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWARETDVFVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@TDXCPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +shared_fs = "@DEFSHAREDFS_QEMU_TDX_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +#file_mem_backend = "@DEFFILEMEMBACKEND@" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. And Debug also enable the hmp socket. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# +# Default is false +#disable_image_nvdimm = true + +# VFIO devices are hotplugged on a bridge by default. +# Enable hotplugging on root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hotplugging on +# a bridge. +# Default false +#hotplug_vfio_on_root_bus = true + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +#pcie_root_port = 2 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 30) +#dial_timeout = 30 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +#service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 43707c65ed..06d7e58957 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -343,16 +343,12 @@ func (object Object) QemuParams(config *Config) []string { case TDXGuest: objectParams = append(objectParams, string(object.Type)) + objectParams = append(objectParams, "sept-ve-disable=on") objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID)) if object.Debug { objectParams = append(objectParams, "debug=on") } - deviceParams = append(deviceParams, string(object.Driver)) - deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID)) - deviceParams = append(deviceParams, fmt.Sprintf("file=%s", object.File)) - if object.FirmwareVolume != "" { - deviceParams = append(deviceParams, fmt.Sprintf("config-firmware-volume=%s", object.FirmwareVolume)) - } + config.Bios = object.File case SEVGuest: fallthrough case SNPGuest: diff --git a/src/runtime/virtcontainers/hypervisor_linux_amd64.go b/src/runtime/virtcontainers/hypervisor_linux_amd64.go index 8cfc9aca9a..043b36c9f5 100644 --- a/src/runtime/virtcontainers/hypervisor_linux_amd64.go +++ b/src/runtime/virtcontainers/hypervisor_linux_amd64.go @@ -8,15 +8,28 @@ package virtcontainers import "os" const ( - tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" + tdxSeamSysFirmwareDir = "/sys/firmware/tdx_seam/" - tdxCPUFlag = "tdx" + tdxSysFirmwareDir = "/sys/firmware/tdx/" sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" snpKvmParameterPath = "/sys/module/kvm_amd/parameters/sev_snp" ) +// TDX is supported and properly loaded when the firmware directory (either tdx or tdx_seam) exists or `tdx` is part of the CPU flag +func checkTdxGuestProtection(flags map[string]bool) bool { + if d, err := os.Stat(tdxSysFirmwareDir); err == nil && d.IsDir() { + return true + } + + if d, err := os.Stat(tdxSeamSysFirmwareDir); err == nil && d.IsDir() { + return true + } + + return false +} + // Implementation of this function is architecture specific func availableGuestProtection() (guestProtection, error) { flags, err := CPUFlags(procCPUInfo) @@ -24,10 +37,10 @@ func availableGuestProtection() (guestProtection, error) { return noneProtection, err } - // TDX is supported and properly loaded when the firmware directory exists or `tdx` is part of the CPU flags - if d, err := os.Stat(tdxSysFirmwareDir); (err == nil && d.IsDir()) || flags[tdxCPUFlag] { + if checkTdxGuestProtection(flags) { return tdxProtection, nil } + // SEV-SNP is supported and enabled when the kvm module `sev_snp` parameter is set to `Y` // SEV-SNP support infers SEV (-ES) support if _, err := os.Stat(snpKvmParameterPath); err == nil { diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index ff8104b8e1..4ecf0804a4 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -233,7 +233,7 @@ func (q *qemuAmd64) enableProtection() error { if q.qemuMachine.Options != "" { q.qemuMachine.Options += "," } - q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx" + q.qemuMachine.Options += "confidential-guest-support=tdx" logger.Info("Enabling TDX guest protection") return nil case sevProtection: diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 3e391f48e1..2ad324b787 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -25,9 +25,12 @@ all: serial-targets \ firecracker-tarball \ kernel-tarball \ kernel-dragonball-experimental-tarball \ + kernel-tdx-experimental-tarball \ nydus-tarball \ qemu-tarball \ + qemu-tdx-experimental-tarball \ shim-v2-tarball \ + tdvf-tarball \ virtiofsd-tarball serial-targets: @@ -54,12 +57,18 @@ kernel-dragonball-experimental-tarball: kernel-experimental-tarball: ${MAKE} $@-build +kernel-tdx-experimental-tarball: + ${MAKE} $@-build + nydus-tarball: ${MAKE} $@-build qemu-tarball: ${MAKE} $@-build +qemu-tdx-experimental-tarball: + ${MAKE} $@-build + rootfs-image-tarball: ${MAKE} $@-build @@ -69,6 +78,9 @@ rootfs-initrd-tarball: shim-v2-tarball: ${MAKE} $@-build +tdvf-tarball: + ${MAKE} $@-build + virtiofsd-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 1da2d4f8e6..816c8e01e2 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -25,7 +25,9 @@ readonly versions_yaml="${repo_root_dir}/versions.yaml" readonly clh_builder="${static_build_dir}/cloud-hypervisor/build-static-clh.sh" readonly firecracker_builder="${static_build_dir}/firecracker/build-static-firecracker.sh" readonly kernel_builder="${static_build_dir}/kernel/build.sh" +readonly ovmf_builder="${static_build_dir}/ovmf/build.sh" readonly qemu_builder="${static_build_dir}/qemu/build-static-qemu.sh" +readonly qemu_experimental_builder="${static_build_dir}/qemu/build-static-qemu-experimental.sh" readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" readonly virtiofsd_builder="${static_build_dir}/virtiofsd/build.sh" readonly nydus_builder="${static_build_dir}/nydus/build.sh" @@ -79,11 +81,14 @@ options: kernel kernel-dragonball-experimental kernel-experimental + kernel-tdx-experimental nydus qemu + qemu-tdx-experimental rootfs-image rootfs-initrd shim-v2 + tdvf virtiofsd EOF @@ -171,77 +176,103 @@ install_initrd() { } #Install kernel asset -install_kernel() { - export kernel_version="$(yq r $versions_yaml assets.kernel.version)" +install_kernel_helper() { + local kernel_version_yaml_path="${1}" + local kernel_name="${2}" + local extra_cmd=${3} + + export kernel_version="$(get_from_kata_deps ${kernel_version_yaml_path})" local kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)" install_cached_tarball_component \ - "kernel" \ - "${jenkins_url}/job/kata-containers-main-kernel-$(uname -m)/${cached_artifacts_path}" \ + "${kernel_name}" \ + "${jenkins_url}/job/kata-containers-main-${kernel_name}-$(uname -m)/${cached_artifacts_path}" \ "${kernel_version}-${kernel_kata_config_version}" \ "$(get_kernel_image_name)" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 - DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -f -v "${kernel_version}" + info "build ${kernel_name}" + info "Kernel version ${kernel_version}" + DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -v "${kernel_version}" ${extra_cmd} } -#Install dragonball experimental kernel asset -install_dragonball_experimental_kernel() { - info "build dragonball experimental kernel" - export kernel_version="$(yq r $versions_yaml assets.kernel-dragonball-experimental.version)" - local kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)" +#Install kernel asset +install_kernel() { + install_kernel_helper \ + "assets.kernel.version" \ + "kernel" \ + "-f" +} - install_cached_tarball_component \ +install_kernel_dragonball_experimental() { + install_kernel_helper \ + "assets.kernel-dragonball-experimental.version" \ "kernel-dragonball-experimental" \ - "${jenkins_url}/job/kata-containers-main-kernel-dragonball-experimental-$(uname -m)/${cached_artifacts_path}" \ - "${kernel_version}-${kernel_kata_config_version}" \ - "$(get_kernel_image_name)" \ - "${final_tarball_name}" \ - "${final_tarball_path}" \ - && return 0 - - info "kernel version ${kernel_version}" - DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -e -t dragonball -v ${kernel_version} + "-e -t dragonball" } #Install experimental kernel asset -install_experimental_kernel() { - info "build experimental kernel" - export kernel_version="$(yq r $versions_yaml assets.kernel-experimental.tag)" - local kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)" - - install_cached_tarball_component \ +install_kernel_experimental() { + install_kernel_helper \ + "assets.kernel-experimental.version" \ "kernel-experimental" \ - "${jenkins_url}/job/kata-containers-main-kernel-experimental-$(uname -m)/${cached_artifacts_path}" \ - "${kernel_version}-${kernel_kata_config_version}" \ - "$(get_kernel_image_name)" \ - "${final_tarball_name}" \ - "${final_tarball_path}" \ - && return 0 - - info "Kernel version ${kernel_version}" - DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -f -b experimental -v ${kernel_version} + "-f -b experimental" } -# Install static qemu asset -install_qemu() { - export qemu_repo="$(yq r $versions_yaml assets.hypervisor.qemu.url)" - export qemu_version="$(yq r $versions_yaml assets.hypervisor.qemu.version)" +#Install experimental TDX kernel asset +install_kernel_tdx_experimental() { + local kernel_url="$(get_from_kata_deps assets.kernel-tdx-experimental.url)" + + install_kernel_helper \ + "assets.kernel-tdx-experimental.version" \ + "kernel-tdx-experimental" \ + "-x tdx -u ${kernel_url}" +} + +install_qemu_helper() { + local qemu_repo_yaml_path="${1}" + local qemu_version_yaml_path="${2}" + local qemu_name="${3}" + local builder="${4}" + local qemu_tarball_name="${qemu_tarball_name:-kata-static-qemu.tar.gz}" + + export qemu_repo="$(get_from_kata_deps ${qemu_repo_yaml_path})" + export qemu_version="$(get_from_kata_deps ${qemu_version_yaml_path})" install_cached_tarball_component \ - "QEMU" \ - "${jenkins_url}/job/kata-containers-main-qemu-$(uname -m)/${cached_artifacts_path}" \ + "${qemu_name}" \ + "${jenkins_url}/job/kata-containers-main-${qemu_name}-$(uname -m)/${cached_artifacts_path}" \ "${qemu_version}-$(calc_qemu_files_sha256sum)" \ "$(get_qemu_image_name)" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 - info "build static qemu" - "${qemu_builder}" - tar xvf "${builddir}/kata-static-qemu.tar.gz" -C "${destdir}" + info "build static ${qemu_name}" + "${builder}" + tar xvf "${qemu_tarball_name}" -C "${destdir}" +} + +# Install static qemu asset +install_qemu() { + install_qemu_helper \ + "assets.hypervisor.qemu.url" \ + "assets.hypervisor.qemu.version" \ + "qemu" \ + "${qemu_builder}" +} + +install_qemu_tdx_experimental() { + export qemu_suffix="tdx-experimental" + export qemu_tarball_name="kata-static-qemu-${qemu_suffix}.tar.gz" + + install_qemu_helper \ + "assets.hypervisor.qemu-${qemu_suffix}.url" \ + "assets.hypervisor.qemu-${qemu_suffix}.tag" \ + "qemu-${qemu_suffix}" \ + "${qemu_experimental_builder}" } # Install static firecracker asset @@ -346,6 +377,31 @@ install_shimv2() { DESTDIR="${destdir}" PREFIX="${prefix}" "${shimv2_builder}" } +install_ovmf() { + ovmf_type="${1:-x86_64}" + tarball_name="${2:-edk2.tar.xz}" + + local component_name="ovmf" + local component_version="$(get_from_kata_deps "externals.ovmf.${ovmf_type}.version")" + [ "${ovmf_type}" == "tdx" ] && component_name="tdvf" + install_cached_tarball_component \ + "${component_name}" \ + "${jenkins_url}/job/kata-containers-main-ovmf-${ovmf_type}-$(uname -m)/${cached_artifacts_path}" \ + "${component_version}" \ + "$(get_ovmf_image_name)" \ + "${final_tarball_name}" \ + "${final_tarball_path}" \ + && return 0 + + DESTDIR="${destdir}" PREFIX="${prefix}" ovmf_build="${ovmf_type}" "${ovmf_builder}" + tar xvf "${builddir}/${tarball_name}" -C "${destdir}" +} + +# Install TDVF +install_tdvf() { + install_ovmf "tdx" "edk2-tdx.tar.gz" +} + get_kata_version() { local v v=$(cat "${version_file}") @@ -368,9 +424,13 @@ handle_build() { install_image install_initrd install_kernel + install_kernel_dragonball_experimental + install_kernel_tdx_experimental install_nydus install_qemu + install_qemu_tdx_experimental install_shimv2 + install_tdvf install_virtiofsd ;; @@ -382,18 +442,24 @@ handle_build() { nydus) install_nydus ;; - kernel-dragonball-experimental) install_dragonball_experimental_kernel;; + kernel-dragonball-experimental) install_kernel_dragonball_experimental ;; - kernel-experimental) install_experimental_kernel;; + kernel-experimental) install_kernel_experimental ;; + + kernel-tdx-experimental) install_kernel_tdx_experimental ;; qemu) install_qemu ;; + qemu-tdx-experimental) install_qemu_tdx_experimental ;; + rootfs-image) install_image ;; rootfs-initrd) install_initrd ;; shim-v2) install_shimv2 ;; + tdvf) install_tdvf ;; + virtiofsd) install_virtiofsd ;; *) diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml index d3260d4a8e..daa4d1e2f9 100644 --- a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml +++ b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml @@ -14,6 +14,19 @@ scheduling: --- kind: RuntimeClass apiVersion: node.k8s.io/v1 +metadata: + name: kata-qemu-tdx +handler: kata-qemu-tdx +overhead: + podFixed: + memory: "160Mi" + cpu: "250m" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" +--- +kind: RuntimeClass +apiVersion: node.k8s.io/v1 metadata: name: kata-clh handler: kata-clh diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index e4e48732bc..8991e04fcf 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -16,6 +16,7 @@ containerd_conf_file_backup="${containerd_conf_file}.bak" shims=( "fc" "qemu" + "qemu-tdx" "clh" "dragonball" ) diff --git a/tools/packaging/kernel/configs/fragments/x86_64/tdx/tdx.conf b/tools/packaging/kernel/configs/fragments/x86_64/tdx/tdx.conf index 1b1f8751ef..2f877a5c99 100644 --- a/tools/packaging/kernel/configs/fragments/x86_64/tdx/tdx.conf +++ b/tools/packaging/kernel/configs/fragments/x86_64/tdx/tdx.conf @@ -5,13 +5,9 @@ CONFIG_DMA_RESTRICTED_POOL=y CONFIG_EFI=y CONFIG_EFI_STUB=y CONFIG_INTEL_IOMMU_SVM=y -CONFIG_INTEL_TDX_ATTESTATION=y -CONFIG_INTEL_TDX_FIXES=y CONFIG_INTEL_TDX_GUEST=y CONFIG_OF=y CONFIG_OF_RESERVED_MEM=y CONFIG_X86_5LEVEL=y CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y -CONFIG_X86_MEM_ENCRYPT_COMMON=y CONFIG_X86_PLATFORM_DEVICES=y -CONFIG_X86_PLATFORM_DRIVERS_INTEL=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index a9c8fe8292..b16e5f75e3 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -103 +104 diff --git a/tools/packaging/kernel/patches/5.19-TDX-v2.x/no_patches.txt b/tools/packaging/kernel/patches/5.19-TDX-v2.x/no_patches.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/packaging/qemu/patches/tag_patches/ad4c7f529a279685da84297773b4ec8080153c2d-plus-TDX-v1.3/no_patches.txt b/tools/packaging/qemu/patches/tag_patches/ad4c7f529a279685da84297773b4ec8080153c2d-plus-TDX-v1.3/no_patches.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/packaging/scripts/configure-hypervisor.sh b/tools/packaging/scripts/configure-hypervisor.sh index 61e479237c..751b2866f9 100755 --- a/tools/packaging/scripts/configure-hypervisor.sh +++ b/tools/packaging/scripts/configure-hypervisor.sh @@ -242,9 +242,12 @@ generate_qemu_options() { # Disable graphical network access qemu_options+=(size:--disable-vnc) qemu_options+=(size:--disable-vnc-jpeg) - if ! gt_eq "${qemu_version}" "7.2.0" ; then + if ! gt_eq "${qemu_version}" "7.0.50" ; then qemu_options+=(size:--disable-vnc-png) + else + qemu_options+=(size:--disable-png) fi + qemu_options+=(size:--disable-vnc-sasl) # Disable PAM authentication: it's a feature used together with VNC access @@ -358,7 +361,7 @@ generate_qemu_options() { qemu_options+=(size:--disable-vde) # Don't build other options which can't be depent on build server. - if ! gt_eq "${qemu_version}" "7.2.0" ; then + if ! gt_eq "${qemu_version}" "7.0.50" ; then qemu_options+=(size:--disable-xfsctl) qemu_options+=(size:--disable-libxml2) fi diff --git a/tools/packaging/scripts/lib.sh b/tools/packaging/scripts/lib.sh index aa101b870e..b6874bf647 100644 --- a/tools/packaging/scripts/lib.sh +++ b/tools/packaging/scripts/lib.sh @@ -183,6 +183,11 @@ get_shim_v2_image_name() { echo "${BUILDER_REGISTRY}:shim-v2-go-$(get_from_kata_deps "languages.golang.meta.newest-version")-rust-$(get_from_kata_deps "languages.rust.meta.newest-version")-$(get_last_modification ${shim_v2_script_dir})-$(uname -m)" } +get_ovmf_image_name() { + ovmf_script_dir="${repo_root_dir}/tools/packaging/static-build/ovmf" + echo "${BUILDER_REGISTRY}:ovmf-$(get_last_modification ${ovmf_script_dir})-$(uname -m)" +} + get_virtiofsd_image_name() { ARCH=$(uname -m) case ${ARCH} in diff --git a/tools/packaging/static-build/cache_components_main.sh b/tools/packaging/static-build/cache_components_main.sh index e447ab4bfa..804df1cbf4 100755 --- a/tools/packaging/static-build/cache_components_main.sh +++ b/tools/packaging/static-build/cache_components_main.sh @@ -12,7 +12,9 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${script_dir}/../scripts/lib.sh" -KERNEL_FLAVOUR="${KERNEL_FLAVOUR:-kernel}" # kernel | kernel-experimental | kernel-arm-experimetnal | kernel-dragonball-experimental +KERNEL_FLAVOUR="${KERNEL_FLAVOUR:-kernel}" # kernel | kernel-experimental | kernel-arm-experimental | kernel-dragonball-experimental | kernel-tdx-experimental +OVMF_FLAVOUR="${OVMF_FLAVOUR:-x86_64}" # x86_64 | tdx +QEMU_FLAVOUR="${QEMU_FLAVOUR:-qemu}" # qemu | qemu-tdx-experimental ROOTFS_IMAGE_TYPE="${ROOTFS_IMAGE_TYPE:-image}" # image | initrd cache_clh_artifacts() { @@ -41,9 +43,16 @@ cache_nydus_artifacts() { create_cache_asset "${nydus_tarball_name}" "${current_nydus_version}" "" } +cache_ovmf_artifacts() { + local ovmf_tarball_name="kata-static-${OVMF_FLAVOUR}.tar.xz" + local current_ovmf_version="$(get_from_kata_deps "externals.ovmf.${OVMF_FLAVOUR}.version")" + local current_ovmf_image="$(get_ovmf_image_name)" + create_cache_asset "${ovmf_tarball_name}" "${current_ovmf_version}" "${current_ovmf_image}" +} + cache_qemu_artifacts() { - local qemu_tarball_name="kata-static-qemu.tar.xz" - local current_qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version") + local qemu_tarball_name="kata-static-${QEMU_FLAVOUR}.tar.xz" + local current_qemu_version=$(get_from_kata_deps "assets.hypervisor.${QEMU_FLAVOUR}.version") local qemu_sha=$(calc_qemu_files_sha256sum) local current_qemu_image="$(get_qemu_image_name)" create_cache_asset "${qemu_tarball_name}" "${current_qemu_version}-${qemu_sha}" "${current_qemu_image}" @@ -105,10 +114,12 @@ Usage: $0 "[options]" -c Cloud hypervisor cache -F Firecracker cache -k Kernel cache - * Export KERNEL_FLAVOUR="kernel|kernek-experimental|kernel-arm-experimental|kernel-dragonball-experimental" for a specific build + * Export KERNEL_FLAVOUR="kernel | kernel-experimental | kernel-arm-experimental | kernel-dragonball-experimental | kernel-tdx-experimental" for a specific build The default KERNEL_FLAVOUR value is "kernel" -n Nydus cache -q QEMU cache + * Export QEMU_FLAVOUR="qemu | qemu-tdx-experimental" for a specific build + The default QEMU_FLAVOUR value is "qemu" -r RootFS cache * Export ROOTFS_IMAGE_TYPE="image|initrd" for one of those two types The default ROOTFS_IMAGE_TYPE value is "image" @@ -124,12 +135,13 @@ main() { local firecracker_component="${firecracker_component:-}" local kernel_component="${kernel_component:-}" local nydus_component="${nydus_component:-}" + local ovmf_component="${ovmf_component:-}" local qemu_component="${qemu_component:-}" local rootfs_component="${rootfs_component:-}" local shim_v2_component="${shim_v2_component:-}" local virtiofsd_component="${virtiofsd_component:-}" local OPTIND - while getopts ":cFknqrsvh:" opt + while getopts ":cFknoqrsvh:" opt do case "$opt" in c) @@ -144,6 +156,9 @@ main() { n) nydus_component="1" ;; + o) + ovmf_component="1" + ;; q) qemu_component="1" ;; @@ -173,6 +188,7 @@ main() { [[ -z "${firecracker_component}" ]] && \ [[ -z "${kernel_component}" ]] && \ [[ -z "${nydus_component}" ]] && \ + [[ -z "${ovmf_component}" ]] && \ [[ -z "${qemu_component}" ]] && \ [[ -z "${rootfs_component}" ]] && \ [[ -z "${shim_v2_component}" ]] && \ @@ -187,6 +203,7 @@ main() { [ "${firecracker_component}" == "1" ] && cache_firecracker_artifacts [ "${kernel_component}" == "1" ] && cache_kernel_artifacts [ "${nydus_component}" == "1" ] && cache_nydus_artifacts + [ "${ovmf_component}" == "1" ] && cache_ovmf_artifacts [ "${qemu_component}" == "1" ] && cache_qemu_artifacts [ "${rootfs_component}" == "1" ] && cache_rootfs_artifacts [ "${shim_v2_component}" == "1" ] && cache_shim_v2_artifacts diff --git a/tools/packaging/static-build/ovmf/Dockerfile b/tools/packaging/static-build/ovmf/Dockerfile index a9a148a756..78acaccfe7 100644 --- a/tools/packaging/static-build/ovmf/Dockerfile +++ b/tools/packaging/static-build/ovmf/Dockerfile @@ -14,9 +14,15 @@ RUN apt-get update && \ git \ iasl \ make \ - nasm \ python \ python3 \ python3-distutils \ uuid-dev && \ - apt-get clean && rm -rf /var/lib/lists/ + apt-get clean && rm -rf /var/lib/lists/ && \ + cd /tmp && curl -fsLO https://www.nasm.us/pub/nasm/releasebuilds/2.15.05/nasm-2.15.05.tar.gz && \ + tar xf nasm-2.15.05.tar.gz && \ + cd nasm-2.15.05 && \ + ./configure && \ + make -j"$(nproc)" && \ + make install && \ + cd /tmp && rm -r nasm-2.15.05* diff --git a/tools/packaging/static-build/ovmf/build-ovmf.sh b/tools/packaging/static-build/ovmf/build-ovmf.sh index 936b53be31..ebdf0669f3 100755 --- a/tools/packaging/static-build/ovmf/build-ovmf.sh +++ b/tools/packaging/static-build/ovmf/build-ovmf.sh @@ -56,7 +56,7 @@ fi info "Building ovmf" build_cmd="build -b ${build_target} -t ${toolchain} -a ${architecture} -p ${ovmf_package}" if [ "${ovmf_build}" == "tdx" ]; then - build_cmd+=" -D DEBUG_ON_SERIAL_PORT=TRUE -D TDX_MEM_PARTIAL_ACCEPT=512 -D TDX_EMULATION_ENABLE=FALSE -D TDX_ACCEPT_PAGE_SIZE=2M" + build_cmd+=" -D DEBUG_ON_SERIAL_PORT=FALSE -D TDX_MEM_PARTIAL_ACCEPT=512 -D TDX_EMULATION_ENABLE=FALSE -D SECURE_BOOT_ENABLE=TRUE -D TDX_ACCEPT_PAGE_SIZE=2M" fi eval "${build_cmd}" @@ -70,7 +70,6 @@ if [ "${ovmf_build}" == "tdx" ]; then build_path_arch="${build_path_target_toolchain}/X64" stat "${build_path_fv}/OVMF_CODE.fd" stat "${build_path_fv}/OVMF_VARS.fd" - stat "${build_path_arch}/DumpTdxEventLog.efi" fi #need to leave tmp dir @@ -87,7 +86,6 @@ install $build_root/$ovmf_dir/"${build_path_fv}"/OVMF.fd "${install_dir}" if [ "${ovmf_build}" == "tdx" ]; then install $build_root/$ovmf_dir/"${build_path_fv}"/OVMF_CODE.fd ${install_dir} install $build_root/$ovmf_dir/"${build_path_fv}"/OVMF_VARS.fd ${install_dir} - install $build_root/$ovmf_dir/"${build_path_arch}"/DumpTdxEventLog.efi ${install_dir} fi local_dir=${PWD} diff --git a/tools/packaging/static-build/ovmf/build.sh b/tools/packaging/static-build/ovmf/build.sh index 2dfbe5a20e..4640e2ac78 100755 --- a/tools/packaging/static-build/ovmf/build.sh +++ b/tools/packaging/static-build/ovmf/build.sh @@ -15,7 +15,7 @@ source "${script_dir}/../../scripts/lib.sh" DESTDIR=${DESTDIR:-${PWD}} PREFIX=${PREFIX:-/opt/kata} -container_image="${OVMF_CONTAINER_BUILDER:-${BUILDER_REGISTRY}:ovmf-$(get_last_modification ${script_dir})-$(uname -m)}" +container_image="${OVMF_CONTAINER_BUILDER:-$(get_ovmf_image_name)}" ovmf_build="${ovmf_build:-x86_64}" kata_version="${kata_version:-}" ovmf_repo="${ovmf_repo:-}" @@ -24,11 +24,7 @@ ovmf_package="${ovmf_package:-}" package_output_dir="${package_output_dir:-}" if [ -z "$ovmf_repo" ]; then - if [ "${ovmf_build}" == "tdx" ]; then - ovmf_repo=$(get_from_kata_deps "externals.ovmf.tdx.url" "${kata_version}") - else - ovmf_repo=$(get_from_kata_deps "externals.ovmf.url" "${kata_version}") - fi + ovmf_repo=$(get_from_kata_deps "externals.ovmf.url" "${kata_version}") fi [ -n "$ovmf_repo" ] || die "failed to get ovmf repo" diff --git a/tools/packaging/static-build/qemu/build-static-qemu-experimental.sh b/tools/packaging/static-build/qemu/build-static-qemu-experimental.sh index be50fb9770..1e0541c541 100755 --- a/tools/packaging/static-build/qemu/build-static-qemu-experimental.sh +++ b/tools/packaging/static-build/qemu/build-static-qemu-experimental.sh @@ -14,6 +14,8 @@ source "${script_dir}/../../scripts/lib.sh" qemu_repo="${qemu_repo:-}" qemu_version="${qemu_version:-}" +qemu_suffix="${qemu_suffix:-experimental}" +qemu_tarball_name="${qemu_tarball_name:-kata-static-qemu-experimental.tar.gz}" if [ -z "$qemu_repo" ]; then info "Get qemu information from runtime versions.yaml" @@ -26,4 +28,4 @@ fi [ -n "$qemu_version" ] || qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu-experimental.version") [ -n "$qemu_version" ] || die "failed to get qemu version" -"${script_dir}/build-base-qemu.sh" "${qemu_repo}" "${qemu_version}" "experimental" "kata-static-qemu-experimental.tar.gz" +"${script_dir}/build-base-qemu.sh" "${qemu_repo}" "${qemu_version}" "${qemu_suffix}" "${qemu_tarball_name}" diff --git a/versions.yaml b/versions.yaml index a71d29dfb8..1c92412cbe 100644 --- a/versions.yaml +++ b/versions.yaml @@ -98,10 +98,6 @@ assets: uscan-url: >- https://github.com/qemu/qemu/tags .*/v?(\d\S+)\.tar\.gz - tdx: - description: "VMM that uses KVM and supports TDX" - url: "https://github.com/kata-containers/qemu" - tag: "TDX-v3.1" snp: description: "VMM that uses KVM and supports AMD SEV-SNP" url: "https://github.com/AMDESE/qemu" @@ -113,6 +109,12 @@ assets: url: "https://github.com/qemu/qemu" version: "7a800cf9496fddddf71b21a00991e0ec757a170a" + qemu-tdx-experimental: + # yamllint disable-line rule:line-length + description: "QEMU with TDX support - based on https://github.com/intel/tdx-tools/releases/tag/2023ww01" + url: "https://github.com/kata-containers/qemu" + tag: "ad4c7f529a279685da84297773b4ec8080153c2d-plus-TDX-v1.3" + image: description: | Root filesystem disk image used to boot the guest virtual @@ -159,10 +161,6 @@ assets: description: "Linux kernel optimised for virtual machines" url: "https://cdn.kernel.org/pub/linux/kernel/v5.x/" version: "v5.19.2" - tdx: - description: "Linux kernel that supports TDX" - url: "https://github.com/kata-containers/linux/archive/refs/tags" - tag: "5.15-plus-TDX" sev: description: "Linux kernel that supports SEV" url: "https://cdn.kernel.org/pub/linux/kernel/v5.x/" @@ -187,6 +185,12 @@ assets: url: "https://cdn.kernel.org/pub/linux/kernel/v5.x/" version: "v5.10.25" + kernel-tdx-experimental: + # yamllint disable-line rule:line-length + description: "Linux kernel with TDX support -- based on https://github.com/intel/tdx-tools/releases/tag/2023ww01" + url: "https://github.com/kata-containers/linux/archive/refs/tags" + version: "5.19-TDX-v2.2" + externals: description: "Third-party projects used by the system" @@ -275,11 +279,11 @@ externals: package: "OvmfPkg/AmdSev/AmdSevX64.dsc" package_output_dir: "AmdSev" tdx: - url: "https://github.com/tianocore/edk2-staging" - description: "TDVF build needed for TDX measured direct boot." - version: "2022-tdvf-ww28.5" - package: "OvmfPkg/OvmfPkgX64.dsc" - package_output_dir: "OvmfX64" + # yamllint disable-line rule:line-length + description: "QEMU with TDX support - based on https://github.com/intel/tdx-tools/releases/tag/2023ww01" + version: "edk2-stable202211" + package: "OvmfPkg/IntelTdx/IntelTdxX64.dsc" + package_output_dir: "IntelTdx" td-shim: description: "Confidential Containers Shim Firmware"