From 9ede2bcd951679b62c3dcf101b2638536649f1ac Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:02:43 +0200 Subject: [PATCH 01/12] local-build: differentiate build targets based on architecture This is to rule out unnecessary build targets for s390x. Signed-off-by: Hyounggyu Choi --- .../kata-deploy/local-build/Makefile | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 0d64cd4cb7..7e007a227c 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -9,19 +9,16 @@ MK_DIR := $(dir $(MK_PATH)) # Verbose build V := 1 -define BUILD - $(MK_DIR)/kata-deploy-binaries-in-docker.sh $(if $(V),,-s) --build=$1 -endef +ifeq ($(CROSS_BUILD),) + CROSS_BUILD = false +endif -kata-tarball: | all-parallel merge-builds +ifeq ($(CROSS_BUILD),false) + ARCH := $(shell uname -m) +endif -$(MK_DIR)/dockerbuild/install_yq.sh: - $(MK_DIR)/kata-deploy-copy-yq-installer.sh - -all-parallel: $(MK_DIR)/dockerbuild/install_yq.sh - ${MAKE} -f $(MK_PATH) all -j $(shell nproc ${CI:+--ignore 1}) V= - -all: serial-targets \ +ifeq ($(ARCH), x86_64) +BASE_TARBALLS = serial-targets \ firecracker-tarball \ kernel-dragonball-experimental-tarball \ kernel-nvidia-gpu-tarball \ @@ -39,16 +36,40 @@ all: serial-targets \ shim-v2-tarball \ tdvf-tarball \ virtiofsd-tarball - -serial-targets: - ${MAKE} -f $(MK_PATH) -j 1 V= \ - rootfs-image-tarball \ +BASE_SERIAL_TARBALLS = rootfs-image-tarball \ rootfs-image-tdx-tarball \ rootfs-initrd-mariner-tarball \ rootfs-initrd-sev-tarball \ rootfs-initrd-tarball \ cloud-hypervisor-tarball \ cloud-hypervisor-glibc-tarball +else ifeq ($(ARCH), s390x) +BASE_TARBALLS = serial-targets \ + kernel-tarball \ + qemu-tarball \ + shim-v2-tarball \ + virtiofsd-tarball +BASE_SERIAL_TARBALLS = rootfs-image-tarball \ + rootfs-initrd-tarball +endif + +define BUILD + $(MK_DIR)/kata-deploy-binaries-in-docker.sh $(if $(V),,-s) --build=$1 +endef + +kata-tarball: | all-parallel merge-builds + +$(MK_DIR)/dockerbuild/install_yq.sh: + $(MK_DIR)/kata-deploy-copy-yq-installer.sh + +all-parallel: $(MK_DIR)/dockerbuild/install_yq.sh + ${MAKE} -f $(MK_PATH) all -j $(shell nproc ${CI:+--ignore 1}) V= + +all: ${BASE_TARBALLS} + +serial-targets: + ${MAKE} -f $(MK_PATH) -j 1 V= \ + ${BASE_SERIAL_TARBALLS} %-tarball-build: $(MK_DIR)/dockerbuild/install_yq.sh $(call BUILD,$*) From 8de4241d3bf7641290ecba4a0ffa6082e9ff8d1c Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:14:49 +0200 Subject: [PATCH 02/12] kata-deploy: add kata-qemu-se runtimeclass This is to increase resources for relaxing the limitation of hotplug for SE. Signed-off-by: Hyounggyu Choi --- .../runtimeclasses/kata-runtimeClasses.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml index b96ac44f8e..1182182376 100644 --- a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml +++ b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml @@ -92,6 +92,19 @@ scheduling: --- kind: RuntimeClass apiVersion: node.k8s.io/v1 +metadata: + name: kata-qemu-se +handler: kata-qemu-se +overhead: + podFixed: + memory: "2048Mi" + cpu: "1.0" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" +--- +kind: RuntimeClass +apiVersion: node.k8s.io/v1 metadata: name: kata-qemu-tdx handler: kata-qemu-tdx From bb1d4adaa9b6a24bc998398ef14729968fc2c42b Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:28:39 +0200 Subject: [PATCH 03/12] config: add SE configuration This is to add SE configuration which is used by kata runtime. Signed-off-by: Hyounggyu Choi --- src/runtime/Makefile | 16 + .../config/configuration-qemu-se.toml.in | 673 ++++++++++++++++++ 2 files changed, 689 insertions(+) create mode 100644 src/runtime/config/configuration-qemu-se.toml.in diff --git a/src/runtime/Makefile b/src/runtime/Makefile index c9cf7ac584..197fec8b4e 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -106,6 +106,7 @@ GENERATED_VARS = \ CONFIG_FC_IN \ CONFIG_STRATOVIRT_IN \ CONFIG_REMOTE_IN \ + CONFIG_QEMU_SE_IN \ $(USER_VARS) SCRIPTS += $(COLLECT_SCRIPT) SCRIPTS_DIR := $(BINDIR) @@ -356,6 +357,18 @@ ifneq (,$(QEMUCMD)) CONFIGS += $(CONFIG_REMOTE) + CONFIG_FILE_QEMU_SE = configuration-qemu-se.toml + CONFIG_QEMU_SE = config/$(CONFIG_FILE_QEMU_SE) + CONFIG_QEMU_SE_IN = $(CONFIG_QEMU_SE).in + + CONFIG_PATH_QEMU_SE = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_SE)) + CONFIG_PATHS += $(CONFIG_PATH_QEMU_SE) + + SYSCONFIG_QEMU_SE = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_SE)) + SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_SE) + + CONFIGS += $(CONFIG_QEMU_SE) + # qemu-specific options (all should be suffixed by "_QEMU") DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi DEFBLOCKDEVICEAIO_QEMU := io_uring @@ -377,6 +390,8 @@ ifneq (,$(QEMUCMD)) KERNELSNPNAME = $(call MAKE_KERNEL_SNP_NAME,$(KERNELSNPTYPE)) KERNELSNPPATH = $(KERNELDIR)/$(KERNELSNPNAME) + KERNELSENAME = kata-containers-se.img + KERNELSEPATH = $(KERNELDIR)/$(KERNELSENAME) endif ifneq (,$(CLHCMD)) @@ -573,6 +588,7 @@ USER_VARS += KERNELPATH USER_VARS += KERNELSEVPATH USER_VARS += KERNELTDXPATH USER_VARS += KERNELSNPPATH +USER_VARS += KERNELSEPATH USER_VARS += KERNELPATH_CLH USER_VARS += KERNELPATH_FC USER_VARS += KERNELPATH_STRATOVIRT diff --git a/src/runtime/config/configuration-qemu-se.toml.in b/src/runtime/config/configuration-qemu-se.toml.in new file mode 100644 index 0000000000..8725d4e3ab --- /dev/null +++ b/src/runtime/config/configuration-qemu-se.toml.in @@ -0,0 +1,673 @@ +# Copyright (c) 2017-2019 Intel Corporation +# Copyright (c) 2021 Adobe Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_SE_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUPATH@" +kernel = "@KERNELSEPATH@" +machine_type = "@MACHINETYPE@" + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +confidential_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWAREPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWAREVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@CPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +#file_mem_backend = "@DEFFILEMEMBACKEND@" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# +# Default is false +#disable_image_nvdimm = true + +# VFIO devices are hotplugged on a bridge by default. +# Enable hotplugging on root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hotplugging on +# a bridge. +# Default false +#hotplug_vfio_on_root_bus = true + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +#pcie_root_port = 2 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 30) +dial_timeout = 90 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +service_offload = @DEFSERVICEOFFLOAD@ + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ From 6d0dabd81ec37aa4e6f09762c22d0a0cd39a6206 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:31:56 +0200 Subject: [PATCH 04/12] gha: build secure image for s390x release This is add a build target boot-image-se with a host-key-document config for s390x. Signed-off-by: Hyounggyu Choi --- .github/workflows/build-kata-static-tarball-s390x.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml index f37056cc68..f9f3f7aade 100644 --- a/.github/workflows/build-kata-static-tarball-s390x.yaml +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -27,6 +27,7 @@ jobs: strategy: matrix: asset: + - boot-image-se - kernel - qemu - rootfs-image @@ -59,6 +60,13 @@ jobs: env: TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Place a host key document + run: | + mkdir -p "host-key-document" + cp "${CI_HKD_PATH}" "host-key-document" + env: + CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -74,6 +82,7 @@ jobs: ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} TARGET_BRANCH: ${{ inputs.target-branch }} + HKD_PATH: "host-key-document" - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 From a63a6959d1e5ca76b08b72b48225e0b03edcf5c7 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:36:18 +0200 Subject: [PATCH 05/12] local-build: install s390-tools in Dockerfile This is to install s390-tools including genprotimg during the docker build. Signed-off-by: Hyounggyu Choi --- tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index cef937b6d6..c5a9a2f527 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -48,6 +48,7 @@ RUN apt-get update && \ gcc \ unzip \ xz-utils && \ + if uname -m | grep -Eq 's390x'; then apt-get install -y s390-tools; fi && \ apt-get clean && rm -rf /var/lib/apt/lists ENV USER ${IMG_USER} From 4de8ef3d1832d0f20a9f36448a23963880df08a1 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:41:19 +0200 Subject: [PATCH 06/12] local-build: add build target boot-image-se This is to add a build target boot-image-se for s390x. Signed-off-by: Hyounggyu Choi --- tools/packaging/kata-deploy/local-build/Makefile | 3 +++ .../local-build/kata-deploy-binaries-in-docker.sh | 2 ++ .../kata-deploy/local-build/kata-deploy-binaries.sh | 9 +++++++++ 3 files changed, 14 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 7e007a227c..5282b68bae 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -134,6 +134,9 @@ qemu-snp-experimental-tarball: qemu-tarball: ${MAKE} $@-build +boot-image-se-tarball: kernel-tarball rootfs-initrd-tarball + ${MAKE} $@-build + qemu-tdx-experimental-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 19653720ea..f01744c406 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -119,6 +119,8 @@ docker run \ --env VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER}" \ --env MEASURED_ROOTFS="${MEASURED_ROOTFS}" \ --env USE_CACHE="${USE_CACHE}" \ + --env AA_KBC="${AA_KBC:-}" \ + --env HKD_PATH="$(realpath "${HKD_PATH:-}" 2> /dev/null || true)" \ --env CROSS_BUILD="${CROSS_BUILD}" \ --env TARGET_ARCH="${TARGET_ARCH}" \ --env ARCH="${ARCH}" \ diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index cb93fd1a15..0345f86fd2 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -36,6 +36,7 @@ readonly virtiofsd_builder="${static_build_dir}/virtiofsd/build.sh" readonly nydus_builder="${static_build_dir}/nydus/build.sh" readonly rootfs_builder="${repo_root_dir}/tools/packaging/guest-image/build_image.sh" readonly tools_builder="${static_build_dir}/tools/build.sh" +readonly se_image_builder="${repo_root_dir}/tools/packaging/guest-image/build_se_image.sh" ARCH=${ARCH:-$(uname -m)} MEASURED_ROOTFS=${MEASURED_ROOTFS:-no} @@ -86,6 +87,7 @@ options: agent agent-opa agent-ctl + boot-image-se cloud-hypervisor cloud-hypervisor-glibc firecracker @@ -259,6 +261,11 @@ install_initrd_sev() { install_initrd "sev" } +install_se_image() { + info "Create IBM SE image configured with AA_KBC=${AA_KBC}" + "${se_image_builder}" --destdir="${destdir}" +} + #Install kernel component helper install_cached_kernel_tarball_component() { local kernel_name=${1} @@ -779,6 +786,8 @@ handle_build() { agent-opa) install_agent_opa ;; agent-ctl) install_agent_ctl ;; + + boot-image-se) install_se_image ;; cloud-hypervisor) install_clh ;; From 511dd5feacbe3fe278763e55262ba11e694bf643 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 22 May 2023 18:43:36 +0200 Subject: [PATCH 07/12] local-build: add support to build IBM Z SE image This is to add an artifact for IBM Z SE(TEE) to main. Fixes: #6754 Signed-off-by: Hyounggyu Choi --- ci/lib.sh | 21 +++ tools/packaging/guest-image/build_se_image.sh | 165 ++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100755 tools/packaging/guest-image/build_se_image.sh diff --git a/ci/lib.sh b/ci/lib.sh index 29b640a6ae..3dca29b7ec 100644 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -64,3 +64,24 @@ run_get_pr_changed_file_details() source "$tests_repo_dir/.ci/lib.sh" get_pr_changed_file_details } + +# Check if the 1st argument version is greater than and equal to 2nd one +# Version format: [0-9]+ separated by period (e.g. 2.4.6, 1.11.3 and etc.) +# +# Parameters: +# $1 - a version to be tested +# $2 - a target version +# +# Return: +# 0 if $1 is greater than and equal to $2 +# 1 otherwise +version_greater_than_equal() { + local current_version=$1 + local target_version=$2 + smaller_version=$(echo -e "$current_version\n$target_version" | sort -V | head -1) + if [ "${smaller_version}" = "${target_version}" ]; then + return 0 + else + return 1 + fi +} diff --git a/tools/packaging/guest-image/build_se_image.sh b/tools/packaging/guest-image/build_se_image.sh new file mode 100755 index 0000000000..e25ce84822 --- /dev/null +++ b/tools/packaging/guest-image/build_se_image.sh @@ -0,0 +1,165 @@ +#!/usr/bin/env bash +# Copyright (c) 2023 IBM Corp. +# +# SPDX-License-Identifier: Apache-2.0 + +[ -n "${DEBUG:-}" ] && set -x + +set -o errexit +set -o nounset +set -o pipefail + +readonly script_name="$(basename "${BASH_SOURCE[0]}")" +readonly script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly packaging_root_dir="$(cd "${script_dir}/../" && pwd)" +readonly kata_root_dir="$(cd "${packaging_root_dir}/../../" && pwd)" + +source "$kata_root_dir/ci/lib.sh" +source "${packaging_root_dir}/scripts/lib.sh" + +[ "$(uname -m)" = s390x ] || die "Building a Secure Execution image is currently only supported on s390x." + +finish() { + if [ -e "${parmfile}" ]; then + rm -f "${parmfile}" + fi +} + +trap finish EXIT + +usage() { + cat >&2 << EOF +Usage: + ${script_name} [options] + +Options: + --builddir=${builddir} + --destdir=${destdir} + +Environment variables: + HKD_PATH (required): Secure Execution host key document, generally specific to your machine. See + https://www.ibm.com/docs/en/linux-on-systems?topic=tasks-verify-host-key-document + for information on how to retrieve and verify this document. + DEBUG : If set, display debug information. +EOF + exit "${1:-0}" +} + +# Build a IBM zSystem secure execution (SE) image +# +# Parameters: +# $1 - kernel_parameters +# $2 - a source directory where kernel and initrd are located +# $3 - a destination directory where a SE image is built +# +# Return: +# 0 if the image is successfully built +# 1 otherwise +build_secure_image() { + kernel_params="${1:-}" + install_src_dir="${2:-}" + install_dest_dir="${3:-}" + + if [ ! -f "${install_src_dir}/vmlinuz.container" ] || + [ ! -f "${install_src_dir}/kata-containers-initrd.img" ]; then + cat << EOF >&2 +Either kernel or initrd does not exist or is mistakenly named +A file name for kernel must be vmlinuz.container (raw binary) +A file name for initrd must be kata-containers-initrd.img +EOF + return 1 + fi + + cmdline="${kernel_params} panic=1 scsi_mod.scan=none swiotlb=262144" + parmfile="$(mktemp --suffix=-cmdline)" + echo "${cmdline}" > "${parmfile}" + chmod 600 "${parmfile}" + + [ -n "${HKD_PATH:-}" ] || (echo >&2 "No host key document specified." && return 1) + cert_list=($(ls -1 $HKD_PATH)) + declare hkd_options + eval "for cert in ${cert_list[*]}; do + hkd_options+=\"--host-key-document=\\\"\$HKD_PATH/\$cert\\\" \" + done" + + command -v genprotimg > /dev/null 2>&1 || die "A package s390-tools is not installed." + extra_arguments="" + genprotimg_version=$(genprotimg --version | grep -Po '(?<=version )[^-]+') + if ! version_greater_than_equal "${genprotimg_version}" "2.17.0"; then + extra_arguments="--x-pcf '0xe0'" + fi + + eval genprotimg \ + "${extra_arguments}" \ + "${hkd_options}" \ + --output="${install_dest_dir}/kata-containers-se.img" \ + --image="${install_src_dir}/vmlinuz.container" \ + --ramdisk="${install_src_dir}/kata-containers-initrd.img" \ + --parmfile="${parmfile}" \ + --no-verify # no verification for CI testing purposes + + build_result=$? + if [ $build_result -eq 0 ]; then + return 0 + else + return 1 + fi +} + +build_image() { + image_source_dir="${builddir}/secure-image" + mkdir -p "${image_source_dir}" + pushd "${tarball_dir}" + for tarball_id in kernel rootfs-initrd; do + tar xvf kata-static-${tarball_id}.tar.xz -C "${image_source_dir}" + done + popd + + protimg_source_dir="${image_source_dir}${prefix}/share/kata-containers" + local kernel_params="" + if ! build_secure_image "${kernel_params}" "${protimg_source_dir}" "${install_dir}"; then + usage 1 + fi +} + +main() { + readonly prefix="/opt/kata" + builddir="${PWD}" + tarball_dir="${builddir}/../.." + while getopts "h-:" opt; do + case "$opt" in + -) + case "${OPTARG}" in + builddir=*) + builddir=${OPTARG#*=} + ;; + destdir=*) + destdir=${OPTARG#*=} + ;; + *) + echo >&2 "ERROR: Invalid option -$opt${OPTARG}" + usage 1 + ;; + esac + ;; + h) usage 0 ;; + *) + echo "Invalid option $opt" >&2 + usage 1 + ;; + esac + done + readonly destdir + readonly builddir + + info "Build IBM zSystems & LinuxONE SE image" + + install_dir="${destdir}${prefix}/share/kata-containers" + readonly install_dir + + mkdir -p "${install_dir}" + + build_image +} + +main $* From 9ceb2c27e08b4e6f76145b0c30858fd9908c9497 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Thu, 1 Jun 2023 10:23:14 +0200 Subject: [PATCH 08/12] local-build: consider cross-compilation env This is to make a base builder image build genprotimg without a package manager under the cross-compilation environment. Signed-off-by: Hyounggyu Choi --- tools/packaging/guest-image/build_se_image.sh | 5 +- .../local-build/dockerbuild/Dockerfile | 46 +++++++++++++++---- .../kata-deploy-binaries-in-docker.sh | 1 + 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/tools/packaging/guest-image/build_se_image.sh b/tools/packaging/guest-image/build_se_image.sh index e25ce84822..9fb3ed8a57 100755 --- a/tools/packaging/guest-image/build_se_image.sh +++ b/tools/packaging/guest-image/build_se_image.sh @@ -17,7 +17,10 @@ readonly kata_root_dir="$(cd "${packaging_root_dir}/../../" && pwd)" source "$kata_root_dir/ci/lib.sh" source "${packaging_root_dir}/scripts/lib.sh" -[ "$(uname -m)" = s390x ] || die "Building a Secure Execution image is currently only supported on s390x." +ARCH=${ARCH:-$(uname -m)} +if [ $(uname -m) == "${ARCH}" ]; then + [ "${ARCH}" == "s390x" ] || die "Building a Secure Execution image is currently only supported on s390x." +fi finish() { if [ -e "${parmfile}" ]; then diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index c5a9a2f527..f4ba5cc60e 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -5,6 +5,9 @@ FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive ENV INSTALL_IN_GOPATH=false +# Required for libxml2-dev +ENV TZ=Etc/UTC +ARG ARCH COPY install_yq.sh /usr/bin/install_yq.sh COPY install_oras.sh /usr/bin/install_oras.sh @@ -40,16 +43,43 @@ RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then groupadd - RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then usermod -a -G docker_on_host ${IMG_USER};fi RUN sh -c "echo '${IMG_USER} ALL=NOPASSWD: ALL' >> /etc/sudoers" +RUN if [ "${ARCH}" != "$(uname -m)" ] && [ "${ARCH}" == "s390x" ]; then sed -i 's/^deb/deb [arch=amd64]/g' /etc/apt/sources.list && \ + dpkg --add-architecture "s390x" && \ + echo "deb [arch=s390x] http://ports.ubuntu.com/ focal main multiverse universe" >> /etc/apt/sources.list && \ + echo "deb [arch=s390x] http://ports.ubuntu.com/ focal-security main multiverse universe" >> /etc/apt/sources.list && \ + echo "deb [arch=s390x] http://ports.ubuntu.com/ focal-backports main multiverse universe" >> /etc/apt/sources.list && \ + echo "deb [arch=s390x] http://ports.ubuntu.com/ focal-updates main multiverse universe" >> /etc/apt/sources.list; fi + #FIXME: gcc is required as agent is build out of a container build. RUN apt-get update && \ - apt-get install --no-install-recommends -y \ - build-essential \ - cpio \ - gcc \ - unzip \ - xz-utils && \ - if uname -m | grep -Eq 's390x'; then apt-get install -y s390-tools; fi && \ - apt-get clean && rm -rf /var/lib/apt/lists + apt-get install --no-install-recommends -y \ + build-essential \ + cpio \ + gcc \ + unzip \ + xz-utils && \ + if [ "${ARCH}" != "$(uname -m)" ] && [ "${ARCH}" == "s390x" ]; then \ + apt-get install -y --no-install-recommends \ + gcc-s390x-linux-gnu \ + g++-s390x-linux-gnu \ + binutils-s390x-linux-gnu \ + dpkg-dev \ + apt-utils \ + libssl-dev:s390x \ + libcurl4-openssl-dev:s390x \ + libjson-c-dev:s390x \ + pkg-config:s390x \ + libxml2-dev:s390x \ + libjson-c-dev:s390x \ + libglib2.0-0:s390x \ + libglib2.0-dev:s390x; \ + elif uname -m | grep -Eq 's390x'; then apt-get install -y s390-tools; fi && \ + apt-get clean && rm -rf /var/lib/apt/lists + +RUN if [ "${ARCH}" != "$(uname -m)" ] && [ "${ARCH}" == "s390x" ]; then \ + git clone -b v2.25.0 https://github.com/ibm-s390-linux/s390-tools.git && cd s390-tools && \ + pushd genprotimg && pushd boot && make CROSS_COMPILE=s390x-linux-gnu- && popd && pushd src && \ + make CROSS_COMPILE=s390x-linux-gnu- && popd && make install && popd || return; fi ENV USER ${IMG_USER} USER ${IMG_USER} diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index f01744c406..46133159ab 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -75,6 +75,7 @@ docker build -q -t build-kata-deploy \ --build-arg http_proxy="${http_proxy}" \ --build-arg https_proxy="${https_proxy}" \ --build-arg HOST_DOCKER_GID=${docker_gid} \ + --build-arg ARCH="${ARCH}" \ "${script_dir}/dockerbuild/" CI="${CI:-}" From 52bdc87fe92aac8e8611be103538393f50832380 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Wed, 7 Jun 2023 21:53:48 +0200 Subject: [PATCH 09/12] local-build: make kernel parameters configurable This is to make kernel parameters configurable during the secure image build by adding an environment variable SE_KERNEL_PARAMS. Signed-off-by: Hyounggyu Choi --- tools/packaging/guest-image/build_se_image.sh | 2 +- .../kata-deploy/local-build/kata-deploy-binaries-in-docker.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/packaging/guest-image/build_se_image.sh b/tools/packaging/guest-image/build_se_image.sh index 9fb3ed8a57..d1b342ca6a 100755 --- a/tools/packaging/guest-image/build_se_image.sh +++ b/tools/packaging/guest-image/build_se_image.sh @@ -119,7 +119,7 @@ build_image() { popd protimg_source_dir="${image_source_dir}${prefix}/share/kata-containers" - local kernel_params="" + local kernel_params="${SE_KERNEL_PARAMS:-}" if ! build_secure_image "${kernel_params}" "${protimg_source_dir}" "${install_dir}"; then usage 1 fi diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 46133159ab..7e7a6d65b7 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -122,6 +122,7 @@ docker run \ --env USE_CACHE="${USE_CACHE}" \ --env AA_KBC="${AA_KBC:-}" \ --env HKD_PATH="$(realpath "${HKD_PATH:-}" 2> /dev/null || true)" \ + --env SE_KERNEL_PARAMS="${SE_KERNEL_PARAMS:-}" \ --env CROSS_BUILD="${CROSS_BUILD}" \ --env TARGET_ARCH="${TARGET_ARCH}" \ --env ARCH="${ARCH}" \ From 31db56207bf1b5d3bf1f3bfae00cdd2366c7118e Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 26 Jun 2023 16:24:50 +0200 Subject: [PATCH 10/12] local-build: add support for key verification for IBM Secure Execution This is to make `build_se_image.sh` incorporate the key verification originally supported by `genprotimg`. It can be achieved by specifying two environment variables called `SIGNING_KEY_CERT_PATH` and `INTERMEDIATE_CA_CERT_PATH`. Signed-off-by: Hyounggyu Choi --- tools/packaging/guest-image/build_se_image.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/packaging/guest-image/build_se_image.sh b/tools/packaging/guest-image/build_se_image.sh index d1b342ca6a..aae3c78fe0 100755 --- a/tools/packaging/guest-image/build_se_image.sh +++ b/tools/packaging/guest-image/build_se_image.sh @@ -40,9 +40,12 @@ Options: --destdir=${destdir} Environment variables: - HKD_PATH (required): Secure Execution host key document, generally specific to your machine. See + HKD_PATH (required): a path for a directory which includes at least one host key document + for Secure Execution, generally specific to your machine. See https://www.ibm.com/docs/en/linux-on-systems?topic=tasks-verify-host-key-document for information on how to retrieve and verify this document. + SIGNING_KEY_CERT_PATH: a path for the IBM zSystem signing key certificate + INTERMEDIATE_CA_CERT_PATH: a path for the intermediate CA certificate signed by the root CA DEBUG : If set, display debug information. EOF exit "${1:-0}" @@ -62,6 +65,15 @@ build_secure_image() { kernel_params="${1:-}" install_src_dir="${2:-}" install_dest_dir="${3:-}" + key_verify_option="--no-verify" # no verification for CI testing purposes + + if [ -n "${SIGNING_KEY_CERT_PATH:-}" ] && [ -n "${INTERMEDIATE_CA_CERT_PATH:-}" ]; then + if [ -e "${SIGNING_KEY_CERT_PATH}" ] && [ -e "${INTERMEDIATE_CA_CERT_PATH}" ]; then + key_verify_option="--cert=${SIGNING_KEY_CERT_PATH} --cert=${INTERMEDIATE_CA_CERT_PATH}" + else + die "Specified certificate(s) not found" + fi + fi if [ ! -f "${install_src_dir}/vmlinuz.container" ] || [ ! -f "${install_src_dir}/kata-containers-initrd.img" ]; then @@ -99,7 +111,7 @@ EOF --image="${install_src_dir}/vmlinuz.container" \ --ramdisk="${install_src_dir}/kata-containers-initrd.img" \ --parmfile="${parmfile}" \ - --no-verify # no verification for CI testing purposes + "${key_verify_option}" build_result=$? if [ $build_result -eq 0 ]; then From f38c7f14c5793e25e44b726b2005af470b4818c4 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 23 Oct 2023 08:59:56 +0200 Subject: [PATCH 11/12] gha: remove build redundancy of kernel and rootfs-initrd It is to remove the build redundancy of `kernel` and `rootfs-initrd` by making `boot-image-se` built based on them at the second build stage. Signed-off-by: Hyounggyu Choi --- .../build-kata-static-tarball-s390x.yaml | 55 +++++++++++++++---- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml index f9f3f7aade..d73438535e 100644 --- a/.github/workflows/build-kata-static-tarball-s390x.yaml +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -27,7 +27,6 @@ jobs: strategy: matrix: asset: - - boot-image-se - kernel - qemu - rootfs-image @@ -60,13 +59,6 @@ jobs: env: TARGET_BRANCH: ${{ inputs.target-branch }} - - name: Place a host key document - run: | - mkdir -p "host-key-document" - cp "${CI_HKD_PATH}" "host-key-document" - env: - CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }} - - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -82,7 +74,6 @@ jobs: ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} TARGET_BRANCH: ${{ inputs.target-branch }} - HKD_PATH: "host-key-document" - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 @@ -92,9 +83,53 @@ jobs: retention-days: 15 if-no-files-found: error - create-kata-tarball: + build-asset-boot-image-se: runs-on: s390x needs: build-asset + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + - uses: actions/checkout@v3 + + - name: get-artifacts + uses: actions/download-artifact@v3 + with: + name: kata-artifacts-s390x${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Place a host key document + run: | + mkdir -p "host-key-document" + cp "${CI_HKD_PATH}" "host-key-document" + env: + CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }} + + - name: Build boot-image-se + run: | + base_dir=tools/packaging/kata-deploy/local-build/ + cp -r kata-artifacts ${base_dir}/build + # Skip building dependant artifacts of boot-image-se-tarball + # because we already have them from the previous build + sed -i 's/\(^boot-image-se-tarball:\).*/\1/g' ${base_dir}/Makefile + make boot-image-se-tarball + build_dir=$(readlink -f build) + sudo cp -r "${build_dir}" "kata-build" + sudo chown -R $(id -u):$(id -g) "kata-build" + env: + HKD_PATH: "host-key-document" + + - name: store-artifact boot-image-se + uses: actions/upload-artifact@v3 + with: + name: kata-artifacts-s390x${{ inputs.tarball-suffix }} + path: kata-build/kata-static-boot-image-se.tar.xz + retention-days: 1 + if-no-files-found: error + + create-kata-tarball: + runs-on: s390x + needs: [build-asset, build-asset-boot-image-se] steps: - name: Adjust a permission for repo run: | From 3fab1690a44e591db7d3feefde5323f4cb3cdf0f Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 23 Oct 2023 13:09:00 +0200 Subject: [PATCH 12/12] local-build: make strip support for cross-compilation This is to adjust a name of the binary `strip` to a target architecture for cross-compilation. Signed-off-by: Hyounggyu Choi --- tools/osbuilder/rootfs-builder/rootfs.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/osbuilder/rootfs-builder/rootfs.sh b/tools/osbuilder/rootfs-builder/rootfs.sh index 2a8f069804..b6cae53da9 100755 --- a/tools/osbuilder/rootfs-builder/rootfs.sh +++ b/tools/osbuilder/rootfs-builder/rootfs.sh @@ -40,7 +40,17 @@ TARGET_ARCH=${TARGET_ARCH:-$(uname -m)} ARCH=${ARCH:-$(uname -m)} [ "${TARGET_ARCH}" == "aarch64" ] && TARGET_ARCH=arm64 TARGET_OS=${TARGET_OS:-linux} -[ "${CROSS_BUILD}" == "true" ] && BUILDX=buildx && PLATFORM="--platform=${TARGET_OS}/${TARGET_ARCH}" +stripping_tool="strip" +if [ "${CROSS_BUILD}" == "true" ]; then + BUILDX=buildx + PLATFORM="--platform=${TARGET_OS}/${TARGET_ARCH}" + if command -v "${TARGET_ARCH}-linux-gnu-strip" >/dev/null; then + stripping_tool="${TARGET_ARCH}-linux-gnu-strip" + else + die "Could not find ${TARGET_ARCH}-linux-gnu-strip for cross build" + fi +fi + handle_error() { local exit_code="${?}" @@ -457,6 +467,8 @@ build_rootfs_distro() --env SECCOMP="${SECCOMP}" \ --env SELINUX="${SELINUX}" \ --env DEBUG="${DEBUG}" \ + --env CROSS_BUILD="${CROSS_BUILD}" \ + --env TARGET_ARCH="${TARGET_ARCH}" \ --env HOME="/root" \ --env AGENT_POLICY="${AGENT_POLICY}" \ -v "${repo_dir}":"/kata-containers" \ @@ -619,7 +631,7 @@ EOF make clean make LIBC=${LIBC} INIT=${AGENT_INIT} SECCOMP=${SECCOMP} AGENT_POLICY=${AGENT_POLICY} make install DESTDIR="${ROOTFS_DIR}" LIBC=${LIBC} INIT=${AGENT_INIT} - strip ${ROOTFS_DIR}/usr/bin/kata-agent + ${stripping_tool} ${ROOTFS_DIR}/usr/bin/kata-agent if [ "${SECCOMP}" == "yes" ]; then rm -rf "${libseccomp_install_dir}" "${gperf_install_dir}" fi @@ -664,7 +676,7 @@ EOF local opa_bin="${ROOTFS_DIR}${opa_bin_dir}/opa" info "Installing OPA binary to ${opa_bin}" install -D -o root -g root -m 0755 opa -T "${opa_bin}" - strip ${ROOTFS_DIR}${opa_bin_dir}/opa + ${stripping_tool} ${ROOTFS_DIR}${opa_bin_dir}/opa else info "OPA binary already exists in ${opa_bin_dir}" fi