diff --git a/.github/workflows/run-kata-coco-tests.yaml b/.github/workflows/run-kata-coco-tests.yaml index 22580f7e12..295da14167 100644 --- a/.github/workflows/run-kata-coco-tests.yaml +++ b/.github/workflows/run-kata-coco-tests.yaml @@ -126,7 +126,6 @@ jobs: timeout-minutes: 5 run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver - # AMD has deprecated SEV support on Kata and henceforth SNP will be the only feature supported for Kata Containers. run-k8s-tests-sev-snp: strategy: fail-fast: false diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 273364d267..fa1dd4ddaa 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -106,7 +106,6 @@ GENERATED_VARS = \ CONFIG_QEMU_NVIDIA_GPU_IN \ CONFIG_QEMU_NVIDIA_GPU_SNP_IN \ CONFIG_QEMU_NVIDIA_GPU_TDX_IN \ - CONFIG_QEMU_SEV_IN \ CONFIG_QEMU_TDX_IN \ CONFIG_QEMU_SNP_IN \ CONFIG_CLH_IN \ @@ -149,7 +148,6 @@ FIRMWAREVOLUMEPATH := FIRMWARETDVFPATH := PLACEHOLDER_FOR_DISTRO_OVMF_WITH_TDX_SUPPORT FIRMWARETDVFVOLUMEPATH := -FIRMWARESEVPATH := $(PREFIXDEPS)/share/ovmf/OVMF.fd FIRMWARESNPPATH := $(PREFIXDEPS)/share/ovmf/AMDSEV.fd ROOTMEASURECONFIG ?= "" @@ -238,11 +236,10 @@ DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] DEFDISABLEBLOCK := false DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs -# Please keep DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS in sync with TDX/SEV/SNP +# Please keep DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS in sync with TDX/SNP DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS := none DEFSHAREDFS_STRATOVIRT_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_TDX_VIRTIOFS := none -DEFSHAREDFS_QEMU_SEV_VIRTIOFS := none DEFSHAREDFS_QEMU_SNP_VIRTIOFS := none DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"] @@ -345,18 +342,6 @@ ifneq (,$(QEMUCMD)) CONFIGS += $(CONFIG_QEMU_TDX) - CONFIG_FILE_QEMU_SEV = configuration-qemu-sev.toml - CONFIG_QEMU_SEV = config/$(CONFIG_FILE_QEMU_SEV) - CONFIG_QEMU_SEV_IN = $(CONFIG_QEMU_SEV).in - - CONFIG_PATH_QEMU_SEV = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_SEV)) - CONFIG_PATHS += $(CONFIG_PATH_QEMU_SEV) - - SYSCONFIG_QEMU_SEV = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_SEV)) - SYSCONFIG_PATHS_SEV += $(SYSCONFIG_QEMU_SEV) - - CONFIGS += $(CONFIG_QEMU_SEV) - CONFIG_FILE_QEMU_SNP = configuration-qemu-snp.toml CONFIG_QEMU_SNP = config/$(CONFIG_FILE_QEMU_SNP) CONFIG_QEMU_SNP_IN = $(CONFIG_QEMU_SNP).in @@ -660,7 +645,6 @@ USER_VARS += KERNELPATH_FC USER_VARS += KERNELPATH_STRATOVIRT USER_VARS += KERNELVIRTIOFSPATH USER_VARS += FIRMWAREPATH -USER_VARS += FIRMWARESEVPATH USER_VARS += FIRMWARETDVFPATH USER_VARS += FIRMWAREVOLUMEPATH USER_VARS += FIRMWARETDVFVOLUMEPATH @@ -728,7 +712,6 @@ USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS USER_VARS += DEFSHAREDFS_STRATOVIRT_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS -USER_VARS += DEFSHAREDFS_QEMU_SEV_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_SNP_VIRTIOFS USER_VARS += DEFVIRTIOFSDAEMON USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS diff --git a/src/runtime/config/configuration-qemu-sev.toml.in b/src/runtime/config/configuration-qemu-sev.toml.in deleted file mode 100644 index e0ea53133d..0000000000 --- a/src/runtime/config/configuration-qemu-sev.toml.in +++ /dev/null @@ -1,636 +0,0 @@ -# Copyright 2022 Advanced Micro Devices, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -# XXX: WARNING: this file is auto-generated. -# XXX: -# XXX: Source file: "@CONFIG_QEMU_SEV_IN@" -# XXX: Project: -# XXX: Name: @PROJECT_NAME@ -# XXX: Type: @PROJECT_TYPE@ - -[hypervisor.qemu] -path = "@QEMUPATH@" -kernel = "@KERNELCONFIDENTIALPATH@" -initrd = "@INITRDCONFIDENTIALPATH@" -machine_type = "@MACHINETYPE@" - -# Enable confidential guest support. -# Toggling that setting may trigger different hardware features, ranging -# from memory encryption to both memory and CPU-state encryption and integrity. -# The Kata Containers runtime dynamically detects the available feature set and -# aims at enabling the largest possible one, returning an error if none is -# available, or none is supported by the hypervisor. -# -# Known limitations: -# * Does not work by design: -# - CPU Hotplug -# - Memory Hotplug -# - NVDIMM devices -# -# Default false -confidential_guest = true - -# Enable running QEMU VMM as a non-root user. -# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as -# a non-root random user. See documentation for the limitations of this mode. -# rootless = true - -# List of valid annotation names for the hypervisor -# Each member of the list is a regular expression, which is the base name -# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" -enable_annotations = @DEFENABLEANNOTATIONS@ - -# List of valid annotations values for the hypervisor -# Each member of the list is a path pattern as described by glob(3). -# The default if not set is empty (all annotations rejected.) -# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ -valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@ - -# Optional space-separated list of options to pass to the guest kernel. -# For example, use `kernel_params = "vsyscall=emulate"` if you are having -# trouble running pre-2.15 glibc. -# -# WARNING: - any parameter specified here will take priority over the default -# parameter value of the same name used to start the virtual machine. -# Do not set values here unless you understand the impact of doing so as you -# may stop the virtual machine from booting. -# To see the list of default parameters, enable hypervisor debug, create a -# container and look for 'default-kernel-parameters' log entries. -kernel_params = "@KERNELPARAMS@" - -# Path to the firmware. -# If you want that qemu uses the default firmware leave this option empty -firmware = "@FIRMWARESEVPATH@" - -# Path to the firmware volume. -# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables -# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables -# can be customized per each user while UEFI code is kept same. -firmware_volume = "@FIRMWAREVOLUMEPATH@" - -# Machine accelerators -# comma-separated list of machine accelerators to pass to the hypervisor. -# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` -machine_accelerators="@MACHINEACCELERATORS@" - -# Qemu seccomp sandbox feature -# comma-separated list of seccomp sandbox features to control the syscall access. -# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` -# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox -# Another note: enabling this feature may reduce performance, you may enable -# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html -#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" - -# CPU features -# comma-separated list of cpu features to pass to the cpu -# For example, `cpu_features = "pmu=off,vmx=off" -cpu_features="@CPUFEATURES@" - -# Default number of vCPUs per SB/VM: -# unspecified or 0 --> will be set to @DEFVCPUS@ -# < 0 --> will be set to the actual number of physical cores -# > 0 <= number of physical cores --> will be set to the specified number -# > number of physical cores --> will be set to the actual number of physical cores -default_vcpus = 1 - -# Default maximum number of vCPUs per SB/VM: -# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number -# of vCPUs supported by KVM if that number is exceeded -# > 0 <= number of physical cores --> will be set to the specified number -# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number -# of vCPUs supported by KVM if that number is exceeded -# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when -# the actual number of physical cores is greater than it. -# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU -# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs -# can be added to a SB/VM, but the memory footprint will be big. Another example, with -# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of -# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, -# unless you know what are you doing. -# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. -default_maxvcpus = @DEFMAXVCPUS@ - -# Bridges can be used to hot plug devices. -# Limitations: -# * Currently only pci bridges are supported -# * Until 30 devices per bridge can be hot plugged. -# * Until 5 PCI bridges can be cold plugged per VM. -# This limitation could be a bug in qemu or in the kernel -# Default number of bridges per SB/VM: -# unspecified or 0 --> will be set to @DEFBRIDGES@ -# > 1 <= 5 --> will be set to the specified number -# > 5 --> will be set to 5 -default_bridges = @DEFBRIDGES@ - -# Default memory size in MiB for SB/VM. -# If unspecified then it will be set @DEFMEMSZ@ MiB. -default_memory = @DEFMEMSZ@ -# -# Default memory slots per SB/VM. -# If unspecified then it will be set @DEFMEMSLOTS@. -# This is will determine the times that memory will be hotadded to sandbox/VM. -#memory_slots = @DEFMEMSLOTS@ - -# Default maximum memory in MiB per SB / VM -# unspecified or == 0 --> will be set to the actual amount of physical RAM -# > 0 <= amount of physical RAM --> will be set to the specified number -# > amount of physical RAM --> will be set to the actual amount of physical RAM -default_maxmemory = @DEFMAXMEMSZ@ - -# The size in MiB will be plused to max memory of hypervisor. -# It is the memory address space for the NVDIMM device. -# If set block storage driver (block_device_driver) to "nvdimm", -# should set memory_offset to the size of block device. -# Default 0 -#memory_offset = 0 - -# Specifies virtio-mem will be enabled or not. -# Please note that this option should be used with the command -# "echo 1 > /proc/sys/vm/overcommit_memory". -# Default false -#enable_virtio_mem = true - -# Disable block device from being used for a container's rootfs. -# In case of a storage driver like devicemapper where a container's -# root file system is backed by a block device, the block device is passed -# directly to the hypervisor for performance reasons. -# This flag prevents the block device from being passed to the hypervisor, -# virtio-fs is used instead to pass the rootfs. -disable_block_device_use = @DEFDISABLEBLOCK@ - -# Shared file system type: -# - virtio-fs (default) -# - virtio-9p -# - virtio-fs-nydus -# - none -shared_fs = "@DEFSHAREDFS_QEMU_SEV_VIRTIOFS@" - -# Path to vhost-user-fs daemon. -virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" - -# List of valid annotations values for the virtiofs daemon -# The default if not set is empty (all annotations rejected.) -# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ -valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ - -# Default size of DAX cache in MiB -virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ - -# Extra args for virtiofsd daemon -# -# Format example: -# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] -# Examples: -# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] -# -# see `virtiofsd -h` for possible options. -virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ - -# Cache mode: -# -# - none -# Metadata, data, and pathname lookup are not cached in guest. They are -# always fetched from host and any changes are immediately pushed to host. -# -# - auto -# Metadata and pathname lookup cache expires after a configured amount of -# time (default is 1 second). Data is cached while the file is open (close -# to open consistency). -# -# - always -# Metadata, data, and pathname lookup are cached in guest and never expire. -virtio_fs_cache = "@DEFVIRTIOFSCACHE@" - -# Block storage driver to be used for the hypervisor in case the container -# rootfs is backed by a block device. This is virtio-scsi, virtio-blk -# or nvdimm. -block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" - -# Specifies cache-related options will be set to block devices or not. -# Default false -#block_device_cache_set = true - -# Specifies cache-related options for block devices. -# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. -# Default false -#block_device_cache_direct = true - -# Specifies cache-related options for block devices. -# Denotes whether flush requests for the device are ignored. -# Default false -#block_device_cache_noflush = true - -# Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. -# -enable_iothreads = @DEFENABLEIOTHREADS@ - -# Enable pre allocation of VM RAM, default false -# Enabling this will result in lower container density -# as all of the memory will be allocated and locked -# This is useful when you want to reserve all the memory -# upfront or in the cases where you want memory latencies -# to be very predictable -# Default false -#enable_mem_prealloc = true - -# Enable huge pages for VM RAM, default false -# Enabling this will result in the VM memory -# being allocated using huge pages. -# This is useful when you want to use vhost-user network -# stacks within the container. This will automatically -# result in memory pre allocation -#enable_hugepages = true - -# Enable vhost-user storage device, default false -# Enabling this will result in some Linux reserved block type -# major range 240-254 being chosen to represent vhost-user devices. -enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ - -# The base directory specifically used for vhost-user devices. -# Its sub-path "block" is used for block devices; "block/sockets" is -# where we expect vhost-user sockets to live; "block/devices" is where -# simulated block device nodes for vhost-user devices to live. -vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" - -# Enable vIOMMU, default false -# Enabling this will result in the VM having a vIOMMU device -# This will also add the following options to the kernel's -# command line: intel_iommu=on,iommu=pt -#enable_iommu = true - -# Enable IOMMU_PLATFORM, default false -# Enabling this will result in the VM device having iommu_platform=on set -#enable_iommu_platform = true - -# List of valid annotations values for the vhost user store path -# The default if not set is empty (all annotations rejected.) -# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ -valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ - -# Enable file based guest memory support. The default is an empty string which -# will disable this feature. In the case of virtio-fs, this is enabled -# automatically and '/dev/shm' is used as the backing folder. -# This option will be ignored if VM templating is enabled. -#file_mem_backend = "@DEFFILEMEMBACKEND@" - -# List of valid annotations values for the file_mem_backend annotation -# The default if not set is empty (all annotations rejected.) -# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ -valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ - -# -pflash can add image file to VM. The arguments of it should be in format -# of ["/path/to/flash0.img", "/path/to/flash1.img"] -pflashes = [] - -# This option changes the default hypervisor and kernel parameters -# to enable debug output where available. -# -# Default false -#enable_debug = true - -# Disable the customizations done in the runtime when it detects -# that it is running on top a VMM. This will result in the runtime -# behaving as it would when running on bare metal. -# -#disable_nesting_checks = true - -# This is the msize used for 9p shares. It is the number of bytes -# used for 9p packet payload. -#msize_9p = @DEFMSIZE9P@ - -# If false and nvdimm is supported, use nvdimm device to plug guest image. -# Otherwise virtio-block device is used. -# -# nvdimm is not supported when `confidential_guest = true`. -disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@ - -# Before hot plugging a PCIe device, you need to add a pcie_root_port device. -# Use this parameter when using some large PCI bar devices, such as Nvidia GPU -# The value means the number of pcie_root_port -# Default 0 -#pcie_root_port = 2 - -# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off -# security (vhost-net runs ring0) for network I/O performance. -#disable_vhost_net = true - -# -# Default entropy source. -# The path to a host source of entropy (including a real hardware RNG) -# /dev/urandom and /dev/random are two main options. -# Be aware that /dev/random is a blocking source of entropy. If the host -# runs out of entropy, the VMs boot time will increase leading to get startup -# timeouts. -# The source of entropy /dev/urandom is non-blocking and provides a -# generally acceptable source of entropy. It should work well for pretty much -# all practical purposes. -#entropy_source= "@DEFENTROPYSOURCE@" - -# List of valid annotations values for entropy_source -# The default if not set is empty (all annotations rejected.) -# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ -valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ - -# Path to OCI hook binaries in the *guest rootfs*. -# This does not affect host-side hooks which must instead be added to -# the OCI spec passed to the runtime. -# -# You can create a rootfs with hooks by customizing the osbuilder scripts: -# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder -# -# Hooks must be stored in a subdirectory of guest_hook_path according to their -# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". -# The agent will scan these directories for executable files and add them, in -# lexicographical order, to the lifecycle of the guest container. -# Hooks are executed in the runtime namespace of the guest. See the official documentation: -# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks -# Warnings will be logged if any error is encountered while scanning for hooks, -# but it will not abort container execution. -#guest_hook_path = "/usr/share/oci/hooks" -# -# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). -# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. -# Default 0-sized value means unlimited rate. -#rx_rate_limiter_max_rate = 0 -# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). -# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) -# to discipline traffic. -# Default 0-sized value means unlimited rate. -#tx_rate_limiter_max_rate = 0 - -# Set where to save the guest memory dump file. -# If set, when GUEST_PANICKED event occurred, -# guest memeory will be dumped to host filesystem under guest_memory_dump_path, -# This directory will be created automatically if it does not exist. -# -# The dumped file(also called vmcore) can be processed with crash or gdb. -# -# WARNING: -# Dump guest’s memory can take very long depending on the amount of guest memory -# and use much disk space. -#guest_memory_dump_path="/var/crash/kata" - -# If enable paging. -# Basically, if you want to use "gdb" rather than "crash", -# or need the guest-virtual addresses in the ELF vmcore, -# then you should enable paging. -# -# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details -#guest_memory_dump_paging=false - -# Enable swap in the guest. Default false. -# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device -# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") -# is bigger than 0. -# The size of the swap device should be -# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. -# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. -# If swap_in_bytes and memory_limit_in_bytes is not set, the size should -# be default_memory. -#enable_guest_swap = true - -# use legacy serial for guest console if available and implemented for architecture. Default false -#use_legacy_serial = true - -# disable applying SELinux on the VMM process (default false) -disable_selinux=@DEFDISABLESELINUX@ - -# disable applying SELinux on the container process -# If set to false, the type `container_t` is applied to the container process by default. -# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built -# with `SELINUX=yes`. -# (default: true) -disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ - -[factory] -# VM templating support. Once enabled, new VMs are created from template -# using vm cloning. They will share the same initial kernel, initramfs and -# agent memory by mapping it readonly. It helps speeding up new container -# creation and saves a lot of memory if there are many kata containers running -# on the same host. -# -# When disabled, new VMs are created from scratch. -# -# Note: Requires "initrd=" to be set ("image=" is not supported). -# -# Default false -#enable_template = true - -# Specifies the path of template. -# -# Default "/run/vc/vm/template" -#template_path = "/run/vc/vm/template" - -# The number of caches of VMCache: -# unspecified or == 0 --> VMCache is disabled -# > 0 --> will be set to the specified number -# -# VMCache is a function that creates VMs as caches before using it. -# It helps speed up new container creation. -# The function consists of a server and some clients communicating -# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. -# The VMCache server will create some VMs and cache them by factory cache. -# It will convert the VM to gRPC format and transport it when gets -# requestion from clients. -# Factory grpccache is the VMCache client. It will request gRPC format -# VM and convert it back to a VM. If VMCache function is enabled, -# kata-runtime will request VM from factory grpccache when it creates -# a new sandbox. -# -# Default 0 -#vm_cache_number = 0 - -# Specify the address of the Unix socket that is used by VMCache. -# -# Default /var/run/kata-containers/cache.sock -#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" - -[agent.@PROJECT_TYPE@] -# If enabled, make the agent display debug-level messages. -# (default: disabled) -#enable_debug = true - -# Enable agent tracing. -# -# If enabled, the agent will generate OpenTelemetry trace spans. -# -# Notes: -# -# - If the runtime also has tracing enabled, the agent spans will be -# associated with the appropriate runtime parent span. -# - If enabled, the runtime will wait for the container to shutdown, -# increasing the container shutdown time slightly. -# -# (default: disabled) -#enable_tracing = true - -# Comma separated list of kernel modules and their parameters. -# These modules will be loaded in the guest kernel using modprobe(8). -# The following example can be used to load two kernel modules with parameters -# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] -# The first word is considered as the module name and the rest as its parameters. -# Container will not be started when: -# * A kernel module is specified and the modprobe command is not installed in the guest -# or it fails loading the module. -# * The module is not available in the guest or it doesn't met the guest kernel -# requirements, like architecture and version. -# -kernel_modules=[] - -# Enable debug console. - -# If enabled, user can connect guest OS running inside hypervisor -# through "kata-runtime exec " command - -#debug_console_enabled = true - -# Agent connection dialing timeout value in seconds -# (default: 90) -dial_timeout = 90 - -[runtime] -# If enabled, the runtime will log additional debug messages to the -# system log -# (default: disabled) -#enable_debug = true -# -# Internetworking model -# Determines how the VM should be connected to the -# the container network interface -# Options: -# -# - macvtap -# Used when the Container network interface can be bridged using -# macvtap. -# -# - none -# Used when customize network. Only creates a tap device. No veth pair. -# -# - tcfilter -# Uses tc filter rules to redirect traffic from the network interface -# provided by plugin to a tap interface connected to the VM. -# -internetworking_model="@DEFNETWORKMODEL_QEMU@" - -# disable guest seccomp -# Determines whether container seccomp profiles are passed to the virtual -# machine and applied by the kata agent. If set to true, seccomp is not applied -# within the guest -# (default: true) -disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ - -# Apply a custom SELinux security policy to the container process inside the VM. -# This is used when you want to apply a type other than the default `container_t`, -# so general users should not uncomment and apply it. -# (format: "user:role:type") -# Note: You cannot specify MCS policy with the label because the sensitivity levels and -# categories are determined automatically by high-level container runtimes such as containerd. -#guest_selinux_label="@DEFGUESTSELINUXLABEL@" - -# If enabled, the runtime will create opentracing.io traces and spans. -# (See https://www.jaegertracing.io/docs/getting-started). -# (default: disabled) -#enable_tracing = true - -# Set the full url to the Jaeger HTTP Thrift collector. -# The default if not set will be "http://localhost:14268/api/traces" -#jaeger_endpoint = "" - -# Sets the username to be used if basic auth is required for Jaeger. -#jaeger_user = "" - -# Sets the password to be used if basic auth is required for Jaeger. -#jaeger_password = "" - -# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. -# This option may have some potential impacts to your host. It should only be used when you know what you're doing. -# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only -# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge -# (like OVS) directly. -# (default: false) -#disable_new_netns = true - -# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. -# The container cgroups in the host are not created, just one single cgroup per sandbox. -# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. -# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. -# The sandbox cgroup is constrained if there is no container type annotation. -# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType -sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ - -# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In -# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful -# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. -# Compatibility for determining appropriate sandbox (VM) size: -# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O -# does not yet support sandbox sizing annotations. -# - When running single containers using a tool like ctr, container sizing information will be available. -static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@ - -# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. -# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. -# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` -# These will not be exposed to the container workloads, and are only provided for potential guest services. -sandbox_bind_mounts=@DEFBINDMOUNTS@ - -# VFIO Mode -# Determines how VFIO devices should be be presented to the container. -# Options: -# -# - vfio -# Matches behaviour of OCI runtimes (e.g. runc) as much as -# possible. VFIO devices will appear in the container as VFIO -# character devices under /dev/vfio. The exact names may differ -# from the host (they need to match the VM's IOMMU group numbers -# rather than the host's) -# -# - guest-kernel -# This is a Kata-specific behaviour that's useful in certain cases. -# The VFIO device is managed by whatever driver in the VM kernel -# claims it. This means it will appear as one or more device nodes -# or network interfaces depending on the nature of the device. -# Using this mode requires specially built workloads that know how -# to locate the relevant device interfaces within the VM. -# -vfio_mode="@DEFVFIOMODE@" - -# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will -# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. -disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ - -# Enabled experimental feature list, format: ["a", "b"]. -# Experimental features are features not stable enough for production, -# they may break compatibility, and are prepared for a big version bump. -# Supported experimental features: -# (default: []) -experimental=@DEFAULTEXPFEATURES@ - -# If enabled, user can run pprof tools with shim v2 process through kata-monitor. -# (default: false) -# enable_pprof = true - -# Indicates the CreateContainer request timeout needed for the workload(s) -# It using guest_pull this includes the time to pull the image inside the guest -# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s) -# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config -# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. -# In essence, the timeout used for guest pull=runtime-request-timeout= entryHeaderSize { - tsize := len(tableBytes) - entryBytes := bytes.NewReader(tableBytes[tsize-entryHeaderSize:]) - var entry ovmfFooterTableEntry - err := binary.Read(entryBytes, binary.LittleEndian, &entry) - if err != nil { - return table, err - } - if int(entry.Size) < entryHeaderSize { - return table, errors.New("Invalid entry size") - } - entryData := tableBytes[tsize-int(entry.Size) : tsize-entryHeaderSize] - table[entry.Guid] = entryData - tableBytes = tableBytes[:tsize-int(entry.Size)] - } - return table, nil -} - -func (o *ovmf) tableItem(guid guidLE) ([]byte, error) { - value, ok := o.table[guid] - if !ok { - return []byte{}, errors.New("OVMF footer table entry not found") - } - return value, nil -} - -func (o *ovmf) sevEsResetEip() (uint32, error) { - value, err := o.tableItem(sevEsResetBlockGuid) - if err != nil { - return 0, err - } - return binary.LittleEndian.Uint32(value), nil -} diff --git a/src/runtime/pkg/sev/sev.go b/src/runtime/pkg/sev/sev.go deleted file mode 100644 index bdf73cf603..0000000000 --- a/src/runtime/pkg/sev/sev.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright contributors to AMD SEV/-ES in Go -// -// SPDX-License-Identifier: Apache-2.0 -// - -// Package sev can be used to compute the expected hash values for -// SEV/-ES pre-launch attestation -package sev - -import ( - "bytes" - "crypto/sha256" - "encoding/binary" - "io" - "os" -) - -type guidLE [16]byte - -// The following definitions must be identical to those in QEMU target/i386/sev.c - -// GUID: 9438d606-4f22-4cc9-b479-a793d411fd21 -var sevHashTableHeaderGuid = guidLE{0x06, 0xd6, 0x38, 0x94, 0x22, 0x4f, 0xc9, 0x4c, 0xb4, 0x79, 0xa7, 0x93, 0xd4, 0x11, 0xfd, 0x21} - -// GUID: 4de79437-abd2-427f-b835-d5b172d2045b -var sevKernelEntryGuid = guidLE{0x37, 0x94, 0xe7, 0x4d, 0xd2, 0xab, 0x7f, 0x42, 0xb8, 0x35, 0xd5, 0xb1, 0x72, 0xd2, 0x04, 0x5b} - -// GUID: 44baf731-3a2f-4bd7-9af1-41e29169781d -var sevInitrdEntryGuid = guidLE{0x31, 0xf7, 0xba, 0x44, 0x2f, 0x3a, 0xd7, 0x4b, 0x9a, 0xf1, 0x41, 0xe2, 0x91, 0x69, 0x78, 0x1d} - -// GUID: 97d02dd8-bd20-4c94-aa78-e7714d36ab2a -var sevCmdlineEntryGuid = guidLE{0xd8, 0x2d, 0xd0, 0x97, 0x20, 0xbd, 0x94, 0x4c, 0xaa, 0x78, 0xe7, 0x71, 0x4d, 0x36, 0xab, 0x2a} - -type sevHashTableEntry struct { - entryGuid guidLE - length uint16 - hash [sha256.Size]byte -} - -type sevHashTable struct { - tableGuid guidLE - length uint16 - cmdline sevHashTableEntry - initrd sevHashTableEntry - kernel sevHashTableEntry -} - -type paddedSevHashTable struct { - table sevHashTable - padding [8]byte -} - -func fileSha256(filename string) (res [sha256.Size]byte, err error) { - f, err := os.Open(filename) - if err != nil { - return res, err - } - defer f.Close() - - digest := sha256.New() - if _, err := io.Copy(digest, f); err != nil { - return res, err - } - - copy(res[:], digest.Sum(nil)) - return res, nil -} - -func constructSevHashesTable(kernelPath, initrdPath, cmdline string) ([]byte, error) { - kernelHash, err := fileSha256(kernelPath) - if err != nil { - return []byte{}, err - } - - initrdHash, err := fileSha256(initrdPath) - if err != nil { - return []byte{}, err - } - - cmdlineHash := sha256.Sum256(append([]byte(cmdline), 0)) - - buf := new(bytes.Buffer) - err = binary.Write(buf, binary.LittleEndian, sevHashTableEntry{}) - if err != nil { - return []byte{}, err - } - entrySize := uint16(buf.Len()) - - buf = new(bytes.Buffer) - err = binary.Write(buf, binary.LittleEndian, sevHashTable{}) - if err != nil { - return []byte{}, err - } - tableSize := uint16(buf.Len()) - - ht := paddedSevHashTable{ - table: sevHashTable{ - tableGuid: sevHashTableHeaderGuid, - length: tableSize, - cmdline: sevHashTableEntry{ - entryGuid: sevCmdlineEntryGuid, - length: entrySize, - hash: cmdlineHash, - }, - initrd: sevHashTableEntry{ - entryGuid: sevInitrdEntryGuid, - length: entrySize, - hash: initrdHash, - }, - kernel: sevHashTableEntry{ - entryGuid: sevKernelEntryGuid, - length: entrySize, - hash: kernelHash, - }, - }, - padding: [8]byte{0, 0, 0, 0, 0, 0, 0, 0}, - } - - htBuf := new(bytes.Buffer) - err = binary.Write(htBuf, binary.LittleEndian, ht) - if err != nil { - return []byte{}, err - } - return htBuf.Bytes(), nil -} - -// CalculateLaunchDigest returns the sha256 encoded SEV launch digest based off -// the current firmware, kernel, initrd, and the kernel cmdline -func CalculateLaunchDigest(firmwarePath, kernelPath, initrdPath, cmdline string) (res [sha256.Size]byte, err error) { - f, err := os.Open(firmwarePath) - if err != nil { - return res, err - } - defer f.Close() - - digest := sha256.New() - if _, err := io.Copy(digest, f); err != nil { - return res, err - } - - // When used for confidential containers in kata-containers, kernelPath - // is always set (direct boot). However, this current package can also - // be used by other programs which may calculate launch digests of - // arbitrary SEV guests without SEV kernel hashes table. - if kernelPath != "" { - ht, err := constructSevHashesTable(kernelPath, initrdPath, cmdline) - if err != nil { - return res, err - } - digest.Write(ht) - } - - copy(res[:], digest.Sum(nil)) - return res, nil -} - -// CalculateSEVESLaunchDigest returns the sha256 encoded SEV-ES launch digest -// based off the current firmware, kernel, initrd, and the kernel cmdline, and -// the number of vcpus and their type -func CalculateSEVESLaunchDigest(vcpus int, vcpuSig VCPUSig, firmwarePath, kernelPath, initrdPath, cmdline string) (res [sha256.Size]byte, err error) { - f, err := os.Open(firmwarePath) - if err != nil { - return res, err - } - defer f.Close() - - digest := sha256.New() - if _, err := io.Copy(digest, f); err != nil { - return res, err - } - - // When used for confidential containers in kata-containers, kernelPath - // is always set (direct boot). However, this current package can also - // be used by other programs which may calculate launch digests of - // arbitrary SEV guests without SEV kernel hashes table. - if kernelPath != "" { - ht, err := constructSevHashesTable(kernelPath, initrdPath, cmdline) - if err != nil { - return res, err - } - digest.Write(ht) - } - - o, err := NewOvmf(firmwarePath) - if err != nil { - return res, err - } - resetEip, err := o.sevEsResetEip() - if err != nil { - return res, err - } - v := vmsaBuilder{uint64(resetEip), vcpuSig} - for i := 0; i < vcpus; i++ { - vmsaPage, err := v.buildPage(i) - if err != nil { - return res, err - } - digest.Write(vmsaPage) - } - - copy(res[:], digest.Sum(nil)) - return res, nil -} diff --git a/src/runtime/pkg/sev/sev_test.go b/src/runtime/pkg/sev/sev_test.go deleted file mode 100644 index 68a82ea90d..0000000000 --- a/src/runtime/pkg/sev/sev_test.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright contributors to AMD SEV/-ES in Go -// -// SPDX-License-Identifier: Apache-2.0 - -package sev - -import ( - "encoding/hex" - "testing" -) - -func TestCalculateLaunchDigestWithoutKernelHashes(t *testing.T) { - ld, err := CalculateLaunchDigest("testdata/ovmf_suffix.bin", "", "", "") - if err != nil { - t.Fatalf("unexpected err value: %s", err) - } - hexld := hex.EncodeToString(ld[:]) - if hexld != "b184e06e012366fd7b33ebfb361a515d05f00d354dca07b36abbc1e1e177ced5" { - t.Fatalf("wrong measurement: %s", hexld) - } -} - -func TestCalculateLaunchDigestWithKernelHashes(t *testing.T) { - ld, err := CalculateLaunchDigest("testdata/ovmf_suffix.bin", "/dev/null", "/dev/null", "") - if err != nil { - t.Fatalf("unexpected err value: %s", err) - } - hexld := hex.EncodeToString(ld[:]) - if hexld != "d59d7696efd7facfaa653758586e6120c4b6eaec3e327771d278cc6a44786ba5" { - t.Fatalf("wrong measurement: %s", hexld) - } -} - -func TestCalculateLaunchDigestWithKernelHashesSevEs(t *testing.T) { - ld, err := CalculateSEVESLaunchDigest(1, SigEpycV4, "testdata/ovmf_suffix.bin", "/dev/null", "/dev/null", "") - if err != nil { - t.Fatalf("unexpected err value: %s", err) - } - hexld := hex.EncodeToString(ld[:]) - if hexld != "7e5c26fb454621eb466978b4d0242b3c04b44a034de7fc0a2d8dac60ea2b6403" { - t.Fatalf("wrong measurement: %s", hexld) - } -} - -func TestCalculateLaunchDigestWithKernelHashesSevEsAndSmp(t *testing.T) { - ld, err := CalculateSEVESLaunchDigest(4, SigEpycV4, "testdata/ovmf_suffix.bin", "/dev/null", "/dev/null", "") - if err != nil { - t.Fatalf("unexpected err value: %s", err) - } - hexld := hex.EncodeToString(ld[:]) - if hexld != "b2111b0051fc3a06ec216899b2c78da99fb9d56c6ff2e8261dd3fe6cff79ecbc" { - t.Fatalf("wrong measurement: %s", hexld) - } -} diff --git a/src/runtime/pkg/sev/testdata/README.md b/src/runtime/pkg/sev/testdata/README.md deleted file mode 100644 index 34554dc8e2..0000000000 --- a/src/runtime/pkg/sev/testdata/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# sev/testdata - -The `ovmf_suffix.bin` contains the last 4KB of the `OVMF.fd` binary from edk2's -`OvmfPkg/AmdSev/AmdSevX64.dsc` build. To save space, we committed only the -last 4KB instead of the the full 4MB binary. - -The end of the file contains a GUIDed footer table with entries that hold the -SEV-ES AP reset vector address, which is needed in order to compute VMSAs for -SEV-ES guests. diff --git a/src/runtime/pkg/sev/testdata/ovmf_suffix.bin b/src/runtime/pkg/sev/testdata/ovmf_suffix.bin deleted file mode 100644 index cc6d7ca7f0..0000000000 Binary files a/src/runtime/pkg/sev/testdata/ovmf_suffix.bin and /dev/null differ diff --git a/src/runtime/pkg/sev/vcpu_sigs.go b/src/runtime/pkg/sev/vcpu_sigs.go deleted file mode 100644 index 9cee59e2b1..0000000000 --- a/src/runtime/pkg/sev/vcpu_sigs.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright contributors to AMD SEV/-ES in Go -// -// SPDX-License-Identifier: Apache-2.0 - -package sev - -type VCPUSig uint64 - -const ( - // 'EPYC': family=23, model=1, stepping=2 - SigEpyc VCPUSig = 0x800f12 - - // 'EPYC-v1': family=23, model=1, stepping=2 - SigEpycV1 VCPUSig = 0x800f12 - - // 'EPYC-v2': family=23, model=1, stepping=2 - SigEpycV2 VCPUSig = 0x800f12 - - // 'EPYC-IBPB': family=23, model=1, stepping=2 - SigEpycIBPB VCPUSig = 0x800f12 - - // 'EPYC-v3': family=23, model=1, stepping=2 - SigEpycV3 VCPUSig = 0x800f12 - - // 'EPYC-v4': family=23, model=1, stepping=2 - SigEpycV4 VCPUSig = 0x800f12 - - // 'EPYC-Rome': family=23, model=49, stepping=0 - SigEpycRome VCPUSig = 0x830f10 - - // 'EPYC-Rome-v1': family=23, model=49, stepping=0 - SigEpycRomeV1 VCPUSig = 0x830f10 - - // 'EPYC-Rome-v2': family=23, model=49, stepping=0 - SigEpycRomeV2 VCPUSig = 0x830f10 - - // 'EPYC-Rome-v3': family=23, model=49, stepping=0 - SigEpycRomeV3 VCPUSig = 0x830f10 - - // 'EPYC-Milan': family=25, model=1, stepping=1 - SigEpycMilan VCPUSig = 0xa00f11 - - // 'EPYC-Milan-v1': family=25, model=1, stepping=1 - SigEpycMilanV1 VCPUSig = 0xa00f11 - - // 'EPYC-Milan-v2': family=25, model=1, stepping=1 - SigEpycMilanV2 VCPUSig = 0xa00f11 -) - -// NewVCPUSig computes the CPU signature (32-bit value) from the given family, -// model, and stepping. -// -// This computation is described in AMD's CPUID Specification, publication #25481 -// https://www.amd.com/system/files/TechDocs/25481.pdf -// See section: CPUID Fn0000_0001_EAX Family, Model, Stepping Identifiers -func NewVCPUSig(family, model, stepping uint32) VCPUSig { - var family_low, family_high uint32 - if family > 0xf { - family_low = 0xf - family_high = (family - 0x0f) & 0xff - } else { - family_low = family - family_high = 0 - } - - model_low := model & 0xf - model_high := (model >> 4) & 0xf - - stepping_low := stepping & 0xf - - return VCPUSig((family_high << 20) | - (model_high << 16) | - (family_low << 8) | - (model_low << 4) | - stepping_low) -} diff --git a/src/runtime/pkg/sev/vcpu_sigs_test.go b/src/runtime/pkg/sev/vcpu_sigs_test.go deleted file mode 100644 index 70f8487509..0000000000 --- a/src/runtime/pkg/sev/vcpu_sigs_test.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright contributors to AMD SEV/-ES in Go -// -// SPDX-License-Identifier: Apache-2.0 - -package sev - -import ( - "testing" -) - -func TestNewVCPUSig(t *testing.T) { - if NewVCPUSig(23, 1, 2) != SigEpyc { - t.Errorf("wrong EPYC CPU signature") - } - if NewVCPUSig(23, 49, 0) != SigEpycRome { - t.Errorf("wrong EPYC-Rome CPU signature") - } - if NewVCPUSig(25, 1, 1) != SigEpycMilan { - t.Errorf("wrong EPYC-Milan CPU signature") - } -} diff --git a/src/runtime/pkg/sev/vmsa.go b/src/runtime/pkg/sev/vmsa.go deleted file mode 100644 index c2bbc4122b..0000000000 --- a/src/runtime/pkg/sev/vmsa.go +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright contributors to AMD SEV/-ES in Go -// -// SPDX-License-Identifier: Apache-2.0 - -package sev - -import ( - "bytes" - "encoding/binary" -) - -// VMCB Segment (struct vmcb_seg in the linux kernel) -type vmcbSeg struct { - selector uint16 - attrib uint16 - limit uint32 - base uint64 -} - -// VMSA page -// -// The names of the fields are taken from struct sev_es_work_area in the linux kernel: -// https://github.com/AMDESE/linux/blob/sev-snp-v12/arch/x86/include/asm/svm.h#L318 -// (following the definitions in AMD APM Vol 2 Table B-4) -type sevEsSaveArea struct { - es vmcbSeg - cs vmcbSeg - ss vmcbSeg - ds vmcbSeg - fs vmcbSeg - gs vmcbSeg - gdtr vmcbSeg - ldtr vmcbSeg - idtr vmcbSeg - tr vmcbSeg - vmpl0_ssp uint64 // nolint: unused - vmpl1_ssp uint64 // nolint: unused - vmpl2_ssp uint64 // nolint: unused - vmpl3_ssp uint64 // nolint: unused - u_cet uint64 // nolint: unused - reserved_1 [2]uint8 // nolint: unused - vmpl uint8 // nolint: unused - cpl uint8 // nolint: unused - reserved_2 [4]uint8 // nolint: unused - efer uint64 - reserved_3 [104]uint8 // nolint: unused - xss uint64 // nolint: unused - cr4 uint64 - cr3 uint64 // nolint: unused - cr0 uint64 - dr7 uint64 - dr6 uint64 - rflags uint64 - rip uint64 - dr0 uint64 // nolint: unused - dr1 uint64 // nolint: unused - dr2 uint64 // nolint: unused - dr3 uint64 // nolint: unused - dr0_addr_mask uint64 // nolint: unused - dr1_addr_mask uint64 // nolint: unused - dr2_addr_mask uint64 // nolint: unused - dr3_addr_mask uint64 // nolint: unused - reserved_4 [24]uint8 // nolint: unused - rsp uint64 // nolint: unused - s_cet uint64 // nolint: unused - ssp uint64 // nolint: unused - isst_addr uint64 // nolint: unused - rax uint64 // nolint: unused - star uint64 // nolint: unused - lstar uint64 // nolint: unused - cstar uint64 // nolint: unused - sfmask uint64 // nolint: unused - kernel_gs_base uint64 // nolint: unused - sysenter_cs uint64 // nolint: unused - sysenter_esp uint64 // nolint: unused - sysenter_eip uint64 // nolint: unused - cr2 uint64 // nolint: unused - reserved_5 [32]uint8 // nolint: unused - g_pat uint64 - dbgctrl uint64 // nolint: unused - br_from uint64 // nolint: unused - br_to uint64 // nolint: unused - last_excp_from uint64 // nolint: unused - last_excp_to uint64 // nolint: unused - reserved_7 [80]uint8 // nolint: unused - pkru uint32 // nolint: unused - reserved_8 [20]uint8 // nolint: unused - reserved_9 uint64 // nolint: unused - rcx uint64 // nolint: unused - rdx uint64 - rbx uint64 // nolint: unused - reserved_10 uint64 // nolint: unused - rbp uint64 // nolint: unused - rsi uint64 // nolint: unused - rdi uint64 // nolint: unused - r8 uint64 // nolint: unused - r9 uint64 // nolint: unused - r10 uint64 // nolint: unused - r11 uint64 // nolint: unused - r12 uint64 // nolint: unused - r13 uint64 // nolint: unused - r14 uint64 // nolint: unused - r15 uint64 // nolint: unused - reserved_11 [16]uint8 // nolint: unused - guest_exit_info_1 uint64 // nolint: unused - guest_exit_info_2 uint64 // nolint: unused - guest_exit_int_info uint64 // nolint: unused - guest_nrip uint64 // nolint: unused - sev_features uint64 - vintr_ctrl uint64 // nolint: unused - guest_exit_code uint64 // nolint: unused - virtual_tom uint64 // nolint: unused - tlb_id uint64 // nolint: unused - pcpu_id uint64 // nolint: unused - event_inj uint64 // nolint: unused - xcr0 uint64 - reserved_12 [16]uint8 // nolint: unused - x87_dp uint64 // nolint: unused - mxcsr uint32 // nolint: unused - x87_ftw uint16 // nolint: unused - x87_fsw uint16 // nolint: unused - x87_fcw uint16 // nolint: unused - x87_fop uint16 // nolint: unused - x87_ds uint16 // nolint: unused - x87_cs uint16 // nolint: unused - x87_rip uint64 // nolint: unused - fpreg_x87 [80]uint8 // nolint: unused - fpreg_xmm [256]uint8 // nolint: unused - fpreg_ymm [256]uint8 // nolint: unused - unused [2448]uint8 // nolint: unused -} - -type vmsaBuilder struct { - apEIP uint64 - vcpuSig VCPUSig -} - -func (v *vmsaBuilder) buildPage(i int) ([]byte, error) { - eip := uint64(0xfffffff0) // BSP (first vcpu) - if i > 0 { - eip = v.apEIP - } - saveArea := sevEsSaveArea{ - es: vmcbSeg{0, 0x93, 0xffff, 0}, - cs: vmcbSeg{0xf000, 0x9b, 0xffff, eip & 0xffff0000}, - ss: vmcbSeg{0, 0x93, 0xffff, 0}, - ds: vmcbSeg{0, 0x93, 0xffff, 0}, - fs: vmcbSeg{0, 0x93, 0xffff, 0}, - gs: vmcbSeg{0, 0x93, 0xffff, 0}, - gdtr: vmcbSeg{0, 0, 0xffff, 0}, - idtr: vmcbSeg{0, 0, 0xffff, 0}, - ldtr: vmcbSeg{0, 0x82, 0xffff, 0}, - tr: vmcbSeg{0, 0x8b, 0xffff, 0}, - efer: 0x1000, // KVM enables EFER_SVME - cr4: 0x40, // KVM enables X86_CR4_MCE - cr0: 0x10, - dr7: 0x400, - dr6: 0xffff0ff0, - rflags: 0x2, - rip: eip & 0xffff, - g_pat: 0x7040600070406, // PAT MSR: See AMD APM Vol 2, Section A.3 - rdx: uint64(v.vcpuSig), - sev_features: 0, // SEV-ES - xcr0: 0x1, - } - page := new(bytes.Buffer) - err := binary.Write(page, binary.LittleEndian, saveArea) - if err != nil { - return []byte{}, err - } - return page.Bytes(), nil -} diff --git a/src/runtime/virtcontainers/hypervisor_linux_amd64.go b/src/runtime/virtcontainers/hypervisor_linux_amd64.go index ac84540166..6cf48e60fe 100644 --- a/src/runtime/virtcontainers/hypervisor_linux_amd64.go +++ b/src/runtime/virtcontainers/hypervisor_linux_amd64.go @@ -10,8 +10,6 @@ import "os" const ( tdxKvmParameterPath = "/sys/module/kvm_intel/parameters/tdx" - sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" - snpKvmParameterPath = "/sys/module/kvm_amd/parameters/sev_snp" ) @@ -30,12 +28,6 @@ func availableGuestProtection() (guestProtection, error) { return snpProtection, nil } } - // SEV is supported and enabled when the kvm module `sev` parameter is set to `1` (or `Y` for linux >= 5.12) - if _, err := os.Stat(sevKvmParameterPath); err == nil { - if c, err := os.ReadFile(sevKvmParameterPath); err == nil && len(c) > 0 && (c[0] == '1' || c[0] == 'Y') { - return sevProtection, nil - } - } return noneProtection, nil }