From 88cf3db601e10bb2682e8ce9dc750066bab04e20 Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Mon, 22 Mar 2021 09:27:05 -0600 Subject: [PATCH 1/4] runtime: implement CPUFlags function `CPUFlags` returns a map with all the CPU flags, these CPU flags may help us to identiry whether a system support confidential computing or not. Signed-off-by: Julio Montes --- src/runtime/virtcontainers/hypervisor.go | 39 +++++++++++++----------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 767215b689..05d1117bac 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -717,21 +717,16 @@ func getHostMemorySizeKb(memInfoPath string) (uint64, error) { return 0, fmt.Errorf("unable get MemTotal from %s", memInfoPath) } -// RunningOnVMM checks if the system is running inside a VM. -func RunningOnVMM(cpuInfoPath string) (bool, error) { - if runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" { - virtLog.Info("Unable to know if the system is running inside a VM") - return false, nil - } - +func CPUFlags(cpuInfoPath string) (map[string]bool, error) { flagsField := "flags" f, err := os.Open(cpuInfoPath) if err != nil { - return false, err + return map[string]bool{}, err } defer f.Close() + flags := make(map[string]bool) scanner := bufio.NewScanner(f) for scanner.Scan() { // Expected format: ["flags", ":", ...] or ["flags:", ...] @@ -745,23 +740,31 @@ func RunningOnVMM(cpuInfoPath string) (bool, error) { } for _, field := range fields[1:] { - if field == "hypervisor" { - return true, nil - } + flags[field] = true } - // As long as we have been able to analyze the fields from - // "flags", there is no reason to check what comes next from - // /proc/cpuinfo, because we already know we are not running - // on a VMM. - return false, nil + return flags, nil } if err := scanner.Err(); err != nil { - return false, err + return map[string]bool{}, err } - return false, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath) + return map[string]bool{}, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath) +} + +// RunningOnVMM checks if the system is running inside a VM. +func RunningOnVMM(cpuInfoPath string) (bool, error) { + if runtime.GOARCH == "amd64" { + flags, err := CPUFlags(cpuInfoPath) + if err != nil { + return false, err + } + return flags["hypervisor"], nil + } + + virtLog.WithField("arch", runtime.GOARCH).Info("Unable to know if the system is running inside a VM") + return false, nil } func getHypervisorPid(h hypervisor) int { From 539afba03daa341474a6223a852bed09eb835bc4 Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Mon, 22 Mar 2021 09:32:45 -0600 Subject: [PATCH 2/4] runtime: define config options to enable confidential computing Define config options to enable or disable confidential computing and its features, for example: * Image service offloading * Image decryption keys Signed-off-by: Julio Montes --- .../cli/config/configuration-qemu.toml.in | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/runtime/cli/config/configuration-qemu.toml.in b/src/runtime/cli/config/configuration-qemu.toml.in index b195701c89..8975206b43 100644 --- a/src/runtime/cli/config/configuration-qemu.toml.in +++ b/src/runtime/cli/config/configuration-qemu.toml.in @@ -16,6 +16,14 @@ kernel = "@KERNELPATH@" image = "@IMAGEPATH@" machine_type = "@MACHINETYPE@" +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one. +# Default false +# confidential_guest = true + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" @@ -532,3 +540,30 @@ experimental=@DEFAULTEXPFEATURES@ # If enabled, user can run pprof tools with shim v2 process through kata-monitor. # (default: false) # enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +#service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ From 0affe8860dd93fd5404f7d80c1f6cbe73ec2a199 Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Wed, 5 May 2021 09:12:36 -0500 Subject: [PATCH 3/4] virtcontainers: define confidential guest framework Define the structure and functions needed to support confidential guests, this commit doesn't add support for any specific technology, support for TDX, SEV, PEF and others will be added in following commits. Signed-off-by: Julio Montes --- .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 11 ++++++- src/runtime/virtcontainers/hypervisor.go | 5 +++ src/runtime/virtcontainers/qemu.go | 5 +++ src/runtime/virtcontainers/qemu_amd64.go | 29 ++++++++++++---- src/runtime/virtcontainers/qemu_arch_base.go | 33 +++++++++++++++++++ 6 files changed, 77 insertions(+), 7 deletions(-) diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 7cd9138baa..470527998f 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -54,6 +54,7 @@ const defaultDisableImageNvdimm = false const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" const defaultRxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0) +const defaultConfidentialGuest = false var defaultSGXEPCSize = int64(0) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index e3f3b3bc9c..e1db0da8c0 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2021 Intel Corporation // Copyright (c) 2018 HyperHQ Inc. // // SPDX-License-Identifier: Apache-2.0 @@ -61,6 +61,12 @@ type tomlConfig struct { Runtime runtime Factory factory Netmon netmon + Image image +} + +type image struct { + ServiceOffload bool `toml:"service_offload"` + Provision string `toml:"provision"` } type factory struct { @@ -130,6 +136,7 @@ type hypervisor struct { HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"` DisableVhostNet bool `toml:"disable_vhost_net"` GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"` + ConfidentialGuest bool `toml:"confidential_guest"` } type runtime struct { @@ -702,6 +709,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { EnableAnnotations: h.EnableAnnotations, GuestMemoryDumpPath: h.GuestMemoryDumpPath, GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, + ConfidentialGuest: h.ConfidentialGuest, }, nil } @@ -1055,6 +1063,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, SGXEPCSize: defaultSGXEPCSize, + ConfidentialGuest: defaultConfidentialGuest, } } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 05d1117bac..0b8cac7761 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -453,6 +453,11 @@ type HypervisorConfig struct { // GuestMemoryDumpPaging is used to indicate if enable paging // for QEMU dump-guest-memory command GuestMemoryDumpPaging bool + + // Enable confidential guest support. + // Enable or disable different hardware features, ranging + // from memory encryption to both memory and CPU-state encryption and integrity. + ConfidentialGuest bool } // vcpu mapping from vcpu number to thread number diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 9f00a12a44..126fb0d36f 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -605,6 +605,11 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa PidFile: filepath.Join(q.store.RunVMStoragePath(), q.id, "pid"), } + qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath) + if err != nil { + return err + } + if ioThread != nil { qemuConfig.IOThreads = []govmmQemu.IOThread{*ioThread} } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 1a045fae08..ec2e764083 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -106,17 +106,17 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { factory = true } - if config.IOMMU { - var q35QemuIOMMUOptions = "accel=kvm,kernel_irqchip=split" + // IOMMU and Guest Protection require a split IRQ controller for handling interrupts + // otherwise QEMU won't be able to create the kernel irqchip + if config.IOMMU || config.ConfidentialGuest { + mp.Options = "accel=kvm,kernel_irqchip=split" + } + if config.IOMMU { kernelParams = append(kernelParams, Param{"intel_iommu", "on"}) kernelParams = append(kernelParams, Param{"iommu", "pt"}) - - if mp.Type == QemuQ35 { - mp.Options = q35QemuIOMMUOptions - } } q := &qemuAmd64{ @@ -129,10 +129,17 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { kernelParams: kernelParams, disableNvdimm: config.DisableImageNvdimm, dax: true, + protection: noneProtection, }, vmFactory: factory, } + if config.ConfidentialGuest { + if err := q.enableProtection(); err != nil { + return nil, err + } + } + q.handleImagePath(config) return q, nil @@ -191,3 +198,13 @@ func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device, func (q *qemuAmd64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type) } + +// enable protection +func (q *qemuAmd64) enableProtection() error { + return nil +} + +// append protection device +func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) { + return devices, firmware, nil +} diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index d2ffac4a1d..5c7b1218c4 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -11,6 +11,7 @@ import ( "errors" "fmt" "os" + "runtime" "strconv" "strings" @@ -142,8 +143,33 @@ type qemuArch interface { // append pvpanic device appendPVPanicDevice(devices []govmmQemu.Device) ([]govmmQemu.Device, error) + + // append protection device. + // This implementation is architecture specific, some archs may need + // a firmware, returns a string containing the path to the firmware that should + // be used with the -bios option, ommit -bios option if the path is empty. + appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) } +// Kind of guest protection +type guestProtection uint8 + +const ( + noneProtection guestProtection = iota + + //Intel Trust Domain Extensions + //https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html + tdxProtection + + // AMD Secure Encrypted Virtualization + // https://developer.amd.com/sev/ + sevProtection + + // IBM POWER 9 Protected Execution Facility + // https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html + pefProtection +) + type qemuArchBase struct { qemuMachine govmmQemu.Machine qemuExePath string @@ -158,6 +184,7 @@ type qemuArchBase struct { kernelParams []Param Bridges []types.Bridge PFlash []string + protection guestProtection } const ( @@ -813,3 +840,9 @@ func (q *qemuArchBase) getPFlash() ([]string, error) { func (q *qemuArchBase) setPFlash(p []string) { q.PFlash = p } + +// append protection device +func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) { + virtLog.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") + return devices, firmware, nil +} From 4f61f4b4908c59afa76a80233426720152dcccbc Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Wed, 5 May 2021 09:37:15 -0500 Subject: [PATCH 4/4] virtcontainers: Support TDX Add support for Intel TDX confidential guests fixes #1332 Signed-off-by: Julio Montes --- .../virtcontainers/hypervisor_amd64.go | 25 +++++++++ src/runtime/virtcontainers/qemu_amd64.go | 53 ++++++++++++++++++- src/runtime/virtcontainers/qemu_amd64_test.go | 50 +++++++++++++++++ 3 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 src/runtime/virtcontainers/hypervisor_amd64.go diff --git a/src/runtime/virtcontainers/hypervisor_amd64.go b/src/runtime/virtcontainers/hypervisor_amd64.go new file mode 100644 index 0000000000..4b75a08cfd --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_amd64.go @@ -0,0 +1,25 @@ +// Copyright (c) 2021 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import "os" + +// Implementation of this function is architecture specific +func availableGuestProtection() (guestProtection, error) { + flags, err := CPUFlags(procCPUInfo) + if err != nil { + return noneProtection, err + } + + // TDX is supported and properly loaded when the firmware directory exists or `tdx` is part of the CPU flags + if d, err := os.Stat(tdxSysFirmwareDir); (err == nil && d.IsDir()) || flags[tdxCPUFlag] { + return tdxProtection, nil + } + + // TODO: Add support for other technologies: SEV + + return noneProtection, nil +} diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index ec2e764083..37b0748b31 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -11,6 +11,7 @@ import ( "time" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/sirupsen/logrus" govmmQemu "github.com/kata-containers/govmm/qemu" ) @@ -20,6 +21,8 @@ type qemuAmd64 struct { qemuArchBase vmFactory bool + + devLoadersCount uint32 } const ( @@ -30,6 +33,10 @@ const ( defaultQemuMachineOptions = "accel=kvm,kernel_irqchip" qmpMigrationWaitTimeout = 5 * time.Second + + tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" + + tdxCPUFlag = "tdx" ) var qemuPaths = map[string]string{ @@ -201,10 +208,52 @@ func (q *qemuAmd64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device // enable protection func (q *qemuAmd64) enableProtection() error { - return nil + var err error + q.protection, err = availableGuestProtection() + if err != nil { + return err + } + + switch q.protection { + case tdxProtection: + if q.qemuMachine.Options != "" { + q.qemuMachine.Options += "," + } + q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx" + q.kernelParams = append(q.kernelParams, Param{"tdx_guest", ""}) + virtLog.WithFields(logrus.Fields{ + "subsystem": "qemuAmd64", + "machine": q.qemuMachine, + "kernel-params": q.kernelParameters}). + Info("Enabling TDX guest protection") + return nil + + // TODO: Add support for other x86_64 technologies: SEV + + default: + return fmt.Errorf("This system doesn't support Confidential Computing (Guest Protection)") + } } // append protection device func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) { - return devices, firmware, nil + switch q.protection { + case tdxProtection: + id := q.devLoadersCount + q.devLoadersCount += 1 + return append(devices, + govmmQemu.Object{ + Driver: govmmQemu.Loader, + Type: govmmQemu.TDXGuest, + ID: "tdx", + DeviceID: fmt.Sprintf("fd%d", id), + Debug: false, + File: firmware, + }), "", nil + case noneProtection: + return devices, firmware, nil + + default: + return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection) + } } diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 1d321e9353..8772361cba 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -276,3 +276,53 @@ func TestQemuAmd64Microvm(t *testing.T) { assert.False(amd64.supportGuestMemoryHotplug()) } + +func TestQemuAmd64AppendProtectionDevice(t *testing.T) { + var devices []govmmQemu.Device + assert := assert.New(t) + + amd64 := newTestQemu(assert, QemuPC) + + id := amd64.(*qemuAmd64).devLoadersCount + firmware := "tdvf.fd" + var bios string + var err error + devices, bios, err = amd64.appendProtectionDevice(devices, firmware) + assert.NoError(err) + + // non-protection + assert.NotEmpty(bios) + + // pef protection + amd64.(*qemuAmd64).protection = pefProtection + devices, bios, err = amd64.appendProtectionDevice(devices, firmware) + assert.Error(err) + assert.Empty(bios) + + // sev protection + // TODO: update once it's supported + amd64.(*qemuAmd64).protection = sevProtection + devices, bios, err = amd64.appendProtectionDevice(devices, firmware) + assert.Error(err) + assert.Empty(bios) + + // tdxProtection + amd64.(*qemuAmd64).protection = tdxProtection + + devices, bios, err = amd64.appendProtectionDevice(devices, firmware) + assert.NoError(err) + assert.Empty(bios) + + expectedOut := []govmmQemu.Device{ + govmmQemu.Object{ + Driver: govmmQemu.Loader, + Type: govmmQemu.TDXGuest, + ID: "tdx", + DeviceID: fmt.Sprintf("fd%d", id), + Debug: false, + File: firmware, + }, + } + + assert.Equal(expectedOut, devices) +}