Merge pull request #1590 from devimc/2021-02-02/ConfidentialComputing

Support TDx
This commit is contained in:
Samuel Ortiz 2021-05-10 22:19:40 +02:00 committed by GitHub
commit 2c4e4ca1ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 257 additions and 25 deletions

View File

@ -16,6 +16,14 @@ kernel = "@KERNELPATH@"
image = "@IMAGEPATH@"
machine_type = "@MACHINETYPE@"
# Enable confidential guest support.
# Toggling that setting may trigger different hardware features, ranging
# from memory encryption to both memory and CPU-state encryption and integrity.
# The Kata Containers runtime dynamically detects the available feature set and
# aims at enabling the largest possible one.
# Default false
# confidential_guest = true
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@ -532,3 +540,30 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# enable_pprof = true
# WARNING: All the options in the following section have not been implemented yet.
# This section was added as a placeholder. DO NOT USE IT!
[image]
# Container image service.
#
# Offload the CRI image management service to the Kata agent.
# (default: false)
#service_offload = true
# Container image decryption keys provisioning.
# Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or
# a local file) or remotely (usually after the guest is remotely attested).
# The provision setting is a complete URL that lets the Kata agent decide
# which method to use in order to fetch the keys.
#
# Keys can be stored in a local file, in a measured and attested initrd:
#provision=data:///local/key/file
#
# Keys could be fetched through a special command or binary from the
# initrd (guest) image, e.g. a firmware call:
#provision=file:///path/to/bin/fetcher/in/guest
#
# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
# a HTTPS URL:
#provision=https://my-key-broker.foo/tenant/<tenant-id>

View File

@ -54,6 +54,7 @@ const defaultDisableImageNvdimm = false
const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
const defaultRxRateLimiterMaxRate = uint64(0)
const defaultTxRateLimiterMaxRate = uint64(0)
const defaultConfidentialGuest = false
var defaultSGXEPCSize = int64(0)

View File

@ -1,4 +1,4 @@
// Copyright (c) 2018 Intel Corporation
// Copyright (c) 2018-2021 Intel Corporation
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
@ -61,6 +61,12 @@ type tomlConfig struct {
Runtime runtime
Factory factory
Netmon netmon
Image image
}
type image struct {
ServiceOffload bool `toml:"service_offload"`
Provision string `toml:"provision"`
}
type factory struct {
@ -130,6 +136,7 @@ type hypervisor struct {
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
}
type runtime struct {
@ -702,6 +709,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableAnnotations: h.EnableAnnotations,
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest,
}, nil
}
@ -1055,6 +1063,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate,
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
SGXEPCSize: defaultSGXEPCSize,
ConfidentialGuest: defaultConfidentialGuest,
}
}

View File

@ -453,6 +453,11 @@ type HypervisorConfig struct {
// GuestMemoryDumpPaging is used to indicate if enable paging
// for QEMU dump-guest-memory command
GuestMemoryDumpPaging bool
// Enable confidential guest support.
// Enable or disable different hardware features, ranging
// from memory encryption to both memory and CPU-state encryption and integrity.
ConfidentialGuest bool
}
// vcpu mapping from vcpu number to thread number
@ -717,21 +722,16 @@ func getHostMemorySizeKb(memInfoPath string) (uint64, error) {
return 0, fmt.Errorf("unable get MemTotal from %s", memInfoPath)
}
// RunningOnVMM checks if the system is running inside a VM.
func RunningOnVMM(cpuInfoPath string) (bool, error) {
if runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" {
virtLog.Info("Unable to know if the system is running inside a VM")
return false, nil
}
func CPUFlags(cpuInfoPath string) (map[string]bool, error) {
flagsField := "flags"
f, err := os.Open(cpuInfoPath)
if err != nil {
return false, err
return map[string]bool{}, err
}
defer f.Close()
flags := make(map[string]bool)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
// Expected format: ["flags", ":", ...] or ["flags:", ...]
@ -745,23 +745,31 @@ func RunningOnVMM(cpuInfoPath string) (bool, error) {
}
for _, field := range fields[1:] {
if field == "hypervisor" {
return true, nil
}
flags[field] = true
}
// As long as we have been able to analyze the fields from
// "flags", there is no reason to check what comes next from
// /proc/cpuinfo, because we already know we are not running
// on a VMM.
return false, nil
return flags, nil
}
if err := scanner.Err(); err != nil {
return false, err
return map[string]bool{}, err
}
return false, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath)
return map[string]bool{}, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath)
}
// RunningOnVMM checks if the system is running inside a VM.
func RunningOnVMM(cpuInfoPath string) (bool, error) {
if runtime.GOARCH == "amd64" {
flags, err := CPUFlags(cpuInfoPath)
if err != nil {
return false, err
}
return flags["hypervisor"], nil
}
virtLog.WithField("arch", runtime.GOARCH).Info("Unable to know if the system is running inside a VM")
return false, nil
}
func getHypervisorPid(h hypervisor) int {

View File

@ -0,0 +1,25 @@
// Copyright (c) 2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import "os"
// Implementation of this function is architecture specific
func availableGuestProtection() (guestProtection, error) {
flags, err := CPUFlags(procCPUInfo)
if err != nil {
return noneProtection, err
}
// TDX is supported and properly loaded when the firmware directory exists or `tdx` is part of the CPU flags
if d, err := os.Stat(tdxSysFirmwareDir); (err == nil && d.IsDir()) || flags[tdxCPUFlag] {
return tdxProtection, nil
}
// TODO: Add support for other technologies: SEV
return noneProtection, nil
}

View File

@ -605,6 +605,11 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
PidFile: filepath.Join(q.store.RunVMStoragePath(), q.id, "pid"),
}
qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath)
if err != nil {
return err
}
if ioThread != nil {
qemuConfig.IOThreads = []govmmQemu.IOThread{*ioThread}
}

View File

@ -11,6 +11,7 @@ import (
"time"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
"github.com/sirupsen/logrus"
govmmQemu "github.com/kata-containers/govmm/qemu"
)
@ -20,6 +21,8 @@ type qemuAmd64 struct {
qemuArchBase
vmFactory bool
devLoadersCount uint32
}
const (
@ -30,6 +33,10 @@ const (
defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
qmpMigrationWaitTimeout = 5 * time.Second
tdxSysFirmwareDir = "/sys/firmware/tdx_seam/"
tdxCPUFlag = "tdx"
)
var qemuPaths = map[string]string{
@ -106,17 +113,17 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) {
factory = true
}
if config.IOMMU {
var q35QemuIOMMUOptions = "accel=kvm,kernel_irqchip=split"
// IOMMU and Guest Protection require a split IRQ controller for handling interrupts
// otherwise QEMU won't be able to create the kernel irqchip
if config.IOMMU || config.ConfidentialGuest {
mp.Options = "accel=kvm,kernel_irqchip=split"
}
if config.IOMMU {
kernelParams = append(kernelParams,
Param{"intel_iommu", "on"})
kernelParams = append(kernelParams,
Param{"iommu", "pt"})
if mp.Type == QemuQ35 {
mp.Options = q35QemuIOMMUOptions
}
}
q := &qemuAmd64{
@ -129,10 +136,17 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) {
kernelParams: kernelParams,
disableNvdimm: config.DisableImageNvdimm,
dax: true,
protection: noneProtection,
},
vmFactory: factory,
}
if config.ConfidentialGuest {
if err := q.enableProtection(); err != nil {
return nil, err
}
}
q.handleImagePath(config)
return q, nil
@ -191,3 +205,55 @@ func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device,
func (q *qemuAmd64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device {
return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type)
}
// enable protection
func (q *qemuAmd64) enableProtection() error {
var err error
q.protection, err = availableGuestProtection()
if err != nil {
return err
}
switch q.protection {
case tdxProtection:
if q.qemuMachine.Options != "" {
q.qemuMachine.Options += ","
}
q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx"
q.kernelParams = append(q.kernelParams, Param{"tdx_guest", ""})
virtLog.WithFields(logrus.Fields{
"subsystem": "qemuAmd64",
"machine": q.qemuMachine,
"kernel-params": q.kernelParameters}).
Info("Enabling TDX guest protection")
return nil
// TODO: Add support for other x86_64 technologies: SEV
default:
return fmt.Errorf("This system doesn't support Confidential Computing (Guest Protection)")
}
}
// append protection device
func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) {
switch q.protection {
case tdxProtection:
id := q.devLoadersCount
q.devLoadersCount += 1
return append(devices,
govmmQemu.Object{
Driver: govmmQemu.Loader,
Type: govmmQemu.TDXGuest,
ID: "tdx",
DeviceID: fmt.Sprintf("fd%d", id),
Debug: false,
File: firmware,
}), "", nil
case noneProtection:
return devices, firmware, nil
default:
return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
}
}

View File

@ -276,3 +276,53 @@ func TestQemuAmd64Microvm(t *testing.T) {
assert.False(amd64.supportGuestMemoryHotplug())
}
func TestQemuAmd64AppendProtectionDevice(t *testing.T) {
var devices []govmmQemu.Device
assert := assert.New(t)
amd64 := newTestQemu(assert, QemuPC)
id := amd64.(*qemuAmd64).devLoadersCount
firmware := "tdvf.fd"
var bios string
var err error
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
assert.NoError(err)
// non-protection
assert.NotEmpty(bios)
// pef protection
amd64.(*qemuAmd64).protection = pefProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
assert.Error(err)
assert.Empty(bios)
// sev protection
// TODO: update once it's supported
amd64.(*qemuAmd64).protection = sevProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
assert.Error(err)
assert.Empty(bios)
// tdxProtection
amd64.(*qemuAmd64).protection = tdxProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
assert.NoError(err)
assert.Empty(bios)
expectedOut := []govmmQemu.Device{
govmmQemu.Object{
Driver: govmmQemu.Loader,
Type: govmmQemu.TDXGuest,
ID: "tdx",
DeviceID: fmt.Sprintf("fd%d", id),
Debug: false,
File: firmware,
},
}
assert.Equal(expectedOut, devices)
}

View File

@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"os"
"runtime"
"strconv"
"strings"
@ -142,8 +143,33 @@ type qemuArch interface {
// append pvpanic device
appendPVPanicDevice(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
// append protection device.
// This implementation is architecture specific, some archs may need
// a firmware, returns a string containing the path to the firmware that should
// be used with the -bios option, ommit -bios option if the path is empty.
appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error)
}
// Kind of guest protection
type guestProtection uint8
const (
noneProtection guestProtection = iota
//Intel Trust Domain Extensions
//https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html
tdxProtection
// AMD Secure Encrypted Virtualization
// https://developer.amd.com/sev/
sevProtection
// IBM POWER 9 Protected Execution Facility
// https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html
pefProtection
)
type qemuArchBase struct {
qemuMachine govmmQemu.Machine
qemuExePath string
@ -158,6 +184,7 @@ type qemuArchBase struct {
kernelParams []Param
Bridges []types.Bridge
PFlash []string
protection guestProtection
}
const (
@ -813,3 +840,9 @@ func (q *qemuArchBase) getPFlash() ([]string, error) {
func (q *qemuArchBase) setPFlash(p []string) {
q.PFlash = p
}
// append protection device
func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) {
virtLog.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
return devices, firmware, nil
}