mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-17 00:43:36 +00:00
Merge pull request #1590 from devimc/2021-02-02/ConfidentialComputing
Support TDx
This commit is contained in:
commit
2c4e4ca1ac
@ -16,6 +16,14 @@ kernel = "@KERNELPATH@"
|
||||
image = "@IMAGEPATH@"
|
||||
machine_type = "@MACHINETYPE@"
|
||||
|
||||
# Enable confidential guest support.
|
||||
# Toggling that setting may trigger different hardware features, ranging
|
||||
# from memory encryption to both memory and CPU-state encryption and integrity.
|
||||
# The Kata Containers runtime dynamically detects the available feature set and
|
||||
# aims at enabling the largest possible one.
|
||||
# Default false
|
||||
# confidential_guest = true
|
||||
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
@ -532,3 +540,30 @@ experimental=@DEFAULTEXPFEATURES@
|
||||
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
|
||||
# (default: false)
|
||||
# enable_pprof = true
|
||||
|
||||
# WARNING: All the options in the following section have not been implemented yet.
|
||||
# This section was added as a placeholder. DO NOT USE IT!
|
||||
[image]
|
||||
# Container image service.
|
||||
#
|
||||
# Offload the CRI image management service to the Kata agent.
|
||||
# (default: false)
|
||||
#service_offload = true
|
||||
|
||||
# Container image decryption keys provisioning.
|
||||
# Applies only if service_offload is true.
|
||||
# Keys can be provisioned locally (e.g. through a special command or
|
||||
# a local file) or remotely (usually after the guest is remotely attested).
|
||||
# The provision setting is a complete URL that lets the Kata agent decide
|
||||
# which method to use in order to fetch the keys.
|
||||
#
|
||||
# Keys can be stored in a local file, in a measured and attested initrd:
|
||||
#provision=data:///local/key/file
|
||||
#
|
||||
# Keys could be fetched through a special command or binary from the
|
||||
# initrd (guest) image, e.g. a firmware call:
|
||||
#provision=file:///path/to/bin/fetcher/in/guest
|
||||
#
|
||||
# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
|
||||
# a HTTPS URL:
|
||||
#provision=https://my-key-broker.foo/tenant/<tenant-id>
|
||||
|
@ -54,6 +54,7 @@ const defaultDisableImageNvdimm = false
|
||||
const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
|
||||
const defaultRxRateLimiterMaxRate = uint64(0)
|
||||
const defaultTxRateLimiterMaxRate = uint64(0)
|
||||
const defaultConfidentialGuest = false
|
||||
|
||||
var defaultSGXEPCSize = int64(0)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2021 Intel Corporation
|
||||
// Copyright (c) 2018 HyperHQ Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
@ -61,6 +61,12 @@ type tomlConfig struct {
|
||||
Runtime runtime
|
||||
Factory factory
|
||||
Netmon netmon
|
||||
Image image
|
||||
}
|
||||
|
||||
type image struct {
|
||||
ServiceOffload bool `toml:"service_offload"`
|
||||
Provision string `toml:"provision"`
|
||||
}
|
||||
|
||||
type factory struct {
|
||||
@ -130,6 +136,7 @@ type hypervisor struct {
|
||||
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
|
||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||
}
|
||||
|
||||
type runtime struct {
|
||||
@ -702,6 +709,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
EnableAnnotations: h.EnableAnnotations,
|
||||
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
|
||||
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
|
||||
ConfidentialGuest: h.ConfidentialGuest,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -1055,6 +1063,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate,
|
||||
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
|
||||
SGXEPCSize: defaultSGXEPCSize,
|
||||
ConfidentialGuest: defaultConfidentialGuest,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -453,6 +453,11 @@ type HypervisorConfig struct {
|
||||
// GuestMemoryDumpPaging is used to indicate if enable paging
|
||||
// for QEMU dump-guest-memory command
|
||||
GuestMemoryDumpPaging bool
|
||||
|
||||
// Enable confidential guest support.
|
||||
// Enable or disable different hardware features, ranging
|
||||
// from memory encryption to both memory and CPU-state encryption and integrity.
|
||||
ConfidentialGuest bool
|
||||
}
|
||||
|
||||
// vcpu mapping from vcpu number to thread number
|
||||
@ -717,21 +722,16 @@ func getHostMemorySizeKb(memInfoPath string) (uint64, error) {
|
||||
return 0, fmt.Errorf("unable get MemTotal from %s", memInfoPath)
|
||||
}
|
||||
|
||||
// RunningOnVMM checks if the system is running inside a VM.
|
||||
func RunningOnVMM(cpuInfoPath string) (bool, error) {
|
||||
if runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" {
|
||||
virtLog.Info("Unable to know if the system is running inside a VM")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func CPUFlags(cpuInfoPath string) (map[string]bool, error) {
|
||||
flagsField := "flags"
|
||||
|
||||
f, err := os.Open(cpuInfoPath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return map[string]bool{}, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
flags := make(map[string]bool)
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
// Expected format: ["flags", ":", ...] or ["flags:", ...]
|
||||
@ -745,23 +745,31 @@ func RunningOnVMM(cpuInfoPath string) (bool, error) {
|
||||
}
|
||||
|
||||
for _, field := range fields[1:] {
|
||||
if field == "hypervisor" {
|
||||
return true, nil
|
||||
}
|
||||
flags[field] = true
|
||||
}
|
||||
|
||||
// As long as we have been able to analyze the fields from
|
||||
// "flags", there is no reason to check what comes next from
|
||||
// /proc/cpuinfo, because we already know we are not running
|
||||
// on a VMM.
|
||||
return false, nil
|
||||
return flags, nil
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return false, err
|
||||
return map[string]bool{}, err
|
||||
}
|
||||
|
||||
return false, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath)
|
||||
return map[string]bool{}, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath)
|
||||
}
|
||||
|
||||
// RunningOnVMM checks if the system is running inside a VM.
|
||||
func RunningOnVMM(cpuInfoPath string) (bool, error) {
|
||||
if runtime.GOARCH == "amd64" {
|
||||
flags, err := CPUFlags(cpuInfoPath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return flags["hypervisor"], nil
|
||||
}
|
||||
|
||||
virtLog.WithField("arch", runtime.GOARCH).Info("Unable to know if the system is running inside a VM")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func getHypervisorPid(h hypervisor) int {
|
||||
|
25
src/runtime/virtcontainers/hypervisor_amd64.go
Normal file
25
src/runtime/virtcontainers/hypervisor_amd64.go
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright (c) 2021 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package virtcontainers
|
||||
|
||||
import "os"
|
||||
|
||||
// Implementation of this function is architecture specific
|
||||
func availableGuestProtection() (guestProtection, error) {
|
||||
flags, err := CPUFlags(procCPUInfo)
|
||||
if err != nil {
|
||||
return noneProtection, err
|
||||
}
|
||||
|
||||
// TDX is supported and properly loaded when the firmware directory exists or `tdx` is part of the CPU flags
|
||||
if d, err := os.Stat(tdxSysFirmwareDir); (err == nil && d.IsDir()) || flags[tdxCPUFlag] {
|
||||
return tdxProtection, nil
|
||||
}
|
||||
|
||||
// TODO: Add support for other technologies: SEV
|
||||
|
||||
return noneProtection, nil
|
||||
}
|
@ -605,6 +605,11 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
|
||||
PidFile: filepath.Join(q.store.RunVMStoragePath(), q.id, "pid"),
|
||||
}
|
||||
|
||||
qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ioThread != nil {
|
||||
qemuConfig.IOThreads = []govmmQemu.IOThread{*ioThread}
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
govmmQemu "github.com/kata-containers/govmm/qemu"
|
||||
)
|
||||
@ -20,6 +21,8 @@ type qemuAmd64 struct {
|
||||
qemuArchBase
|
||||
|
||||
vmFactory bool
|
||||
|
||||
devLoadersCount uint32
|
||||
}
|
||||
|
||||
const (
|
||||
@ -30,6 +33,10 @@ const (
|
||||
defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
|
||||
|
||||
qmpMigrationWaitTimeout = 5 * time.Second
|
||||
|
||||
tdxSysFirmwareDir = "/sys/firmware/tdx_seam/"
|
||||
|
||||
tdxCPUFlag = "tdx"
|
||||
)
|
||||
|
||||
var qemuPaths = map[string]string{
|
||||
@ -106,17 +113,17 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) {
|
||||
factory = true
|
||||
}
|
||||
|
||||
if config.IOMMU {
|
||||
var q35QemuIOMMUOptions = "accel=kvm,kernel_irqchip=split"
|
||||
// IOMMU and Guest Protection require a split IRQ controller for handling interrupts
|
||||
// otherwise QEMU won't be able to create the kernel irqchip
|
||||
if config.IOMMU || config.ConfidentialGuest {
|
||||
mp.Options = "accel=kvm,kernel_irqchip=split"
|
||||
}
|
||||
|
||||
if config.IOMMU {
|
||||
kernelParams = append(kernelParams,
|
||||
Param{"intel_iommu", "on"})
|
||||
kernelParams = append(kernelParams,
|
||||
Param{"iommu", "pt"})
|
||||
|
||||
if mp.Type == QemuQ35 {
|
||||
mp.Options = q35QemuIOMMUOptions
|
||||
}
|
||||
}
|
||||
|
||||
q := &qemuAmd64{
|
||||
@ -129,10 +136,17 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) {
|
||||
kernelParams: kernelParams,
|
||||
disableNvdimm: config.DisableImageNvdimm,
|
||||
dax: true,
|
||||
protection: noneProtection,
|
||||
},
|
||||
vmFactory: factory,
|
||||
}
|
||||
|
||||
if config.ConfidentialGuest {
|
||||
if err := q.enableProtection(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
q.handleImagePath(config)
|
||||
|
||||
return q, nil
|
||||
@ -191,3 +205,55 @@ func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device,
|
||||
func (q *qemuAmd64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device {
|
||||
return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type)
|
||||
}
|
||||
|
||||
// enable protection
|
||||
func (q *qemuAmd64) enableProtection() error {
|
||||
var err error
|
||||
q.protection, err = availableGuestProtection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch q.protection {
|
||||
case tdxProtection:
|
||||
if q.qemuMachine.Options != "" {
|
||||
q.qemuMachine.Options += ","
|
||||
}
|
||||
q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx"
|
||||
q.kernelParams = append(q.kernelParams, Param{"tdx_guest", ""})
|
||||
virtLog.WithFields(logrus.Fields{
|
||||
"subsystem": "qemuAmd64",
|
||||
"machine": q.qemuMachine,
|
||||
"kernel-params": q.kernelParameters}).
|
||||
Info("Enabling TDX guest protection")
|
||||
return nil
|
||||
|
||||
// TODO: Add support for other x86_64 technologies: SEV
|
||||
|
||||
default:
|
||||
return fmt.Errorf("This system doesn't support Confidential Computing (Guest Protection)")
|
||||
}
|
||||
}
|
||||
|
||||
// append protection device
|
||||
func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) {
|
||||
switch q.protection {
|
||||
case tdxProtection:
|
||||
id := q.devLoadersCount
|
||||
q.devLoadersCount += 1
|
||||
return append(devices,
|
||||
govmmQemu.Object{
|
||||
Driver: govmmQemu.Loader,
|
||||
Type: govmmQemu.TDXGuest,
|
||||
ID: "tdx",
|
||||
DeviceID: fmt.Sprintf("fd%d", id),
|
||||
Debug: false,
|
||||
File: firmware,
|
||||
}), "", nil
|
||||
case noneProtection:
|
||||
return devices, firmware, nil
|
||||
|
||||
default:
|
||||
return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
|
||||
}
|
||||
}
|
||||
|
@ -276,3 +276,53 @@ func TestQemuAmd64Microvm(t *testing.T) {
|
||||
|
||||
assert.False(amd64.supportGuestMemoryHotplug())
|
||||
}
|
||||
|
||||
func TestQemuAmd64AppendProtectionDevice(t *testing.T) {
|
||||
var devices []govmmQemu.Device
|
||||
assert := assert.New(t)
|
||||
|
||||
amd64 := newTestQemu(assert, QemuPC)
|
||||
|
||||
id := amd64.(*qemuAmd64).devLoadersCount
|
||||
firmware := "tdvf.fd"
|
||||
var bios string
|
||||
var err error
|
||||
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
|
||||
assert.NoError(err)
|
||||
|
||||
// non-protection
|
||||
assert.NotEmpty(bios)
|
||||
|
||||
// pef protection
|
||||
amd64.(*qemuAmd64).protection = pefProtection
|
||||
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
|
||||
assert.Error(err)
|
||||
assert.Empty(bios)
|
||||
|
||||
// sev protection
|
||||
// TODO: update once it's supported
|
||||
amd64.(*qemuAmd64).protection = sevProtection
|
||||
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
|
||||
assert.Error(err)
|
||||
assert.Empty(bios)
|
||||
|
||||
// tdxProtection
|
||||
amd64.(*qemuAmd64).protection = tdxProtection
|
||||
|
||||
devices, bios, err = amd64.appendProtectionDevice(devices, firmware)
|
||||
assert.NoError(err)
|
||||
assert.Empty(bios)
|
||||
|
||||
expectedOut := []govmmQemu.Device{
|
||||
govmmQemu.Object{
|
||||
Driver: govmmQemu.Loader,
|
||||
Type: govmmQemu.TDXGuest,
|
||||
ID: "tdx",
|
||||
DeviceID: fmt.Sprintf("fd%d", id),
|
||||
Debug: false,
|
||||
File: firmware,
|
||||
},
|
||||
}
|
||||
|
||||
assert.Equal(expectedOut, devices)
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -142,8 +143,33 @@ type qemuArch interface {
|
||||
|
||||
// append pvpanic device
|
||||
appendPVPanicDevice(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
|
||||
|
||||
// append protection device.
|
||||
// This implementation is architecture specific, some archs may need
|
||||
// a firmware, returns a string containing the path to the firmware that should
|
||||
// be used with the -bios option, ommit -bios option if the path is empty.
|
||||
appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error)
|
||||
}
|
||||
|
||||
// Kind of guest protection
|
||||
type guestProtection uint8
|
||||
|
||||
const (
|
||||
noneProtection guestProtection = iota
|
||||
|
||||
//Intel Trust Domain Extensions
|
||||
//https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html
|
||||
tdxProtection
|
||||
|
||||
// AMD Secure Encrypted Virtualization
|
||||
// https://developer.amd.com/sev/
|
||||
sevProtection
|
||||
|
||||
// IBM POWER 9 Protected Execution Facility
|
||||
// https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html
|
||||
pefProtection
|
||||
)
|
||||
|
||||
type qemuArchBase struct {
|
||||
qemuMachine govmmQemu.Machine
|
||||
qemuExePath string
|
||||
@ -158,6 +184,7 @@ type qemuArchBase struct {
|
||||
kernelParams []Param
|
||||
Bridges []types.Bridge
|
||||
PFlash []string
|
||||
protection guestProtection
|
||||
}
|
||||
|
||||
const (
|
||||
@ -813,3 +840,9 @@ func (q *qemuArchBase) getPFlash() ([]string, error) {
|
||||
func (q *qemuArchBase) setPFlash(p []string) {
|
||||
q.PFlash = p
|
||||
}
|
||||
|
||||
// append protection device
|
||||
func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) {
|
||||
virtLog.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
|
||||
return devices, firmware, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user