Merge pull request #10610 from Xynnn007/faet-initdata-rbd

Feat | Implement initdata for bare-metal/qemu hypervisor
This commit is contained in:
Alex Lyn
2025-04-24 09:59:14 +08:00
committed by GitHub
24 changed files with 858 additions and 64 deletions

View File

@@ -682,6 +682,14 @@ type HypervisorConfig struct {
// Initdata defines the initdata passed into guest when CreateVM
Initdata string
// InitdataDigest represents opaque binary data attached to a TEE and typically used
// for Guest attestation. This will be encoded in the format expected by QEMU for each TEE type.
InitdataDigest []byte
// The initdata image on the host side to store the initdata and be mounted
// as a raw block device to guest
InitdataImage string
// GPU specific annotations (currently only applicable for Remote Hypervisor)
//DefaultGPUs specifies the number of GPUs required for the Kata VM
DefaultGPUs uint32

View File

@@ -9,7 +9,10 @@ package virtcontainers
import (
"bufio"
"bytes"
"compress/gzip"
"context"
"encoding/binary"
"encoding/hex"
"encoding/json"
"fmt"
@@ -394,6 +397,23 @@ func (q *qemu) createQmpSocket() ([]govmmQemu.QMPSocket, error) {
return sockets, nil
}
func (q *qemu) buildInitdataDevice(devices []govmmQemu.Device, InitdataImage string) []govmmQemu.Device {
device := govmmQemu.BlockDevice{
Driver: govmmQemu.VirtioBlock,
Transport: govmmQemu.TransportPCI,
ID: "initdata",
File: InitdataImage,
SCSI: false,
WCE: false,
AIO: govmmQemu.Threads,
Interface: "none",
Format: "raw",
}
devices = append(devices, device)
return devices
}
func (q *qemu) buildDevices(ctx context.Context, kernelPath string) ([]govmmQemu.Device, *govmmQemu.IOThread, *govmmQemu.Kernel, error) {
var devices []govmmQemu.Device
@@ -540,6 +560,94 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
}, nil
}
// prepareInitdataImage will create an image with a very simple layout
//
// There will be multiple sectors. The first 8 bytes are Magic number "initdata".
// Then a "length" field of 8 bytes follows (unsigned int64).
// Finally the gzipped initdata toml. The image will be padded to an
// integer multiple of the sector size for alignment.
//
// offset 0 8 16
// 0 'i' 'n' 'i' 't' 'd' 'a' 't' 'a' | gzip length in le |
// 16 gzip(initdata toml) ...
// (end of the last sector) '\0' paddings
func prepareInitdataImage(initdata string, imagePath string) error {
SectorSize := 512
var buf bytes.Buffer
gzipper := gzip.NewWriter(&buf)
defer gzipper.Close()
gzipper.Write([]byte(initdata))
err := gzipper.Close()
if err != nil {
return fmt.Errorf("failed to compress initdata: %v", err)
}
compressedInitdata := buf.Bytes()
compressedInitdataLength := len(compressedInitdata)
lengthBuffer := make([]byte, 8)
binary.LittleEndian.PutUint64(lengthBuffer, uint64(compressedInitdataLength))
paddingLength := (compressedInitdataLength+16+SectorSize-1)/SectorSize*SectorSize - (compressedInitdataLength + 16)
paddingBuffer := make([]byte, paddingLength)
file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_RDWR, 0640)
if err != nil {
return fmt.Errorf("failed to create initdata image: %v", err)
}
defer file.Close()
_, err = file.Write([]byte("initdata"))
if err != nil {
return fmt.Errorf("failed to write magic number to initdata image: %v", err)
}
_, err = file.Write(lengthBuffer)
if err != nil {
return fmt.Errorf("failed to write data length to initdata image: %v", err)
}
_, err = file.Write([]byte(compressedInitdata))
if err != nil {
return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err)
}
_, err = file.Write(paddingBuffer)
if err != nil {
return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err)
}
return nil
}
func (q *qemu) prepareInitdataMount(config *HypervisorConfig) error {
if len(config.Initdata) == 0 {
q.Logger().Info("No initdata provided. Skip prepare initdata device")
return nil
}
q.Logger().Info("Start to prepare initdata")
initdataWorkdir := filepath.Join("/run/kata-containers/shared/initdata", q.id)
initdataImagePath := filepath.Join(initdataWorkdir, "data.img")
err := os.MkdirAll(initdataWorkdir, 0755)
if err != nil {
q.Logger().WithField("initdata", "create initdata image path").WithError(err)
return err
}
err = prepareInitdataImage(config.Initdata, initdataImagePath)
if err != nil {
q.Logger().WithField("initdata", "prepare initdata image").WithError(err)
return err
}
config.InitdataImage = initdataImagePath
return nil
}
// CreateVM is the Hypervisor VM creation implementation for govmmQemu.
func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
// Save the tracing context
@@ -552,6 +660,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
return err
}
if err := q.prepareInitdataMount(hypervisorConfig); err != nil {
return err
}
machine, err := q.getQemuMachine()
if err != nil {
return err
@@ -650,6 +762,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
return err
}
if len(hypervisorConfig.Initdata) > 0 {
devices = q.buildInitdataDevice(devices, hypervisorConfig.InitdataImage)
}
// some devices configuration may also change kernel params, make sure this is called afterwards
kernel.Params = q.kernelParameters()
q.checkBpfEnabled()
@@ -681,7 +797,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
Debug: hypervisorConfig.Debug,
}
qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath)
qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath, hypervisorConfig.InitdataDigest)
if err != nil {
return err
}
@@ -1254,6 +1370,7 @@ func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) {
}
}
}
if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus {
if err := q.stopVirtiofsDaemon(ctx); err != nil {
return err
@@ -1318,6 +1435,15 @@ func (q *qemu) cleanupVM() error {
}).Debug("successfully removed the non root user")
}
// If we have initdata, we should drop initdata image path
hypervisorConfig := q.HypervisorConfig()
if len(hypervisorConfig.Initdata) > 0 {
initdataWorkdir := filepath.Join(string(filepath.Separator), "/run/kata-containers/shared/initdata", q.id)
if err := os.RemoveAll(initdataWorkdir); err != nil {
q.Logger().WithError(err).Warnf("failed to remove initdata work dir %s", initdataWorkdir)
}
}
return nil
}

View File

@@ -274,7 +274,7 @@ func (q *qemuAmd64) enableProtection() error {
}
// append protection device
func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) {
func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) {
if q.sgxEPCSize != 0 {
devices = append(devices,
govmmQemu.Object{
@@ -299,6 +299,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware,
Debug: false,
File: firmware,
FirmwareVolume: firmwareVolume,
InitdataDigest: initdataDigest,
}), "", nil
case sevProtection:
return append(devices,
@@ -318,6 +319,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware,
File: firmware,
CBitPos: cpuid.AMDMemEncrypt.CBitPosition,
ReducedPhysBits: 1,
InitdataDigest: initdataDigest,
}
if q.snpIdBlock != "" && q.snpIdAuth != "" {
obj.SnpIdBlock = q.snpIdBlock

View File

@@ -257,7 +257,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) {
firmware := "tdvf.fd"
var bios string
var err error
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.NoError(err)
// non-protection
@@ -265,20 +265,20 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) {
// pef protection
amd64.(*qemuAmd64).protection = pefProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(bios)
// Secure Execution protection
amd64.(*qemuAmd64).protection = seProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(bios)
// sev protection
amd64.(*qemuAmd64).protection = sevProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.NoError(err)
assert.Empty(bios)
@@ -298,7 +298,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) {
// snp protection
amd64.(*qemuAmd64).protection = snpProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []uint8(nil))
assert.NoError(err)
assert.Empty(bios)
@@ -318,18 +318,19 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) {
// tdxProtection
amd64.(*qemuAmd64).protection = tdxProtection
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.NoError(err)
assert.Empty(bios)
expectedOut = append(expectedOut,
govmmQemu.Object{
Driver: govmmQemu.Loader,
Type: govmmQemu.TDXGuest,
ID: "tdx",
DeviceID: fmt.Sprintf("fd%d", id),
Debug: false,
File: firmware,
Driver: govmmQemu.Loader,
Type: govmmQemu.TDXGuest,
ID: "tdx",
DeviceID: fmt.Sprintf("fd%d", id),
Debug: false,
File: firmware,
InitdataDigest: []byte(""),
},
)

View File

@@ -165,7 +165,7 @@ type qemuArch interface {
// This implementation is architecture specific, some archs may need
// a firmware, returns a string containing the path to the firmware that should
// be used with the -bios option, ommit -bios option if the path is empty.
appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error)
appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error)
// scans the PCIe space and returns the biggest BAR sizes for 32-bit
// and 64-bit addressable memory
@@ -920,7 +920,7 @@ func (q *qemuArchBase) setPFlash(p []string) {
}
// append protection device
func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) {
func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) {
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
return devices, firmware, nil
}

View File

@@ -154,7 +154,7 @@ func (q *qemuArm64) enableProtection() error {
return nil
}
func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) {
func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) {
err := q.enableProtection()
if err != nil {
hvLogger.WithField("arch", runtime.GOARCH).Error(err)

View File

@@ -183,42 +183,42 @@ func TestQemuArm64AppendProtectionDevice(t *testing.T) {
var err error
// no protection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// PEF protection
arm64.(*qemuArm64).protection = pefProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// Secure Execution protection
arm64.(*qemuArm64).protection = seProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// SEV protection
arm64.(*qemuArm64).protection = sevProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// SNP protection
arm64.(*qemuArm64).protection = snpProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// TDX protection
arm64.(*qemuArm64).protection = tdxProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "")
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)

View File

@@ -157,7 +157,7 @@ func (q *qemuPPC64le) enableProtection() error {
}
// append protection device
func (q *qemuPPC64le) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) {
func (q *qemuPPC64le) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) {
switch q.protection {
case pefProtection:
return append(devices,

View File

@@ -60,7 +60,7 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) {
var devices []govmmQemu.Device
var bios, firmware string
var err error
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "")
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.NoError(err)
//no protection
@@ -68,31 +68,31 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) {
//Secure Execution protection
ppc64le.(*qemuPPC64le).protection = seProtection
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "")
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
//SEV protection
ppc64le.(*qemuPPC64le).protection = sevProtection
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "")
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
//SNP protection
ppc64le.(*qemuPPC64le).protection = snpProtection
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "")
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
//TDX protection
ppc64le.(*qemuPPC64le).protection = tdxProtection
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "")
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
//PEF protection
ppc64le.(*qemuPPC64le).protection = pefProtection
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "")
devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.NoError(err)
assert.Empty(bios)

View File

@@ -344,7 +344,7 @@ func (q *qemuS390x) enableProtection() error {
// appendProtectionDevice appends a QEMU object for Secure Execution.
// Takes devices and returns updated version. Takes BIOS and returns it (no modification on s390x).
func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) {
func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) {
switch q.protection {
case seProtection:
return append(devices,

View File

@@ -111,7 +111,7 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) {
var devices []govmmQemu.Device
var bios, firmware string
var err error
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "")
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.NoError(err)
// no protection
@@ -119,32 +119,32 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) {
// PEF protection
s390x.(*qemuS390x).protection = pefProtection
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "")
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
// TDX protection
s390x.(*qemuS390x).protection = tdxProtection
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "")
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
// SEV protection
s390x.(*qemuS390x).protection = sevProtection
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "")
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
// SNP protection
s390x.(*qemuS390x).protection = snpProtection
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "")
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.Error(err)
assert.Empty(bios)
// Secure Execution protection
s390x.(*qemuS390x).protection = seProtection
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "")
devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil))
assert.NoError(err)
assert.Empty(bios)

View File

@@ -8,9 +8,14 @@
package virtcontainers
import (
"bytes"
"compress/gzip"
"context"
"encoding/binary"
"fmt"
"io"
"os"
"path"
"path/filepath"
"testing"
@@ -770,3 +775,53 @@ func TestQemuStartSandbox(t *testing.T) {
err = q.StartVM(context.Background(), 10)
assert.Error(err)
}
func TestPrepareInitdataImage(t *testing.T) {
tests := []struct {
name string
content string
}{
{
"create an initdata image",
"some content",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
imageDir := t.TempDir()
imagePath := path.Join(imageDir, "initdata.img")
err := prepareInitdataImage(tt.content, imagePath)
if err != nil {
t.Errorf("prepareInitdataImage() error = %v", err)
}
defer os.Remove(imagePath)
fullContent, err := os.ReadFile(imagePath)
if err != nil {
t.Errorf("read initdata image failed: %v", err)
}
magicNumber := fullContent[:8]
if string(magicNumber) != "initdata" {
t.Errorf("initdata magic number is not correct, got %s, want initdata", string(magicNumber))
}
length := binary.LittleEndian.Uint64(fullContent[8:16])
contentSlice := fullContent[16 : 16+length]
gzipReader, err := gzip.NewReader(bytes.NewBuffer(contentSlice))
if err != nil {
t.Errorf("read gzipped initdata failed: %v", err)
}
defer gzipReader.Close()
content, err := io.ReadAll(gzipReader)
if err != nil {
t.Errorf("read gzipped initdata failed: %v", err)
}
if string(content) != tt.content {
t.Errorf("initdata content is not correct, got %s, want %s", string(content), tt.content)
}
})
}
}