runtime: add support for io.katacontainers.config.runtime.cc_init_data

io.katacontainers.config.runtime.cc_init_data specifies initdata used by
the pod in base64(gzip(initdata toml)) format. The initdata will be
encapsulated into an initdata image and mount it as a raw block device
to the guest.

The initdata image will be aligned with 512 bytes, which is chosen as a
usual sector size supported by different hypervisors like qemu, clh and
dragonball.

Note that this patch only adds support for qemu hypervisor.

Signed-off-by: Xynnn007 <xynnn@linux.alibaba.com>
This commit is contained in:
Xynnn007
2025-04-03 12:52:25 +08:00
parent 17d0db9865
commit 91bb6b7c34
16 changed files with 355 additions and 45 deletions

View File

@@ -9,7 +9,10 @@ package virtcontainers
import (
"bufio"
"bytes"
"compress/gzip"
"context"
"encoding/binary"
"encoding/hex"
"encoding/json"
"fmt"
@@ -394,6 +397,23 @@ func (q *qemu) createQmpSocket() ([]govmmQemu.QMPSocket, error) {
return sockets, nil
}
func (q *qemu) buildInitdataDevice(devices []govmmQemu.Device, InitdataImage string) []govmmQemu.Device {
device := govmmQemu.BlockDevice{
Driver: govmmQemu.VirtioBlock,
Transport: govmmQemu.TransportPCI,
ID: "initdata",
File: InitdataImage,
SCSI: false,
WCE: false,
AIO: govmmQemu.Threads,
Interface: "none",
Format: "raw",
}
devices = append(devices, device)
return devices
}
func (q *qemu) buildDevices(ctx context.Context, kernelPath string) ([]govmmQemu.Device, *govmmQemu.IOThread, *govmmQemu.Kernel, error) {
var devices []govmmQemu.Device
@@ -540,6 +560,94 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
}, nil
}
// prepareInitdataImage will create an image with a very simple layout
//
// There will be multiple sectors. The first 8 bytes are Magic number "initdata".
// Then a "length" field of 8 bytes follows (unsigned int64).
// Finally the gzipped initdata toml. The image will be padded to an
// integer multiple of the sector size for alignment.
//
// offset 0 8 16
// 0 'i' 'n' 'i' 't' 'd' 'a' 't' 'a' | gzip length in le |
// 16 gzip(initdata toml) ...
// (end of the last sector) '\0' paddings
func prepareInitdataImage(initdata string, imagePath string) error {
SectorSize := 512
var buf bytes.Buffer
gzipper := gzip.NewWriter(&buf)
defer gzipper.Close()
gzipper.Write([]byte(initdata))
err := gzipper.Close()
if err != nil {
return fmt.Errorf("failed to compress initdata: %v", err)
}
compressedInitdata := buf.Bytes()
compressedInitdataLength := len(compressedInitdata)
lengthBuffer := make([]byte, 8)
binary.LittleEndian.PutUint64(lengthBuffer, uint64(compressedInitdataLength))
paddingLength := (compressedInitdataLength+16+SectorSize-1)/SectorSize*SectorSize - (compressedInitdataLength + 16)
paddingBuffer := make([]byte, paddingLength)
file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_RDWR, 0640)
if err != nil {
return fmt.Errorf("failed to create initdata image: %v", err)
}
defer file.Close()
_, err = file.Write([]byte("initdata"))
if err != nil {
return fmt.Errorf("failed to write magic number to initdata image: %v", err)
}
_, err = file.Write(lengthBuffer)
if err != nil {
return fmt.Errorf("failed to write data length to initdata image: %v", err)
}
_, err = file.Write([]byte(compressedInitdata))
if err != nil {
return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err)
}
_, err = file.Write(paddingBuffer)
if err != nil {
return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err)
}
return nil
}
func (q *qemu) prepareInitdataMount(config *HypervisorConfig) error {
if len(config.Initdata) == 0 {
q.Logger().Info("No initdata provided. Skip prepare initdata device")
return nil
}
q.Logger().Info("Start to prepare initdata")
initdataWorkdir := filepath.Join("/run/kata-containers/shared/initdata", q.id)
initdataImagePath := filepath.Join(initdataWorkdir, "data.img")
err := os.MkdirAll(initdataWorkdir, 0755)
if err != nil {
q.Logger().WithField("initdata", "create initdata image path").WithError(err)
return err
}
err = prepareInitdataImage(config.Initdata, initdataImagePath)
if err != nil {
q.Logger().WithField("initdata", "prepare initdata image").WithError(err)
return err
}
config.InitdataImage = initdataImagePath
return nil
}
// CreateVM is the Hypervisor VM creation implementation for govmmQemu.
func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
// Save the tracing context
@@ -552,6 +660,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
return err
}
if err := q.prepareInitdataMount(hypervisorConfig); err != nil {
return err
}
machine, err := q.getQemuMachine()
if err != nil {
return err
@@ -650,6 +762,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
return err
}
if len(hypervisorConfig.Initdata) > 0 {
devices = q.buildInitdataDevice(devices, hypervisorConfig.InitdataImage)
}
// some devices configuration may also change kernel params, make sure this is called afterwards
kernel.Params = q.kernelParameters()
q.checkBpfEnabled()
@@ -681,7 +797,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
Debug: hypervisorConfig.Debug,
}
qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath)
qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath, hypervisorConfig.InitdataDigest)
if err != nil {
return err
}
@@ -1254,6 +1370,7 @@ func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) {
}
}
}
if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus {
if err := q.stopVirtiofsDaemon(ctx); err != nil {
return err
@@ -1318,6 +1435,15 @@ func (q *qemu) cleanupVM() error {
}).Debug("successfully removed the non root user")
}
// If we have initdata, we should drop initdata image path
hypervisorConfig := q.HypervisorConfig()
if len(hypervisorConfig.Initdata) > 0 {
initdataWorkdir := filepath.Join(string(filepath.Separator), "/run/kata-containers/shared/initdata", q.id)
if err := os.RemoveAll(initdataWorkdir); err != nil {
q.Logger().WithError(err).Warnf("failed to remove initdata work dir %s", initdataWorkdir)
}
}
return nil
}