Merge pull request #13196 from microsoft/cameronbaird/upstream/runtime-go-clh-templating

runtime: Enable VM Templating Support for CLH
This commit is contained in:
Fabiano Fidêncio
2026-06-21 16:31:19 +02:00
committed by GitHub
17 changed files with 945 additions and 86 deletions

View File

@@ -41,10 +41,12 @@ jobs:
matrix:
environment: [
{ vmm: clh, containerd_version: latest },
{ vmm: clh, containerd_version: latest, snapshotter: erofs, erofs_mode: disk, erofs_merge_mode: unmerged },
{ vmm: clh, containerd_version: minimum },
{ vmm: dragonball, containerd_version: latest },
{ vmm: dragonball, containerd_version: minimum },
{ vmm: qemu, containerd_version: latest },
{ vmm: qemu, containerd_version: latest, snapshotter: erofs, erofs_mode: disk, erofs_merge_mode: unmerged },
{ vmm: qemu, containerd_version: minimum },
{ vmm: qemu-runtime-rs, containerd_version: latest },
{ vmm: qemu-runtime-rs, containerd_version: minimum },
@@ -68,6 +70,9 @@ jobs:
K8S_TEST_HOST_TYPE: baremetal-no-attestation
CONTAINER_ENGINE: containerd
CONTAINER_ENGINE_VERSION: ${{ matrix.environment.containerd_version }}
SNAPSHOTTER: ${{ matrix.environment.snapshotter }}
EROFS_SNAPSHOTTER_MODE: ${{ matrix.environment.erofs_mode }}
EROFS_MERGE_MODE: ${{ matrix.environment.erofs_merge_mode }}
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

View File

@@ -2085,12 +2085,6 @@ func checkNetNsConfig(config oci.RuntimeConfig) error {
// checkFactoryConfig ensures the VM factory configuration is valid.
func checkFactoryConfig(config oci.RuntimeConfig) error {
if config.FactoryConfig.Template {
if config.HypervisorConfig.InitrdPath == "" {
return errors.New("Factory option enable_template requires an initrd image")
}
}
if config.FactoryConfig.VMCacheNumber > 0 {
if config.HypervisorType != vc.QemuHypervisor {
return errors.New("VM cache just support qemu")

View File

@@ -1696,7 +1696,7 @@ func TestCheckFactoryConfig(t *testing.T) {
{false, false, "", "initrd"},
{true, false, "", "initrd"},
{true, true, "image", ""},
{true, false, "image", ""},
}
for i, d := range data {

View File

@@ -82,12 +82,8 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon
kataUtilsLogger.WithField("factory", factoryConfig).Info("load vm factory")
f, err := vf.NewFactory(ctx, factoryConfig, true)
if err != nil && !factoryConfig.VMCache {
kataUtilsLogger.WithError(err).Warn("load vm factory failed, about to create new one")
f, err = vf.NewFactory(ctx, factoryConfig, false)
}
if err != nil {
kataUtilsLogger.WithError(err).Warn("create vm factory failed")
kataUtilsLogger.WithError(err).Warn("load vm factory failed, will use direct boot")
return
}

View File

@@ -68,6 +68,7 @@ const (
const (
clhStateCreated = "Created"
clhStateRunning = "Running"
clhStatePaused = "Paused"
)
const (
@@ -112,8 +113,16 @@ type clhClient interface {
VmAddDevicePut(ctx context.Context, deviceConfig chclient.DeviceConfig) (chclient.PciDeviceInfo, *http.Response, error)
// Add a new disk device to the VM
VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error)
// Pause the VM
VmPausePut(ctx context.Context) (*http.Response, error)
// Create a snapshot of the VM
VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error)
// Remove a device from the VM
VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error)
// Restore VM from a snapshot
VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error)
// Resume a paused VM
ResumeVM(ctx context.Context) (*http.Response, error)
}
type clhClientApi struct {
@@ -153,10 +162,26 @@ func (c *clhClientApi) VmAddDiskPut(ctx context.Context, diskConfig chclient.Dis
return c.ApiInternal.VmAddDiskPut(ctx).DiskConfig(diskConfig).Execute()
}
func (c *clhClientApi) VmPausePut(ctx context.Context) (*http.Response, error) {
return c.ApiInternal.PauseVM(ctx).Execute()
}
func (c *clhClientApi) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) {
return c.ApiInternal.VmSnapshotPut(ctx).VmSnapshotConfig(vmSnapshotConfig).Execute()
}
func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) {
return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute()
}
func (c *clhClientApi) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) {
return c.ApiInternal.VmRestorePut(ctx).RestoreConfig(restoreConfig).Execute()
}
func (c *clhClientApi) ResumeVM(ctx context.Context) (*http.Response, error) {
return c.ApiInternal.ResumeVM(ctx).Execute()
}
// This is done in order to be able to override such a function as part of
// our unit tests, as when testing bootVM we're on a mocked scenario already.
var vmAddNetPutRequest = func(clh *cloudHypervisor) ([]chclient.PciDeviceInfo, error) {
@@ -255,12 +280,14 @@ type CloudHypervisorState struct {
PID int
VirtiofsDaemonPid int
state clhState
isRestoring bool
}
func (s *CloudHypervisorState) reset() {
s.PID = 0
s.VirtiofsDaemonPid = 0
s.state = clhNotReady
s.isRestoring = false
}
type cloudHypervisor struct {
@@ -501,7 +528,7 @@ func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bo
}
// For cloudHypervisor this call only sets the internal structure up.
// The VM will be created and started through StartVM().
// The VM will be created and started through StartVM(), or restored from template if template files exist.
func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
clh.ctx = ctx
@@ -559,29 +586,78 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
}
}
// Create the VM memory config via the constructor to ensure default values are properly assigned
clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
// Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs
// or when using HugePages.
// If such features are disabled, turn off shared memory config.
if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages {
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false)
} else {
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
}
// Enable hugepages if needed
clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages)
if !clh.config.ConfidentialGuest {
hotplugSize := clh.config.DefaultMaxMemorySize
// OpenAPI only supports int64 values
clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes()))
if clh.config.ReclaimGuestFreedMemory {
// Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero)
clh.vmconfig.Balloon = chclient.NewBalloonConfig(0)
// Set the free page reporting flag for ballooning to be true
clh.vmconfig.Balloon.SetFreePageReporting(true)
// If the VM is booting from a template, or if the VM is going to be used as a template
// the memory is to be backed by a file, so we need to configure the memory zones accordingly.
if clh.config.BootFromTemplate || clh.config.BootToBeTemplate {
// VM templating is incompatible with virtio-fs because virtio-fs requires shared memory,
// while templating needs COW/private memory on restore.
if clh.config.SharedFS == config.VirtioFS || clh.config.SharedFS == config.VirtioFSNydus {
return errors.New("VM templating has been enabled with virtio-fs and this configuration will not work")
}
// Double-check that the clh.config.MemoryPath file is accessible before using it in the VM config, to avoid hitting a less clear error from cloud hypervisor when it tries to access the memory file.
if _, err := os.Stat(clh.config.MemoryPath); err != nil {
return fmt.Errorf("memory file %s is not accessible: %w", clh.config.MemoryPath, err)
}
// Set the size to be 0 since we are going to configure actual size via zones
clh.vmconfig.Memory = chclient.NewMemoryConfig(0)
memoryZoneConfig := chclient.NewMemoryZoneConfig("mem0", int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
if clh.config.BootToBeTemplate {
// When BootToBeTemplate is true, the memory file backing the VM memory is shared between multiple VMs created from the same template.
// So we need to set shared to true in this case.
memoryZoneConfig.SetShared(true)
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
if !clh.config.ConfidentialGuest {
// TODO: Remove this warning once memory hotplugging is supported
// for template VMs.
//
// Memory hotplug is intentionally not configured for template VMs.
// Resizing a memory zone requires the virtio-mem hotplug method
// (cloud-hypervisor rejects the default ACPI hotplug on a zone that
// carries a hotplug_size), which is not currently supported in the
// templating path. As a result, VMs restored from this template
// cannot grow their memory beyond the template's boot size.
clh.Logger().Warn("memory hotplugging is currently unsupported for template VMs")
}
} else {
// When BootFromTemplate is true, set shared=false to ensure Copy-On-Write is used for the memory file.
// So that the VM can have its own private memory.
memoryZoneConfig.SetShared(false)
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false)
}
memoryZoneConfig.SetFile(clh.config.MemoryPath)
clh.vmconfig.Memory.Zones = &[]chclient.MemoryZoneConfig{
*memoryZoneConfig,
}
} else { // Normal (non-template) VM creation
// Create the VM memory config via the constructor to ensure default values are properly assigned
clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
// Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs
// or when using HugePages.
// If such features are disabled, turn off shared memory config.
if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages {
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false)
} else {
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
}
// Enable hugepages if needed
clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages)
if !clh.config.ConfidentialGuest {
hotplugSize := clh.config.DefaultMaxMemorySize
// OpenAPI only supports int64 values
clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes()))
}
}
// Configure balloon device for free page reporting. This is set unconditionally
// (for both template and non-template paths) so that template VMs include the
// balloon in their snapshot, and VMs restored from a template inherit it.
if !clh.config.ConfidentialGuest && clh.config.ReclaimGuestFreedMemory {
clh.vmconfig.Balloon = chclient.NewBalloonConfig(0)
clh.vmconfig.Balloon.SetFreePageReporting(true)
}
// Set initial amount of cpu's for the virtual machine
@@ -700,9 +776,118 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
return err
}
// Check if we should restore from template instead of creating new VM
if clh.config.BootFromTemplate && clh.shouldRestoreFromTemplate() {
clh.Logger().Info("Template files found, will restore VM instead of creating new")
// Mark this as a restore operation for StartVM to use RestoreVM instead
clh.state.isRestoring = true
return nil
}
return nil
}
// shouldRestoreFromTemplate checks if template snapshot files exist and we should restore instead of creating new VM
func (clh *cloudHypervisor) shouldRestoreFromTemplate() bool {
// For template restore, we need the snapshot directory to contain the necessary files
// The snapshotDir is derived from the MemoryPath directory
snapshotDir := filepath.Dir(clh.config.MemoryPath)
// Check for required template files (config.json, state.json, and memory file)
configFile := filepath.Join(snapshotDir, "config.json")
stateFile := filepath.Join(snapshotDir, "state.json")
memoryFile := clh.config.MemoryPath
if _, err := os.Stat(configFile); err != nil {
clh.Logger().WithError(err).WithField("configFile", configFile).Debug("Template config file not accessible")
return false
}
if _, err := os.Stat(stateFile); err != nil {
clh.Logger().WithError(err).WithField("stateFile", stateFile).Debug("Template state file not accessible")
return false
}
if _, err := os.Stat(memoryFile); err != nil {
clh.Logger().WithError(err).WithField("memoryFile", memoryFile).Debug("Template memory file not accessible")
return false
}
clh.Logger().WithFields(log.Fields{
"configFile": configFile,
"stateFile": stateFile,
"memoryFile": memoryFile,
}).Info("Template files found, can restore VM from template")
return true
}
// copyFile copies a file from src to dst, preserving the source file's permissions.
func (clh *cloudHypervisor) copyFile(src, dst string) error {
srcFile, err := os.Open(src)
if err != nil {
return err
}
defer srcFile.Close()
srcInfo, err := srcFile.Stat()
if err != nil {
return err
}
dstFile, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, srcInfo.Mode())
if err != nil {
return err
}
defer dstFile.Close()
_, err = io.Copy(dstFile, srcFile)
if err != nil {
return err
}
return dstFile.Sync()
}
// updateVsockSocketPath updates the vsock socket path in the config.json file
func (clh *cloudHypervisor) updateVsockSocketPath(configPath, vmID string) error {
// Read the config file
configData, err := os.ReadFile(configPath)
if err != nil {
return err
}
var config map[string]interface{}
dec := json.NewDecoder(bytes.NewReader(configData))
dec.UseNumber()
if err := dec.Decode(&config); err != nil {
return err
}
// Update vsock socket path if vsock exists
if vsock, ok := config["vsock"].(map[string]interface{}); ok {
// Generate new vsock socket path for this VM
newVsockPath, err := clh.vsockSocketPath(vmID)
if err != nil {
return err
}
vsock["socket"] = newVsockPath
clh.Logger().WithFields(log.Fields{
"vmID": vmID,
"newVsockPath": newVsockPath,
}).Debug("Updated vsock socket path in config.json")
}
// Write the updated config back to file
updatedConfig, err := json.Marshal(config)
if err != nil {
return err
}
return os.WriteFile(configPath, updatedConfig, 0600)
}
// setupInitdata prepares and attaches the initdata disk if present.
func setupInitdata(clh *cloudHypervisor, hypervisorConfig *HypervisorConfig) error {
if len(hypervisorConfig.Initdata) == 0 {
@@ -771,8 +956,37 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error {
ctx, cancel := context.WithTimeout(ctx, bootTimeout*time.Second)
defer cancel()
if err := clh.bootVM(ctx); err != nil {
return err
// Check if we should restore from template or create new VM
if clh.state.isRestoring {
// Copy template files to VM directory
snapshotDir := filepath.Dir(clh.config.MemoryPath)
// Copy config.json from template to VM directory
srcConfig := filepath.Join(snapshotDir, "config.json")
dstConfig := filepath.Join(vmPath, "config.json")
if err := clh.copyFile(srcConfig, dstConfig); err != nil {
return fmt.Errorf("failed to copy config.json: %v", err)
}
// Copy state.json from template to VM directory
srcState := filepath.Join(snapshotDir, "state.json")
dstState := filepath.Join(vmPath, "state.json")
if err := clh.copyFile(srcState, dstState); err != nil {
return fmt.Errorf("failed to copy state.json: %v", err)
}
// Update vsock socket path in the copied config.json
if err := clh.updateVsockSocketPath(dstConfig, clh.id); err != nil {
return fmt.Errorf("failed to update vsock socket path: %v", err)
}
if err := clh.restoreVM(ctx); err != nil {
return err
}
} else {
if err := clh.bootVM(ctx); err != nil {
return err
}
}
clh.state.state = clhReady
@@ -1287,16 +1501,109 @@ func (clh *cloudHypervisor) Cleanup(ctx context.Context) error {
func (clh *cloudHypervisor) PauseVM(ctx context.Context) error {
clh.Logger().WithField("function", "PauseVM").Info("Pause Sandbox")
cl := clh.client()
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
defer cancel()
_, err := cl.VmPausePut(ctx)
if err != nil {
clh.Logger().WithError(err).Error("Failed to pause VM")
return openAPIClientError(err)
}
return nil
}
func (clh *cloudHypervisor) SaveVM() error {
clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox")
clh.Logger().WithField("function", "SaveVM").Info("Save Sandbox")
cl := clh.client()
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
defer cancel()
snapshotDir := filepath.Dir(clh.config.MemoryPath)
// Create snapshot config with file URL to template path
// Use MemoryPath as base for snapshot destination
// When creating a template, the MemoryPath is set to the template path, so we can use it to save the snapshot.
fileURL := "file://" + snapshotDir
vmSnapshotConfig := *chclient.NewVmSnapshotConfig()
vmSnapshotConfig.SetDestinationUrl(fileURL)
_, err := cl.VmSnapshotPut(ctx, vmSnapshotConfig)
if err != nil {
clh.Logger().WithError(err).Error("Failed to save VM snapshot")
return openAPIClientError(err)
}
if clh.config.BootToBeTemplate {
// Update the config.json file in the snapshotDir to set memory shared=false
snapshotConfigPath := filepath.Join(snapshotDir, "config.json")
snapshotConfig, err := os.ReadFile(snapshotConfigPath)
if err != nil {
clh.Logger().WithError(err).Error("Failed to read snapshot config")
return err
}
var snapshotConfigData map[string]interface{}
dec := json.NewDecoder(bytes.NewReader(snapshotConfig))
dec.UseNumber()
if err := dec.Decode(&snapshotConfigData); err != nil {
clh.Logger().WithError(err).Error("Failed to unmarshal snapshot config")
return err
}
// Access the memory section and cast it to a map
if memorySection, ok := snapshotConfigData["memory"].(map[string]interface{}); ok {
memorySection["shared"] = false
// Do the same update for each element for the "zones" array in the memorySection
if zones, ok := memorySection["zones"].([]interface{}); ok {
for _, zone := range zones {
if zoneMap, ok := zone.(map[string]interface{}); ok {
zoneMap["shared"] = false
} else {
clh.Logger().Error("Unable to access zone in snapshot config memory section")
return fmt.Errorf("invalid snapshot config structure: zone in memory section not found or invalid")
}
}
} else {
clh.Logger().Error("Unable to access zones array in snapshot config memory section")
return fmt.Errorf("invalid snapshot config structure: zones array in memory section not found or invalid")
}
} else {
clh.Logger().Error("Unable to access memory section in snapshot config")
return fmt.Errorf("invalid snapshot config structure: memory section not found or invalid")
}
// Write the modified config back to file
modifiedConfig, err := json.Marshal(snapshotConfigData)
if err != nil {
clh.Logger().WithError(err).Error("Failed to marshal modified snapshot config")
return err
}
if err := os.WriteFile(snapshotConfigPath, modifiedConfig, 0600); err != nil {
clh.Logger().WithError(err).Error("Failed to write modified snapshot config")
return err
}
}
return nil
}
func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error {
clh.Logger().WithField("function", "ResumeVM").Info("Resume Sandbox")
cl := clh.client()
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
defer cancel()
_, err := cl.ResumeVM(ctx)
if err != nil {
clh.Logger().WithError(err).Error("Failed to resume VM")
return openAPIClientError(err)
}
return nil
}
@@ -1509,11 +1816,11 @@ func (clh *cloudHypervisor) clhPath() (string, error) {
p = defaultClhPath
}
if _, err = os.Stat(p); os.IsNotExist(err) {
return "", fmt.Errorf("Cloud-Hypervisor path (%s) does not exist", p)
if _, err = os.Stat(p); err != nil {
return "", fmt.Errorf("Cloud-Hypervisor path (%s) is not accessible: %w", p, err)
}
return p, err
return p, nil
}
func (clh *cloudHypervisor) launchClh() error {
@@ -1741,6 +2048,60 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error {
return nil
}
// restoreVM restores a VM from a template snapshot. The restored VM will be in
// Paused state. The caller (factory layer, via factory.GetVM → vm.Resume) is
// responsible for resuming the VM, reseeding the RNG, and syncing the guest clock
// before the VM is used. See factory_linux.go GetVM().
func (clh *cloudHypervisor) restoreVM(ctx context.Context) error {
clh.Logger().Info("Restoring VM from template")
cl := clh.client()
// use the VMStorePath as the base for the restore source URL
vmPath := filepath.Join(clh.config.VMStorePath, clh.id)
sourceURL := "file://" + vmPath
// check if the snapshot directory contains the state.json and config.json files
// which contain the VM state and configuration respectively
stateFile := filepath.Join(vmPath, "state.json")
configFile := filepath.Join(vmPath, "config.json")
if _, err := os.Stat(stateFile); err != nil {
return fmt.Errorf("failed to access state file %s: %v", stateFile, err)
}
if _, err := os.Stat(configFile); err != nil {
return fmt.Errorf("failed to access config file %s: %v", configFile, err)
}
// Prepare restore configuration
restoreConfig := *chclient.NewRestoreConfig(sourceURL)
clh.Logger().WithField("sourceURL", sourceURL).Debug("Restore configuration")
// Restore VM from template (uses the caller's ctx, which already has the boot timeout)
_, err := cl.VmRestorePut(ctx, restoreConfig)
if err != nil {
clh.Logger().WithError(err).Error("failed to restore VM from template")
return openAPIClientError(err)
}
// Check VM state after restoration
info, err := clh.vmInfo()
if err != nil {
return err
}
clh.Logger().Debugf("VM state after restore: %#v", info)
if info.State != clhStatePaused {
clh.Logger().Warnf("VM state is '%s' after restore, expected 'Paused'", info.State)
}
clh.Logger().Info("Successfully restored VM from template")
return nil
}
func (clh *cloudHypervisor) addVSock(cid int64, path string) {
clh.Logger().WithFields(log.Fields{
"path": path,

View File

@@ -74,7 +74,9 @@ func newClhConfig() (HypervisorConfig, error) {
}
type clhClientMock struct {
vmInfo chclient.VmInfo
vmInfo chclient.VmInfo
restoreRequest *chclient.RestoreConfig
snapshotRequest *chclient.VmSnapshotConfig
}
func (c *clhClientMock) VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) {
@@ -115,11 +117,35 @@ func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.Di
return chclient.PciDeviceInfo{Bdf: "0000:00:0a.0"}, nil, nil
}
//nolint:golint
func (c *clhClientMock) VmPausePut(ctx context.Context) (*http.Response, error) {
c.vmInfo.State = clhStatePaused
return nil, nil
}
//nolint:golint
func (c *clhClientMock) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) {
c.snapshotRequest = &vmSnapshotConfig
return nil, nil
}
//nolint:golint
func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) {
return nil, nil
}
func (c *clhClientMock) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) {
c.restoreRequest = &restoreConfig
// restoreVM() verifies Paused after restore.
c.vmInfo.State = clhStatePaused
return nil, nil
}
func (c *clhClientMock) ResumeVM(ctx context.Context) (*http.Response, error) {
c.vmInfo.State = clhStateRunning
return nil, nil
}
func TestCloudHypervisorAddVSock(t *testing.T) {
assert := assert.New(t)
clh := cloudHypervisor{}
@@ -431,7 +457,8 @@ func TestCloudHypervisorCleanupVM(t *testing.T) {
assert.NoError(err, "persist.GetDriver() unexpected error")
dir := filepath.Join(store.RunVMStoragePath(), clh.id)
os.MkdirAll(dir, os.ModePerm)
err = os.MkdirAll(dir, os.ModePerm)
assert.NoError(err, "failed to create dir %s", dir)
err = clh.cleanupVM(false)
assert.NoError(err, "persist.GetDriver() unexpected error")
@@ -516,6 +543,81 @@ func TestClhCreateVM(t *testing.T) {
}
}
func TestClhRestoreVM(t *testing.T) {
assert := assert.New(t)
store, err := persist.GetDriver()
assert.NoError(err)
clhConfig, err := newClhConfig()
assert.NoError(err)
clhConfig.VMStorePath = store.RunVMStoragePath()
clhConfig.RunStorePath = store.RunStoragePath()
mockClient := &clhClientMock{}
clh := &cloudHypervisor{
config: clhConfig,
APIClient: mockClient,
}
// First call restoreVM without the VM snapshot files (state.json, config.json) present.
err = clh.restoreVM(context.Background())
// An error is expected because restoreVM expects the VM snapshot files to be present.
assert.Error(err)
assert.Contains(err.Error(), filepath.Join(clhConfig.VMStorePath, "state.json"))
// Now create the VM snapshot files and call restoreVM again.
err = os.MkdirAll(clhConfig.VMStorePath, os.ModePerm)
assert.NoError(err, "failed to create dir %s", clhConfig.VMStorePath)
stateFile := filepath.Join(clhConfig.VMStorePath, "state.json")
configFile := filepath.Join(clhConfig.VMStorePath, "config.json")
err = os.WriteFile(stateFile, []byte("{}"), 0o600)
assert.NoError(err)
err = os.WriteFile(configFile, []byte("{}"), 0o600)
assert.NoError(err)
// Call restoreVM again, this time it should succeed.
err = clh.restoreVM(context.Background())
assert.NoError(err)
if assert.NotNil(mockClient.restoreRequest) {
expectedSourceURL := "file://" + clhConfig.VMStorePath
assert.Equal(expectedSourceURL, mockClient.restoreRequest.GetSourceUrl())
}
info, err := clh.vmInfo()
assert.NoError(err)
assert.Equal(clhStatePaused, info.State)
}
func TestClhSaveVM(t *testing.T) {
assert := assert.New(t)
store, err := persist.GetDriver()
assert.NoError(err)
clhConfig, err := newClhConfig()
assert.NoError(err)
// For testing, assume the memory path is located within the VM store path.
clhConfig.MemoryPath = filepath.Join(store.RunVMStoragePath(), "memory")
clhConfig.VMStorePath = store.RunVMStoragePath()
clhConfig.RunStorePath = store.RunStoragePath()
mockClient := &clhClientMock{}
clh := &cloudHypervisor{
config: clhConfig,
APIClient: mockClient,
}
err = clh.SaveVM()
assert.NoError(err)
if assert.NotNil(mockClient.snapshotRequest) {
expectedDestinationURL := "file://" + filepath.Dir(clhConfig.MemoryPath)
assert.Equal(expectedDestinationURL, mockClient.snapshotRequest.GetDestinationUrl())
}
}
func TestCloudHypervisorStartSandbox(t *testing.T) {
assert := assert.New(t)
clhConfig, err := newClhConfig()

View File

@@ -80,6 +80,8 @@ func resetHypervisorConfig(config *vc.VMConfig) {
config.HypervisorConfig.SharedPath = ""
config.HypervisorConfig.VMStorePath = ""
config.HypervisorConfig.RunStorePath = ""
config.HypervisorConfig.SandboxName = ""
config.HypervisorConfig.SandboxNamespace = ""
}
// It's important that baseConfig and newConfig are passed by value!

View File

@@ -11,6 +11,7 @@ import (
"context"
"fmt"
"os"
"path/filepath"
"syscall"
"time"
@@ -115,6 +116,15 @@ func (t *template) prepareTemplateFiles() error {
}
f.Close()
// truncate the memory file to the exact size of the VM memory
memoryInBytes := int64(t.config.HypervisorConfig.MemorySize) * 1024 * 1024
t.Logger().Infof("truncating memory file %s to %d bytes", t.statePath+"/memory", memoryInBytes)
err = os.Truncate(t.statePath+"/memory", memoryInBytes)
if err != nil {
t.close()
return err
}
return nil
}
@@ -124,7 +134,8 @@ func (t *template) createTemplateVM(ctx context.Context) error {
config.HypervisorConfig.BootToBeTemplate = true
config.HypervisorConfig.BootFromTemplate = false
config.HypervisorConfig.MemoryPath = t.statePath + "/memory"
config.HypervisorConfig.DevicesStatePath = t.statePath + "/state"
config.HypervisorConfig.DevicesStatePath = t.deviceStatePath()
config.HypervisorConfig.VMStorePath = t.statePath
vm, err := vc.NewVM(ctx, config)
if err != nil {
@@ -161,7 +172,7 @@ func (t *template) createFromTemplateVM(ctx context.Context, c vc.VMConfig) (*vc
config.HypervisorConfig.BootToBeTemplate = false
config.HypervisorConfig.BootFromTemplate = true
config.HypervisorConfig.MemoryPath = t.statePath + "/memory"
config.HypervisorConfig.DevicesStatePath = t.statePath + "/state"
config.HypervisorConfig.DevicesStatePath = t.deviceStatePath()
config.HypervisorConfig.SharedPath = c.HypervisorConfig.SharedPath
config.HypervisorConfig.VMStorePath = c.HypervisorConfig.VMStorePath
config.HypervisorConfig.RunStorePath = c.HypervisorConfig.RunStorePath
@@ -175,6 +186,15 @@ func (t *template) checkTemplateVM() error {
return err
}
_, err = os.Stat(t.statePath + "/state")
_, err = os.Stat(t.deviceStatePath())
return err
}
func (t *template) deviceStatePath() string {
stateFileName := "state"
if t.config.HypervisorType == vc.ClhHypervisor {
stateFileName = "state.json"
}
return filepath.Join(t.statePath, stateFileName)
}

View File

@@ -57,15 +57,26 @@ func TestTemplateFactory(t *testing.T) {
assert.NoError(err)
defer hybridVSockTTRPCMock.Stop()
// New
// Create 2 sets of instance-specific directories for per-VM storage
runStorePath1 := t.TempDir()
vmStorePath1 := t.TempDir()
runStorePath2 := t.TempDir()
vmStorePath2 := t.TempDir()
// Create a new Template Factory
f, err := New(ctx, vmConfig, testDir)
assert.Nil(err)
// Config
assert.Equal(f.Config(), vmConfig)
// GetBaseVM
vm, err := f.GetBaseVM(ctx, vmConfig)
// GetBaseVM with first instance paths
vmConfig1 := vmConfig
vmConfig1.HypervisorConfig.RunStorePath = runStorePath1
vmConfig1.HypervisorConfig.VMStorePath = vmStorePath1
// Test the creation of a new VM from the template factory
vm, err := f.GetBaseVM(ctx, vmConfig1)
assert.Nil(err)
err = vm.Stop(ctx)
@@ -79,44 +90,59 @@ func TestTemplateFactory(t *testing.T) {
assert.Equal(tt.Config(), vmConfig)
// Checking that template VM check fails
// if the corresponding memory and state files are absent
err = tt.checkTemplateVM()
assert.Error(err)
_, err = os.Create(tt.statePath + "/memory")
memFile, err := os.Create(tt.statePath + "/memory")
assert.Nil(err)
memFile.Close()
err = tt.checkTemplateVM()
assert.Error(err)
_, err = os.Create(tt.statePath + "/state")
devFile, err := os.Create(tt.deviceStatePath())
assert.Nil(err)
devFile.Close()
// After creating state and memory files, checkTemplateVM should succeed
err = tt.checkTemplateVM()
assert.Nil(err)
// Recreate the template VM, which should succeed
err = tt.createTemplateVM(ctx)
assert.Nil(err)
vm, err = tt.GetBaseVM(ctx, vmConfig)
// Ensuring that directly calling template's GetBaseVM function
// returns a VM instance similar to the one returned by the factory's GetBaseVM function
vm, err = tt.GetBaseVM(ctx, vmConfig1)
assert.Nil(err)
err = vm.Stop(ctx)
assert.Nil(err)
vm, err = f.GetBaseVM(ctx, vmConfig)
vm, err = f.GetBaseVM(ctx, vmConfig1)
assert.Nil(err)
err = vm.Stop(ctx)
assert.Nil(err)
// Overwriting the template VM should succeed
err = tt.createTemplateVM(ctx)
assert.Nil(err)
vm, err = tt.GetBaseVM(ctx, vmConfig)
// Create second instance with different storage paths
vmConfig2 := vmConfig
vmConfig2.HypervisorConfig.RunStorePath = runStorePath2
vmConfig2.HypervisorConfig.VMStorePath = vmStorePath2
vm, err = tt.GetBaseVM(ctx, vmConfig2)
assert.Nil(err)
err = vm.Stop(ctx)
assert.Nil(err)
vm, err = f.GetBaseVM(ctx, vmConfig)
vm, err = f.GetBaseVM(ctx, vmConfig2)
assert.Nil(err)
err = vm.Stop(ctx)

View File

@@ -43,6 +43,7 @@ TEST_CLUSTER_NAMESPACE="${TEST_CLUSTER_NAMESPACE:-}"
CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-containerd}"
SNAPSHOTTER="${SNAPSHOTTER:-}"
EROFS_SNAPSHOTTER_MODE="${EROFS_SNAPSHOTTER_MODE:-}"
EROFS_MERGE_MODE="${EROFS_MERGE_MODE:-}"
# Wait for the Kubernetes API to recover after kata-deploy uninstall, then
# retry the uninstall to purge any stale helm release state. On k3s/rke2,
@@ -851,6 +852,26 @@ function helm_helper() {
yq -i '.containerd.userDropIn = strenv(HELM_CONTAINERD_USER_DROP_IN)' "${values_yaml}"
fi
# EROFS merge mode ("merged" default, or "unmerged"). This is orthogonal
# to EROFS_SNAPSHOTTER_MODE (which controls default_size): it controls
# whether containerd merges layers into a single fsmeta.erofs (merged,
# runtime-rs only) or keeps per-layer layer.erofs (unmerged, required by
# the Go runtime).
if [[ -n "${EROFS_MERGE_MODE}" ]]; then
if [[ "${SNAPSHOTTER}" != "erofs" ]]; then
die "EROFS_MERGE_MODE is only supported with SNAPSHOTTER=erofs"
fi
case "${EROFS_MERGE_MODE}" in
merged|unmerged) ;;
*)
die "Unsupported EROFS_MERGE_MODE: ${EROFS_MERGE_MODE}"
;;
esac
yq -i ".snapshotter.erofsMergeMode = \"${EROFS_MERGE_MODE}\"" "${values_yaml}"
fi
if [[ -z "${HELM_SHIMS}" ]]; then
die "A list of shims is expected but none was provided"
fi

View File

@@ -0,0 +1,114 @@
#!/usr/bin/env bats
#
# Copyright (c) 2024 Kata Containers
#
# SPDX-License-Identifier: Apache-2.0
#
# Tests for Kata VM templating (factory) functionality in Kubernetes integration mode
load "${BATS_TEST_DIRNAME}/lib.sh"
load "${BATS_TEST_DIRNAME}/../../common.bash"
load "${BATS_TEST_DIRNAME}/confidential_common.sh"
load "${BATS_TEST_DIRNAME}/tests_common.sh"
# Returns 0 if the current environment supports VM templating, non-zero
# otherwise. VM templating is only supported on non-confidential clh/qemu
# hypervisors, and because it uses shared_fs="none" it also requires a
# block-device-based snapshotter (blockfile or erofs).
vm_templating_supported() {
[[ "${KATA_HYPERVISOR}" == "clh" || "${KATA_HYPERVISOR}" == "qemu" ]] || return 1
is_confidential_runtime_class && return 1
[[ "${SNAPSHOTTER:-}" =~ ^(blockfile|erofs)$ ]] || return 1
return 0
}
setup() {
if ! vm_templating_supported; then
skip "VM templating requires a non-confidential clh/qemu hypervisor and a blockfile/erofs snapshotter (KATA_HYPERVISOR=${KATA_HYPERVISOR}, SNAPSHOTTER=${SNAPSHOTTER:-unset})"
fi
setup_common || die "setup_common failed"
# Build a Kata runtime config drop-in that enables VM templating and
# disables shared_fs (incompatible with templating).
# QEMU VM templating requires an initrd, CLH does not.
local rootfs_override=""
if [[ "${KATA_HYPERVISOR}" == "qemu" ]]; then
rootfs_override=$'image = ""\ninitrd = "/opt/kata/share/kata-containers/kata-containers-initrd.img"'
fi
local runtime_config_dropin_file="${BATS_TEST_TMPDIR}/99-k8s-vm-templating.toml"
cat > "${runtime_config_dropin_file}" <<DROPIN
[hypervisor.${KATA_HYPERVISOR}]
shared_fs = "none"
default_vcpus = 1
default_memory = 512
${rootfs_override}
[factory]
enable_template = true
template_path = "/run/vc/vm/template"
DROPIN
# Install the drop-in on the node selected by setup_common and record the
# remote path so teardown can remove it.
dropin_path="$(set_kata_runtime_config_dropin_file "$node" "${runtime_config_dropin_file}")" \
|| die "Failed to install Kata runtime config drop-in on node $node"
# kata-runtime defaults to the QEMU config; point it at the active
# hypervisor so that factory init/destroy use the correct configuration.
kata_config_path="/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml"
}
@test "Pod can be created with a templated VM" {
# Initialize the VM template on the target node.
exec_host "$node" "nsenter --mount=/proc/1/ns/mnt /opt/kata/bin/kata-runtime --config ${kata_config_path} factory init"
# The factory init above must have created the template directory. exec_host
# pipes the remote output through `tr`, so the pipeline's exit status is not
# the remote command's; assert on the output instead. Check inside PID 1's
# mount namespace, where the template tmpfs was actually mounted.
exec_host "$node" "nsenter --mount=/proc/1/ns/mnt test -f /run/vc/vm/template/memory && echo present" | grep -q present
pod_name="test-templated-pod"
ctr_name="test-container"
pod_config=$(mktemp --tmpdir pod_config.XXXXXX.yaml)
cp "$pod_config_dir/busybox-template.yaml" "$pod_config"
sed -i "s/POD_NAME/$pod_name/" "$pod_config"
sed -i "s/CTR_NAME/$ctr_name/" "$pod_config"
kubectl create -f "${pod_config}"
kubectl wait --for=condition=Ready --timeout="$timeout" "pod/${pod_name}"
grep_pod_exec_output "${pod_name}" "Hello from templated VM" sh -c "echo 'Hello from templated VM'"
# Confirm at least one VM sandbox under /run/vc/vm/ is a symlink, which
# proves the factory/template path was used. A non-templated VM creates a
# real directory at /run/vc/vm/<sandbox-id>/, whereas a factory-spawned VM
# stores its state under a generated UUID and /run/vc/vm/<sandbox-id> is a
# symlink pointing at it (see assignSandbox() in
# src/runtime/virtcontainers/vm.go). Inspect PID 1's mount namespace, where
# the shim creates these entries alongside the template tmpfs.
exec_host "$node" \
"nsenter --mount=/proc/1/ns/mnt find /run/vc/vm -maxdepth 1 -mindepth 1 -type l ! -name template | grep -q . && echo symlink" \
| grep -q symlink
}
teardown() {
vm_templating_supported || return 0
rm -f "${pod_config:-}"
# Destroy the VM template and remove the config drop-in on the target node.
# factory destroy must run in PID 1's mount namespace to unmount the template
# tmpfs that factory init created there (see the @test for details).
exec_host "$node" "nsenter --mount=/proc/1/ns/mnt /opt/kata/bin/kata-runtime --config ${kata_config_path} factory destroy" \
|| echo "Warning: Failed to destroy VM template on node $node"
remove_kata_runtime_config_dropin_file "$node" "${dropin_path:-}" \
|| echo "Warning: Failed to remove Kata runtime config drop-in on node $node"
teardown_common "${node:-}" "${node_start_time:-}"
}

View File

@@ -104,6 +104,7 @@ else
"k8s-security-context.bats" \
"k8s-shared-volume.bats" \
"k8s-volume.bats" \
"k8s-vm-templating.bats" \
"k8s-nginx-connectivity.bats" \
)

View File

@@ -15,36 +15,50 @@ use std::path::Path;
pub async fn configure_erofs_snapshotter(config: &Config, configuration_file: &Path) -> Result<()> {
info!("Configuring erofs-snapshotter");
// "unmerged" mode keeps each image layer as its own per-layer `layer.erofs`
// (containerd's default, non-fsmerged layout), which is the only layout the
// Go runtime can consume. In the default "merged" mode we force containerd
// to merge layers into a single `fsmeta.erofs`, which is runtime-rs only.
let unmerged = config.erofs_merge_mode.as_deref() == Some("unmerged");
// The Go runtime does not support fsmerged EROFS (fsmeta.erofs).
// If the snapshotter handler mapping explicitly pairs a Go shim with
// erofs, that is a hard misconfiguration — bail out so the operator
// fixes the mapping instead of hitting cryptic runtime errors later.
if let Some(mapping) = config.snapshotter_handler_mapping_for_arch.as_ref() {
let mut go_shims_on_erofs = Vec::new();
for entry in mapping.split(',') {
let parts: Vec<&str> = entry.split(':').collect();
if parts.len() == 2 && parts[1] == "erofs" && !utils::is_rust_shim(parts[0]) {
go_shims_on_erofs.push(parts[0].to_string());
// erofs in the (default) merged mode, that is a hard misconfiguration —
// bail out so the operator fixes the mapping instead of hitting cryptic
// runtime errors later. In "unmerged" mode the Go runtime is supported, so
// skip this guard.
if !unmerged {
if let Some(mapping) = config.snapshotter_handler_mapping_for_arch.as_ref() {
let mut go_shims_on_erofs = Vec::new();
for entry in mapping.split(',') {
let parts: Vec<&str> = entry.split(':').collect();
if parts.len() == 2 && parts[1] == "erofs" && !utils::is_rust_shim(parts[0]) {
go_shims_on_erofs.push(parts[0].to_string());
}
}
}
if !go_shims_on_erofs.is_empty() {
warn!("##########################################################################");
warn!("# #");
warn!("# Go runtime shim(s) mapped to the erofs snapshotter: #");
for s in &go_shims_on_erofs {
warn!("# - {:<64} #", s);
if !go_shims_on_erofs.is_empty() {
warn!("##########################################################################");
warn!("# #");
warn!("# Go runtime shim(s) mapped to the erofs snapshotter: #");
for s in &go_shims_on_erofs {
warn!("# - {:<64} #", s);
}
warn!("# #");
warn!(
"# The Go runtime does NOT support fsmerged EROFS (fsmeta.erofs). #"
);
warn!("# Only runtime-rs shims are supported with merged erofs. Set #");
warn!("# EROFS_MERGE_MODE=unmerged to use the Go runtime with erofs. #");
warn!("# #");
warn!("##########################################################################");
return Err(anyhow::anyhow!(
"erofs snapshotter: Go runtime shim(s) [{}] cannot be mapped to merged erofs. \
The Go runtime does not support fsmerged EROFS. \
Set EROFS_MERGE_MODE=unmerged, remove these shims from \
SNAPSHOTTER_HANDLER_MAPPING, or switch them to runtime-rs.",
go_shims_on_erofs.join(", ")
));
}
warn!("# #");
warn!("# The Go runtime does NOT support fsmerged EROFS (fsmeta.erofs). #");
warn!("# Only runtime-rs shims are supported with the erofs snapshotter. #");
warn!("# #");
warn!("##########################################################################");
return Err(anyhow::anyhow!(
"erofs snapshotter: Go runtime shim(s) [{}] cannot be mapped to erofs. \
The Go runtime does not support fsmerged EROFS. \
Remove these shims from SNAPSHOTTER_HANDLER_MAPPING or switch them to runtime-rs.",
go_shims_on_erofs.join(", ")
));
}
}
@@ -88,11 +102,27 @@ pub async fn configure_erofs_snapshotter(config: &Config, configuration_file: &P
".plugins.\"io.containerd.snapshotter.v1.erofs\".default_size",
"\"10G\"",
)?;
toml_utils::set_toml_value(
configuration_file,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
"0",
)?;
// In the default "merged" mode, force containerd to merge all layers into a
// single fsmeta.erofs (max_unmerged_layers = 0). In "unmerged" mode we delete
// any previously-written value so each layer stays a separate layer.erofs,
// which the Go runtime requires.
//
// Because kata-deploy edits the containerd config in place, switching from
// merged to unmerged must actively remove the old `max_unmerged_layers = 0`
// left behind by a previous install. Otherwise the stale `0` would keep
// forcing the merged layout and break Go-runtime compatibility.
if !unmerged {
toml_utils::set_toml_value(
configuration_file,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
"0",
)?;
} else {
toml_utils::delete_toml_value(
configuration_file,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
)?;
}
Ok(())
}

View File

@@ -178,6 +178,14 @@ pub struct Config {
pub multi_install_suffix: Option<String>,
pub helm_post_delete_hook: bool,
pub experimental_setup_snapshotter: Option<Vec<String>>,
/// EROFS snapshotter merge mode: "merged" (default) or "unmerged".
///
/// In "unmerged" mode kata-deploy does not force containerd's erofs
/// snapshotter to merge layers (it leaves `max_unmerged_layers` at the
/// containerd default), so each image layer is exposed as its own
/// per-layer `layer.erofs`. This is the only layout the Go runtime can
/// consume; the merged (`fsmeta.erofs`) layout is runtime-rs only.
pub erofs_merge_mode: Option<String>,
pub experimental_force_guest_pull_for_arch: Vec<String>,
pub dest_dir: String,
pub host_install_dir: String,
@@ -307,6 +315,11 @@ impl Config {
.filter(|s| !s.is_empty())
.map(|s| s.split(',').map(|s| s.trim().to_string()).collect());
let erofs_merge_mode = env::var("EROFS_MERGE_MODE")
.ok()
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty());
// Only use arch-specific variable for experimental force guest pull
let experimental_force_guest_pull_for_arch =
get_arch_var("EXPERIMENTAL_FORCE_GUEST_PULL", "", &arch)
@@ -338,6 +351,7 @@ impl Config {
multi_install_suffix,
helm_post_delete_hook,
experimental_setup_snapshotter,
erofs_merge_mode,
experimental_force_guest_pull_for_arch,
dest_dir,
host_install_dir,
@@ -508,6 +522,17 @@ impl Config {
_ => {}
}
// Validate EROFS_MERGE_MODE
// Only "merged" (default) and "unmerged" are accepted.
if let Some(mode) = self.erofs_merge_mode.as_ref() {
if mode != "merged" && mode != "unmerged" {
return Err(anyhow::anyhow!(
"EROFS_MERGE_MODE must be either 'merged' or 'unmerged', got '{}'",
mode
));
}
}
// Validate EXPERIMENTAL_FORCE_GUEST_PULL_FOR_ARCH
// This is a list of shim names
for shim in &self.experimental_force_guest_pull_for_arch {
@@ -551,6 +576,7 @@ impl Config {
"* EXPERIMENTAL_SETUP_SNAPSHOTTER: {:?}",
self.experimental_setup_snapshotter
);
info!("* EROFS_MERGE_MODE: {:?}", self.erofs_merge_mode);
info!(
"* EXPERIMENTAL_FORCE_GUEST_PULL: {}",
self.experimental_force_guest_pull_for_arch.join(",")

View File

@@ -121,6 +121,47 @@ pub fn set_toml_value(file_path: &Path, path: &str, value: &str) -> Result<()> {
Ok(())
}
/// Delete a TOML value (or table) at a given path.
///
/// Navigates to the parent table and removes the final key. This is a no-op if
/// any path component (including the final key) does not exist, so callers can
/// unconditionally remove a value that may or may not be present.
pub fn delete_toml_value(file_path: &Path, path: &str) -> Result<()> {
let content = std::fs::read_to_string(file_path)
.with_context(|| format!("Failed to read TOML file: {file_path:?}"))?;
let (header, toml_content) = split_non_toml_header(&content);
let mut doc = toml_content
.parse::<DocumentMut>()
.context("Failed to parse TOML")?;
let parts = parse_toml_path(path)?;
let mut current_table = doc.as_table_mut();
for (i, part) in parts.iter().enumerate() {
let is_last = i == parts.len() - 1;
if is_last {
// Remove the value; absent key is fine (no-op).
current_table.remove(part.as_str());
} else {
// Navigate into the intermediate table. If it does not exist, there
// is nothing to delete.
match current_table
.get_mut(part.as_str())
.and_then(|item| item.as_table_mut())
{
Some(table) => current_table = table,
None => return Ok(()),
}
}
}
write_toml_with_header(file_path, header, &doc)?;
Ok(())
}
/// Get a TOML value at a given path
pub fn get_toml_value(file_path: &Path, path: &str) -> Result<String> {
let content = std::fs::read_to_string(file_path)
@@ -1714,4 +1755,100 @@ imports = ["/etc/containerd/conf.d/*.toml", "/opt/kata/containerd/config.d/kata-
.unwrap();
assert_eq!(runtime_type, "io.containerd.kata-qemu.v2");
}
#[test]
fn test_delete_toml_value() {
let temp_file = NamedTempFile::new().unwrap();
let temp_path = temp_file.path();
std::fs::write(
temp_path,
"[plugins.\"io.containerd.snapshotter.v1.erofs\"]\nmax_unmerged_layers = 0\nenable_fsverity = true\n",
)
.unwrap();
// Sanity check: value is present before deletion.
let before = get_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
)
.unwrap();
assert_eq!(before, "0");
delete_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
)
.unwrap();
// The deleted key is gone, but sibling keys remain.
let result = get_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
);
assert!(result.is_err(), "deleted key should no longer be found");
let sibling = get_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".enable_fsverity",
)
.unwrap();
assert_eq!(sibling, "true", "sibling keys must be preserved");
}
#[test]
fn test_delete_toml_value_missing_key_is_noop() {
let temp_file = NamedTempFile::new().unwrap();
let temp_path = temp_file.path();
let initial = "[plugins.\"io.containerd.snapshotter.v1.erofs\"]\nenable_fsverity = true\n";
std::fs::write(temp_path, initial).unwrap();
// Deleting a key that does not exist must succeed and leave the file usable.
delete_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
)
.unwrap();
// Deleting through a non-existent intermediate table is also a no-op.
delete_toml_value(temp_path, ".plugins.\"nonexistent.plugin\".some_key").unwrap();
let sibling = get_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".enable_fsverity",
)
.unwrap();
assert_eq!(sibling, "true");
}
#[test]
fn test_delete_toml_value_preserves_k3s_header() {
let temp_file = NamedTempFile::new().unwrap();
let temp_path = temp_file.path();
std::fs::write(
temp_path,
"{{ template \"base\" . }}\n[plugins.\"io.containerd.snapshotter.v1.erofs\"]\nmax_unmerged_layers = 0\n",
)
.unwrap();
delete_toml_value(
temp_path,
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
)
.unwrap();
let content = std::fs::read_to_string(temp_path).unwrap();
assert!(
content.starts_with("{{ template \"base\" . }}\n"),
"non-TOML header must be preserved"
);
assert!(
!content.contains("max_unmerged_layers"),
"value must be removed"
);
}
#[test]
fn test_delete_toml_value_nonexistent_file() {
let result = delete_toml_value(Path::new("/nonexistent/file.toml"), "some.path");
assert!(result.is_err());
}
}

View File

@@ -413,6 +413,13 @@ Get snapshotter setup list from structured config
{{- join "," .Values.snapshotter.setup -}}
{{- end -}}
{{/*
Get EROFS merge mode from structured config ("merged" or "unmerged")
*/}}
{{- define "kata-deploy.getErofsMergeMode" -}}
{{- .Values.snapshotter.erofsMergeMode | default "" -}}
{{- end -}}
{{/*
Get debug value from structured config
*/}}
@@ -569,6 +576,11 @@ e.g. `{{- include "kata-deploy.commonEnv" . | nindent 8 }}`.
- name: EXPERIMENTAL_SETUP_SNAPSHOTTER
value: {{ $snapshotterSetup | quote }}
{{- end }}
{{- $erofsMergeMode := include "kata-deploy.getErofsMergeMode" . | trim -}}
{{- if $erofsMergeMode }}
- name: EROFS_MERGE_MODE
value: {{ $erofsMergeMode | quote }}
{{- end }}
{{- $forceGuestPullAmd64 := include "kata-deploy.getForceGuestPullForArch" (dict "root" . "arch" "amd64") | trim -}}
{{- if $forceGuestPullAmd64 }}
- name: EXPERIMENTAL_FORCE_GUEST_PULL_X86_64

View File

@@ -271,6 +271,18 @@ health:
snapshotter:
setup: ["nydus"] # ["nydus", "erofs"] or []
# EROFS merge mode: "merged" (default) or "unmerged".
#
# "merged" forces containerd's erofs snapshotter to merge all image layers
# into a single fsmeta.erofs (max_unmerged_layers = 0). This layout is only
# supported by runtime-rs shims.
#
# "unmerged" leaves max_unmerged_layers at the containerd default so each
# image layer is exposed as its own per-layer layer.erofs. This is the only
# layout the Go runtime can consume, so set this when mapping a Go shim to the
# erofs snapshotter. When empty, kata-deploy uses its built-in default
# (merged).
erofsMergeMode: ""
# Shim configuration
# By default (disableAll: false), all shims with enabled: ~ (null) are enabled.