mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 22:50:54 +00:00
Merge pull request #13196 from microsoft/cameronbaird/upstream/runtime-go-clh-templating
runtime: Enable VM Templating Support for CLH
This commit is contained in:
@@ -41,10 +41,12 @@ jobs:
|
||||
matrix:
|
||||
environment: [
|
||||
{ vmm: clh, containerd_version: latest },
|
||||
{ vmm: clh, containerd_version: latest, snapshotter: erofs, erofs_mode: disk, erofs_merge_mode: unmerged },
|
||||
{ vmm: clh, containerd_version: minimum },
|
||||
{ vmm: dragonball, containerd_version: latest },
|
||||
{ vmm: dragonball, containerd_version: minimum },
|
||||
{ vmm: qemu, containerd_version: latest },
|
||||
{ vmm: qemu, containerd_version: latest, snapshotter: erofs, erofs_mode: disk, erofs_merge_mode: unmerged },
|
||||
{ vmm: qemu, containerd_version: minimum },
|
||||
{ vmm: qemu-runtime-rs, containerd_version: latest },
|
||||
{ vmm: qemu-runtime-rs, containerd_version: minimum },
|
||||
@@ -68,6 +70,9 @@ jobs:
|
||||
K8S_TEST_HOST_TYPE: baremetal-no-attestation
|
||||
CONTAINER_ENGINE: containerd
|
||||
CONTAINER_ENGINE_VERSION: ${{ matrix.environment.containerd_version }}
|
||||
SNAPSHOTTER: ${{ matrix.environment.snapshotter }}
|
||||
EROFS_SNAPSHOTTER_MODE: ${{ matrix.environment.erofs_mode }}
|
||||
EROFS_MERGE_MODE: ${{ matrix.environment.erofs_merge_mode }}
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -2085,12 +2085,6 @@ func checkNetNsConfig(config oci.RuntimeConfig) error {
|
||||
|
||||
// checkFactoryConfig ensures the VM factory configuration is valid.
|
||||
func checkFactoryConfig(config oci.RuntimeConfig) error {
|
||||
if config.FactoryConfig.Template {
|
||||
if config.HypervisorConfig.InitrdPath == "" {
|
||||
return errors.New("Factory option enable_template requires an initrd image")
|
||||
}
|
||||
}
|
||||
|
||||
if config.FactoryConfig.VMCacheNumber > 0 {
|
||||
if config.HypervisorType != vc.QemuHypervisor {
|
||||
return errors.New("VM cache just support qemu")
|
||||
|
||||
@@ -1696,7 +1696,7 @@ func TestCheckFactoryConfig(t *testing.T) {
|
||||
{false, false, "", "initrd"},
|
||||
|
||||
{true, false, "", "initrd"},
|
||||
{true, true, "image", ""},
|
||||
{true, false, "image", ""},
|
||||
}
|
||||
|
||||
for i, d := range data {
|
||||
|
||||
@@ -82,12 +82,8 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon
|
||||
kataUtilsLogger.WithField("factory", factoryConfig).Info("load vm factory")
|
||||
|
||||
f, err := vf.NewFactory(ctx, factoryConfig, true)
|
||||
if err != nil && !factoryConfig.VMCache {
|
||||
kataUtilsLogger.WithError(err).Warn("load vm factory failed, about to create new one")
|
||||
f, err = vf.NewFactory(ctx, factoryConfig, false)
|
||||
}
|
||||
if err != nil {
|
||||
kataUtilsLogger.WithError(err).Warn("create vm factory failed")
|
||||
kataUtilsLogger.WithError(err).Warn("load vm factory failed, will use direct boot")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,7 @@ const (
|
||||
const (
|
||||
clhStateCreated = "Created"
|
||||
clhStateRunning = "Running"
|
||||
clhStatePaused = "Paused"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -112,8 +113,16 @@ type clhClient interface {
|
||||
VmAddDevicePut(ctx context.Context, deviceConfig chclient.DeviceConfig) (chclient.PciDeviceInfo, *http.Response, error)
|
||||
// Add a new disk device to the VM
|
||||
VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error)
|
||||
// Pause the VM
|
||||
VmPausePut(ctx context.Context) (*http.Response, error)
|
||||
// Create a snapshot of the VM
|
||||
VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error)
|
||||
// Remove a device from the VM
|
||||
VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error)
|
||||
// Restore VM from a snapshot
|
||||
VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error)
|
||||
// Resume a paused VM
|
||||
ResumeVM(ctx context.Context) (*http.Response, error)
|
||||
}
|
||||
|
||||
type clhClientApi struct {
|
||||
@@ -153,10 +162,26 @@ func (c *clhClientApi) VmAddDiskPut(ctx context.Context, diskConfig chclient.Dis
|
||||
return c.ApiInternal.VmAddDiskPut(ctx).DiskConfig(diskConfig).Execute()
|
||||
}
|
||||
|
||||
func (c *clhClientApi) VmPausePut(ctx context.Context) (*http.Response, error) {
|
||||
return c.ApiInternal.PauseVM(ctx).Execute()
|
||||
}
|
||||
|
||||
func (c *clhClientApi) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) {
|
||||
return c.ApiInternal.VmSnapshotPut(ctx).VmSnapshotConfig(vmSnapshotConfig).Execute()
|
||||
}
|
||||
|
||||
func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) {
|
||||
return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute()
|
||||
}
|
||||
|
||||
func (c *clhClientApi) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) {
|
||||
return c.ApiInternal.VmRestorePut(ctx).RestoreConfig(restoreConfig).Execute()
|
||||
}
|
||||
|
||||
func (c *clhClientApi) ResumeVM(ctx context.Context) (*http.Response, error) {
|
||||
return c.ApiInternal.ResumeVM(ctx).Execute()
|
||||
}
|
||||
|
||||
// This is done in order to be able to override such a function as part of
|
||||
// our unit tests, as when testing bootVM we're on a mocked scenario already.
|
||||
var vmAddNetPutRequest = func(clh *cloudHypervisor) ([]chclient.PciDeviceInfo, error) {
|
||||
@@ -255,12 +280,14 @@ type CloudHypervisorState struct {
|
||||
PID int
|
||||
VirtiofsDaemonPid int
|
||||
state clhState
|
||||
isRestoring bool
|
||||
}
|
||||
|
||||
func (s *CloudHypervisorState) reset() {
|
||||
s.PID = 0
|
||||
s.VirtiofsDaemonPid = 0
|
||||
s.state = clhNotReady
|
||||
s.isRestoring = false
|
||||
}
|
||||
|
||||
type cloudHypervisor struct {
|
||||
@@ -501,7 +528,7 @@ func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bo
|
||||
}
|
||||
|
||||
// For cloudHypervisor this call only sets the internal structure up.
|
||||
// The VM will be created and started through StartVM().
|
||||
// The VM will be created and started through StartVM(), or restored from template if template files exist.
|
||||
func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
|
||||
clh.ctx = ctx
|
||||
|
||||
@@ -559,29 +586,78 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
|
||||
}
|
||||
}
|
||||
|
||||
// Create the VM memory config via the constructor to ensure default values are properly assigned
|
||||
clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
|
||||
// Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs
|
||||
// or when using HugePages.
|
||||
// If such features are disabled, turn off shared memory config.
|
||||
if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages {
|
||||
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false)
|
||||
} else {
|
||||
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
|
||||
}
|
||||
// Enable hugepages if needed
|
||||
clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages)
|
||||
if !clh.config.ConfidentialGuest {
|
||||
hotplugSize := clh.config.DefaultMaxMemorySize
|
||||
// OpenAPI only supports int64 values
|
||||
clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes()))
|
||||
|
||||
if clh.config.ReclaimGuestFreedMemory {
|
||||
// Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero)
|
||||
clh.vmconfig.Balloon = chclient.NewBalloonConfig(0)
|
||||
// Set the free page reporting flag for ballooning to be true
|
||||
clh.vmconfig.Balloon.SetFreePageReporting(true)
|
||||
// If the VM is booting from a template, or if the VM is going to be used as a template
|
||||
// the memory is to be backed by a file, so we need to configure the memory zones accordingly.
|
||||
if clh.config.BootFromTemplate || clh.config.BootToBeTemplate {
|
||||
// VM templating is incompatible with virtio-fs because virtio-fs requires shared memory,
|
||||
// while templating needs COW/private memory on restore.
|
||||
if clh.config.SharedFS == config.VirtioFS || clh.config.SharedFS == config.VirtioFSNydus {
|
||||
return errors.New("VM templating has been enabled with virtio-fs and this configuration will not work")
|
||||
}
|
||||
|
||||
// Double-check that the clh.config.MemoryPath file is accessible before using it in the VM config, to avoid hitting a less clear error from cloud hypervisor when it tries to access the memory file.
|
||||
if _, err := os.Stat(clh.config.MemoryPath); err != nil {
|
||||
return fmt.Errorf("memory file %s is not accessible: %w", clh.config.MemoryPath, err)
|
||||
}
|
||||
|
||||
// Set the size to be 0 since we are going to configure actual size via zones
|
||||
clh.vmconfig.Memory = chclient.NewMemoryConfig(0)
|
||||
|
||||
memoryZoneConfig := chclient.NewMemoryZoneConfig("mem0", int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
|
||||
if clh.config.BootToBeTemplate {
|
||||
// When BootToBeTemplate is true, the memory file backing the VM memory is shared between multiple VMs created from the same template.
|
||||
// So we need to set shared to true in this case.
|
||||
memoryZoneConfig.SetShared(true)
|
||||
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
|
||||
|
||||
if !clh.config.ConfidentialGuest {
|
||||
// TODO: Remove this warning once memory hotplugging is supported
|
||||
// for template VMs.
|
||||
//
|
||||
// Memory hotplug is intentionally not configured for template VMs.
|
||||
// Resizing a memory zone requires the virtio-mem hotplug method
|
||||
// (cloud-hypervisor rejects the default ACPI hotplug on a zone that
|
||||
// carries a hotplug_size), which is not currently supported in the
|
||||
// templating path. As a result, VMs restored from this template
|
||||
// cannot grow their memory beyond the template's boot size.
|
||||
clh.Logger().Warn("memory hotplugging is currently unsupported for template VMs")
|
||||
}
|
||||
} else {
|
||||
// When BootFromTemplate is true, set shared=false to ensure Copy-On-Write is used for the memory file.
|
||||
// So that the VM can have its own private memory.
|
||||
memoryZoneConfig.SetShared(false)
|
||||
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false)
|
||||
}
|
||||
memoryZoneConfig.SetFile(clh.config.MemoryPath)
|
||||
clh.vmconfig.Memory.Zones = &[]chclient.MemoryZoneConfig{
|
||||
*memoryZoneConfig,
|
||||
}
|
||||
} else { // Normal (non-template) VM creation
|
||||
// Create the VM memory config via the constructor to ensure default values are properly assigned
|
||||
clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
|
||||
// Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs
|
||||
// or when using HugePages.
|
||||
// If such features are disabled, turn off shared memory config.
|
||||
if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages {
|
||||
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false)
|
||||
} else {
|
||||
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
|
||||
}
|
||||
// Enable hugepages if needed
|
||||
clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages)
|
||||
if !clh.config.ConfidentialGuest {
|
||||
hotplugSize := clh.config.DefaultMaxMemorySize
|
||||
// OpenAPI only supports int64 values
|
||||
clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes()))
|
||||
}
|
||||
}
|
||||
|
||||
// Configure balloon device for free page reporting. This is set unconditionally
|
||||
// (for both template and non-template paths) so that template VMs include the
|
||||
// balloon in their snapshot, and VMs restored from a template inherit it.
|
||||
if !clh.config.ConfidentialGuest && clh.config.ReclaimGuestFreedMemory {
|
||||
clh.vmconfig.Balloon = chclient.NewBalloonConfig(0)
|
||||
clh.vmconfig.Balloon.SetFreePageReporting(true)
|
||||
}
|
||||
|
||||
// Set initial amount of cpu's for the virtual machine
|
||||
@@ -700,9 +776,118 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
|
||||
return err
|
||||
}
|
||||
|
||||
// Check if we should restore from template instead of creating new VM
|
||||
if clh.config.BootFromTemplate && clh.shouldRestoreFromTemplate() {
|
||||
clh.Logger().Info("Template files found, will restore VM instead of creating new")
|
||||
// Mark this as a restore operation for StartVM to use RestoreVM instead
|
||||
clh.state.isRestoring = true
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// shouldRestoreFromTemplate checks if template snapshot files exist and we should restore instead of creating new VM
|
||||
func (clh *cloudHypervisor) shouldRestoreFromTemplate() bool {
|
||||
// For template restore, we need the snapshot directory to contain the necessary files
|
||||
// The snapshotDir is derived from the MemoryPath directory
|
||||
snapshotDir := filepath.Dir(clh.config.MemoryPath)
|
||||
|
||||
// Check for required template files (config.json, state.json, and memory file)
|
||||
configFile := filepath.Join(snapshotDir, "config.json")
|
||||
stateFile := filepath.Join(snapshotDir, "state.json")
|
||||
memoryFile := clh.config.MemoryPath
|
||||
|
||||
if _, err := os.Stat(configFile); err != nil {
|
||||
clh.Logger().WithError(err).WithField("configFile", configFile).Debug("Template config file not accessible")
|
||||
return false
|
||||
}
|
||||
|
||||
if _, err := os.Stat(stateFile); err != nil {
|
||||
clh.Logger().WithError(err).WithField("stateFile", stateFile).Debug("Template state file not accessible")
|
||||
return false
|
||||
}
|
||||
|
||||
if _, err := os.Stat(memoryFile); err != nil {
|
||||
clh.Logger().WithError(err).WithField("memoryFile", memoryFile).Debug("Template memory file not accessible")
|
||||
return false
|
||||
}
|
||||
|
||||
clh.Logger().WithFields(log.Fields{
|
||||
"configFile": configFile,
|
||||
"stateFile": stateFile,
|
||||
"memoryFile": memoryFile,
|
||||
}).Info("Template files found, can restore VM from template")
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// copyFile copies a file from src to dst, preserving the source file's permissions.
|
||||
func (clh *cloudHypervisor) copyFile(src, dst string) error {
|
||||
srcFile, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer srcFile.Close()
|
||||
|
||||
srcInfo, err := srcFile.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstFile, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, srcInfo.Mode())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
_, err = io.Copy(dstFile, srcFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return dstFile.Sync()
|
||||
}
|
||||
|
||||
// updateVsockSocketPath updates the vsock socket path in the config.json file
|
||||
func (clh *cloudHypervisor) updateVsockSocketPath(configPath, vmID string) error {
|
||||
// Read the config file
|
||||
configData, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var config map[string]interface{}
|
||||
dec := json.NewDecoder(bytes.NewReader(configData))
|
||||
dec.UseNumber()
|
||||
if err := dec.Decode(&config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Update vsock socket path if vsock exists
|
||||
if vsock, ok := config["vsock"].(map[string]interface{}); ok {
|
||||
// Generate new vsock socket path for this VM
|
||||
newVsockPath, err := clh.vsockSocketPath(vmID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vsock["socket"] = newVsockPath
|
||||
|
||||
clh.Logger().WithFields(log.Fields{
|
||||
"vmID": vmID,
|
||||
"newVsockPath": newVsockPath,
|
||||
}).Debug("Updated vsock socket path in config.json")
|
||||
}
|
||||
|
||||
// Write the updated config back to file
|
||||
updatedConfig, err := json.Marshal(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(configPath, updatedConfig, 0600)
|
||||
}
|
||||
|
||||
// setupInitdata prepares and attaches the initdata disk if present.
|
||||
func setupInitdata(clh *cloudHypervisor, hypervisorConfig *HypervisorConfig) error {
|
||||
if len(hypervisorConfig.Initdata) == 0 {
|
||||
@@ -771,8 +956,37 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error {
|
||||
ctx, cancel := context.WithTimeout(ctx, bootTimeout*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := clh.bootVM(ctx); err != nil {
|
||||
return err
|
||||
// Check if we should restore from template or create new VM
|
||||
if clh.state.isRestoring {
|
||||
// Copy template files to VM directory
|
||||
snapshotDir := filepath.Dir(clh.config.MemoryPath)
|
||||
|
||||
// Copy config.json from template to VM directory
|
||||
srcConfig := filepath.Join(snapshotDir, "config.json")
|
||||
dstConfig := filepath.Join(vmPath, "config.json")
|
||||
if err := clh.copyFile(srcConfig, dstConfig); err != nil {
|
||||
return fmt.Errorf("failed to copy config.json: %v", err)
|
||||
}
|
||||
|
||||
// Copy state.json from template to VM directory
|
||||
srcState := filepath.Join(snapshotDir, "state.json")
|
||||
dstState := filepath.Join(vmPath, "state.json")
|
||||
if err := clh.copyFile(srcState, dstState); err != nil {
|
||||
return fmt.Errorf("failed to copy state.json: %v", err)
|
||||
}
|
||||
|
||||
// Update vsock socket path in the copied config.json
|
||||
if err := clh.updateVsockSocketPath(dstConfig, clh.id); err != nil {
|
||||
return fmt.Errorf("failed to update vsock socket path: %v", err)
|
||||
}
|
||||
|
||||
if err := clh.restoreVM(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := clh.bootVM(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
clh.state.state = clhReady
|
||||
@@ -1287,16 +1501,109 @@ func (clh *cloudHypervisor) Cleanup(ctx context.Context) error {
|
||||
|
||||
func (clh *cloudHypervisor) PauseVM(ctx context.Context) error {
|
||||
clh.Logger().WithField("function", "PauseVM").Info("Pause Sandbox")
|
||||
|
||||
cl := clh.client()
|
||||
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, err := cl.VmPausePut(ctx)
|
||||
if err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to pause VM")
|
||||
return openAPIClientError(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) SaveVM() error {
|
||||
clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox")
|
||||
clh.Logger().WithField("function", "SaveVM").Info("Save Sandbox")
|
||||
|
||||
cl := clh.client()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
||||
defer cancel()
|
||||
|
||||
snapshotDir := filepath.Dir(clh.config.MemoryPath)
|
||||
// Create snapshot config with file URL to template path
|
||||
// Use MemoryPath as base for snapshot destination
|
||||
// When creating a template, the MemoryPath is set to the template path, so we can use it to save the snapshot.
|
||||
fileURL := "file://" + snapshotDir
|
||||
|
||||
vmSnapshotConfig := *chclient.NewVmSnapshotConfig()
|
||||
vmSnapshotConfig.SetDestinationUrl(fileURL)
|
||||
|
||||
_, err := cl.VmSnapshotPut(ctx, vmSnapshotConfig)
|
||||
if err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to save VM snapshot")
|
||||
return openAPIClientError(err)
|
||||
}
|
||||
|
||||
if clh.config.BootToBeTemplate {
|
||||
// Update the config.json file in the snapshotDir to set memory shared=false
|
||||
snapshotConfigPath := filepath.Join(snapshotDir, "config.json")
|
||||
snapshotConfig, err := os.ReadFile(snapshotConfigPath)
|
||||
if err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to read snapshot config")
|
||||
return err
|
||||
}
|
||||
|
||||
var snapshotConfigData map[string]interface{}
|
||||
dec := json.NewDecoder(bytes.NewReader(snapshotConfig))
|
||||
dec.UseNumber()
|
||||
if err := dec.Decode(&snapshotConfigData); err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to unmarshal snapshot config")
|
||||
return err
|
||||
}
|
||||
|
||||
// Access the memory section and cast it to a map
|
||||
if memorySection, ok := snapshotConfigData["memory"].(map[string]interface{}); ok {
|
||||
memorySection["shared"] = false
|
||||
// Do the same update for each element for the "zones" array in the memorySection
|
||||
if zones, ok := memorySection["zones"].([]interface{}); ok {
|
||||
for _, zone := range zones {
|
||||
if zoneMap, ok := zone.(map[string]interface{}); ok {
|
||||
zoneMap["shared"] = false
|
||||
} else {
|
||||
clh.Logger().Error("Unable to access zone in snapshot config memory section")
|
||||
return fmt.Errorf("invalid snapshot config structure: zone in memory section not found or invalid")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
clh.Logger().Error("Unable to access zones array in snapshot config memory section")
|
||||
return fmt.Errorf("invalid snapshot config structure: zones array in memory section not found or invalid")
|
||||
}
|
||||
} else {
|
||||
clh.Logger().Error("Unable to access memory section in snapshot config")
|
||||
return fmt.Errorf("invalid snapshot config structure: memory section not found or invalid")
|
||||
}
|
||||
|
||||
// Write the modified config back to file
|
||||
modifiedConfig, err := json.Marshal(snapshotConfigData)
|
||||
if err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to marshal modified snapshot config")
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.WriteFile(snapshotConfigPath, modifiedConfig, 0600); err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to write modified snapshot config")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error {
|
||||
clh.Logger().WithField("function", "ResumeVM").Info("Resume Sandbox")
|
||||
cl := clh.client()
|
||||
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, err := cl.ResumeVM(ctx)
|
||||
if err != nil {
|
||||
clh.Logger().WithError(err).Error("Failed to resume VM")
|
||||
return openAPIClientError(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1509,11 +1816,11 @@ func (clh *cloudHypervisor) clhPath() (string, error) {
|
||||
p = defaultClhPath
|
||||
}
|
||||
|
||||
if _, err = os.Stat(p); os.IsNotExist(err) {
|
||||
return "", fmt.Errorf("Cloud-Hypervisor path (%s) does not exist", p)
|
||||
if _, err = os.Stat(p); err != nil {
|
||||
return "", fmt.Errorf("Cloud-Hypervisor path (%s) is not accessible: %w", p, err)
|
||||
}
|
||||
|
||||
return p, err
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) launchClh() error {
|
||||
@@ -1741,6 +2048,60 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// restoreVM restores a VM from a template snapshot. The restored VM will be in
|
||||
// Paused state. The caller (factory layer, via factory.GetVM → vm.Resume) is
|
||||
// responsible for resuming the VM, reseeding the RNG, and syncing the guest clock
|
||||
// before the VM is used. See factory_linux.go GetVM().
|
||||
func (clh *cloudHypervisor) restoreVM(ctx context.Context) error {
|
||||
clh.Logger().Info("Restoring VM from template")
|
||||
|
||||
cl := clh.client()
|
||||
|
||||
// use the VMStorePath as the base for the restore source URL
|
||||
vmPath := filepath.Join(clh.config.VMStorePath, clh.id)
|
||||
sourceURL := "file://" + vmPath
|
||||
|
||||
// check if the snapshot directory contains the state.json and config.json files
|
||||
// which contain the VM state and configuration respectively
|
||||
stateFile := filepath.Join(vmPath, "state.json")
|
||||
configFile := filepath.Join(vmPath, "config.json")
|
||||
|
||||
if _, err := os.Stat(stateFile); err != nil {
|
||||
return fmt.Errorf("failed to access state file %s: %v", stateFile, err)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(configFile); err != nil {
|
||||
return fmt.Errorf("failed to access config file %s: %v", configFile, err)
|
||||
}
|
||||
|
||||
// Prepare restore configuration
|
||||
restoreConfig := *chclient.NewRestoreConfig(sourceURL)
|
||||
|
||||
clh.Logger().WithField("sourceURL", sourceURL).Debug("Restore configuration")
|
||||
|
||||
// Restore VM from template (uses the caller's ctx, which already has the boot timeout)
|
||||
_, err := cl.VmRestorePut(ctx, restoreConfig)
|
||||
if err != nil {
|
||||
clh.Logger().WithError(err).Error("failed to restore VM from template")
|
||||
return openAPIClientError(err)
|
||||
}
|
||||
|
||||
// Check VM state after restoration
|
||||
info, err := clh.vmInfo()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
clh.Logger().Debugf("VM state after restore: %#v", info)
|
||||
|
||||
if info.State != clhStatePaused {
|
||||
clh.Logger().Warnf("VM state is '%s' after restore, expected 'Paused'", info.State)
|
||||
}
|
||||
|
||||
clh.Logger().Info("Successfully restored VM from template")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) addVSock(cid int64, path string) {
|
||||
clh.Logger().WithFields(log.Fields{
|
||||
"path": path,
|
||||
|
||||
@@ -74,7 +74,9 @@ func newClhConfig() (HypervisorConfig, error) {
|
||||
}
|
||||
|
||||
type clhClientMock struct {
|
||||
vmInfo chclient.VmInfo
|
||||
vmInfo chclient.VmInfo
|
||||
restoreRequest *chclient.RestoreConfig
|
||||
snapshotRequest *chclient.VmSnapshotConfig
|
||||
}
|
||||
|
||||
func (c *clhClientMock) VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) {
|
||||
@@ -115,11 +117,35 @@ func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.Di
|
||||
return chclient.PciDeviceInfo{Bdf: "0000:00:0a.0"}, nil, nil
|
||||
}
|
||||
|
||||
//nolint:golint
|
||||
func (c *clhClientMock) VmPausePut(ctx context.Context) (*http.Response, error) {
|
||||
c.vmInfo.State = clhStatePaused
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
//nolint:golint
|
||||
func (c *clhClientMock) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) {
|
||||
c.snapshotRequest = &vmSnapshotConfig
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
//nolint:golint
|
||||
func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *clhClientMock) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) {
|
||||
c.restoreRequest = &restoreConfig
|
||||
// restoreVM() verifies Paused after restore.
|
||||
c.vmInfo.State = clhStatePaused
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *clhClientMock) ResumeVM(ctx context.Context) (*http.Response, error) {
|
||||
c.vmInfo.State = clhStateRunning
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestCloudHypervisorAddVSock(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
clh := cloudHypervisor{}
|
||||
@@ -431,7 +457,8 @@ func TestCloudHypervisorCleanupVM(t *testing.T) {
|
||||
assert.NoError(err, "persist.GetDriver() unexpected error")
|
||||
|
||||
dir := filepath.Join(store.RunVMStoragePath(), clh.id)
|
||||
os.MkdirAll(dir, os.ModePerm)
|
||||
err = os.MkdirAll(dir, os.ModePerm)
|
||||
assert.NoError(err, "failed to create dir %s", dir)
|
||||
|
||||
err = clh.cleanupVM(false)
|
||||
assert.NoError(err, "persist.GetDriver() unexpected error")
|
||||
@@ -516,6 +543,81 @@ func TestClhCreateVM(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestClhRestoreVM(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
store, err := persist.GetDriver()
|
||||
assert.NoError(err)
|
||||
|
||||
clhConfig, err := newClhConfig()
|
||||
assert.NoError(err)
|
||||
clhConfig.VMStorePath = store.RunVMStoragePath()
|
||||
clhConfig.RunStorePath = store.RunStoragePath()
|
||||
|
||||
mockClient := &clhClientMock{}
|
||||
clh := &cloudHypervisor{
|
||||
config: clhConfig,
|
||||
APIClient: mockClient,
|
||||
}
|
||||
|
||||
// First call restoreVM without the VM snapshot files (state.json, config.json) present.
|
||||
err = clh.restoreVM(context.Background())
|
||||
// An error is expected because restoreVM expects the VM snapshot files to be present.
|
||||
assert.Error(err)
|
||||
assert.Contains(err.Error(), filepath.Join(clhConfig.VMStorePath, "state.json"))
|
||||
|
||||
// Now create the VM snapshot files and call restoreVM again.
|
||||
err = os.MkdirAll(clhConfig.VMStorePath, os.ModePerm)
|
||||
assert.NoError(err, "failed to create dir %s", clhConfig.VMStorePath)
|
||||
stateFile := filepath.Join(clhConfig.VMStorePath, "state.json")
|
||||
configFile := filepath.Join(clhConfig.VMStorePath, "config.json")
|
||||
err = os.WriteFile(stateFile, []byte("{}"), 0o600)
|
||||
assert.NoError(err)
|
||||
err = os.WriteFile(configFile, []byte("{}"), 0o600)
|
||||
assert.NoError(err)
|
||||
|
||||
// Call restoreVM again, this time it should succeed.
|
||||
err = clh.restoreVM(context.Background())
|
||||
assert.NoError(err)
|
||||
|
||||
if assert.NotNil(mockClient.restoreRequest) {
|
||||
expectedSourceURL := "file://" + clhConfig.VMStorePath
|
||||
assert.Equal(expectedSourceURL, mockClient.restoreRequest.GetSourceUrl())
|
||||
}
|
||||
|
||||
info, err := clh.vmInfo()
|
||||
assert.NoError(err)
|
||||
assert.Equal(clhStatePaused, info.State)
|
||||
}
|
||||
|
||||
func TestClhSaveVM(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
store, err := persist.GetDriver()
|
||||
assert.NoError(err)
|
||||
|
||||
clhConfig, err := newClhConfig()
|
||||
assert.NoError(err)
|
||||
// For testing, assume the memory path is located within the VM store path.
|
||||
clhConfig.MemoryPath = filepath.Join(store.RunVMStoragePath(), "memory")
|
||||
clhConfig.VMStorePath = store.RunVMStoragePath()
|
||||
clhConfig.RunStorePath = store.RunStoragePath()
|
||||
|
||||
mockClient := &clhClientMock{}
|
||||
clh := &cloudHypervisor{
|
||||
config: clhConfig,
|
||||
APIClient: mockClient,
|
||||
}
|
||||
|
||||
err = clh.SaveVM()
|
||||
assert.NoError(err)
|
||||
|
||||
if assert.NotNil(mockClient.snapshotRequest) {
|
||||
expectedDestinationURL := "file://" + filepath.Dir(clhConfig.MemoryPath)
|
||||
assert.Equal(expectedDestinationURL, mockClient.snapshotRequest.GetDestinationUrl())
|
||||
}
|
||||
}
|
||||
|
||||
func TestCloudHypervisorStartSandbox(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
clhConfig, err := newClhConfig()
|
||||
|
||||
@@ -80,6 +80,8 @@ func resetHypervisorConfig(config *vc.VMConfig) {
|
||||
config.HypervisorConfig.SharedPath = ""
|
||||
config.HypervisorConfig.VMStorePath = ""
|
||||
config.HypervisorConfig.RunStorePath = ""
|
||||
config.HypervisorConfig.SandboxName = ""
|
||||
config.HypervisorConfig.SandboxNamespace = ""
|
||||
}
|
||||
|
||||
// It's important that baseConfig and newConfig are passed by value!
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@@ -115,6 +116,15 @@ func (t *template) prepareTemplateFiles() error {
|
||||
}
|
||||
f.Close()
|
||||
|
||||
// truncate the memory file to the exact size of the VM memory
|
||||
memoryInBytes := int64(t.config.HypervisorConfig.MemorySize) * 1024 * 1024
|
||||
t.Logger().Infof("truncating memory file %s to %d bytes", t.statePath+"/memory", memoryInBytes)
|
||||
err = os.Truncate(t.statePath+"/memory", memoryInBytes)
|
||||
if err != nil {
|
||||
t.close()
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -124,7 +134,8 @@ func (t *template) createTemplateVM(ctx context.Context) error {
|
||||
config.HypervisorConfig.BootToBeTemplate = true
|
||||
config.HypervisorConfig.BootFromTemplate = false
|
||||
config.HypervisorConfig.MemoryPath = t.statePath + "/memory"
|
||||
config.HypervisorConfig.DevicesStatePath = t.statePath + "/state"
|
||||
config.HypervisorConfig.DevicesStatePath = t.deviceStatePath()
|
||||
config.HypervisorConfig.VMStorePath = t.statePath
|
||||
|
||||
vm, err := vc.NewVM(ctx, config)
|
||||
if err != nil {
|
||||
@@ -161,7 +172,7 @@ func (t *template) createFromTemplateVM(ctx context.Context, c vc.VMConfig) (*vc
|
||||
config.HypervisorConfig.BootToBeTemplate = false
|
||||
config.HypervisorConfig.BootFromTemplate = true
|
||||
config.HypervisorConfig.MemoryPath = t.statePath + "/memory"
|
||||
config.HypervisorConfig.DevicesStatePath = t.statePath + "/state"
|
||||
config.HypervisorConfig.DevicesStatePath = t.deviceStatePath()
|
||||
config.HypervisorConfig.SharedPath = c.HypervisorConfig.SharedPath
|
||||
config.HypervisorConfig.VMStorePath = c.HypervisorConfig.VMStorePath
|
||||
config.HypervisorConfig.RunStorePath = c.HypervisorConfig.RunStorePath
|
||||
@@ -175,6 +186,15 @@ func (t *template) checkTemplateVM() error {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = os.Stat(t.statePath + "/state")
|
||||
_, err = os.Stat(t.deviceStatePath())
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *template) deviceStatePath() string {
|
||||
stateFileName := "state"
|
||||
if t.config.HypervisorType == vc.ClhHypervisor {
|
||||
stateFileName = "state.json"
|
||||
}
|
||||
|
||||
return filepath.Join(t.statePath, stateFileName)
|
||||
}
|
||||
|
||||
@@ -57,15 +57,26 @@ func TestTemplateFactory(t *testing.T) {
|
||||
assert.NoError(err)
|
||||
defer hybridVSockTTRPCMock.Stop()
|
||||
|
||||
// New
|
||||
// Create 2 sets of instance-specific directories for per-VM storage
|
||||
runStorePath1 := t.TempDir()
|
||||
vmStorePath1 := t.TempDir()
|
||||
runStorePath2 := t.TempDir()
|
||||
vmStorePath2 := t.TempDir()
|
||||
|
||||
// Create a new Template Factory
|
||||
f, err := New(ctx, vmConfig, testDir)
|
||||
assert.Nil(err)
|
||||
|
||||
// Config
|
||||
assert.Equal(f.Config(), vmConfig)
|
||||
|
||||
// GetBaseVM
|
||||
vm, err := f.GetBaseVM(ctx, vmConfig)
|
||||
// GetBaseVM with first instance paths
|
||||
vmConfig1 := vmConfig
|
||||
vmConfig1.HypervisorConfig.RunStorePath = runStorePath1
|
||||
vmConfig1.HypervisorConfig.VMStorePath = vmStorePath1
|
||||
|
||||
// Test the creation of a new VM from the template factory
|
||||
vm, err := f.GetBaseVM(ctx, vmConfig1)
|
||||
assert.Nil(err)
|
||||
|
||||
err = vm.Stop(ctx)
|
||||
@@ -79,44 +90,59 @@ func TestTemplateFactory(t *testing.T) {
|
||||
|
||||
assert.Equal(tt.Config(), vmConfig)
|
||||
|
||||
// Checking that template VM check fails
|
||||
// if the corresponding memory and state files are absent
|
||||
err = tt.checkTemplateVM()
|
||||
assert.Error(err)
|
||||
|
||||
_, err = os.Create(tt.statePath + "/memory")
|
||||
memFile, err := os.Create(tt.statePath + "/memory")
|
||||
assert.Nil(err)
|
||||
memFile.Close()
|
||||
err = tt.checkTemplateVM()
|
||||
assert.Error(err)
|
||||
|
||||
_, err = os.Create(tt.statePath + "/state")
|
||||
devFile, err := os.Create(tt.deviceStatePath())
|
||||
assert.Nil(err)
|
||||
devFile.Close()
|
||||
|
||||
// After creating state and memory files, checkTemplateVM should succeed
|
||||
err = tt.checkTemplateVM()
|
||||
assert.Nil(err)
|
||||
|
||||
// Recreate the template VM, which should succeed
|
||||
err = tt.createTemplateVM(ctx)
|
||||
assert.Nil(err)
|
||||
|
||||
vm, err = tt.GetBaseVM(ctx, vmConfig)
|
||||
// Ensuring that directly calling template's GetBaseVM function
|
||||
// returns a VM instance similar to the one returned by the factory's GetBaseVM function
|
||||
vm, err = tt.GetBaseVM(ctx, vmConfig1)
|
||||
assert.Nil(err)
|
||||
|
||||
err = vm.Stop(ctx)
|
||||
assert.Nil(err)
|
||||
|
||||
vm, err = f.GetBaseVM(ctx, vmConfig)
|
||||
vm, err = f.GetBaseVM(ctx, vmConfig1)
|
||||
assert.Nil(err)
|
||||
|
||||
err = vm.Stop(ctx)
|
||||
assert.Nil(err)
|
||||
|
||||
// Overwriting the template VM should succeed
|
||||
err = tt.createTemplateVM(ctx)
|
||||
assert.Nil(err)
|
||||
|
||||
vm, err = tt.GetBaseVM(ctx, vmConfig)
|
||||
// Create second instance with different storage paths
|
||||
vmConfig2 := vmConfig
|
||||
vmConfig2.HypervisorConfig.RunStorePath = runStorePath2
|
||||
vmConfig2.HypervisorConfig.VMStorePath = vmStorePath2
|
||||
|
||||
vm, err = tt.GetBaseVM(ctx, vmConfig2)
|
||||
assert.Nil(err)
|
||||
|
||||
err = vm.Stop(ctx)
|
||||
assert.Nil(err)
|
||||
|
||||
vm, err = f.GetBaseVM(ctx, vmConfig)
|
||||
vm, err = f.GetBaseVM(ctx, vmConfig2)
|
||||
assert.Nil(err)
|
||||
|
||||
err = vm.Stop(ctx)
|
||||
|
||||
@@ -43,6 +43,7 @@ TEST_CLUSTER_NAMESPACE="${TEST_CLUSTER_NAMESPACE:-}"
|
||||
CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-containerd}"
|
||||
SNAPSHOTTER="${SNAPSHOTTER:-}"
|
||||
EROFS_SNAPSHOTTER_MODE="${EROFS_SNAPSHOTTER_MODE:-}"
|
||||
EROFS_MERGE_MODE="${EROFS_MERGE_MODE:-}"
|
||||
|
||||
# Wait for the Kubernetes API to recover after kata-deploy uninstall, then
|
||||
# retry the uninstall to purge any stale helm release state. On k3s/rke2,
|
||||
@@ -851,6 +852,26 @@ function helm_helper() {
|
||||
yq -i '.containerd.userDropIn = strenv(HELM_CONTAINERD_USER_DROP_IN)' "${values_yaml}"
|
||||
fi
|
||||
|
||||
# EROFS merge mode ("merged" default, or "unmerged"). This is orthogonal
|
||||
# to EROFS_SNAPSHOTTER_MODE (which controls default_size): it controls
|
||||
# whether containerd merges layers into a single fsmeta.erofs (merged,
|
||||
# runtime-rs only) or keeps per-layer layer.erofs (unmerged, required by
|
||||
# the Go runtime).
|
||||
if [[ -n "${EROFS_MERGE_MODE}" ]]; then
|
||||
if [[ "${SNAPSHOTTER}" != "erofs" ]]; then
|
||||
die "EROFS_MERGE_MODE is only supported with SNAPSHOTTER=erofs"
|
||||
fi
|
||||
|
||||
case "${EROFS_MERGE_MODE}" in
|
||||
merged|unmerged) ;;
|
||||
*)
|
||||
die "Unsupported EROFS_MERGE_MODE: ${EROFS_MERGE_MODE}"
|
||||
;;
|
||||
esac
|
||||
|
||||
yq -i ".snapshotter.erofsMergeMode = \"${EROFS_MERGE_MODE}\"" "${values_yaml}"
|
||||
fi
|
||||
|
||||
if [[ -z "${HELM_SHIMS}" ]]; then
|
||||
die "A list of shims is expected but none was provided"
|
||||
fi
|
||||
|
||||
114
tests/integration/kubernetes/k8s-vm-templating.bats
Normal file
114
tests/integration/kubernetes/k8s-vm-templating.bats
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bats
|
||||
#
|
||||
# Copyright (c) 2024 Kata Containers
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Tests for Kata VM templating (factory) functionality in Kubernetes integration mode
|
||||
|
||||
load "${BATS_TEST_DIRNAME}/lib.sh"
|
||||
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
load "${BATS_TEST_DIRNAME}/confidential_common.sh"
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
# Returns 0 if the current environment supports VM templating, non-zero
|
||||
# otherwise. VM templating is only supported on non-confidential clh/qemu
|
||||
# hypervisors, and because it uses shared_fs="none" it also requires a
|
||||
# block-device-based snapshotter (blockfile or erofs).
|
||||
vm_templating_supported() {
|
||||
[[ "${KATA_HYPERVISOR}" == "clh" || "${KATA_HYPERVISOR}" == "qemu" ]] || return 1
|
||||
is_confidential_runtime_class && return 1
|
||||
[[ "${SNAPSHOTTER:-}" =~ ^(blockfile|erofs)$ ]] || return 1
|
||||
return 0
|
||||
}
|
||||
|
||||
setup() {
|
||||
if ! vm_templating_supported; then
|
||||
skip "VM templating requires a non-confidential clh/qemu hypervisor and a blockfile/erofs snapshotter (KATA_HYPERVISOR=${KATA_HYPERVISOR}, SNAPSHOTTER=${SNAPSHOTTER:-unset})"
|
||||
fi
|
||||
|
||||
setup_common || die "setup_common failed"
|
||||
|
||||
# Build a Kata runtime config drop-in that enables VM templating and
|
||||
# disables shared_fs (incompatible with templating).
|
||||
# QEMU VM templating requires an initrd, CLH does not.
|
||||
local rootfs_override=""
|
||||
if [[ "${KATA_HYPERVISOR}" == "qemu" ]]; then
|
||||
rootfs_override=$'image = ""\ninitrd = "/opt/kata/share/kata-containers/kata-containers-initrd.img"'
|
||||
fi
|
||||
|
||||
local runtime_config_dropin_file="${BATS_TEST_TMPDIR}/99-k8s-vm-templating.toml"
|
||||
cat > "${runtime_config_dropin_file}" <<DROPIN
|
||||
[hypervisor.${KATA_HYPERVISOR}]
|
||||
shared_fs = "none"
|
||||
default_vcpus = 1
|
||||
default_memory = 512
|
||||
${rootfs_override}
|
||||
|
||||
[factory]
|
||||
enable_template = true
|
||||
template_path = "/run/vc/vm/template"
|
||||
DROPIN
|
||||
|
||||
# Install the drop-in on the node selected by setup_common and record the
|
||||
# remote path so teardown can remove it.
|
||||
dropin_path="$(set_kata_runtime_config_dropin_file "$node" "${runtime_config_dropin_file}")" \
|
||||
|| die "Failed to install Kata runtime config drop-in on node $node"
|
||||
|
||||
# kata-runtime defaults to the QEMU config; point it at the active
|
||||
# hypervisor so that factory init/destroy use the correct configuration.
|
||||
kata_config_path="/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml"
|
||||
}
|
||||
|
||||
@test "Pod can be created with a templated VM" {
|
||||
# Initialize the VM template on the target node.
|
||||
exec_host "$node" "nsenter --mount=/proc/1/ns/mnt /opt/kata/bin/kata-runtime --config ${kata_config_path} factory init"
|
||||
|
||||
# The factory init above must have created the template directory. exec_host
|
||||
# pipes the remote output through `tr`, so the pipeline's exit status is not
|
||||
# the remote command's; assert on the output instead. Check inside PID 1's
|
||||
# mount namespace, where the template tmpfs was actually mounted.
|
||||
exec_host "$node" "nsenter --mount=/proc/1/ns/mnt test -f /run/vc/vm/template/memory && echo present" | grep -q present
|
||||
|
||||
pod_name="test-templated-pod"
|
||||
ctr_name="test-container"
|
||||
|
||||
pod_config=$(mktemp --tmpdir pod_config.XXXXXX.yaml)
|
||||
cp "$pod_config_dir/busybox-template.yaml" "$pod_config"
|
||||
|
||||
sed -i "s/POD_NAME/$pod_name/" "$pod_config"
|
||||
sed -i "s/CTR_NAME/$ctr_name/" "$pod_config"
|
||||
|
||||
kubectl create -f "${pod_config}"
|
||||
kubectl wait --for=condition=Ready --timeout="$timeout" "pod/${pod_name}"
|
||||
|
||||
grep_pod_exec_output "${pod_name}" "Hello from templated VM" sh -c "echo 'Hello from templated VM'"
|
||||
|
||||
# Confirm at least one VM sandbox under /run/vc/vm/ is a symlink, which
|
||||
# proves the factory/template path was used. A non-templated VM creates a
|
||||
# real directory at /run/vc/vm/<sandbox-id>/, whereas a factory-spawned VM
|
||||
# stores its state under a generated UUID and /run/vc/vm/<sandbox-id> is a
|
||||
# symlink pointing at it (see assignSandbox() in
|
||||
# src/runtime/virtcontainers/vm.go). Inspect PID 1's mount namespace, where
|
||||
# the shim creates these entries alongside the template tmpfs.
|
||||
exec_host "$node" \
|
||||
"nsenter --mount=/proc/1/ns/mnt find /run/vc/vm -maxdepth 1 -mindepth 1 -type l ! -name template | grep -q . && echo symlink" \
|
||||
| grep -q symlink
|
||||
}
|
||||
|
||||
teardown() {
|
||||
vm_templating_supported || return 0
|
||||
|
||||
rm -f "${pod_config:-}"
|
||||
|
||||
# Destroy the VM template and remove the config drop-in on the target node.
|
||||
# factory destroy must run in PID 1's mount namespace to unmount the template
|
||||
# tmpfs that factory init created there (see the @test for details).
|
||||
exec_host "$node" "nsenter --mount=/proc/1/ns/mnt /opt/kata/bin/kata-runtime --config ${kata_config_path} factory destroy" \
|
||||
|| echo "Warning: Failed to destroy VM template on node $node"
|
||||
|
||||
remove_kata_runtime_config_dropin_file "$node" "${dropin_path:-}" \
|
||||
|| echo "Warning: Failed to remove Kata runtime config drop-in on node $node"
|
||||
|
||||
teardown_common "${node:-}" "${node_start_time:-}"
|
||||
}
|
||||
@@ -104,6 +104,7 @@ else
|
||||
"k8s-security-context.bats" \
|
||||
"k8s-shared-volume.bats" \
|
||||
"k8s-volume.bats" \
|
||||
"k8s-vm-templating.bats" \
|
||||
"k8s-nginx-connectivity.bats" \
|
||||
)
|
||||
|
||||
|
||||
@@ -15,36 +15,50 @@ use std::path::Path;
|
||||
pub async fn configure_erofs_snapshotter(config: &Config, configuration_file: &Path) -> Result<()> {
|
||||
info!("Configuring erofs-snapshotter");
|
||||
|
||||
// "unmerged" mode keeps each image layer as its own per-layer `layer.erofs`
|
||||
// (containerd's default, non-fsmerged layout), which is the only layout the
|
||||
// Go runtime can consume. In the default "merged" mode we force containerd
|
||||
// to merge layers into a single `fsmeta.erofs`, which is runtime-rs only.
|
||||
let unmerged = config.erofs_merge_mode.as_deref() == Some("unmerged");
|
||||
|
||||
// The Go runtime does not support fsmerged EROFS (fsmeta.erofs).
|
||||
// If the snapshotter handler mapping explicitly pairs a Go shim with
|
||||
// erofs, that is a hard misconfiguration — bail out so the operator
|
||||
// fixes the mapping instead of hitting cryptic runtime errors later.
|
||||
if let Some(mapping) = config.snapshotter_handler_mapping_for_arch.as_ref() {
|
||||
let mut go_shims_on_erofs = Vec::new();
|
||||
for entry in mapping.split(',') {
|
||||
let parts: Vec<&str> = entry.split(':').collect();
|
||||
if parts.len() == 2 && parts[1] == "erofs" && !utils::is_rust_shim(parts[0]) {
|
||||
go_shims_on_erofs.push(parts[0].to_string());
|
||||
// erofs in the (default) merged mode, that is a hard misconfiguration —
|
||||
// bail out so the operator fixes the mapping instead of hitting cryptic
|
||||
// runtime errors later. In "unmerged" mode the Go runtime is supported, so
|
||||
// skip this guard.
|
||||
if !unmerged {
|
||||
if let Some(mapping) = config.snapshotter_handler_mapping_for_arch.as_ref() {
|
||||
let mut go_shims_on_erofs = Vec::new();
|
||||
for entry in mapping.split(',') {
|
||||
let parts: Vec<&str> = entry.split(':').collect();
|
||||
if parts.len() == 2 && parts[1] == "erofs" && !utils::is_rust_shim(parts[0]) {
|
||||
go_shims_on_erofs.push(parts[0].to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
if !go_shims_on_erofs.is_empty() {
|
||||
warn!("##########################################################################");
|
||||
warn!("# #");
|
||||
warn!("# Go runtime shim(s) mapped to the erofs snapshotter: #");
|
||||
for s in &go_shims_on_erofs {
|
||||
warn!("# - {:<64} #", s);
|
||||
if !go_shims_on_erofs.is_empty() {
|
||||
warn!("##########################################################################");
|
||||
warn!("# #");
|
||||
warn!("# Go runtime shim(s) mapped to the erofs snapshotter: #");
|
||||
for s in &go_shims_on_erofs {
|
||||
warn!("# - {:<64} #", s);
|
||||
}
|
||||
warn!("# #");
|
||||
warn!(
|
||||
"# The Go runtime does NOT support fsmerged EROFS (fsmeta.erofs). #"
|
||||
);
|
||||
warn!("# Only runtime-rs shims are supported with merged erofs. Set #");
|
||||
warn!("# EROFS_MERGE_MODE=unmerged to use the Go runtime with erofs. #");
|
||||
warn!("# #");
|
||||
warn!("##########################################################################");
|
||||
return Err(anyhow::anyhow!(
|
||||
"erofs snapshotter: Go runtime shim(s) [{}] cannot be mapped to merged erofs. \
|
||||
The Go runtime does not support fsmerged EROFS. \
|
||||
Set EROFS_MERGE_MODE=unmerged, remove these shims from \
|
||||
SNAPSHOTTER_HANDLER_MAPPING, or switch them to runtime-rs.",
|
||||
go_shims_on_erofs.join(", ")
|
||||
));
|
||||
}
|
||||
warn!("# #");
|
||||
warn!("# The Go runtime does NOT support fsmerged EROFS (fsmeta.erofs). #");
|
||||
warn!("# Only runtime-rs shims are supported with the erofs snapshotter. #");
|
||||
warn!("# #");
|
||||
warn!("##########################################################################");
|
||||
return Err(anyhow::anyhow!(
|
||||
"erofs snapshotter: Go runtime shim(s) [{}] cannot be mapped to erofs. \
|
||||
The Go runtime does not support fsmerged EROFS. \
|
||||
Remove these shims from SNAPSHOTTER_HANDLER_MAPPING or switch them to runtime-rs.",
|
||||
go_shims_on_erofs.join(", ")
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,11 +102,27 @@ pub async fn configure_erofs_snapshotter(config: &Config, configuration_file: &P
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".default_size",
|
||||
"\"10G\"",
|
||||
)?;
|
||||
toml_utils::set_toml_value(
|
||||
configuration_file,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
"0",
|
||||
)?;
|
||||
// In the default "merged" mode, force containerd to merge all layers into a
|
||||
// single fsmeta.erofs (max_unmerged_layers = 0). In "unmerged" mode we delete
|
||||
// any previously-written value so each layer stays a separate layer.erofs,
|
||||
// which the Go runtime requires.
|
||||
//
|
||||
// Because kata-deploy edits the containerd config in place, switching from
|
||||
// merged to unmerged must actively remove the old `max_unmerged_layers = 0`
|
||||
// left behind by a previous install. Otherwise the stale `0` would keep
|
||||
// forcing the merged layout and break Go-runtime compatibility.
|
||||
if !unmerged {
|
||||
toml_utils::set_toml_value(
|
||||
configuration_file,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
"0",
|
||||
)?;
|
||||
} else {
|
||||
toml_utils::delete_toml_value(
|
||||
configuration_file,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -178,6 +178,14 @@ pub struct Config {
|
||||
pub multi_install_suffix: Option<String>,
|
||||
pub helm_post_delete_hook: bool,
|
||||
pub experimental_setup_snapshotter: Option<Vec<String>>,
|
||||
/// EROFS snapshotter merge mode: "merged" (default) or "unmerged".
|
||||
///
|
||||
/// In "unmerged" mode kata-deploy does not force containerd's erofs
|
||||
/// snapshotter to merge layers (it leaves `max_unmerged_layers` at the
|
||||
/// containerd default), so each image layer is exposed as its own
|
||||
/// per-layer `layer.erofs`. This is the only layout the Go runtime can
|
||||
/// consume; the merged (`fsmeta.erofs`) layout is runtime-rs only.
|
||||
pub erofs_merge_mode: Option<String>,
|
||||
pub experimental_force_guest_pull_for_arch: Vec<String>,
|
||||
pub dest_dir: String,
|
||||
pub host_install_dir: String,
|
||||
@@ -307,6 +315,11 @@ impl Config {
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.split(',').map(|s| s.trim().to_string()).collect());
|
||||
|
||||
let erofs_merge_mode = env::var("EROFS_MERGE_MODE")
|
||||
.ok()
|
||||
.map(|s| s.trim().to_lowercase())
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
// Only use arch-specific variable for experimental force guest pull
|
||||
let experimental_force_guest_pull_for_arch =
|
||||
get_arch_var("EXPERIMENTAL_FORCE_GUEST_PULL", "", &arch)
|
||||
@@ -338,6 +351,7 @@ impl Config {
|
||||
multi_install_suffix,
|
||||
helm_post_delete_hook,
|
||||
experimental_setup_snapshotter,
|
||||
erofs_merge_mode,
|
||||
experimental_force_guest_pull_for_arch,
|
||||
dest_dir,
|
||||
host_install_dir,
|
||||
@@ -508,6 +522,17 @@ impl Config {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Validate EROFS_MERGE_MODE
|
||||
// Only "merged" (default) and "unmerged" are accepted.
|
||||
if let Some(mode) = self.erofs_merge_mode.as_ref() {
|
||||
if mode != "merged" && mode != "unmerged" {
|
||||
return Err(anyhow::anyhow!(
|
||||
"EROFS_MERGE_MODE must be either 'merged' or 'unmerged', got '{}'",
|
||||
mode
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Validate EXPERIMENTAL_FORCE_GUEST_PULL_FOR_ARCH
|
||||
// This is a list of shim names
|
||||
for shim in &self.experimental_force_guest_pull_for_arch {
|
||||
@@ -551,6 +576,7 @@ impl Config {
|
||||
"* EXPERIMENTAL_SETUP_SNAPSHOTTER: {:?}",
|
||||
self.experimental_setup_snapshotter
|
||||
);
|
||||
info!("* EROFS_MERGE_MODE: {:?}", self.erofs_merge_mode);
|
||||
info!(
|
||||
"* EXPERIMENTAL_FORCE_GUEST_PULL: {}",
|
||||
self.experimental_force_guest_pull_for_arch.join(",")
|
||||
|
||||
@@ -121,6 +121,47 @@ pub fn set_toml_value(file_path: &Path, path: &str, value: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete a TOML value (or table) at a given path.
|
||||
///
|
||||
/// Navigates to the parent table and removes the final key. This is a no-op if
|
||||
/// any path component (including the final key) does not exist, so callers can
|
||||
/// unconditionally remove a value that may or may not be present.
|
||||
pub fn delete_toml_value(file_path: &Path, path: &str) -> Result<()> {
|
||||
let content = std::fs::read_to_string(file_path)
|
||||
.with_context(|| format!("Failed to read TOML file: {file_path:?}"))?;
|
||||
|
||||
let (header, toml_content) = split_non_toml_header(&content);
|
||||
let mut doc = toml_content
|
||||
.parse::<DocumentMut>()
|
||||
.context("Failed to parse TOML")?;
|
||||
|
||||
let parts = parse_toml_path(path)?;
|
||||
|
||||
let mut current_table = doc.as_table_mut();
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
let is_last = i == parts.len() - 1;
|
||||
|
||||
if is_last {
|
||||
// Remove the value; absent key is fine (no-op).
|
||||
current_table.remove(part.as_str());
|
||||
} else {
|
||||
// Navigate into the intermediate table. If it does not exist, there
|
||||
// is nothing to delete.
|
||||
match current_table
|
||||
.get_mut(part.as_str())
|
||||
.and_then(|item| item.as_table_mut())
|
||||
{
|
||||
Some(table) => current_table = table,
|
||||
None => return Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_toml_with_header(file_path, header, &doc)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a TOML value at a given path
|
||||
pub fn get_toml_value(file_path: &Path, path: &str) -> Result<String> {
|
||||
let content = std::fs::read_to_string(file_path)
|
||||
@@ -1714,4 +1755,100 @@ imports = ["/etc/containerd/conf.d/*.toml", "/opt/kata/containerd/config.d/kata-
|
||||
.unwrap();
|
||||
assert_eq!(runtime_type, "io.containerd.kata-qemu.v2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_toml_value() {
|
||||
let temp_file = NamedTempFile::new().unwrap();
|
||||
let temp_path = temp_file.path();
|
||||
std::fs::write(
|
||||
temp_path,
|
||||
"[plugins.\"io.containerd.snapshotter.v1.erofs\"]\nmax_unmerged_layers = 0\nenable_fsverity = true\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Sanity check: value is present before deletion.
|
||||
let before = get_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(before, "0");
|
||||
|
||||
delete_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// The deleted key is gone, but sibling keys remain.
|
||||
let result = get_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
);
|
||||
assert!(result.is_err(), "deleted key should no longer be found");
|
||||
|
||||
let sibling = get_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".enable_fsverity",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(sibling, "true", "sibling keys must be preserved");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_toml_value_missing_key_is_noop() {
|
||||
let temp_file = NamedTempFile::new().unwrap();
|
||||
let temp_path = temp_file.path();
|
||||
let initial = "[plugins.\"io.containerd.snapshotter.v1.erofs\"]\nenable_fsverity = true\n";
|
||||
std::fs::write(temp_path, initial).unwrap();
|
||||
|
||||
// Deleting a key that does not exist must succeed and leave the file usable.
|
||||
delete_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Deleting through a non-existent intermediate table is also a no-op.
|
||||
delete_toml_value(temp_path, ".plugins.\"nonexistent.plugin\".some_key").unwrap();
|
||||
let sibling = get_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".enable_fsverity",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(sibling, "true");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_toml_value_preserves_k3s_header() {
|
||||
let temp_file = NamedTempFile::new().unwrap();
|
||||
let temp_path = temp_file.path();
|
||||
std::fs::write(
|
||||
temp_path,
|
||||
"{{ template \"base\" . }}\n[plugins.\"io.containerd.snapshotter.v1.erofs\"]\nmax_unmerged_layers = 0\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
delete_toml_value(
|
||||
temp_path,
|
||||
".plugins.\"io.containerd.snapshotter.v1.erofs\".max_unmerged_layers",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = std::fs::read_to_string(temp_path).unwrap();
|
||||
assert!(
|
||||
content.starts_with("{{ template \"base\" . }}\n"),
|
||||
"non-TOML header must be preserved"
|
||||
);
|
||||
assert!(
|
||||
!content.contains("max_unmerged_layers"),
|
||||
"value must be removed"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_toml_value_nonexistent_file() {
|
||||
let result = delete_toml_value(Path::new("/nonexistent/file.toml"), "some.path");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,6 +413,13 @@ Get snapshotter setup list from structured config
|
||||
{{- join "," .Values.snapshotter.setup -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Get EROFS merge mode from structured config ("merged" or "unmerged")
|
||||
*/}}
|
||||
{{- define "kata-deploy.getErofsMergeMode" -}}
|
||||
{{- .Values.snapshotter.erofsMergeMode | default "" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Get debug value from structured config
|
||||
*/}}
|
||||
@@ -569,6 +576,11 @@ e.g. `{{- include "kata-deploy.commonEnv" . | nindent 8 }}`.
|
||||
- name: EXPERIMENTAL_SETUP_SNAPSHOTTER
|
||||
value: {{ $snapshotterSetup | quote }}
|
||||
{{- end }}
|
||||
{{- $erofsMergeMode := include "kata-deploy.getErofsMergeMode" . | trim -}}
|
||||
{{- if $erofsMergeMode }}
|
||||
- name: EROFS_MERGE_MODE
|
||||
value: {{ $erofsMergeMode | quote }}
|
||||
{{- end }}
|
||||
{{- $forceGuestPullAmd64 := include "kata-deploy.getForceGuestPullForArch" (dict "root" . "arch" "amd64") | trim -}}
|
||||
{{- if $forceGuestPullAmd64 }}
|
||||
- name: EXPERIMENTAL_FORCE_GUEST_PULL_X86_64
|
||||
|
||||
@@ -271,6 +271,18 @@ health:
|
||||
|
||||
snapshotter:
|
||||
setup: ["nydus"] # ["nydus", "erofs"] or []
|
||||
# EROFS merge mode: "merged" (default) or "unmerged".
|
||||
#
|
||||
# "merged" forces containerd's erofs snapshotter to merge all image layers
|
||||
# into a single fsmeta.erofs (max_unmerged_layers = 0). This layout is only
|
||||
# supported by runtime-rs shims.
|
||||
#
|
||||
# "unmerged" leaves max_unmerged_layers at the containerd default so each
|
||||
# image layer is exposed as its own per-layer layer.erofs. This is the only
|
||||
# layout the Go runtime can consume, so set this when mapping a Go shim to the
|
||||
# erofs snapshotter. When empty, kata-deploy uses its built-in default
|
||||
# (merged).
|
||||
erofsMergeMode: ""
|
||||
|
||||
# Shim configuration
|
||||
# By default (disableAll: false), all shims with enabled: ~ (null) are enabled.
|
||||
|
||||
Reference in New Issue
Block a user