From c0f9744225c0b58640892afd0c8f951bba709dac Mon Sep 17 00:00:00 2001 From: Cameron Baird Date: Tue, 9 Jun 2026 18:53:58 +0000 Subject: [PATCH] runtime: Implement support for VM Template factory in clh Add support for VM Template factory on the clh path. In order to support snapshot/restore-based VM templating, the following changes were needed: 1. For clh.go, implement SaveVM, PauseVM, restoreVM, ResumeVM 2. Remove initrd config check for VM Templating path. The root disk image (when using image mode) is created in memory and therefore captured in the VM snapshot. 3. Truncate the memory file to the size of the VM at factory VM create time. This allows CLH to use the memory file as the backing for the template VM memory, allowing O(1) snapshot times. 4. CLH uses memory zones as backing for its memory on the template paths 5. Update StartVM in CLH to use the restore path when template is configured and available Signed-off-by: Cameron Baird --- src/runtime/pkg/katautils/config.go | 6 - src/runtime/pkg/katautils/config_test.go | 2 +- src/runtime/virtcontainers/clh.go | 414 ++++++++++++++++-- src/runtime/virtcontainers/clh_test.go | 102 ++++- .../virtcontainers/factory/factory_linux.go | 3 + .../factory/template/template_linux.go | 26 +- .../factory/template/template_test.go | 44 +- 7 files changed, 548 insertions(+), 49 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index de0b525c59..e78e699e01 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -2085,12 +2085,6 @@ func checkNetNsConfig(config oci.RuntimeConfig) error { // checkFactoryConfig ensures the VM factory configuration is valid. func checkFactoryConfig(config oci.RuntimeConfig) error { - if config.FactoryConfig.Template { - if config.HypervisorConfig.InitrdPath == "" { - return errors.New("Factory option enable_template requires an initrd image") - } - } - if config.FactoryConfig.VMCacheNumber > 0 { if config.HypervisorType != vc.QemuHypervisor { return errors.New("VM cache just support qemu") diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index ef9e70f8ec..187af6cb70 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -1696,7 +1696,7 @@ func TestCheckFactoryConfig(t *testing.T) { {false, false, "", "initrd"}, {true, false, "", "initrd"}, - {true, true, "image", ""}, + {true, false, "image", ""}, } for i, d := range data { diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index bffaa34385..28d5ef63d5 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -68,6 +68,7 @@ const ( const ( clhStateCreated = "Created" clhStateRunning = "Running" + clhStatePaused = "Paused" ) const ( @@ -112,8 +113,16 @@ type clhClient interface { VmAddDevicePut(ctx context.Context, deviceConfig chclient.DeviceConfig) (chclient.PciDeviceInfo, *http.Response, error) // Add a new disk device to the VM VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error) + // Pause the VM + VmPausePut(ctx context.Context) (*http.Response, error) + // Create a snapshot of the VM + VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) // Remove a device from the VM VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) + // Restore VM from a snapshot + VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) + // Resume a paused VM + ResumeVM(ctx context.Context) (*http.Response, error) } type clhClientApi struct { @@ -153,10 +162,26 @@ func (c *clhClientApi) VmAddDiskPut(ctx context.Context, diskConfig chclient.Dis return c.ApiInternal.VmAddDiskPut(ctx).DiskConfig(diskConfig).Execute() } +func (c *clhClientApi) VmPausePut(ctx context.Context) (*http.Response, error) { + return c.ApiInternal.PauseVM(ctx).Execute() +} + +func (c *clhClientApi) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) { + return c.ApiInternal.VmSnapshotPut(ctx).VmSnapshotConfig(vmSnapshotConfig).Execute() +} + func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) { return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute() } +func (c *clhClientApi) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) { + return c.ApiInternal.VmRestorePut(ctx).RestoreConfig(restoreConfig).Execute() +} + +func (c *clhClientApi) ResumeVM(ctx context.Context) (*http.Response, error) { + return c.ApiInternal.ResumeVM(ctx).Execute() +} + // This is done in order to be able to override such a function as part of // our unit tests, as when testing bootVM we're on a mocked scenario already. var vmAddNetPutRequest = func(clh *cloudHypervisor) ([]chclient.PciDeviceInfo, error) { @@ -255,12 +280,14 @@ type CloudHypervisorState struct { PID int VirtiofsDaemonPid int state clhState + isRestoring bool } func (s *CloudHypervisorState) reset() { s.PID = 0 s.VirtiofsDaemonPid = 0 s.state = clhNotReady + s.isRestoring = false } type cloudHypervisor struct { @@ -501,7 +528,7 @@ func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bo } // For cloudHypervisor this call only sets the internal structure up. -// The VM will be created and started through StartVM(). +// The VM will be created and started through StartVM(), or restored from template if template files exist. func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { clh.ctx = ctx @@ -559,29 +586,78 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } } - // Create the VM memory config via the constructor to ensure default values are properly assigned - clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) - // Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs - // or when using HugePages. - // If such features are disabled, turn off shared memory config. - if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages { - clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) - } else { - clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) - } - // Enable hugepages if needed - clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) - if !clh.config.ConfidentialGuest { - hotplugSize := clh.config.DefaultMaxMemorySize - // OpenAPI only supports int64 values - clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) - - if clh.config.ReclaimGuestFreedMemory { - // Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero) - clh.vmconfig.Balloon = chclient.NewBalloonConfig(0) - // Set the free page reporting flag for ballooning to be true - clh.vmconfig.Balloon.SetFreePageReporting(true) + // If the VM is booting from a template, or if the VM is going to be used as a template + // the memory is to be backed by a file, so we need to configure the memory zones accordingly. + if clh.config.BootFromTemplate || clh.config.BootToBeTemplate { + // VM templating is incompatible with virtio-fs because virtio-fs requires shared memory, + // while templating needs COW/private memory on restore. + if clh.config.SharedFS == config.VirtioFS || clh.config.SharedFS == config.VirtioFSNydus { + return errors.New("VM templating has been enabled with virtio-fs and this configuration will not work") } + + // Double-check that the clh.config.MemoryPath file is accessible before using it in the VM config, to avoid hitting a less clear error from cloud hypervisor when it tries to access the memory file. + if _, err := os.Stat(clh.config.MemoryPath); err != nil { + return fmt.Errorf("memory file %s is not accessible: %w", clh.config.MemoryPath, err) + } + + // Set the size to be 0 since we are going to configure actual size via zones + clh.vmconfig.Memory = chclient.NewMemoryConfig(0) + + memoryZoneConfig := chclient.NewMemoryZoneConfig("mem0", int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) + if clh.config.BootToBeTemplate { + // When BootToBeTemplate is true, the memory file backing the VM memory is shared between multiple VMs created from the same template. + // So we need to set shared to true in this case. + memoryZoneConfig.SetShared(true) + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) + + if !clh.config.ConfidentialGuest { + // TODO: Remove this warning once memory hotplugging is supported + // for template VMs. + // + // Memory hotplug is intentionally not configured for template VMs. + // Resizing a memory zone requires the virtio-mem hotplug method + // (cloud-hypervisor rejects the default ACPI hotplug on a zone that + // carries a hotplug_size), which is not currently supported in the + // templating path. As a result, VMs restored from this template + // cannot grow their memory beyond the template's boot size. + clh.Logger().Warn("memory hotplugging is currently unsupported for template VMs") + } + } else { + // When BootFromTemplate is true, set shared=false to ensure Copy-On-Write is used for the memory file. + // So that the VM can have its own private memory. + memoryZoneConfig.SetShared(false) + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) + } + memoryZoneConfig.SetFile(clh.config.MemoryPath) + clh.vmconfig.Memory.Zones = &[]chclient.MemoryZoneConfig{ + *memoryZoneConfig, + } + } else { // Normal (non-template) VM creation + // Create the VM memory config via the constructor to ensure default values are properly assigned + clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) + // Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs + // or when using HugePages. + // If such features are disabled, turn off shared memory config. + if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages { + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) + } else { + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) + } + // Enable hugepages if needed + clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) + if !clh.config.ConfidentialGuest { + hotplugSize := clh.config.DefaultMaxMemorySize + // OpenAPI only supports int64 values + clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) + } + } + + // Configure balloon device for free page reporting. This is set unconditionally + // (for both template and non-template paths) so that template VMs include the + // balloon in their snapshot, and VMs restored from a template inherit it. + if !clh.config.ConfidentialGuest && clh.config.ReclaimGuestFreedMemory { + clh.vmconfig.Balloon = chclient.NewBalloonConfig(0) + clh.vmconfig.Balloon.SetFreePageReporting(true) } // Set initial amount of cpu's for the virtual machine @@ -700,9 +776,113 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } + // Check if we should restore from template instead of creating new VM + if clh.config.BootFromTemplate && clh.shouldRestoreFromTemplate() { + clh.Logger().Info("Template files found, will restore VM instead of creating new") + // Mark this as a restore operation for StartVM to use RestoreVM instead + clh.state.isRestoring = true + return nil + } + return nil } +// shouldRestoreFromTemplate checks if template snapshot files exist and we should restore instead of creating new VM +func (clh *cloudHypervisor) shouldRestoreFromTemplate() bool { + // For template restore, we need the snapshot directory to contain the necessary files + // The snapshotDir is derived from the MemoryPath directory + snapshotDir := filepath.Dir(clh.config.MemoryPath) + + // Check for required template files (config.json, state.json, and memory file) + configFile := filepath.Join(snapshotDir, "config.json") + stateFile := filepath.Join(snapshotDir, "state.json") + memoryFile := clh.config.MemoryPath + + if _, err := os.Stat(configFile); err != nil { + clh.Logger().WithError(err).WithField("configFile", configFile).Debug("Template config file not accessible") + return false + } + + if _, err := os.Stat(stateFile); err != nil { + clh.Logger().WithError(err).WithField("stateFile", stateFile).Debug("Template state file not accessible") + return false + } + + if _, err := os.Stat(memoryFile); err != nil { + clh.Logger().WithError(err).WithField("memoryFile", memoryFile).Debug("Template memory file not accessible") + return false + } + + clh.Logger().WithFields(log.Fields{ + "configFile": configFile, + "stateFile": stateFile, + "memoryFile": memoryFile, + }).Info("Template files found, can restore VM from template") + + return true +} + +// copyFile copies a file from src to dst +func (clh *cloudHypervisor) copyFile(src, dst string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + if err != nil { + return err + } + + return dstFile.Sync() +} + +// updateVsockSocketPath updates the vsock socket path in the config.json file +func (clh *cloudHypervisor) updateVsockSocketPath(configPath, vmID string) error { + // Read the config file + configData, err := os.ReadFile(configPath) + if err != nil { + return err + } + + var config map[string]interface{} + dec := json.NewDecoder(bytes.NewReader(configData)) + dec.UseNumber() + if err := dec.Decode(&config); err != nil { + return err + } + + // Update vsock socket path if vsock exists + if vsock, ok := config["vsock"].(map[string]interface{}); ok { + // Generate new vsock socket path for this VM + newVsockPath, err := clh.vsockSocketPath(vmID) + if err != nil { + return err + } + vsock["socket"] = newVsockPath + + clh.Logger().WithFields(log.Fields{ + "vmID": vmID, + "newVsockPath": newVsockPath, + }).Debug("Updated vsock socket path in config.json") + } + + // Write the updated config back to file + updatedConfig, err := json.Marshal(config) + if err != nil { + return err + } + + return os.WriteFile(configPath, updatedConfig, 0644) +} + // setupInitdata prepares and attaches the initdata disk if present. func setupInitdata(clh *cloudHypervisor, hypervisorConfig *HypervisorConfig) error { if len(hypervisorConfig.Initdata) == 0 { @@ -771,8 +951,37 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { ctx, cancel := context.WithTimeout(ctx, bootTimeout*time.Second) defer cancel() - if err := clh.bootVM(ctx); err != nil { - return err + // Check if we should restore from template or create new VM + if clh.state.isRestoring { + // Copy template files to VM directory + snapshotDir := filepath.Dir(clh.config.MemoryPath) + + // Copy config.json from template to VM directory + srcConfig := filepath.Join(snapshotDir, "config.json") + dstConfig := filepath.Join(vmPath, "config.json") + if err := clh.copyFile(srcConfig, dstConfig); err != nil { + return fmt.Errorf("failed to copy config.json: %v", err) + } + + // Copy state.json from template to VM directory + srcState := filepath.Join(snapshotDir, "state.json") + dstState := filepath.Join(vmPath, "state.json") + if err := clh.copyFile(srcState, dstState); err != nil { + return fmt.Errorf("failed to copy state.json: %v", err) + } + + // Update vsock socket path in the copied config.json + if err := clh.updateVsockSocketPath(dstConfig, clh.id); err != nil { + return fmt.Errorf("failed to update vsock socket path: %v", err) + } + + if err := clh.restoreVM(ctx); err != nil { + return err + } + } else { + if err := clh.bootVM(ctx); err != nil { + return err + } } clh.state.state = clhReady @@ -1287,16 +1496,109 @@ func (clh *cloudHypervisor) Cleanup(ctx context.Context) error { func (clh *cloudHypervisor) PauseVM(ctx context.Context) error { clh.Logger().WithField("function", "PauseVM").Info("Pause Sandbox") + + cl := clh.client() + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) + defer cancel() + + _, err := cl.VmPausePut(ctx) + if err != nil { + clh.Logger().WithError(err).Error("Failed to pause VM") + return openAPIClientError(err) + } + return nil } func (clh *cloudHypervisor) SaveVM() error { - clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox") + clh.Logger().WithField("function", "SaveVM").Info("Save Sandbox") + + cl := clh.client() + ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) + defer cancel() + + snapshotDir := filepath.Dir(clh.config.MemoryPath) + // Create snapshot config with file URL to template path + // Use MemoryPath as base for snapshot destination + // When creating a template, the MemoryPath is set to the template path, so we can use it to save the snapshot. + fileURL := "file://" + snapshotDir + + vmSnapshotConfig := *chclient.NewVmSnapshotConfig() + vmSnapshotConfig.SetDestinationUrl(fileURL) + + _, err := cl.VmSnapshotPut(ctx, vmSnapshotConfig) + if err != nil { + clh.Logger().WithError(err).Error("Failed to save VM snapshot") + return openAPIClientError(err) + } + + if clh.config.BootToBeTemplate { + // Update the config.json file in the snapshotDir to set memory shared=false + snapshotConfigPath := filepath.Join(snapshotDir, "config.json") + snapshotConfig, err := os.ReadFile(snapshotConfigPath) + if err != nil { + clh.Logger().WithError(err).Error("Failed to read snapshot config") + return err + } + + var snapshotConfigData map[string]interface{} + dec := json.NewDecoder(bytes.NewReader(snapshotConfig)) + dec.UseNumber() + if err := dec.Decode(&snapshotConfigData); err != nil { + clh.Logger().WithError(err).Error("Failed to unmarshal snapshot config") + return err + } + + // Access the memory section and cast it to a map + if memorySection, ok := snapshotConfigData["memory"].(map[string]interface{}); ok { + memorySection["shared"] = false + // Do the same update for each element for the "zones" array in the memorySection + if zones, ok := memorySection["zones"].([]interface{}); ok { + for _, zone := range zones { + if zoneMap, ok := zone.(map[string]interface{}); ok { + zoneMap["shared"] = false + } else { + clh.Logger().Error("Unable to access zone in snapshot config memory section") + return fmt.Errorf("invalid snapshot config structure: zone in memory section not found or invalid") + } + } + } else { + clh.Logger().Error("Unable to access zones array in snapshot config memory section") + return fmt.Errorf("invalid snapshot config structure: zones array in memory section not found or invalid") + } + } else { + clh.Logger().Error("Unable to access memory section in snapshot config") + return fmt.Errorf("invalid snapshot config structure: memory section not found or invalid") + } + + // Write the modified config back to file + modifiedConfig, err := json.Marshal(snapshotConfigData) + if err != nil { + clh.Logger().WithError(err).Error("Failed to marshal modified snapshot config") + return err + } + + if err := os.WriteFile(snapshotConfigPath, modifiedConfig, 0644); err != nil { + clh.Logger().WithError(err).Error("Failed to write modified snapshot config") + return err + } + } + return nil } func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error { clh.Logger().WithField("function", "ResumeVM").Info("Resume Sandbox") + cl := clh.client() + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) + defer cancel() + + _, err := cl.ResumeVM(ctx) + if err != nil { + clh.Logger().WithError(err).Error("Failed to resume VM") + return openAPIClientError(err) + } + return nil } @@ -1509,11 +1811,11 @@ func (clh *cloudHypervisor) clhPath() (string, error) { p = defaultClhPath } - if _, err = os.Stat(p); os.IsNotExist(err) { - return "", fmt.Errorf("Cloud-Hypervisor path (%s) does not exist", p) + if _, err = os.Stat(p); err != nil { + return "", fmt.Errorf("Cloud-Hypervisor path (%s) is not accessible: %w", p, err) } - return p, err + return p, nil } func (clh *cloudHypervisor) launchClh() error { @@ -1741,6 +2043,60 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error { return nil } +// restoreVM restores a VM from a template snapshot. The restored VM will be in +// Paused state. The caller (factory layer, via factory.GetVM → vm.Resume) is +// responsible for resuming the VM, reseeding the RNG, and syncing the guest clock +// before the VM is used. See factory_linux.go GetVM(). +func (clh *cloudHypervisor) restoreVM(ctx context.Context) error { + clh.Logger().Info("Restoring VM from template") + + cl := clh.client() + + // use the VMStorePath as the base for the restore source URL + vmPath := filepath.Join(clh.config.VMStorePath, clh.id) + sourceURL := "file://" + vmPath + + // check if the snapshot directory contains the state.json and config.json files + // which contain the VM state and configuration respectively + stateFile := filepath.Join(vmPath, "state.json") + configFile := filepath.Join(vmPath, "config.json") + + if _, err := os.Stat(stateFile); err != nil { + return fmt.Errorf("failed to access state file %s: %v", stateFile, err) + } + + if _, err := os.Stat(configFile); err != nil { + return fmt.Errorf("failed to access config file %s: %v", configFile, err) + } + + // Prepare restore configuration + restoreConfig := *chclient.NewRestoreConfig(sourceURL) + + clh.Logger().WithField("sourceURL", sourceURL).Debug("Restore configuration") + + // Restore VM from template (uses the caller's ctx, which already has the boot timeout) + _, err := cl.VmRestorePut(ctx, restoreConfig) + if err != nil { + clh.Logger().WithError(err).Error("failed to restore VM from template") + return openAPIClientError(err) + } + + // Check VM state after restoration + info, err := clh.vmInfo() + if err != nil { + return err + } + + clh.Logger().Debugf("VM state after restore: %#v", info) + + if info.State != clhStatePaused { + clh.Logger().Warnf("VM state is '%s' after restore, expected 'Paused'", info.State) + } + + clh.Logger().Info("Successfully restored VM from template") + return nil +} + func (clh *cloudHypervisor) addVSock(cid int64, path string) { clh.Logger().WithFields(log.Fields{ "path": path, diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 7ddbcff206..93cf3b1a70 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -74,7 +74,9 @@ func newClhConfig() (HypervisorConfig, error) { } type clhClientMock struct { - vmInfo chclient.VmInfo + vmInfo chclient.VmInfo + restoreRequest *chclient.RestoreConfig + snapshotRequest *chclient.VmSnapshotConfig } func (c *clhClientMock) VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) { @@ -115,11 +117,35 @@ func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.Di return chclient.PciDeviceInfo{Bdf: "0000:00:0a.0"}, nil, nil } +//nolint:golint +func (c *clhClientMock) VmPausePut(ctx context.Context) (*http.Response, error) { + c.vmInfo.State = clhStatePaused + return nil, nil +} + +//nolint:golint +func (c *clhClientMock) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) { + c.snapshotRequest = &vmSnapshotConfig + return nil, nil +} + //nolint:golint func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) { return nil, nil } +func (c *clhClientMock) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) { + c.restoreRequest = &restoreConfig + // restoreVM() verifies Paused after restore. + c.vmInfo.State = clhStatePaused + return nil, nil +} + +func (c *clhClientMock) ResumeVM(ctx context.Context) (*http.Response, error) { + c.vmInfo.State = clhStateRunning + return nil, nil +} + func TestCloudHypervisorAddVSock(t *testing.T) { assert := assert.New(t) clh := cloudHypervisor{} @@ -516,6 +542,80 @@ func TestClhCreateVM(t *testing.T) { } } +func TestClhRestoreVM(t *testing.T) { + assert := assert.New(t) + + store, err := persist.GetDriver() + assert.NoError(err) + + clhConfig, err := newClhConfig() + assert.NoError(err) + clhConfig.VMStorePath = store.RunVMStoragePath() + clhConfig.RunStorePath = store.RunStoragePath() + + mockClient := &clhClientMock{} + clh := &cloudHypervisor{ + config: clhConfig, + APIClient: mockClient, + } + + // First call restoreVM without the VM snapshot files (state.json, config.json) present. + err = clh.restoreVM(context.Background()) + // An error is expected because restoreVM expects the VM snapshot files to be present. + assert.Error(err) + assert.Contains(err.Error(), filepath.Join(clhConfig.VMStorePath, "state.json")) + + // Now create the VM snapshot files and call restoreVM again. + os.MkdirAll(clhConfig.VMStorePath, os.ModePerm) + stateFile := filepath.Join(clhConfig.VMStorePath, "state.json") + configFile := filepath.Join(clhConfig.VMStorePath, "config.json") + err = os.WriteFile(stateFile, []byte("{}"), 0o600) + assert.NoError(err) + err = os.WriteFile(configFile, []byte("{}"), 0o600) + assert.NoError(err) + + // Call restoreVM again, this time it should succeed. + err = clh.restoreVM(context.Background()) + assert.NoError(err) + + if assert.NotNil(mockClient.restoreRequest) { + expectedSourceURL := "file://" + clhConfig.VMStorePath + assert.Equal(expectedSourceURL, mockClient.restoreRequest.GetSourceUrl()) + } + + info, err := clh.vmInfo() + assert.NoError(err) + assert.Equal(clhStatePaused, info.State) +} + +func TestClhSaveVM(t *testing.T) { + assert := assert.New(t) + + store, err := persist.GetDriver() + assert.NoError(err) + + clhConfig, err := newClhConfig() + assert.NoError(err) + // For testing, assume the memory path is located within the VM store path. + clhConfig.MemoryPath = filepath.Join(store.RunVMStoragePath(), "memory") + clhConfig.VMStorePath = store.RunVMStoragePath() + clhConfig.RunStorePath = store.RunStoragePath() + + mockClient := &clhClientMock{} + clh := &cloudHypervisor{ + config: clhConfig, + APIClient: mockClient, + } + + err = clh.SaveVM() + assert.NoError(err) + + if assert.NotNil(mockClient.snapshotRequest) { + expectedDestinationURL := "file://" + filepath.Dir(clhConfig.MemoryPath) + assert.Equal(expectedDestinationURL, mockClient.snapshotRequest.GetDestinationUrl()) + } +} + func TestCloudHypervisorStartSandbox(t *testing.T) { assert := assert.New(t) clhConfig, err := newClhConfig() diff --git a/src/runtime/virtcontainers/factory/factory_linux.go b/src/runtime/virtcontainers/factory/factory_linux.go index 4a0cfcfe38..c010916943 100644 --- a/src/runtime/virtcontainers/factory/factory_linux.go +++ b/src/runtime/virtcontainers/factory/factory_linux.go @@ -80,6 +80,9 @@ func resetHypervisorConfig(config *vc.VMConfig) { config.HypervisorConfig.SharedPath = "" config.HypervisorConfig.VMStorePath = "" config.HypervisorConfig.RunStorePath = "" + config.HypervisorConfig.SandboxName = "" + config.HypervisorConfig.SandboxNamespace = "" + config.HypervisorConfig.DefaultMaxVCPUs = 0 } // It's important that baseConfig and newConfig are passed by value! diff --git a/src/runtime/virtcontainers/factory/template/template_linux.go b/src/runtime/virtcontainers/factory/template/template_linux.go index d48ce5c50b..2a23cdbccd 100644 --- a/src/runtime/virtcontainers/factory/template/template_linux.go +++ b/src/runtime/virtcontainers/factory/template/template_linux.go @@ -11,6 +11,7 @@ import ( "context" "fmt" "os" + "path/filepath" "syscall" "time" @@ -115,6 +116,15 @@ func (t *template) prepareTemplateFiles() error { } f.Close() + // truncate the memory file to the exact size of the VM memory + memoryInBytes := int64(t.config.HypervisorConfig.MemorySize) * 1024 * 1024 + t.Logger().Infof("truncating memory file %s to %d bytes", t.statePath+"/memory", memoryInBytes) + err = os.Truncate(t.statePath+"/memory", memoryInBytes) + if err != nil { + t.close() + return err + } + return nil } @@ -124,7 +134,8 @@ func (t *template) createTemplateVM(ctx context.Context) error { config.HypervisorConfig.BootToBeTemplate = true config.HypervisorConfig.BootFromTemplate = false config.HypervisorConfig.MemoryPath = t.statePath + "/memory" - config.HypervisorConfig.DevicesStatePath = t.statePath + "/state" + config.HypervisorConfig.DevicesStatePath = t.deviceStatePath() + config.HypervisorConfig.VMStorePath = t.statePath vm, err := vc.NewVM(ctx, config) if err != nil { @@ -161,7 +172,7 @@ func (t *template) createFromTemplateVM(ctx context.Context, c vc.VMConfig) (*vc config.HypervisorConfig.BootToBeTemplate = false config.HypervisorConfig.BootFromTemplate = true config.HypervisorConfig.MemoryPath = t.statePath + "/memory" - config.HypervisorConfig.DevicesStatePath = t.statePath + "/state" + config.HypervisorConfig.DevicesStatePath = t.deviceStatePath() config.HypervisorConfig.SharedPath = c.HypervisorConfig.SharedPath config.HypervisorConfig.VMStorePath = c.HypervisorConfig.VMStorePath config.HypervisorConfig.RunStorePath = c.HypervisorConfig.RunStorePath @@ -175,6 +186,15 @@ func (t *template) checkTemplateVM() error { return err } - _, err = os.Stat(t.statePath + "/state") + _, err = os.Stat(t.deviceStatePath()) return err } + +func (t *template) deviceStatePath() string { + stateFileName := "state" + if t.config.HypervisorType == vc.ClhHypervisor { + stateFileName = "state.json" + } + + return filepath.Join(t.statePath, stateFileName) +} diff --git a/src/runtime/virtcontainers/factory/template/template_test.go b/src/runtime/virtcontainers/factory/template/template_test.go index c067c793e6..a9c78162b9 100644 --- a/src/runtime/virtcontainers/factory/template/template_test.go +++ b/src/runtime/virtcontainers/factory/template/template_test.go @@ -57,15 +57,26 @@ func TestTemplateFactory(t *testing.T) { assert.NoError(err) defer hybridVSockTTRPCMock.Stop() - // New + // Create 2 sets of instance-specific directories for per-VM storage + runStorePath1 := t.TempDir() + vmStorePath1 := t.TempDir() + runStorePath2 := t.TempDir() + vmStorePath2 := t.TempDir() + + // Create a new Template Factory f, err := New(ctx, vmConfig, testDir) assert.Nil(err) // Config assert.Equal(f.Config(), vmConfig) - // GetBaseVM - vm, err := f.GetBaseVM(ctx, vmConfig) + // GetBaseVM with first instance paths + vmConfig1 := vmConfig + vmConfig1.HypervisorConfig.RunStorePath = runStorePath1 + vmConfig1.HypervisorConfig.VMStorePath = vmStorePath1 + + // Test the creation of a new VM from the template factory + vm, err := f.GetBaseVM(ctx, vmConfig1) assert.Nil(err) err = vm.Stop(ctx) @@ -79,44 +90,59 @@ func TestTemplateFactory(t *testing.T) { assert.Equal(tt.Config(), vmConfig) + // Checking that template VM check fails + // if the corresponding memory and state files are absent err = tt.checkTemplateVM() assert.Error(err) - _, err = os.Create(tt.statePath + "/memory") + memFile, err := os.Create(tt.statePath + "/memory") assert.Nil(err) + memFile.Close() err = tt.checkTemplateVM() assert.Error(err) - _, err = os.Create(tt.statePath + "/state") + devFile, err := os.Create(tt.deviceStatePath()) assert.Nil(err) + devFile.Close() + + // After creating state and memory files, checkTemplateVM should succeed err = tt.checkTemplateVM() assert.Nil(err) + // Recreate the template VM, which should succeed err = tt.createTemplateVM(ctx) assert.Nil(err) - vm, err = tt.GetBaseVM(ctx, vmConfig) + // Ensuring that directly calling template's GetBaseVM function + // returns a VM instance similar to the one returned by the factory's GetBaseVM function + vm, err = tt.GetBaseVM(ctx, vmConfig1) assert.Nil(err) err = vm.Stop(ctx) assert.Nil(err) - vm, err = f.GetBaseVM(ctx, vmConfig) + vm, err = f.GetBaseVM(ctx, vmConfig1) assert.Nil(err) err = vm.Stop(ctx) assert.Nil(err) + // Overwriting the template VM should succeed err = tt.createTemplateVM(ctx) assert.Nil(err) - vm, err = tt.GetBaseVM(ctx, vmConfig) + // Create second instance with different storage paths + vmConfig2 := vmConfig + vmConfig2.HypervisorConfig.RunStorePath = runStorePath2 + vmConfig2.HypervisorConfig.VMStorePath = vmStorePath2 + + vm, err = tt.GetBaseVM(ctx, vmConfig2) assert.Nil(err) err = vm.Stop(ctx) assert.Nil(err) - vm, err = f.GetBaseVM(ctx, vmConfig) + vm, err = f.GetBaseVM(ctx, vmConfig2) assert.Nil(err) err = vm.Stop(ctx)