From 9f76467cb77bbefbe04017d3ff1fbd52b6f3c7dd Mon Sep 17 00:00:00 2001 From: Champ-Goblem Date: Thu, 24 Apr 2025 10:56:39 +0200 Subject: [PATCH 1/2] runtime: clh: Add reclaim_guest_freed_memory [BACKPORT] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're bringing to *Cloud Hypervisor only* the reclaim_guest_freed_memory option already present in the runtime-rs. This allows us to use virtio-balloon for the hypervisor to reclaim memory freed by the guest. The reason we're not touching other hypervisors is because we're very much aware of avoiding to clutter the go code at this point, so we'll leave it for whoever really needs this on other hypervisor (and trust me, we really do need it for Cloud Hypervisor right now ;-)). Signed-off-by: Champ-Goblem Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 10 ++++++++++ src/runtime/pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 3 +++ src/runtime/pkg/oci/utils.go | 6 ++++++ src/runtime/virtcontainers/clh.go | 8 ++++++++ src/runtime/virtcontainers/hypervisor.go | 3 +++ .../virtcontainers/pkg/annotations/annotations.go | 3 +++ 7 files changed, 34 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 204b2fb700..38c97c01f6 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -185,6 +185,16 @@ block_device_driver = "virtio-blk" # Default false #block_device_cache_direct = true +# Reclaim guest freed memory. +# Enabling this will result in the VM balloon device having f_reporting=on set. +# Then the hypervisor will use it to reclaim guest freed memory. +# This is useful for reducing the amount of memory used by a VM. +# Enabling this feature may sometimes reduce the speed of memory access in +# the VM. +# +# Default false +#reclaim_guest_freed_memory = true + # Enable huge pages for VM RAM, default false # Enabling this will result in the VM memory # being allocated using huge pages. diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 55438f5007..d7680dd1bd 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -76,6 +76,7 @@ const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheNoflush bool = false const defaultEnableIOThreads bool = false const defaultEnableMemPrealloc bool = false +const defaultEnableReclaimGuestFreedMemory bool = false const defaultEnableHugePages bool = false const defaultEnableIOMMU bool = false const defaultEnableIOMMUPlatform bool = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 79665e2593..0c1e211c57 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -147,6 +147,7 @@ type hypervisor struct { VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"` DisableBlockDeviceUse bool `toml:"disable_block_device_use"` MemPrealloc bool `toml:"enable_mem_prealloc"` + ReclaimGuestFreedMemory bool `toml:"reclaim_guest_freed_memory"` HugePages bool `toml:"enable_hugepages"` VirtioMem bool `toml:"enable_virtio_mem"` IOMMU bool `toml:"enable_iommu"` @@ -1082,6 +1083,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { VirtioFSCacheSize: h.VirtioFSCacheSize, VirtioFSCache: h.VirtioFSCache, MemPrealloc: h.MemPrealloc, + ReclaimGuestFreedMemory: h.ReclaimGuestFreedMemory, HugePages: h.HugePages, FileBackedMemRootDir: h.FileBackedMemRootDir, FileBackedMemRootList: h.FileBackedMemRootList, @@ -1434,6 +1436,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { DisableBlockDeviceUse: defaultDisableBlockDeviceUse, DefaultBridges: defaultBridgesCount, MemPrealloc: defaultEnableMemPrealloc, + ReclaimGuestFreedMemory: defaultEnableReclaimGuestFreedMemory, HugePages: defaultEnableHugePages, IOMMU: defaultEnableIOMMU, IOMMUPlatform: defaultEnableIOMMUPlatform, diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index e8f792ef58..c997cf1118 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -705,6 +705,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig sbConfig.HypervisorConfig.FileBackedMemRootDir = value } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.ReclaimGuestFreedMemory).setBool(func(reclaimGuestFreedMemory bool) { + sbConfig.HypervisorConfig.ReclaimGuestFreedMemory = reclaimGuestFreedMemory + }); err != nil { + return err + } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.HugePages).setBool(func(hugePages bool) { sbConfig.HypervisorConfig.HugePages = hugePages }); err != nil { diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index cbb9460dfe..a622a695ad 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -527,7 +527,15 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net hotplugSize := clh.config.DefaultMaxMemorySize // OpenAPI only supports int64 values clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) + + if clh.config.ReclaimGuestFreedMemory { + // Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero) + clh.vmconfig.Balloon = chclient.NewBalloonConfig(0) + // Set the free page reporting flag for ballooning to be true + clh.vmconfig.Balloon.SetFreePageReporting(true) + } } + // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs)) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index b0ac28b287..64989cb7a9 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -612,6 +612,9 @@ type HypervisorConfig struct { // MemPrealloc specifies if the memory should be pre-allocated MemPrealloc bool + // ReclaimGuestFreedMemory is a sandbox annotation that specifies whether the memory freed by the guest will be reclaimed by the hypervisor or not. + ReclaimGuestFreedMemory bool + // HugePages specifies if the memory should be pre-allocated from huge pages HugePages bool diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index e71b0525c1..353daabdec 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -167,6 +167,9 @@ const ( // MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc" + // ReclaimGuestFreedMemory is a sandbox annotation that specifies whether the memory freed by the guest will be reclaimed by the hypervisor or not. + ReclaimGuestFreedMemory = kataAnnotHypervisorPrefix + "reclaim_guest_freed_memory" + // HugePages is a sandbox annotation to specify if the memory should be pre-allocated from huge pages HugePages = kataAnnotHypervisorPrefix + "enable_hugepages" From b747f8380e29ab246d647790427af334174eb65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 25 Apr 2025 21:30:47 +0200 Subject: [PATCH 2/2] clh: Rework CreateVM to reduce the amount of cycles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the static checks will whip us as hard as possible. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/virtcontainers/clh.go | 53 +++++++++++++++++++------------ 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index a622a695ad..29c8001256 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -452,6 +452,37 @@ func (clh *cloudHypervisor) enableProtection() error { } } +func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bool, debug bool, confidential bool, iommu bool) ([]Param, error) { + params, err := GetKernelRootParams(rootfstype, disableNvdimm, dax) + if err != nil { + return []Param{}, err + } + params = append(params, clhKernelParams...) + + if iommu { + params = append(params, Param{"iommu", "pt"}) + } + + if !debug { + // start the guest kernel with 'quiet' in non-debug mode + params = append(params, Param{"quiet", ""}) + return params, nil + } + + // In case of debug ... + + // Followed by extra debug parameters if debug enabled in configuration file + if confidential { + params = append(params, clhDebugConfidentialGuestKernelParams...) + } else if runtime.GOARCH == "arm64" { + params = append(params, clhArmDebugKernelParams...) + } else { + params = append(params, clhDebugKernelParams...) + } + params = append(params, clhDebugKernelParamsCommon...) + return params, nil +} + // For cloudHypervisor this call only sets the internal structure up. // The VM will be created and started through StartVM(). func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { @@ -539,30 +570,10 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs)) - params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, !clh.config.ConfidentialGuest) + params, err := getNonUserDefinedKernelParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, !clh.config.ConfidentialGuest, clh.config.Debug, clh.config.ConfidentialGuest, clh.config.IOMMU) if err != nil { return err } - params = append(params, clhKernelParams...) - - // Followed by extra debug parameters if debug enabled in configuration file - if clh.config.Debug { - if clh.config.ConfidentialGuest { - params = append(params, clhDebugConfidentialGuestKernelParams...) - } else if runtime.GOARCH == "arm64" { - params = append(params, clhArmDebugKernelParams...) - } else { - params = append(params, clhDebugKernelParams...) - } - params = append(params, clhDebugKernelParamsCommon...) - } else { - // start the guest kernel with 'quiet' in non-debug mode - params = append(params, Param{"quiet", ""}) - } - if clh.config.IOMMU { - params = append(params, Param{"iommu", "pt"}) - } - // Followed by extra kernel parameters defined in the configuration file params = append(params, clh.config.KernelParams...)