From 9f76467cb77bbefbe04017d3ff1fbd52b6f3c7dd Mon Sep 17 00:00:00 2001
From: Champ-Goblem <cameron@northflank.com>
Date: Thu, 24 Apr 2025 10:56:39 +0200
Subject: [PATCH 1/2] runtime: clh: Add reclaim_guest_freed_memory [BACKPORT]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're bringing to *Cloud Hypervisor only* the reclaim_guest_freed_memory
option already present in the runtime-rs.

This allows us to use virtio-balloon for the hypervisor to reclaim
memory freed by the guest.

The reason we're not touching other hypervisors is because we're very
much aware of avoiding to clutter the go code at this point, so we'll
leave it for whoever really needs this on other hypervisor (and trust
me, we really do need it for Cloud Hypervisor right now ;-)).

Signed-off-by: Champ-Goblem <cameron@northflank.com>
Signed-off-by: Fabiano Fidêncio <fidencio@northflank.com>
---
 src/runtime/config/configuration-clh.toml.in           | 10 ++++++++++
 src/runtime/pkg/katautils/config-settings.go.in        |  1 +
 src/runtime/pkg/katautils/config.go                    |  3 +++
 src/runtime/pkg/oci/utils.go                           |  6 ++++++
 src/runtime/virtcontainers/clh.go                      |  8 ++++++++
 src/runtime/virtcontainers/hypervisor.go               |  3 +++
 .../virtcontainers/pkg/annotations/annotations.go      |  3 +++
 7 files changed, 34 insertions(+)

diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in
index 204b2fb700..38c97c01f6 100644
--- a/src/runtime/config/configuration-clh.toml.in
+++ b/src/runtime/config/configuration-clh.toml.in
@@ -185,6 +185,16 @@ block_device_driver = "virtio-blk"
 # Default false
 #block_device_cache_direct = true
 
+# Reclaim guest freed memory.
+# Enabling this will result in the VM balloon device having f_reporting=on set.
+# Then the hypervisor will use it to reclaim guest freed memory.
+# This is useful for reducing the amount of memory used by a VM.
+# Enabling this feature may sometimes reduce the speed of memory access in
+# the VM.
+#
+# Default false
+#reclaim_guest_freed_memory = true
+
 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
 # being allocated using huge pages.
diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in
index 55438f5007..d7680dd1bd 100644
--- a/src/runtime/pkg/katautils/config-settings.go.in
+++ b/src/runtime/pkg/katautils/config-settings.go.in
@@ -76,6 +76,7 @@ const defaultBlockDeviceCacheDirect bool = false
 const defaultBlockDeviceCacheNoflush bool = false
 const defaultEnableIOThreads bool = false
 const defaultEnableMemPrealloc bool = false
+const defaultEnableReclaimGuestFreedMemory bool = false
 const defaultEnableHugePages bool = false
 const defaultEnableIOMMU bool = false
 const defaultEnableIOMMUPlatform bool = false
diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go
index 79665e2593..0c1e211c57 100644
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -147,6 +147,7 @@ type hypervisor struct {
 	VhostUserDeviceReconnect       uint32                    `toml:"vhost_user_reconnect_timeout_sec"`
 	DisableBlockDeviceUse          bool                      `toml:"disable_block_device_use"`
 	MemPrealloc                    bool                      `toml:"enable_mem_prealloc"`
+	ReclaimGuestFreedMemory        bool                      `toml:"reclaim_guest_freed_memory"`
 	HugePages                      bool                      `toml:"enable_hugepages"`
 	VirtioMem                      bool                      `toml:"enable_virtio_mem"`
 	IOMMU                          bool                      `toml:"enable_iommu"`
@@ -1082,6 +1083,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		VirtioFSCacheSize:              h.VirtioFSCacheSize,
 		VirtioFSCache:                  h.VirtioFSCache,
 		MemPrealloc:                    h.MemPrealloc,
+		ReclaimGuestFreedMemory:        h.ReclaimGuestFreedMemory,
 		HugePages:                      h.HugePages,
 		FileBackedMemRootDir:           h.FileBackedMemRootDir,
 		FileBackedMemRootList:          h.FileBackedMemRootList,
@@ -1434,6 +1436,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
 		DisableBlockDeviceUse:    defaultDisableBlockDeviceUse,
 		DefaultBridges:           defaultBridgesCount,
 		MemPrealloc:              defaultEnableMemPrealloc,
+		ReclaimGuestFreedMemory:  defaultEnableReclaimGuestFreedMemory,
 		HugePages:                defaultEnableHugePages,
 		IOMMU:                    defaultEnableIOMMU,
 		IOMMUPlatform:            defaultEnableIOMMUPlatform,
diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go
index e8f792ef58..c997cf1118 100644
--- a/src/runtime/pkg/oci/utils.go
+++ b/src/runtime/pkg/oci/utils.go
@@ -705,6 +705,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
 		sbConfig.HypervisorConfig.FileBackedMemRootDir = value
 	}
 
+	if err := newAnnotationConfiguration(ocispec, vcAnnotations.ReclaimGuestFreedMemory).setBool(func(reclaimGuestFreedMemory bool) {
+		sbConfig.HypervisorConfig.ReclaimGuestFreedMemory = reclaimGuestFreedMemory
+	}); err != nil {
+		return err
+	}
+
 	if err := newAnnotationConfiguration(ocispec, vcAnnotations.HugePages).setBool(func(hugePages bool) {
 		sbConfig.HypervisorConfig.HugePages = hugePages
 	}); err != nil {
diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go
index cbb9460dfe..a622a695ad 100644
--- a/src/runtime/virtcontainers/clh.go
+++ b/src/runtime/virtcontainers/clh.go
@@ -527,7 +527,15 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
 		hotplugSize := clh.config.DefaultMaxMemorySize
 		// OpenAPI only supports int64 values
 		clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes()))
+
+		if clh.config.ReclaimGuestFreedMemory {
+			// Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero)
+			clh.vmconfig.Balloon = chclient.NewBalloonConfig(0)
+			// Set the free page reporting flag for ballooning to be true
+			clh.vmconfig.Balloon.SetFreePageReporting(true)
+		}
 	}
+
 	// Set initial amount of cpu's for the virtual machine
 	clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs))
 
diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go
index b0ac28b287..64989cb7a9 100644
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@@ -612,6 +612,9 @@ type HypervisorConfig struct {
 	// MemPrealloc specifies if the memory should be pre-allocated
 	MemPrealloc bool
 
+	// ReclaimGuestFreedMemory is a sandbox annotation that specifies whether the memory freed by the guest will be reclaimed by the hypervisor or not.
+	ReclaimGuestFreedMemory bool
+
 	// HugePages specifies if the memory should be pre-allocated from huge pages
 	HugePages bool
 
diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go
index e71b0525c1..353daabdec 100644
--- a/src/runtime/virtcontainers/pkg/annotations/annotations.go
+++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go
@@ -167,6 +167,9 @@ const (
 	// MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
 	MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc"
 
+	// ReclaimGuestFreedMemory is a sandbox annotation that specifies whether the memory freed by the guest will be reclaimed by the hypervisor or not.
+	ReclaimGuestFreedMemory = kataAnnotHypervisorPrefix + "reclaim_guest_freed_memory"
+
 	// HugePages is a sandbox annotation to specify if the memory should be pre-allocated from huge pages
 	HugePages = kataAnnotHypervisorPrefix + "enable_hugepages"
 

From b747f8380e29ab246d647790427af334174eb65c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= <fidencio@northflank.com>
Date: Fri, 25 Apr 2025 21:30:47 +0200
Subject: [PATCH 2/2] clh: Rework CreateVM to reduce the amount of cycles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise the static checks will whip us as hard as possible.

Signed-off-by: Fabiano Fidêncio <fidencio@northflank.com>
---
 src/runtime/virtcontainers/clh.go | 53 +++++++++++++++++++------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go
index a622a695ad..29c8001256 100644
--- a/src/runtime/virtcontainers/clh.go
+++ b/src/runtime/virtcontainers/clh.go
@@ -452,6 +452,37 @@ func (clh *cloudHypervisor) enableProtection() error {
 	}
 }
 
+func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bool, debug bool, confidential bool, iommu bool) ([]Param, error) {
+	params, err := GetKernelRootParams(rootfstype, disableNvdimm, dax)
+	if err != nil {
+		return []Param{}, err
+	}
+	params = append(params, clhKernelParams...)
+
+	if iommu {
+		params = append(params, Param{"iommu", "pt"})
+	}
+
+	if !debug {
+		// start the guest kernel with 'quiet' in non-debug mode
+		params = append(params, Param{"quiet", ""})
+		return params, nil
+	}
+
+	// In case of debug ...
+
+	// Followed by extra debug parameters if debug enabled in configuration file
+	if confidential {
+		params = append(params, clhDebugConfidentialGuestKernelParams...)
+	} else if runtime.GOARCH == "arm64" {
+		params = append(params, clhArmDebugKernelParams...)
+	} else {
+		params = append(params, clhDebugKernelParams...)
+	}
+	params = append(params, clhDebugKernelParamsCommon...)
+	return params, nil
+}
+
 // For cloudHypervisor this call only sets the internal structure up.
 // The VM will be created and started through StartVM().
 func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
@@ -539,30 +570,10 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
 	// Set initial amount of cpu's for the virtual machine
 	clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs))
 
-	params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, !clh.config.ConfidentialGuest)
+	params, err := getNonUserDefinedKernelParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, !clh.config.ConfidentialGuest, clh.config.Debug, clh.config.ConfidentialGuest, clh.config.IOMMU)
 	if err != nil {
 		return err
 	}
-	params = append(params, clhKernelParams...)
-
-	// Followed by extra debug parameters if debug enabled in configuration file
-	if clh.config.Debug {
-		if clh.config.ConfidentialGuest {
-			params = append(params, clhDebugConfidentialGuestKernelParams...)
-		} else if runtime.GOARCH == "arm64" {
-			params = append(params, clhArmDebugKernelParams...)
-		} else {
-			params = append(params, clhDebugKernelParams...)
-		}
-		params = append(params, clhDebugKernelParamsCommon...)
-	} else {
-		// start the guest kernel with 'quiet' in non-debug mode
-		params = append(params, Param{"quiet", ""})
-	}
-	if clh.config.IOMMU {
-		params = append(params, Param{"iommu", "pt"})
-	}
-
 	// Followed by extra kernel parameters defined in the configuration file
 	params = append(params, clh.config.KernelParams...)