diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 204b2fb700..38c97c01f6 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -185,6 +185,16 @@ block_device_driver = "virtio-blk" # Default false #block_device_cache_direct = true +# Reclaim guest freed memory. +# Enabling this will result in the VM balloon device having f_reporting=on set. +# Then the hypervisor will use it to reclaim guest freed memory. +# This is useful for reducing the amount of memory used by a VM. +# Enabling this feature may sometimes reduce the speed of memory access in +# the VM. +# +# Default false +#reclaim_guest_freed_memory = true + # Enable huge pages for VM RAM, default false # Enabling this will result in the VM memory # being allocated using huge pages. diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 55438f5007..d7680dd1bd 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -76,6 +76,7 @@ const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheNoflush bool = false const defaultEnableIOThreads bool = false const defaultEnableMemPrealloc bool = false +const defaultEnableReclaimGuestFreedMemory bool = false const defaultEnableHugePages bool = false const defaultEnableIOMMU bool = false const defaultEnableIOMMUPlatform bool = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 79665e2593..0c1e211c57 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -147,6 +147,7 @@ type hypervisor struct { VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"` DisableBlockDeviceUse bool `toml:"disable_block_device_use"` MemPrealloc bool `toml:"enable_mem_prealloc"` + ReclaimGuestFreedMemory bool `toml:"reclaim_guest_freed_memory"` HugePages bool `toml:"enable_hugepages"` VirtioMem bool `toml:"enable_virtio_mem"` IOMMU bool `toml:"enable_iommu"` @@ -1082,6 +1083,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { VirtioFSCacheSize: h.VirtioFSCacheSize, VirtioFSCache: h.VirtioFSCache, MemPrealloc: h.MemPrealloc, + ReclaimGuestFreedMemory: h.ReclaimGuestFreedMemory, HugePages: h.HugePages, FileBackedMemRootDir: h.FileBackedMemRootDir, FileBackedMemRootList: h.FileBackedMemRootList, @@ -1434,6 +1436,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { DisableBlockDeviceUse: defaultDisableBlockDeviceUse, DefaultBridges: defaultBridgesCount, MemPrealloc: defaultEnableMemPrealloc, + ReclaimGuestFreedMemory: defaultEnableReclaimGuestFreedMemory, HugePages: defaultEnableHugePages, IOMMU: defaultEnableIOMMU, IOMMUPlatform: defaultEnableIOMMUPlatform, diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index e8f792ef58..c997cf1118 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -705,6 +705,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig sbConfig.HypervisorConfig.FileBackedMemRootDir = value } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.ReclaimGuestFreedMemory).setBool(func(reclaimGuestFreedMemory bool) { + sbConfig.HypervisorConfig.ReclaimGuestFreedMemory = reclaimGuestFreedMemory + }); err != nil { + return err + } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.HugePages).setBool(func(hugePages bool) { sbConfig.HypervisorConfig.HugePages = hugePages }); err != nil { diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index cbb9460dfe..29c8001256 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -452,6 +452,37 @@ func (clh *cloudHypervisor) enableProtection() error { } } +func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bool, debug bool, confidential bool, iommu bool) ([]Param, error) { + params, err := GetKernelRootParams(rootfstype, disableNvdimm, dax) + if err != nil { + return []Param{}, err + } + params = append(params, clhKernelParams...) + + if iommu { + params = append(params, Param{"iommu", "pt"}) + } + + if !debug { + // start the guest kernel with 'quiet' in non-debug mode + params = append(params, Param{"quiet", ""}) + return params, nil + } + + // In case of debug ... + + // Followed by extra debug parameters if debug enabled in configuration file + if confidential { + params = append(params, clhDebugConfidentialGuestKernelParams...) + } else if runtime.GOARCH == "arm64" { + params = append(params, clhArmDebugKernelParams...) + } else { + params = append(params, clhDebugKernelParams...) + } + params = append(params, clhDebugKernelParamsCommon...) + return params, nil +} + // For cloudHypervisor this call only sets the internal structure up. // The VM will be created and started through StartVM(). func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { @@ -527,34 +558,22 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net hotplugSize := clh.config.DefaultMaxMemorySize // OpenAPI only supports int64 values clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) + + if clh.config.ReclaimGuestFreedMemory { + // Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero) + clh.vmconfig.Balloon = chclient.NewBalloonConfig(0) + // Set the free page reporting flag for ballooning to be true + clh.vmconfig.Balloon.SetFreePageReporting(true) + } } + // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs)) - params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, !clh.config.ConfidentialGuest) + params, err := getNonUserDefinedKernelParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, !clh.config.ConfidentialGuest, clh.config.Debug, clh.config.ConfidentialGuest, clh.config.IOMMU) if err != nil { return err } - params = append(params, clhKernelParams...) - - // Followed by extra debug parameters if debug enabled in configuration file - if clh.config.Debug { - if clh.config.ConfidentialGuest { - params = append(params, clhDebugConfidentialGuestKernelParams...) - } else if runtime.GOARCH == "arm64" { - params = append(params, clhArmDebugKernelParams...) - } else { - params = append(params, clhDebugKernelParams...) - } - params = append(params, clhDebugKernelParamsCommon...) - } else { - // start the guest kernel with 'quiet' in non-debug mode - params = append(params, Param{"quiet", ""}) - } - if clh.config.IOMMU { - params = append(params, Param{"iommu", "pt"}) - } - // Followed by extra kernel parameters defined in the configuration file params = append(params, clh.config.KernelParams...) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index b0ac28b287..64989cb7a9 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -612,6 +612,9 @@ type HypervisorConfig struct { // MemPrealloc specifies if the memory should be pre-allocated MemPrealloc bool + // ReclaimGuestFreedMemory is a sandbox annotation that specifies whether the memory freed by the guest will be reclaimed by the hypervisor or not. + ReclaimGuestFreedMemory bool + // HugePages specifies if the memory should be pre-allocated from huge pages HugePages bool diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index e71b0525c1..353daabdec 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -167,6 +167,9 @@ const ( // MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc" + // ReclaimGuestFreedMemory is a sandbox annotation that specifies whether the memory freed by the guest will be reclaimed by the hypervisor or not. + ReclaimGuestFreedMemory = kataAnnotHypervisorPrefix + "reclaim_guest_freed_memory" + // HugePages is a sandbox annotation to specify if the memory should be pre-allocated from huge pages HugePages = kataAnnotHypervisorPrefix + "enable_hugepages"