runtime: Add option "enable_guest_swap" to config hypervisor.qemu

This commit add option "enable_guest_swap" to config hypervisor.qemu. It will enable swap in the guest. Default false. When enable_guest_swap is enabled, insert a raw file to the guest as the swap device if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") is bigger than 0. The size of the swap device should be swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. If swap_in_bytes is not set, the size should be memory_limit_in_bytes. If swap_in_bytes and memory_limit_in_bytes is not set, the size should be default_memory. Fixes: #2201 Signed-off-by: Hui Zhu <teawater@antfin.com>
2025-07-19 09:51:29 +00:00 · 2021-07-08 18:10:39 +08:00 · 2021-07-08 18:10:39 +08:00 · cb6b7667cd
commit cb6b7667cd
parent a733f537e5
8 changed files with 113 additions and 10 deletions
--- a/src/runtime/cli/config/configuration-qemu.toml.in
+++ b/src/runtime/cli/config/configuration-qemu.toml.in
@ -356,6 +356,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
 #guest_memory_dump_paging=false
 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
 # if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
 # is bigger than 0.
 # The size of the swap device should be 
 # swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
 #enable_guest_swap = true
 [factory]
 # VM templating support. Once enabled, new VMs are created from template
 # using vm cloning. They will share the same initial kernel, initramfs and
--- a/src/runtime/pkg/katautils/config-settings.go.in
+++ b/src/runtime/pkg/katautils/config-settings.go.in
@ -55,6 +55,7 @@ const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
 const defaultRxRateLimiterMaxRate = uint64(0)
 const defaultTxRateLimiterMaxRate = uint64(0)
 const defaultConfidentialGuest = false
 const defaultGuestSwap = false
 var defaultSGXEPCSize = int64(0)
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@ -133,6 +133,7 @@ type hypervisor struct {
 	DisableVhostNet         bool     `toml:"disable_vhost_net"`
 	GuestMemoryDumpPaging   bool     `toml:"guest_memory_dump_paging"`
 	ConfidentialGuest       bool     `toml:"confidential_guest"`
 	GuestSwap               bool     `toml:"enable_guest_swap"`
 }
 type runtime struct {
@ -711,6 +712,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		GuestMemoryDumpPath:     h.GuestMemoryDumpPath,
 		GuestMemoryDumpPaging:   h.GuestMemoryDumpPaging,
 		ConfidentialGuest:       h.ConfidentialGuest,
 		GuestSwap:               h.GuestSwap,
 	}, nil
 }
@ -1066,6 +1068,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
 		TxRateLimiterMaxRate:    defaultTxRateLimiterMaxRate,
 		SGXEPCSize:              defaultSGXEPCSize,
 		ConfidentialGuest:       defaultConfidentialGuest,
 		GuestSwap:               defaultGuestSwap,
 	}
 }
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@ -458,6 +458,9 @@ type HypervisorConfig struct {
 	// MemOffset specifies memory space for nvdimm device
 	MemOffset uint64
 	// GuestSwap Used to enable/disable swap in the guest
 	GuestSwap bool
 }
 // vcpu mapping from vcpu number to thread number
--- a/src/runtime/virtcontainers/pkg/annotations/annotations.go
+++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go
@ -220,6 +220,9 @@ const (
 	// TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth
 	TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate"
 	// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
 	EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
 )
 // Runtime related annotations
--- a/src/runtime/virtcontainers/pkg/oci/utils.go
+++ b/src/runtime/virtcontainers/pkg/oci/utils.go
@ -539,6 +539,7 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
 			}
 		}
 	}
 	return nil
 }
@ -616,6 +617,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
 		return err
 	}
 	if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) {
 		sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap
 	}); err != nil {
 		return err
 	}
 	return nil
 }
--- a/src/runtime/virtcontainers/sandbox.go
+++ b/src/runtime/virtcontainers/sandbox.go
@ -8,6 +8,7 @@ package virtcontainers
 import (
 	"bufio"
 	"bytes"
 	"context"
 	"fmt"
 	"io"
@ -65,6 +66,8 @@ const (
 	// DirMode is the permission bits used for creating a directory
 	DirMode = os.FileMode(0750) | os.ModeDir
 	mkswapPath = "/sbin/mkswap"
 )
 var (
@ -200,6 +203,10 @@ type Sandbox struct {
 	ctx context.Context
 	cw *consoleWatcher
 	swapDeviceNum uint
 	swapSizeBytes int64
 	swapDevices   []*config.BlockDrive
 }
 // ID returns the sandbox identifier string.
@ -519,6 +526,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
 		sharePidNs:      sandboxConfig.SharePidNs,
 		networkNS:       NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
 		ctx:             ctx,
 		swapDeviceNum:   0,
 		swapSizeBytes:   0,
 		swapDevices:     []*config.BlockDrive{},
 	}
 	hypervisor.setSandbox(s)
@ -1028,9 +1038,13 @@ func (s *Sandbox) addSwap(ctx context.Context, swapID string, size int64) (*conf
 		return nil, err
 	}
-	err = exec.CommandContext(ctx, "/sbin/mkswap", swapFile).Run()
+	var outbuf, errbuf bytes.Buffer
 	cmd := exec.CommandContext(ctx, mkswapPath, swapFile)
 	cmd.Stdout = &outbuf
 	cmd.Stderr = &errbuf
 	err = cmd.Run()
 	if err != nil {
-		err = fmt.Errorf("mkswap swapfile %s fail %s", swapFile, err.Error())
+		err = fmt.Errorf("mkswap swapfile %s fail %s stdout %s stderr %s", swapFile, err.Error(), outbuf.String(), errbuf.String())
 		s.Logger().WithError(err).Error("addSwap")
 		return nil, err
 	}
@ -1079,6 +1093,30 @@ func (s *Sandbox) removeSwap(ctx context.Context, blockDevice *config.BlockDrive
 	return err
 }
 func (s *Sandbox) setupSwap(ctx context.Context, sizeBytes int64) error {
 	if sizeBytes > s.swapSizeBytes {
 		dev, err := s.addSwap(ctx, fmt.Sprintf("swap%d", s.swapDeviceNum), sizeBytes-s.swapSizeBytes)
 		if err != nil {
 			return err
 		}
 		s.swapDeviceNum += 1
 		s.swapSizeBytes = sizeBytes
 		s.swapDevices = append(s.swapDevices, dev)
 	}
 	return nil
 }
 func (s *Sandbox) cleanSwap(ctx context.Context) {
 	for _, dev := range s.swapDevices {
 		err := s.removeSwap(ctx, dev)
 		if err != nil {
 			s.Logger().Warnf("remove swap device %+v got error %s", dev, err)
 		}
 	}
 }
 // startVM starts the VM.
 func (s *Sandbox) startVM(ctx context.Context) (err error) {
 	span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags())
@ -1641,6 +1679,8 @@ func (s *Sandbox) Stop(ctx context.Context, force bool) error {
 		return err
 	}
 	s.cleanSwap(ctx)
 	return nil
 }
@ -1894,9 +1934,21 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
 	// Add default vcpus for sandbox
 	sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
-	sandboxMemoryByte := s.calculateSandboxMemory()
+	sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory()
 	// Add default / rsvd memory for sandbox.
-	sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
+	hypervisorMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
 	sandboxMemoryByte += hypervisorMemoryByte
 	if sandboxneedPodSwap {
 		sandboxSwapByte += hypervisorMemoryByte
 	}
 	// Setup the SWAP in the guest
 	if sandboxSwapByte > 0 {
 		err = s.setupSwap(ctx, sandboxSwapByte)
 		if err != nil {
 			return err
 		}
 	}
 	// Update VCPUs
 	s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
@ -1941,8 +1993,10 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
 	return nil
 }
-func (s *Sandbox) calculateSandboxMemory() int64 {
+func (s *Sandbox) calculateSandboxMemory() (int64, bool, int64) {
 	memorySandbox := int64(0)
 	needPodSwap := false
 	swapSandbox := int64(0)
 	for _, c := range s.config.Containers {
 		// Do not hot add again non-running containers resources
 		if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
@ -1950,11 +2004,30 @@ func (s *Sandbox) calculateSandboxMemory() int64 {
 			continue
 		}
-		if m := c.Resources.Memory; m != nil && m.Limit != nil {
+		if m := c.Resources.Memory; m != nil {
-			memorySandbox += *m.Limit
+			currentLimit := int64(0)
 			if m.Limit != nil {
 				currentLimit = *m.Limit
 				memorySandbox += currentLimit
 			}
 			if s.config.HypervisorConfig.GuestSwap && m.Swappiness != nil && *m.Swappiness > 0 {
 				currentSwap := int64(0)
 				if m.Swap != nil {
 					currentSwap = *m.Swap
 				}
 				if currentSwap == 0 {
 					if currentLimit == 0 {
 						needPodSwap = true
 					} else {
 						swapSandbox += currentLimit
 					}
 				} else if currentSwap > currentLimit {
 					swapSandbox = currentSwap - currentLimit
 				}
 			}
 		}
 	}
-	return memorySandbox
+	return memorySandbox, needPodSwap, swapSandbox
 }
 func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {
--- a/src/runtime/virtcontainers/sandbox_test.go
+++ b/src/runtime/virtcontainers/sandbox_test.go
@ -168,8 +168,10 @@ func TestCalculateSandboxMem(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			sandbox.config.Containers = tt.containers
-			got := sandbox.calculateSandboxMemory()
+			mem, needSwap, swap := sandbox.calculateSandboxMemory()
-			assert.Equal(t, got, tt.want)
+			assert.Equal(t, mem, tt.want)
 			assert.Equal(t, needSwap, false)
 			assert.Equal(t, swap, int64(0))
 		})
 	}
 }