From cb6b7667cdf4299d4ef73b4a5350d0179c27e802 Mon Sep 17 00:00:00 2001 From: Hui Zhu Date: Thu, 8 Jul 2021 18:10:39 +0800 Subject: [PATCH] runtime: Add option "enable_guest_swap" to config hypervisor.qemu This commit add option "enable_guest_swap" to config hypervisor.qemu. It will enable swap in the guest. Default false. When enable_guest_swap is enabled, insert a raw file to the guest as the swap device if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") is bigger than 0. The size of the swap device should be swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. If swap_in_bytes is not set, the size should be memory_limit_in_bytes. If swap_in_bytes and memory_limit_in_bytes is not set, the size should be default_memory. Fixes: #2201 Signed-off-by: Hui Zhu --- .../cli/config/configuration-qemu.toml.in | 11 +++ .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 3 + src/runtime/virtcontainers/hypervisor.go | 3 + .../pkg/annotations/annotations.go | 3 + src/runtime/virtcontainers/pkg/oci/utils.go | 7 ++ src/runtime/virtcontainers/sandbox.go | 89 +++++++++++++++++-- src/runtime/virtcontainers/sandbox_test.go | 6 +- 8 files changed, 113 insertions(+), 10 deletions(-) diff --git a/src/runtime/cli/config/configuration-qemu.toml.in b/src/runtime/cli/config/configuration-qemu.toml.in index 19fa2b3b65..2113a527ee 100644 --- a/src/runtime/cli/config/configuration-qemu.toml.in +++ b/src/runtime/cli/config/configuration-qemu.toml.in @@ -356,6 +356,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details #guest_memory_dump_paging=false +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index e92d8741c9..6231ddd393 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -55,6 +55,7 @@ const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" const defaultRxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0) const defaultConfidentialGuest = false +const defaultGuestSwap = false var defaultSGXEPCSize = int64(0) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index fc9e5a013b..41ff6c3bdf 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -133,6 +133,7 @@ type hypervisor struct { DisableVhostNet bool `toml:"disable_vhost_net"` GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"` ConfidentialGuest bool `toml:"confidential_guest"` + GuestSwap bool `toml:"enable_guest_swap"` } type runtime struct { @@ -711,6 +712,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestMemoryDumpPath: h.GuestMemoryDumpPath, GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, ConfidentialGuest: h.ConfidentialGuest, + GuestSwap: h.GuestSwap, }, nil } @@ -1066,6 +1068,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, SGXEPCSize: defaultSGXEPCSize, ConfidentialGuest: defaultConfidentialGuest, + GuestSwap: defaultGuestSwap, } } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 2c75d7f4a5..525d77ceb9 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -458,6 +458,9 @@ type HypervisorConfig struct { // MemOffset specifies memory space for nvdimm device MemOffset uint64 + + // GuestSwap Used to enable/disable swap in the guest + GuestSwap bool } // vcpu mapping from vcpu number to thread number diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 1ac2497b6a..838b2994fd 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -220,6 +220,9 @@ const ( // TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate" + + // EnableGuestSwap is a sandbox annotation to enable swap in the guest. + EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap" ) // Runtime related annotations diff --git a/src/runtime/virtcontainers/pkg/oci/utils.go b/src/runtime/virtcontainers/pkg/oci/utils.go index 1240b9bb06..bbdd5dcae1 100644 --- a/src/runtime/virtcontainers/pkg/oci/utils.go +++ b/src/runtime/virtcontainers/pkg/oci/utils.go @@ -539,6 +539,7 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru } } } + return nil } @@ -616,6 +617,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) { + sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap + }); err != nil { + return err + } + return nil } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index c5ab8d5647..7784a66b88 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -8,6 +8,7 @@ package virtcontainers import ( "bufio" + "bytes" "context" "fmt" "io" @@ -65,6 +66,8 @@ const ( // DirMode is the permission bits used for creating a directory DirMode = os.FileMode(0750) | os.ModeDir + + mkswapPath = "/sbin/mkswap" ) var ( @@ -200,6 +203,10 @@ type Sandbox struct { ctx context.Context cw *consoleWatcher + + swapDeviceNum uint + swapSizeBytes int64 + swapDevices []*config.BlockDrive } // ID returns the sandbox identifier string. @@ -519,6 +526,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor sharePidNs: sandboxConfig.SharePidNs, networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath}, ctx: ctx, + swapDeviceNum: 0, + swapSizeBytes: 0, + swapDevices: []*config.BlockDrive{}, } hypervisor.setSandbox(s) @@ -1028,9 +1038,13 @@ func (s *Sandbox) addSwap(ctx context.Context, swapID string, size int64) (*conf return nil, err } - err = exec.CommandContext(ctx, "/sbin/mkswap", swapFile).Run() + var outbuf, errbuf bytes.Buffer + cmd := exec.CommandContext(ctx, mkswapPath, swapFile) + cmd.Stdout = &outbuf + cmd.Stderr = &errbuf + err = cmd.Run() if err != nil { - err = fmt.Errorf("mkswap swapfile %s fail %s", swapFile, err.Error()) + err = fmt.Errorf("mkswap swapfile %s fail %s stdout %s stderr %s", swapFile, err.Error(), outbuf.String(), errbuf.String()) s.Logger().WithError(err).Error("addSwap") return nil, err } @@ -1079,6 +1093,30 @@ func (s *Sandbox) removeSwap(ctx context.Context, blockDevice *config.BlockDrive return err } +func (s *Sandbox) setupSwap(ctx context.Context, sizeBytes int64) error { + if sizeBytes > s.swapSizeBytes { + dev, err := s.addSwap(ctx, fmt.Sprintf("swap%d", s.swapDeviceNum), sizeBytes-s.swapSizeBytes) + if err != nil { + return err + } + + s.swapDeviceNum += 1 + s.swapSizeBytes = sizeBytes + s.swapDevices = append(s.swapDevices, dev) + } + + return nil +} + +func (s *Sandbox) cleanSwap(ctx context.Context) { + for _, dev := range s.swapDevices { + err := s.removeSwap(ctx, dev) + if err != nil { + s.Logger().Warnf("remove swap device %+v got error %s", dev, err) + } + } +} + // startVM starts the VM. func (s *Sandbox) startVM(ctx context.Context) (err error) { span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags()) @@ -1641,6 +1679,8 @@ func (s *Sandbox) Stop(ctx context.Context, force bool) error { return err } + s.cleanSwap(ctx) + return nil } @@ -1894,9 +1934,21 @@ func (s *Sandbox) updateResources(ctx context.Context) error { // Add default vcpus for sandbox sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs - sandboxMemoryByte := s.calculateSandboxMemory() + sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory() // Add default / rsvd memory for sandbox. - sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift + hypervisorMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift + sandboxMemoryByte += hypervisorMemoryByte + if sandboxneedPodSwap { + sandboxSwapByte += hypervisorMemoryByte + } + + // Setup the SWAP in the guest + if sandboxSwapByte > 0 { + err = s.setupSwap(ctx, sandboxSwapByte) + if err != nil { + return err + } + } // Update VCPUs s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs") @@ -1941,8 +1993,10 @@ func (s *Sandbox) updateResources(ctx context.Context) error { return nil } -func (s *Sandbox) calculateSandboxMemory() int64 { +func (s *Sandbox) calculateSandboxMemory() (int64, bool, int64) { memorySandbox := int64(0) + needPodSwap := false + swapSandbox := int64(0) for _, c := range s.config.Containers { // Do not hot add again non-running containers resources if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped { @@ -1950,11 +2004,30 @@ func (s *Sandbox) calculateSandboxMemory() int64 { continue } - if m := c.Resources.Memory; m != nil && m.Limit != nil { - memorySandbox += *m.Limit + if m := c.Resources.Memory; m != nil { + currentLimit := int64(0) + if m.Limit != nil { + currentLimit = *m.Limit + memorySandbox += currentLimit + } + if s.config.HypervisorConfig.GuestSwap && m.Swappiness != nil && *m.Swappiness > 0 { + currentSwap := int64(0) + if m.Swap != nil { + currentSwap = *m.Swap + } + if currentSwap == 0 { + if currentLimit == 0 { + needPodSwap = true + } else { + swapSandbox += currentLimit + } + } else if currentSwap > currentLimit { + swapSandbox = currentSwap - currentLimit + } + } } } - return memorySandbox + return memorySandbox, needPodSwap, swapSandbox } func (s *Sandbox) calculateSandboxCPUs() (uint32, error) { diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 27df559d36..77475be96e 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -168,8 +168,10 @@ func TestCalculateSandboxMem(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { sandbox.config.Containers = tt.containers - got := sandbox.calculateSandboxMemory() - assert.Equal(t, got, tt.want) + mem, needSwap, swap := sandbox.calculateSandboxMemory() + assert.Equal(t, mem, tt.want) + assert.Equal(t, needSwap, false) + assert.Equal(t, swap, int64(0)) }) } }