runtime: Add option "enable_guest_swap" to config hypervisor.qemu

This commit add option "enable_guest_swap" to config hypervisor.qemu.
It will enable swap in the guest. Default false.
When enable_guest_swap is enabled, insert a raw file to the guest as the
swap device if the swappiness of a container (set by annotation
"io.katacontainers.container.resource.swappiness") is bigger than 0.
The size of the swap device should be
swap_in_bytes (set by annotation
"io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
If swap_in_bytes and memory_limit_in_bytes is not set, the size should be
default_memory.

Fixes: #2201

Signed-off-by: Hui Zhu <teawater@antfin.com>
This commit is contained in:
Hui Zhu 2021-07-08 18:10:39 +08:00
parent a733f537e5
commit cb6b7667cd
8 changed files with 113 additions and 10 deletions

View File

@ -356,6 +356,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
#guest_memory_dump_paging=false #guest_memory_dump_paging=false
# Enable swap in the guest. Default false.
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
# is bigger than 0.
# The size of the swap device should be
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
# be default_memory.
#enable_guest_swap = true
[factory] [factory]
# VM templating support. Once enabled, new VMs are created from template # VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and # using vm cloning. They will share the same initial kernel, initramfs and

View File

@ -55,6 +55,7 @@ const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
const defaultRxRateLimiterMaxRate = uint64(0) const defaultRxRateLimiterMaxRate = uint64(0)
const defaultTxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0)
const defaultConfidentialGuest = false const defaultConfidentialGuest = false
const defaultGuestSwap = false
var defaultSGXEPCSize = int64(0) var defaultSGXEPCSize = int64(0)

View File

@ -133,6 +133,7 @@ type hypervisor struct {
DisableVhostNet bool `toml:"disable_vhost_net"` DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"` GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"` ConfidentialGuest bool `toml:"confidential_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
} }
type runtime struct { type runtime struct {
@ -711,6 +712,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
GuestMemoryDumpPath: h.GuestMemoryDumpPath, GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest, ConfidentialGuest: h.ConfidentialGuest,
GuestSwap: h.GuestSwap,
}, nil }, nil
} }
@ -1066,6 +1068,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
SGXEPCSize: defaultSGXEPCSize, SGXEPCSize: defaultSGXEPCSize,
ConfidentialGuest: defaultConfidentialGuest, ConfidentialGuest: defaultConfidentialGuest,
GuestSwap: defaultGuestSwap,
} }
} }

View File

@ -458,6 +458,9 @@ type HypervisorConfig struct {
// MemOffset specifies memory space for nvdimm device // MemOffset specifies memory space for nvdimm device
MemOffset uint64 MemOffset uint64
// GuestSwap Used to enable/disable swap in the guest
GuestSwap bool
} }
// vcpu mapping from vcpu number to thread number // vcpu mapping from vcpu number to thread number

View File

@ -220,6 +220,9 @@ const (
// TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth // TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth
TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate" TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate"
// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
) )
// Runtime related annotations // Runtime related annotations

View File

@ -539,6 +539,7 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
} }
} }
} }
return nil return nil
} }
@ -616,6 +617,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
return err return err
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) {
sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap
}); err != nil {
return err
}
return nil return nil
} }

View File

@ -8,6 +8,7 @@ package virtcontainers
import ( import (
"bufio" "bufio"
"bytes"
"context" "context"
"fmt" "fmt"
"io" "io"
@ -65,6 +66,8 @@ const (
// DirMode is the permission bits used for creating a directory // DirMode is the permission bits used for creating a directory
DirMode = os.FileMode(0750) | os.ModeDir DirMode = os.FileMode(0750) | os.ModeDir
mkswapPath = "/sbin/mkswap"
) )
var ( var (
@ -200,6 +203,10 @@ type Sandbox struct {
ctx context.Context ctx context.Context
cw *consoleWatcher cw *consoleWatcher
swapDeviceNum uint
swapSizeBytes int64
swapDevices []*config.BlockDrive
} }
// ID returns the sandbox identifier string. // ID returns the sandbox identifier string.
@ -519,6 +526,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
sharePidNs: sandboxConfig.SharePidNs, sharePidNs: sandboxConfig.SharePidNs,
networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath}, networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
ctx: ctx, ctx: ctx,
swapDeviceNum: 0,
swapSizeBytes: 0,
swapDevices: []*config.BlockDrive{},
} }
hypervisor.setSandbox(s) hypervisor.setSandbox(s)
@ -1028,9 +1038,13 @@ func (s *Sandbox) addSwap(ctx context.Context, swapID string, size int64) (*conf
return nil, err return nil, err
} }
err = exec.CommandContext(ctx, "/sbin/mkswap", swapFile).Run() var outbuf, errbuf bytes.Buffer
cmd := exec.CommandContext(ctx, mkswapPath, swapFile)
cmd.Stdout = &outbuf
cmd.Stderr = &errbuf
err = cmd.Run()
if err != nil { if err != nil {
err = fmt.Errorf("mkswap swapfile %s fail %s", swapFile, err.Error()) err = fmt.Errorf("mkswap swapfile %s fail %s stdout %s stderr %s", swapFile, err.Error(), outbuf.String(), errbuf.String())
s.Logger().WithError(err).Error("addSwap") s.Logger().WithError(err).Error("addSwap")
return nil, err return nil, err
} }
@ -1079,6 +1093,30 @@ func (s *Sandbox) removeSwap(ctx context.Context, blockDevice *config.BlockDrive
return err return err
} }
func (s *Sandbox) setupSwap(ctx context.Context, sizeBytes int64) error {
if sizeBytes > s.swapSizeBytes {
dev, err := s.addSwap(ctx, fmt.Sprintf("swap%d", s.swapDeviceNum), sizeBytes-s.swapSizeBytes)
if err != nil {
return err
}
s.swapDeviceNum += 1
s.swapSizeBytes = sizeBytes
s.swapDevices = append(s.swapDevices, dev)
}
return nil
}
func (s *Sandbox) cleanSwap(ctx context.Context) {
for _, dev := range s.swapDevices {
err := s.removeSwap(ctx, dev)
if err != nil {
s.Logger().Warnf("remove swap device %+v got error %s", dev, err)
}
}
}
// startVM starts the VM. // startVM starts the VM.
func (s *Sandbox) startVM(ctx context.Context) (err error) { func (s *Sandbox) startVM(ctx context.Context) (err error) {
span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags()) span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags())
@ -1641,6 +1679,8 @@ func (s *Sandbox) Stop(ctx context.Context, force bool) error {
return err return err
} }
s.cleanSwap(ctx)
return nil return nil
} }
@ -1894,9 +1934,21 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
// Add default vcpus for sandbox // Add default vcpus for sandbox
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
sandboxMemoryByte := s.calculateSandboxMemory() sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory()
// Add default / rsvd memory for sandbox. // Add default / rsvd memory for sandbox.
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift hypervisorMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
sandboxMemoryByte += hypervisorMemoryByte
if sandboxneedPodSwap {
sandboxSwapByte += hypervisorMemoryByte
}
// Setup the SWAP in the guest
if sandboxSwapByte > 0 {
err = s.setupSwap(ctx, sandboxSwapByte)
if err != nil {
return err
}
}
// Update VCPUs // Update VCPUs
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs") s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
@ -1941,8 +1993,10 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
return nil return nil
} }
func (s *Sandbox) calculateSandboxMemory() int64 { func (s *Sandbox) calculateSandboxMemory() (int64, bool, int64) {
memorySandbox := int64(0) memorySandbox := int64(0)
needPodSwap := false
swapSandbox := int64(0)
for _, c := range s.config.Containers { for _, c := range s.config.Containers {
// Do not hot add again non-running containers resources // Do not hot add again non-running containers resources
if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped { if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
@ -1950,11 +2004,30 @@ func (s *Sandbox) calculateSandboxMemory() int64 {
continue continue
} }
if m := c.Resources.Memory; m != nil && m.Limit != nil { if m := c.Resources.Memory; m != nil {
memorySandbox += *m.Limit currentLimit := int64(0)
if m.Limit != nil {
currentLimit = *m.Limit
memorySandbox += currentLimit
}
if s.config.HypervisorConfig.GuestSwap && m.Swappiness != nil && *m.Swappiness > 0 {
currentSwap := int64(0)
if m.Swap != nil {
currentSwap = *m.Swap
}
if currentSwap == 0 {
if currentLimit == 0 {
needPodSwap = true
} else {
swapSandbox += currentLimit
}
} else if currentSwap > currentLimit {
swapSandbox = currentSwap - currentLimit
}
}
} }
} }
return memorySandbox return memorySandbox, needPodSwap, swapSandbox
} }
func (s *Sandbox) calculateSandboxCPUs() (uint32, error) { func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {

View File

@ -168,8 +168,10 @@ func TestCalculateSandboxMem(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
sandbox.config.Containers = tt.containers sandbox.config.Containers = tt.containers
got := sandbox.calculateSandboxMemory() mem, needSwap, swap := sandbox.calculateSandboxMemory()
assert.Equal(t, got, tt.want) assert.Equal(t, mem, tt.want)
assert.Equal(t, needSwap, false)
assert.Equal(t, swap, int64(0))
}) })
} }
} }