mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-19 09:51:29 +00:00
runtime: Add option "enable_guest_swap" to config hypervisor.qemu
This commit add option "enable_guest_swap" to config hypervisor.qemu. It will enable swap in the guest. Default false. When enable_guest_swap is enabled, insert a raw file to the guest as the swap device if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") is bigger than 0. The size of the swap device should be swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. If swap_in_bytes is not set, the size should be memory_limit_in_bytes. If swap_in_bytes and memory_limit_in_bytes is not set, the size should be default_memory. Fixes: #2201 Signed-off-by: Hui Zhu <teawater@antfin.com>
This commit is contained in:
parent
a733f537e5
commit
cb6b7667cd
@ -356,6 +356,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
||||
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
|
||||
#guest_memory_dump_paging=false
|
||||
|
||||
# Enable swap in the guest. Default false.
|
||||
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
|
||||
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
|
||||
# is bigger than 0.
|
||||
# The size of the swap device should be
|
||||
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
|
||||
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
|
||||
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
|
||||
# be default_memory.
|
||||
#enable_guest_swap = true
|
||||
|
||||
[factory]
|
||||
# VM templating support. Once enabled, new VMs are created from template
|
||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||
|
@ -55,6 +55,7 @@ const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
|
||||
const defaultRxRateLimiterMaxRate = uint64(0)
|
||||
const defaultTxRateLimiterMaxRate = uint64(0)
|
||||
const defaultConfidentialGuest = false
|
||||
const defaultGuestSwap = false
|
||||
|
||||
var defaultSGXEPCSize = int64(0)
|
||||
|
||||
|
@ -133,6 +133,7 @@ type hypervisor struct {
|
||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||
GuestSwap bool `toml:"enable_guest_swap"`
|
||||
}
|
||||
|
||||
type runtime struct {
|
||||
@ -711,6 +712,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
|
||||
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
|
||||
ConfidentialGuest: h.ConfidentialGuest,
|
||||
GuestSwap: h.GuestSwap,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -1066,6 +1068,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
|
||||
SGXEPCSize: defaultSGXEPCSize,
|
||||
ConfidentialGuest: defaultConfidentialGuest,
|
||||
GuestSwap: defaultGuestSwap,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -458,6 +458,9 @@ type HypervisorConfig struct {
|
||||
|
||||
// MemOffset specifies memory space for nvdimm device
|
||||
MemOffset uint64
|
||||
|
||||
// GuestSwap Used to enable/disable swap in the guest
|
||||
GuestSwap bool
|
||||
}
|
||||
|
||||
// vcpu mapping from vcpu number to thread number
|
||||
|
@ -220,6 +220,9 @@ const (
|
||||
|
||||
// TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth
|
||||
TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate"
|
||||
|
||||
// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
|
||||
EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
|
||||
)
|
||||
|
||||
// Runtime related annotations
|
||||
|
@ -539,6 +539,7 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -616,6 +617,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
|
||||
return err
|
||||
}
|
||||
|
||||
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) {
|
||||
sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@ package virtcontainers
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
@ -65,6 +66,8 @@ const (
|
||||
|
||||
// DirMode is the permission bits used for creating a directory
|
||||
DirMode = os.FileMode(0750) | os.ModeDir
|
||||
|
||||
mkswapPath = "/sbin/mkswap"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -200,6 +203,10 @@ type Sandbox struct {
|
||||
ctx context.Context
|
||||
|
||||
cw *consoleWatcher
|
||||
|
||||
swapDeviceNum uint
|
||||
swapSizeBytes int64
|
||||
swapDevices []*config.BlockDrive
|
||||
}
|
||||
|
||||
// ID returns the sandbox identifier string.
|
||||
@ -519,6 +526,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
sharePidNs: sandboxConfig.SharePidNs,
|
||||
networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
|
||||
ctx: ctx,
|
||||
swapDeviceNum: 0,
|
||||
swapSizeBytes: 0,
|
||||
swapDevices: []*config.BlockDrive{},
|
||||
}
|
||||
|
||||
hypervisor.setSandbox(s)
|
||||
@ -1028,9 +1038,13 @@ func (s *Sandbox) addSwap(ctx context.Context, swapID string, size int64) (*conf
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = exec.CommandContext(ctx, "/sbin/mkswap", swapFile).Run()
|
||||
var outbuf, errbuf bytes.Buffer
|
||||
cmd := exec.CommandContext(ctx, mkswapPath, swapFile)
|
||||
cmd.Stdout = &outbuf
|
||||
cmd.Stderr = &errbuf
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("mkswap swapfile %s fail %s", swapFile, err.Error())
|
||||
err = fmt.Errorf("mkswap swapfile %s fail %s stdout %s stderr %s", swapFile, err.Error(), outbuf.String(), errbuf.String())
|
||||
s.Logger().WithError(err).Error("addSwap")
|
||||
return nil, err
|
||||
}
|
||||
@ -1079,6 +1093,30 @@ func (s *Sandbox) removeSwap(ctx context.Context, blockDevice *config.BlockDrive
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Sandbox) setupSwap(ctx context.Context, sizeBytes int64) error {
|
||||
if sizeBytes > s.swapSizeBytes {
|
||||
dev, err := s.addSwap(ctx, fmt.Sprintf("swap%d", s.swapDeviceNum), sizeBytes-s.swapSizeBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.swapDeviceNum += 1
|
||||
s.swapSizeBytes = sizeBytes
|
||||
s.swapDevices = append(s.swapDevices, dev)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) cleanSwap(ctx context.Context) {
|
||||
for _, dev := range s.swapDevices {
|
||||
err := s.removeSwap(ctx, dev)
|
||||
if err != nil {
|
||||
s.Logger().Warnf("remove swap device %+v got error %s", dev, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startVM starts the VM.
|
||||
func (s *Sandbox) startVM(ctx context.Context) (err error) {
|
||||
span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags())
|
||||
@ -1641,6 +1679,8 @@ func (s *Sandbox) Stop(ctx context.Context, force bool) error {
|
||||
return err
|
||||
}
|
||||
|
||||
s.cleanSwap(ctx)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1894,9 +1934,21 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
|
||||
// Add default vcpus for sandbox
|
||||
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
|
||||
|
||||
sandboxMemoryByte := s.calculateSandboxMemory()
|
||||
sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory()
|
||||
// Add default / rsvd memory for sandbox.
|
||||
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
|
||||
hypervisorMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
|
||||
sandboxMemoryByte += hypervisorMemoryByte
|
||||
if sandboxneedPodSwap {
|
||||
sandboxSwapByte += hypervisorMemoryByte
|
||||
}
|
||||
|
||||
// Setup the SWAP in the guest
|
||||
if sandboxSwapByte > 0 {
|
||||
err = s.setupSwap(ctx, sandboxSwapByte)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Update VCPUs
|
||||
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
|
||||
@ -1941,8 +1993,10 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) calculateSandboxMemory() int64 {
|
||||
func (s *Sandbox) calculateSandboxMemory() (int64, bool, int64) {
|
||||
memorySandbox := int64(0)
|
||||
needPodSwap := false
|
||||
swapSandbox := int64(0)
|
||||
for _, c := range s.config.Containers {
|
||||
// Do not hot add again non-running containers resources
|
||||
if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
|
||||
@ -1950,11 +2004,30 @@ func (s *Sandbox) calculateSandboxMemory() int64 {
|
||||
continue
|
||||
}
|
||||
|
||||
if m := c.Resources.Memory; m != nil && m.Limit != nil {
|
||||
memorySandbox += *m.Limit
|
||||
if m := c.Resources.Memory; m != nil {
|
||||
currentLimit := int64(0)
|
||||
if m.Limit != nil {
|
||||
currentLimit = *m.Limit
|
||||
memorySandbox += currentLimit
|
||||
}
|
||||
if s.config.HypervisorConfig.GuestSwap && m.Swappiness != nil && *m.Swappiness > 0 {
|
||||
currentSwap := int64(0)
|
||||
if m.Swap != nil {
|
||||
currentSwap = *m.Swap
|
||||
}
|
||||
if currentSwap == 0 {
|
||||
if currentLimit == 0 {
|
||||
needPodSwap = true
|
||||
} else {
|
||||
swapSandbox += currentLimit
|
||||
}
|
||||
} else if currentSwap > currentLimit {
|
||||
swapSandbox = currentSwap - currentLimit
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return memorySandbox
|
||||
return memorySandbox, needPodSwap, swapSandbox
|
||||
}
|
||||
|
||||
func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {
|
||||
|
@ -168,8 +168,10 @@ func TestCalculateSandboxMem(t *testing.T) {
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
sandbox.config.Containers = tt.containers
|
||||
got := sandbox.calculateSandboxMemory()
|
||||
assert.Equal(t, got, tt.want)
|
||||
mem, needSwap, swap := sandbox.calculateSandboxMemory()
|
||||
assert.Equal(t, mem, tt.want)
|
||||
assert.Equal(t, needSwap, false)
|
||||
assert.Equal(t, swap, int64(0))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user