mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-19 09:51:29 +00:00
runtime: Add option "enable_guest_swap" to config hypervisor.qemu
This commit add option "enable_guest_swap" to config hypervisor.qemu. It will enable swap in the guest. Default false. When enable_guest_swap is enabled, insert a raw file to the guest as the swap device if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") is bigger than 0. The size of the swap device should be swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. If swap_in_bytes is not set, the size should be memory_limit_in_bytes. If swap_in_bytes and memory_limit_in_bytes is not set, the size should be default_memory. Fixes: #2201 Signed-off-by: Hui Zhu <teawater@antfin.com>
This commit is contained in:
parent
a733f537e5
commit
cb6b7667cd
@ -356,6 +356,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
|||||||
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
|
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
|
||||||
#guest_memory_dump_paging=false
|
#guest_memory_dump_paging=false
|
||||||
|
|
||||||
|
# Enable swap in the guest. Default false.
|
||||||
|
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
|
||||||
|
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
|
||||||
|
# is bigger than 0.
|
||||||
|
# The size of the swap device should be
|
||||||
|
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
|
||||||
|
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
|
||||||
|
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
|
||||||
|
# be default_memory.
|
||||||
|
#enable_guest_swap = true
|
||||||
|
|
||||||
[factory]
|
[factory]
|
||||||
# VM templating support. Once enabled, new VMs are created from template
|
# VM templating support. Once enabled, new VMs are created from template
|
||||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||||
|
@ -55,6 +55,7 @@ const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
|
|||||||
const defaultRxRateLimiterMaxRate = uint64(0)
|
const defaultRxRateLimiterMaxRate = uint64(0)
|
||||||
const defaultTxRateLimiterMaxRate = uint64(0)
|
const defaultTxRateLimiterMaxRate = uint64(0)
|
||||||
const defaultConfidentialGuest = false
|
const defaultConfidentialGuest = false
|
||||||
|
const defaultGuestSwap = false
|
||||||
|
|
||||||
var defaultSGXEPCSize = int64(0)
|
var defaultSGXEPCSize = int64(0)
|
||||||
|
|
||||||
|
@ -133,6 +133,7 @@ type hypervisor struct {
|
|||||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||||
|
GuestSwap bool `toml:"enable_guest_swap"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type runtime struct {
|
type runtime struct {
|
||||||
@ -711,6 +712,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
|
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
|
||||||
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
|
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
|
||||||
ConfidentialGuest: h.ConfidentialGuest,
|
ConfidentialGuest: h.ConfidentialGuest,
|
||||||
|
GuestSwap: h.GuestSwap,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1066,6 +1068,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
|||||||
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
|
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
|
||||||
SGXEPCSize: defaultSGXEPCSize,
|
SGXEPCSize: defaultSGXEPCSize,
|
||||||
ConfidentialGuest: defaultConfidentialGuest,
|
ConfidentialGuest: defaultConfidentialGuest,
|
||||||
|
GuestSwap: defaultGuestSwap,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -458,6 +458,9 @@ type HypervisorConfig struct {
|
|||||||
|
|
||||||
// MemOffset specifies memory space for nvdimm device
|
// MemOffset specifies memory space for nvdimm device
|
||||||
MemOffset uint64
|
MemOffset uint64
|
||||||
|
|
||||||
|
// GuestSwap Used to enable/disable swap in the guest
|
||||||
|
GuestSwap bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// vcpu mapping from vcpu number to thread number
|
// vcpu mapping from vcpu number to thread number
|
||||||
|
@ -220,6 +220,9 @@ const (
|
|||||||
|
|
||||||
// TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth
|
// TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth
|
||||||
TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate"
|
TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate"
|
||||||
|
|
||||||
|
// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
|
||||||
|
EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Runtime related annotations
|
// Runtime related annotations
|
||||||
|
@ -539,6 +539,7 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -616,6 +617,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) {
|
||||||
|
sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ package virtcontainers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@ -65,6 +66,8 @@ const (
|
|||||||
|
|
||||||
// DirMode is the permission bits used for creating a directory
|
// DirMode is the permission bits used for creating a directory
|
||||||
DirMode = os.FileMode(0750) | os.ModeDir
|
DirMode = os.FileMode(0750) | os.ModeDir
|
||||||
|
|
||||||
|
mkswapPath = "/sbin/mkswap"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -200,6 +203,10 @@ type Sandbox struct {
|
|||||||
ctx context.Context
|
ctx context.Context
|
||||||
|
|
||||||
cw *consoleWatcher
|
cw *consoleWatcher
|
||||||
|
|
||||||
|
swapDeviceNum uint
|
||||||
|
swapSizeBytes int64
|
||||||
|
swapDevices []*config.BlockDrive
|
||||||
}
|
}
|
||||||
|
|
||||||
// ID returns the sandbox identifier string.
|
// ID returns the sandbox identifier string.
|
||||||
@ -519,6 +526,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
sharePidNs: sandboxConfig.SharePidNs,
|
sharePidNs: sandboxConfig.SharePidNs,
|
||||||
networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
|
networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
|
swapDeviceNum: 0,
|
||||||
|
swapSizeBytes: 0,
|
||||||
|
swapDevices: []*config.BlockDrive{},
|
||||||
}
|
}
|
||||||
|
|
||||||
hypervisor.setSandbox(s)
|
hypervisor.setSandbox(s)
|
||||||
@ -1028,9 +1038,13 @@ func (s *Sandbox) addSwap(ctx context.Context, swapID string, size int64) (*conf
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = exec.CommandContext(ctx, "/sbin/mkswap", swapFile).Run()
|
var outbuf, errbuf bytes.Buffer
|
||||||
|
cmd := exec.CommandContext(ctx, mkswapPath, swapFile)
|
||||||
|
cmd.Stdout = &outbuf
|
||||||
|
cmd.Stderr = &errbuf
|
||||||
|
err = cmd.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("mkswap swapfile %s fail %s", swapFile, err.Error())
|
err = fmt.Errorf("mkswap swapfile %s fail %s stdout %s stderr %s", swapFile, err.Error(), outbuf.String(), errbuf.String())
|
||||||
s.Logger().WithError(err).Error("addSwap")
|
s.Logger().WithError(err).Error("addSwap")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -1079,6 +1093,30 @@ func (s *Sandbox) removeSwap(ctx context.Context, blockDevice *config.BlockDrive
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Sandbox) setupSwap(ctx context.Context, sizeBytes int64) error {
|
||||||
|
if sizeBytes > s.swapSizeBytes {
|
||||||
|
dev, err := s.addSwap(ctx, fmt.Sprintf("swap%d", s.swapDeviceNum), sizeBytes-s.swapSizeBytes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.swapDeviceNum += 1
|
||||||
|
s.swapSizeBytes = sizeBytes
|
||||||
|
s.swapDevices = append(s.swapDevices, dev)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Sandbox) cleanSwap(ctx context.Context) {
|
||||||
|
for _, dev := range s.swapDevices {
|
||||||
|
err := s.removeSwap(ctx, dev)
|
||||||
|
if err != nil {
|
||||||
|
s.Logger().Warnf("remove swap device %+v got error %s", dev, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// startVM starts the VM.
|
// startVM starts the VM.
|
||||||
func (s *Sandbox) startVM(ctx context.Context) (err error) {
|
func (s *Sandbox) startVM(ctx context.Context) (err error) {
|
||||||
span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags())
|
span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags())
|
||||||
@ -1641,6 +1679,8 @@ func (s *Sandbox) Stop(ctx context.Context, force bool) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.cleanSwap(ctx)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1894,9 +1934,21 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
|
|||||||
// Add default vcpus for sandbox
|
// Add default vcpus for sandbox
|
||||||
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
|
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
|
||||||
|
|
||||||
sandboxMemoryByte := s.calculateSandboxMemory()
|
sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory()
|
||||||
// Add default / rsvd memory for sandbox.
|
// Add default / rsvd memory for sandbox.
|
||||||
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
|
hypervisorMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
|
||||||
|
sandboxMemoryByte += hypervisorMemoryByte
|
||||||
|
if sandboxneedPodSwap {
|
||||||
|
sandboxSwapByte += hypervisorMemoryByte
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup the SWAP in the guest
|
||||||
|
if sandboxSwapByte > 0 {
|
||||||
|
err = s.setupSwap(ctx, sandboxSwapByte)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update VCPUs
|
// Update VCPUs
|
||||||
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
|
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
|
||||||
@ -1941,8 +1993,10 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Sandbox) calculateSandboxMemory() int64 {
|
func (s *Sandbox) calculateSandboxMemory() (int64, bool, int64) {
|
||||||
memorySandbox := int64(0)
|
memorySandbox := int64(0)
|
||||||
|
needPodSwap := false
|
||||||
|
swapSandbox := int64(0)
|
||||||
for _, c := range s.config.Containers {
|
for _, c := range s.config.Containers {
|
||||||
// Do not hot add again non-running containers resources
|
// Do not hot add again non-running containers resources
|
||||||
if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
|
if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
|
||||||
@ -1950,11 +2004,30 @@ func (s *Sandbox) calculateSandboxMemory() int64 {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if m := c.Resources.Memory; m != nil && m.Limit != nil {
|
if m := c.Resources.Memory; m != nil {
|
||||||
memorySandbox += *m.Limit
|
currentLimit := int64(0)
|
||||||
|
if m.Limit != nil {
|
||||||
|
currentLimit = *m.Limit
|
||||||
|
memorySandbox += currentLimit
|
||||||
|
}
|
||||||
|
if s.config.HypervisorConfig.GuestSwap && m.Swappiness != nil && *m.Swappiness > 0 {
|
||||||
|
currentSwap := int64(0)
|
||||||
|
if m.Swap != nil {
|
||||||
|
currentSwap = *m.Swap
|
||||||
|
}
|
||||||
|
if currentSwap == 0 {
|
||||||
|
if currentLimit == 0 {
|
||||||
|
needPodSwap = true
|
||||||
|
} else {
|
||||||
|
swapSandbox += currentLimit
|
||||||
|
}
|
||||||
|
} else if currentSwap > currentLimit {
|
||||||
|
swapSandbox = currentSwap - currentLimit
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return memorySandbox
|
return memorySandbox, needPodSwap, swapSandbox
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {
|
func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {
|
||||||
|
@ -168,8 +168,10 @@ func TestCalculateSandboxMem(t *testing.T) {
|
|||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
sandbox.config.Containers = tt.containers
|
sandbox.config.Containers = tt.containers
|
||||||
got := sandbox.calculateSandboxMemory()
|
mem, needSwap, swap := sandbox.calculateSandboxMemory()
|
||||||
assert.Equal(t, got, tt.want)
|
assert.Equal(t, mem, tt.want)
|
||||||
|
assert.Equal(t, needSwap, false)
|
||||||
|
assert.Equal(t, swap, int64(0))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user