mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-19 18:01:01 +00:00
qemu: Add virtio-mem support
This commit adds qemu virtio-mem support. Then qemu can use virtio-mem support memory resize. To enable this function, need the Linux and the qemu that support virtio-mem. Use command "echo 1 > /proc/sys/vm/overcommit_memory" to enable memory overcommitment of the Linux kernel. Because qemu virtio-mem device need to allocate a lot of memory. Set "enable_virtio_mem" of kata configuration to true. Fixes: #2406 Signed-off-by: Hui Zhu <teawater@antfin.com>
This commit is contained in:
parent
b337428947
commit
01a12b003b
@ -89,6 +89,12 @@ default_memory = @DEFMEMSZ@
|
||||
# Default 0
|
||||
#memory_offset = 0
|
||||
|
||||
# Specifies virtio-mem will be enabled or not.
|
||||
# Please note that this option should be used with the command
|
||||
# "echo 1 > /proc/sys/vm/overcommit_memory".
|
||||
# Default false
|
||||
#enable_virtio_mem = true
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
|
@ -27,6 +27,7 @@ const defaultMaxVCPUCount uint32 = 0
|
||||
const defaultMemSize uint32 = 2048 // MiB
|
||||
const defaultMemSlots uint32 = 10
|
||||
const defaultMemOffset uint32 = 0 // MiB
|
||||
const defaultVirtioMem bool = false
|
||||
const defaultBridgesCount uint32 = 1
|
||||
const defaultInterNetworkingModel = "tcfilter"
|
||||
const defaultDisableBlockDeviceUse bool = false
|
||||
|
@ -114,6 +114,7 @@ type hypervisor struct {
|
||||
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
|
||||
MemPrealloc bool `toml:"enable_mem_prealloc"`
|
||||
HugePages bool `toml:"enable_hugepages"`
|
||||
VirtioMem bool `toml:"enable_virtio_mem"`
|
||||
FileBackedMemRootDir string `toml:"file_mem_backend"`
|
||||
Swap bool `toml:"enable_swap"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
@ -623,6 +624,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
MemorySize: h.defaultMemSz(),
|
||||
MemSlots: h.defaultMemSlots(),
|
||||
MemOffset: h.defaultMemOffset(),
|
||||
VirtioMem: h.VirtioMem,
|
||||
EntropySource: h.GetEntropySource(),
|
||||
DefaultBridges: h.defaultBridges(),
|
||||
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
|
||||
@ -773,6 +775,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
MemorySize: h.defaultMemSz(),
|
||||
MemSlots: h.defaultMemSlots(),
|
||||
MemOffset: h.defaultMemOffset(),
|
||||
VirtioMem: h.VirtioMem,
|
||||
EntropySource: h.GetEntropySource(),
|
||||
DefaultBridges: h.defaultBridges(),
|
||||
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
|
||||
@ -1054,6 +1057,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
DefaultMaxVCPUs: defaultMaxVCPUCount,
|
||||
MemorySize: defaultMemSize,
|
||||
MemOffset: defaultMemOffset,
|
||||
VirtioMem: defaultVirtioMem,
|
||||
DisableBlockDeviceUse: defaultDisableBlockDeviceUse,
|
||||
DefaultBridges: defaultBridgesCount,
|
||||
MemPrealloc: defaultEnableMemPrealloc,
|
||||
|
@ -307,6 +307,9 @@ type HypervisorConfig struct {
|
||||
// VirtioFSExtraArgs passes options to virtiofsd daemon
|
||||
VirtioFSExtraArgs []string
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
|
||||
// customAssets is a map of assets.
|
||||
// Each value in that map takes precedence over the configured assets.
|
||||
// For example, if there is a value for the "kernel" key in this map,
|
||||
@ -341,8 +344,8 @@ type HypervisorConfig struct {
|
||||
// HugePages specifies if the memory should be pre-allocated from huge pages
|
||||
HugePages bool
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
// VirtioMem is used to enable/disable virtio-mem
|
||||
VirtioMem bool
|
||||
|
||||
// Realtime Used to enable/disable realtime
|
||||
Realtime bool
|
||||
|
@ -214,6 +214,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
|
||||
Msize9p: sconfig.HypervisorConfig.Msize9p,
|
||||
MemSlots: sconfig.HypervisorConfig.MemSlots,
|
||||
MemOffset: sconfig.HypervisorConfig.MemOffset,
|
||||
VirtioMem: sconfig.HypervisorConfig.VirtioMem,
|
||||
VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize,
|
||||
KernelPath: sconfig.HypervisorConfig.KernelPath,
|
||||
ImagePath: sconfig.HypervisorConfig.ImagePath,
|
||||
@ -499,6 +500,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
||||
Msize9p: hconf.Msize9p,
|
||||
MemSlots: hconf.MemSlots,
|
||||
MemOffset: hconf.MemOffset,
|
||||
VirtioMem: hconf.VirtioMem,
|
||||
VirtioFSCacheSize: hconf.VirtioFSCacheSize,
|
||||
KernelPath: hconf.KernelPath,
|
||||
ImagePath: hconf.ImagePath,
|
||||
|
@ -97,6 +97,9 @@ type HypervisorConfig struct {
|
||||
// VirtioFSExtraArgs passes options to virtiofsd daemon
|
||||
VirtioFSExtraArgs []string
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
|
||||
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
|
||||
BlockDeviceCacheSet bool
|
||||
|
||||
@ -125,8 +128,8 @@ type HypervisorConfig struct {
|
||||
// HugePages specifies if the memory should be pre-allocated from huge pages
|
||||
HugePages bool
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
// VirtioMem is used to enable/disable virtio-mem
|
||||
VirtioMem bool
|
||||
|
||||
// Realtime Used to enable/disable realtime
|
||||
Realtime bool
|
||||
|
@ -124,6 +124,9 @@ const (
|
||||
// MemOffset is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
|
||||
MemOffset = kataAnnotHypervisorPrefix + "memory_offset"
|
||||
|
||||
// VirtioMem is a sandbox annotation that is used to enable/disable virtio-mem.
|
||||
VirtioMem = kataAnnotHypervisorPrefix + "enable_virtio_mem"
|
||||
|
||||
// MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
|
||||
MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc"
|
||||
|
||||
|
@ -492,6 +492,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
|
||||
}
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.VirtioMem]; ok {
|
||||
virtioMem, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing annotation for enable_virtio_mem: Please specify boolean value 'true|false'")
|
||||
}
|
||||
|
||||
sbConfig.HypervisorConfig.VirtioMem = virtioMem
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.MemPrealloc]; ok {
|
||||
memPrealloc, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
|
@ -741,6 +741,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
ocispec.Annotations[vcAnnotations.DefaultMemory] = "1024"
|
||||
ocispec.Annotations[vcAnnotations.MemSlots] = "20"
|
||||
ocispec.Annotations[vcAnnotations.MemOffset] = "512"
|
||||
ocispec.Annotations[vcAnnotations.VirtioMem] = "true"
|
||||
ocispec.Annotations[vcAnnotations.MemPrealloc] = "true"
|
||||
ocispec.Annotations[vcAnnotations.EnableSwap] = "true"
|
||||
ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm"
|
||||
@ -770,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
|
||||
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
|
||||
assert.Equal(config.HypervisorConfig.MemOffset, uint32(512))
|
||||
assert.Equal(config.HypervisorConfig.VirtioMem, true)
|
||||
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
|
||||
assert.Equal(config.HypervisorConfig.Mlock, false)
|
||||
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
||||
|
@ -668,6 +668,56 @@ func (q *qemu) setupVirtiofsd() (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *qemu) getMemArgs() (bool, string, string) {
|
||||
share := false
|
||||
target := ""
|
||||
memoryBack := "memory-backend-ram"
|
||||
|
||||
if q.qemuConfig.Knobs.HugePages {
|
||||
// we are setting all the bits that govmm sets when hugepages are enabled.
|
||||
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
|
||||
target = "/dev/hugepages"
|
||||
memoryBack = "memory-backend-file"
|
||||
share = true
|
||||
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
|
||||
target = q.qemuConfig.Memory.Path
|
||||
memoryBack = "memory-backend-file"
|
||||
}
|
||||
if q.qemuConfig.Knobs.MemShared {
|
||||
share = true
|
||||
}
|
||||
|
||||
return share, target, memoryBack
|
||||
}
|
||||
|
||||
func (q *qemu) setupVirtioMem() error {
|
||||
maxMem, err := q.hostMemMB()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 1024 is size for nvdimm
|
||||
sizeMB := int(maxMem) - int(q.config.MemorySize)
|
||||
|
||||
share, target, memoryBack := q.getMemArgs()
|
||||
err = q.qmpSetup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0")
|
||||
if err == nil {
|
||||
q.config.VirtioMem = true
|
||||
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
|
||||
} else {
|
||||
help := ""
|
||||
if strings.Contains(err.Error(), "Cannot allocate memory") {
|
||||
help = ". Please use command \"echo 1 > /proc/sys/vm/overcommit_memory\" handle it."
|
||||
}
|
||||
err = fmt.Errorf("Add %dMB virtio-mem-pci fail %s%s", sizeMB, err.Error(), help)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// startSandbox will start the Sandbox's VM.
|
||||
func (q *qemu) startSandbox(timeout int) error {
|
||||
span, _ := q.trace("startSandbox")
|
||||
@ -744,6 +794,10 @@ func (q *qemu) startSandbox(timeout int) error {
|
||||
}
|
||||
}
|
||||
|
||||
if q.config.VirtioMem {
|
||||
err = q.setupVirtioMem()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@ -1449,9 +1503,6 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) {
|
||||
|
||||
func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
|
||||
memoryDevices, err := q.qmpMonitorCh.qmp.ExecQueryMemoryDevices(q.qmpMonitorCh.ctx)
|
||||
share := false
|
||||
target := ""
|
||||
memoryBack := "memory-backend-ram"
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to query memory devices: %v", err)
|
||||
}
|
||||
@ -1465,19 +1516,8 @@ func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
|
||||
}
|
||||
memDev.slot = maxSlot + 1
|
||||
}
|
||||
if q.qemuConfig.Knobs.HugePages {
|
||||
// we are setting all the bits that govmm sets when hugepages are enabled.
|
||||
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
|
||||
target = "/dev/hugepages"
|
||||
memoryBack = "memory-backend-file"
|
||||
share = true
|
||||
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
|
||||
target = q.qemuConfig.Memory.Path
|
||||
memoryBack = "memory-backend-file"
|
||||
}
|
||||
if q.qemuConfig.Knobs.MemShared {
|
||||
share = true
|
||||
}
|
||||
|
||||
share, target, memoryBack := q.getMemArgs()
|
||||
err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, memoryBack, "mem"+strconv.Itoa(memDev.slot), target, memDev.sizeMB, share)
|
||||
if err != nil {
|
||||
q.Logger().WithError(err).Error("hotplug memory")
|
||||
@ -1661,6 +1701,17 @@ func (q *qemu) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe boo
|
||||
return 0, memoryDevice{}, err
|
||||
}
|
||||
var addMemDevice memoryDevice
|
||||
if q.config.VirtioMem && currentMemory != reqMemMB {
|
||||
q.Logger().WithField("hotplug", "memory").Debugf("resize memory from %dMB to %dMB", currentMemory, reqMemMB)
|
||||
sizeByte := (reqMemMB - q.config.MemorySize) * 1024 * 1024
|
||||
err = q.qmpMonitorCh.qmp.ExecQomSet(q.qmpMonitorCh.ctx, "virtiomem0", "requested-size", uint64(sizeByte))
|
||||
if err != nil {
|
||||
return 0, memoryDevice{}, err
|
||||
}
|
||||
q.state.HotpluggedMemory = int(sizeByte / 1024 / 1024)
|
||||
return reqMemMB, memoryDevice{}, nil
|
||||
}
|
||||
|
||||
switch {
|
||||
case currentMemory < reqMemMB:
|
||||
//hotplug
|
||||
|
Loading…
Reference in New Issue
Block a user