mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 06:28:11 +00:00
govmm: Add multi-NUMA memory backend and distance matrix support
Introduce NUMANode and NUMADist types, add NUMANodes/NUMADists fields to Config, and implement appendMultiNUMAMemoryKnobs() to generate per-node memory-backend objects with host-nodes/policy=bind, -numa node entries with cpus= ranges, and -numa dist entries for the distance matrix. Gate the multi-NUMA path in appendMemoryKnobs() behind isDimmSupported() to ensure architectures without DIMM support (s390x, riscv64) fall back to the single-node path. Drop 386 from isDimmSupported since 32-bit x86 is not a supported Kata target. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
8d2ecaabb5
commit
1e9da61d48
@@ -152,7 +152,7 @@ const (
|
||||
|
||||
func isDimmSupported(config *Config) bool {
|
||||
switch runtime.GOARCH {
|
||||
case "amd64", "386", "ppc64le", "arm64":
|
||||
case "amd64", "ppc64le", "arm64":
|
||||
if config != nil && config.Machine.Type == MachineTypeMicrovm {
|
||||
// microvm does not support NUMA
|
||||
return false
|
||||
@@ -1586,8 +1586,13 @@ func (vhostuserDev VhostUserDevice) QemuNetParams(config *Config) []string {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("netdev=%s", vhostuserDev.TypeDevID))
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("mac=%s", vhostuserDev.Address))
|
||||
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) {
|
||||
// Pin to pcie.0 so pxb-pcie (when present) doesn't capture
|
||||
// this leaf device as the default bus.
|
||||
deviceParams = append(deviceParams, "bus=pcie.0")
|
||||
if vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
}
|
||||
}
|
||||
|
||||
qemuParams = append(qemuParams, "-netdev")
|
||||
@@ -1612,8 +1617,11 @@ func (vhostuserDev VhostUserDevice) QemuSCSIParams(config *Config) []string {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("id=%s", vhostuserDev.TypeDevID))
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("chardev=%s", vhostuserDev.CharDevID))
|
||||
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) {
|
||||
deviceParams = append(deviceParams, "bus=pcie.0")
|
||||
if vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
}
|
||||
}
|
||||
|
||||
qemuParams = append(qemuParams, "-device")
|
||||
@@ -1637,8 +1645,11 @@ func (vhostuserDev VhostUserDevice) QemuBlkParams(config *Config) []string {
|
||||
deviceParams = append(deviceParams, "size=512M")
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("chardev=%s", vhostuserDev.CharDevID))
|
||||
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) {
|
||||
deviceParams = append(deviceParams, "bus=pcie.0")
|
||||
if vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
}
|
||||
}
|
||||
|
||||
qemuParams = append(qemuParams, "-device")
|
||||
@@ -1674,8 +1685,11 @@ func (vhostuserDev VhostUserDevice) QemuFSParams(config *Config) []string {
|
||||
}
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("devno=%s", vhostuserDev.DevNo))
|
||||
}
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
if vhostuserDev.Transport.isVirtioPCI(config) {
|
||||
deviceParams = append(deviceParams, "bus=pcie.0")
|
||||
if vhostuserDev.ROMFile != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
|
||||
}
|
||||
}
|
||||
|
||||
qemuParams = append(qemuParams, "-device")
|
||||
@@ -2689,7 +2703,8 @@ type SMP struct {
|
||||
Sockets uint32
|
||||
|
||||
// MaxCPUs is the maximum number of VCPUs that a VM can have.
|
||||
// This value, if non-zero, MUST BE equal to or greater than CPUs
|
||||
// This value, if non-zero, MUST BE equal to or greater than CPUs,
|
||||
// and must be equal to Sockets * Cores * Threads if all are non-zero.
|
||||
MaxCPUs uint32
|
||||
}
|
||||
|
||||
@@ -2775,6 +2790,36 @@ func (fwcfg FwCfg) QemuParams(config *Config) []string {
|
||||
return qemuParams
|
||||
}
|
||||
|
||||
// NUMANode describes a guest NUMA node and its mapping to host resources.
|
||||
type NUMANode struct {
|
||||
// NodeID is the guest NUMA node identifier (0-based).
|
||||
NodeID uint32
|
||||
|
||||
// CPUs is the guest vCPU range assigned to this node (e.g. "0-3").
|
||||
CPUs string
|
||||
|
||||
// MemSize is the amount of memory for this node (e.g. "512M", "1G").
|
||||
MemSize string
|
||||
|
||||
// HostNodes is the host NUMA node(s) this guest node maps to (e.g. "0" or "0-1").
|
||||
HostNodes string
|
||||
|
||||
// MemBackendType selects the QEMU memory backend object type.
|
||||
// Typical values: "memory-backend-ram" or "memory-backend-file".
|
||||
MemBackendType string
|
||||
|
||||
// MemBackendPath is the mem-path for file-backed memory (hugepages, file-backed).
|
||||
// Empty when using memory-backend-ram.
|
||||
MemBackendPath string
|
||||
}
|
||||
|
||||
// NUMADist describes a NUMA distance entry for `-numa dist`.
|
||||
type NUMADist struct {
|
||||
Src uint32
|
||||
Dst uint32
|
||||
Val uint32
|
||||
}
|
||||
|
||||
// Knobs regroups a set of qemu boolean settings
|
||||
type Knobs struct {
|
||||
// NoUserConfig prevents qemu from loading user config files.
|
||||
@@ -2922,6 +2967,14 @@ type Config struct {
|
||||
|
||||
IOThreads []IOThread
|
||||
|
||||
// NUMANodes defines multi-NUMA guest topology. When non-empty,
|
||||
// appendMemoryKnobs creates per-node memory backends and -numa entries
|
||||
// instead of a single flat memory region.
|
||||
NUMANodes []NUMANode
|
||||
|
||||
// NUMADists defines inter-node distance entries emitted as -numa dist.
|
||||
NUMADists []NUMADist
|
||||
|
||||
// PidFile is the -pidfile parameter
|
||||
PidFile string
|
||||
|
||||
@@ -3096,6 +3149,13 @@ func (config *Config) appendCPUs() error {
|
||||
return fmt.Errorf("MaxCPUs %d must be equal to or greater than CPUs %d",
|
||||
config.SMP.MaxCPUs, config.SMP.CPUs)
|
||||
}
|
||||
if len(config.NUMANodes) > 1 && config.SMP.Sockets > 0 && config.SMP.Cores > 0 && config.SMP.Threads > 0 {
|
||||
expected := config.SMP.Sockets * config.SMP.Cores * config.SMP.Threads
|
||||
if config.SMP.MaxCPUs != expected {
|
||||
return fmt.Errorf("MaxCPUs %d must equal Sockets(%d) * Cores(%d) * Threads(%d) = %d",
|
||||
config.SMP.MaxCPUs, config.SMP.Sockets, config.SMP.Cores, config.SMP.Threads, expected)
|
||||
}
|
||||
}
|
||||
SMPParams = append(SMPParams, fmt.Sprintf("maxcpus=%d", config.SMP.MaxCPUs))
|
||||
}
|
||||
|
||||
@@ -3169,6 +3229,12 @@ func (config *Config) appendMemoryKnobs() {
|
||||
if config.Memory.Size == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if len(config.NUMANodes) > 0 && isDimmSupported(config) {
|
||||
config.appendMultiNUMAMemoryKnobs()
|
||||
return
|
||||
}
|
||||
|
||||
var objMemParam, numaMemParam string
|
||||
dimmName := "dimm1"
|
||||
if config.Knobs.HugePages {
|
||||
@@ -3200,6 +3266,49 @@ func (config *Config) appendMemoryKnobs() {
|
||||
}
|
||||
}
|
||||
|
||||
func (config *Config) appendMultiNUMAMemoryKnobs() {
|
||||
for _, node := range config.NUMANodes {
|
||||
memID := fmt.Sprintf("numa-mem%d", node.NodeID)
|
||||
|
||||
backendType := node.MemBackendType
|
||||
if backendType == "" {
|
||||
backendType = "memory-backend-ram"
|
||||
}
|
||||
|
||||
objMemParam := fmt.Sprintf("%s,id=%s,size=%s", backendType, memID, node.MemSize)
|
||||
|
||||
if node.MemBackendPath != "" {
|
||||
objMemParam += ",mem-path=" + node.MemBackendPath
|
||||
}
|
||||
|
||||
if node.HostNodes != "" {
|
||||
objMemParam += ",host-nodes=" + node.HostNodes + ",policy=bind"
|
||||
}
|
||||
|
||||
if config.Knobs.MemShared {
|
||||
objMemParam += ",share=on"
|
||||
}
|
||||
if config.Knobs.MemPrealloc {
|
||||
objMemParam += ",prealloc=on"
|
||||
}
|
||||
|
||||
config.qemuParams = append(config.qemuParams, "-object")
|
||||
config.qemuParams = append(config.qemuParams, objMemParam)
|
||||
|
||||
numaParam := fmt.Sprintf("node,nodeid=%d,memdev=%s", node.NodeID, memID)
|
||||
if node.CPUs != "" {
|
||||
numaParam += ",cpus=" + node.CPUs
|
||||
}
|
||||
config.qemuParams = append(config.qemuParams, "-numa")
|
||||
config.qemuParams = append(config.qemuParams, numaParam)
|
||||
}
|
||||
|
||||
for _, dist := range config.NUMADists {
|
||||
config.qemuParams = append(config.qemuParams, "-numa")
|
||||
config.qemuParams = append(config.qemuParams, fmt.Sprintf("dist,src=%d,dst=%d,val=%d", dist.Src, dist.Dst, dist.Val))
|
||||
}
|
||||
}
|
||||
|
||||
func (config *Config) appendKnobs() {
|
||||
if config.Knobs.NoUserConfig {
|
||||
config.qemuParams = append(config.qemuParams, "-no-user-config")
|
||||
|
||||
@@ -14,8 +14,8 @@ var (
|
||||
deviceNetworkString = "-netdev tap,id=tap0,vhost=on,ifname=ceth0,downscript=no,script=no -device driver=virtio-net-pci,netdev=tap0,mac=01:02:de:ad:be:ef,bus=/pci-bus/pcie.0,addr=ff,disable-modern=true,romfile=efi-virtio.rom"
|
||||
deviceNetworkStringMq = "-netdev tap,id=tap0,vhost=on,fds=3:4 -device driver=virtio-net-pci,netdev=tap0,mac=01:02:de:ad:be:ef,bus=/pci-bus/pcie.0,addr=ff,disable-modern=true,mq=on,vectors=6,romfile=efi-virtio.rom"
|
||||
deviceSerialString = "-device virtio-serial-pci,disable-modern=true,id=serial0,romfile=efi-virtio.rom,max_ports=2"
|
||||
deviceVhostUserNetString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -netdev type=vhost-user,id=net1,chardev=char1,vhostforce -device virtio-net-pci,netdev=net1,mac=00:11:22:33:44:55,romfile=efi-virtio.rom"
|
||||
deviceVSOCKString = "-device vhost-vsock-pci,disable-modern=true,id=vhost-vsock-pci0,guest-cid=4,romfile=efi-virtio.rom"
|
||||
deviceVhostUserNetString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -netdev type=vhost-user,id=net1,chardev=char1,vhostforce -device virtio-net-pci,netdev=net1,mac=00:11:22:33:44:55,bus=pcie.0,romfile=efi-virtio.rom"
|
||||
deviceVSOCKString = "-device vhost-vsock-pci,disable-modern=true,id=vhost-vsock-pci0,guest-cid=4,bus=pcie.0,romfile=efi-virtio.rom"
|
||||
deviceVFIOString = "-device vfio-pci,host=02:10.0,x-pci-vendor-id=0x1234,x-pci-device-id=0x5678,romfile=efi-virtio.rom"
|
||||
devicePCIeRootPortSimpleString = "-device pcie-root-port,id=rp1,bus=pcie.0,chassis=0x00,slot=0x00,multifunction=off"
|
||||
devicePCIeRootPortFullString = "-device pcie-root-port,id=rp2,bus=pcie.0,chassis=0x0,slot=0x1,addr=0x2,multifunction=on,bus-reserve=0x3,pref64-reserve=16G,mem-reserve=1G,io-reserve=512M,romfile=efi-virtio.rom"
|
||||
@@ -23,8 +23,8 @@ var (
|
||||
deviceVFIOPCIeFullString = "-device vfio-pci,host=02:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x15f8,romfile=efi-virtio.rom,bus=rp1"
|
||||
deviceSCSIControllerStr = "-device virtio-scsi-pci,id=foo,disable-modern=false,romfile=efi-virtio.rom"
|
||||
deviceSCSIControllerBusAddrStr = "-device virtio-scsi-pci,id=foo,bus=pci.0,addr=00:04.0,disable-modern=true,iothread=iothread1,romfile=efi-virtio.rom"
|
||||
deviceVhostUserSCSIString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -device vhost-user-scsi-pci,id=scsi1,chardev=char1,romfile=efi-virtio.rom"
|
||||
deviceVhostUserBlkString = "-chardev socket,id=char2,path=/tmp/nonexistentsocket.socket -device vhost-user-blk-pci,logical_block_size=4096,size=512M,chardev=char2,romfile=efi-virtio.rom"
|
||||
deviceVhostUserSCSIString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -device vhost-user-scsi-pci,id=scsi1,chardev=char1,bus=pcie.0,romfile=efi-virtio.rom"
|
||||
deviceVhostUserBlkString = "-chardev socket,id=char2,path=/tmp/nonexistentsocket.socket -device vhost-user-blk-pci,logical_block_size=4096,size=512M,chardev=char2,bus=pcie.0,romfile=efi-virtio.rom"
|
||||
deviceBlockString = "-device virtio-blk-pci,disable-modern=true,drive=hd0,config-wce=off,romfile=efi-virtio.rom,share-rw=on,serial=hd0 -drive id=hd0,file=/var/lib/vm.img,aio=threads,format=qcow2,if=none,readonly=on"
|
||||
devicePCIBridgeString = "-device pci-bridge,bus=/pci-bus/pcie.0,id=mybridge,chassis_nr=5,shpc=on,addr=ff,romfile=efi-virtio.rom"
|
||||
devicePCIBridgeStringReserved = "-device pci-bridge,bus=/pci-bus/pcie.0,id=mybridge,chassis_nr=5,shpc=off,addr=ff,romfile=efi-virtio.rom,io-reserve=4k,mem-reserve=1m,pref64-reserve=1m"
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
@@ -1117,6 +1118,140 @@ func TestBadMemoryKnobs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendMultiNUMAMemoryKnobs(t *testing.T) {
|
||||
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
|
||||
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
|
||||
}
|
||||
c := &Config{
|
||||
Memory: Memory{
|
||||
Size: "2G",
|
||||
Slots: 8,
|
||||
MaxMem: "4G",
|
||||
},
|
||||
NUMANodes: []NUMANode{
|
||||
{
|
||||
NodeID: 0,
|
||||
CPUs: "0-3",
|
||||
MemSize: "1G",
|
||||
HostNodes: "0",
|
||||
MemBackendType: "memory-backend-ram",
|
||||
},
|
||||
{
|
||||
NodeID: 1,
|
||||
CPUs: "4-7",
|
||||
MemSize: "1G",
|
||||
HostNodes: "1",
|
||||
MemBackendType: "memory-backend-ram",
|
||||
},
|
||||
},
|
||||
Knobs: Knobs{
|
||||
MemShared: true,
|
||||
MemPrealloc: true,
|
||||
},
|
||||
}
|
||||
|
||||
c.appendMemoryKnobs()
|
||||
|
||||
expected := []string{
|
||||
"-object", "memory-backend-ram,id=numa-mem0,size=1G,host-nodes=0,policy=bind,share=on,prealloc=on",
|
||||
"-numa", "node,nodeid=0,memdev=numa-mem0,cpus=0-3",
|
||||
"-object", "memory-backend-ram,id=numa-mem1,size=1G,host-nodes=1,policy=bind,share=on,prealloc=on",
|
||||
"-numa", "node,nodeid=1,memdev=numa-mem1,cpus=4-7",
|
||||
}
|
||||
if len(c.qemuParams) != len(expected) {
|
||||
t.Fatalf("Expected %d params, got %d: %v", len(expected), len(c.qemuParams), c.qemuParams)
|
||||
}
|
||||
for i, p := range expected {
|
||||
if c.qemuParams[i] != p {
|
||||
t.Errorf("Param %d: expected %q, got %q", i, p, c.qemuParams[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendMultiNUMAHugePages(t *testing.T) {
|
||||
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
|
||||
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
|
||||
}
|
||||
c := &Config{
|
||||
Memory: Memory{
|
||||
Size: "2G",
|
||||
Slots: 8,
|
||||
MaxMem: "4G",
|
||||
},
|
||||
NUMANodes: []NUMANode{
|
||||
{
|
||||
NodeID: 0,
|
||||
CPUs: "0-1",
|
||||
MemSize: "1G",
|
||||
HostNodes: "0",
|
||||
MemBackendType: "memory-backend-file",
|
||||
MemBackendPath: "/dev/hugepages",
|
||||
},
|
||||
{
|
||||
NodeID: 1,
|
||||
CPUs: "2-3",
|
||||
MemSize: "1G",
|
||||
HostNodes: "1",
|
||||
MemBackendType: "memory-backend-file",
|
||||
MemBackendPath: "/dev/hugepages",
|
||||
},
|
||||
},
|
||||
Knobs: Knobs{
|
||||
MemShared: true,
|
||||
},
|
||||
}
|
||||
|
||||
c.appendMemoryKnobs()
|
||||
|
||||
expected := []string{
|
||||
"-object", "memory-backend-file,id=numa-mem0,size=1G,mem-path=/dev/hugepages,host-nodes=0,policy=bind,share=on",
|
||||
"-numa", "node,nodeid=0,memdev=numa-mem0,cpus=0-1",
|
||||
"-object", "memory-backend-file,id=numa-mem1,size=1G,mem-path=/dev/hugepages,host-nodes=1,policy=bind,share=on",
|
||||
"-numa", "node,nodeid=1,memdev=numa-mem1,cpus=2-3",
|
||||
}
|
||||
if len(c.qemuParams) != len(expected) {
|
||||
t.Fatalf("Expected %d params, got %d: %v", len(expected), len(c.qemuParams), c.qemuParams)
|
||||
}
|
||||
for i, p := range expected {
|
||||
if c.qemuParams[i] != p {
|
||||
t.Errorf("Param %d: expected %q, got %q", i, p, c.qemuParams[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendNUMADist(t *testing.T) {
|
||||
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
|
||||
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
|
||||
}
|
||||
c := &Config{
|
||||
Memory: Memory{
|
||||
Size: "2G",
|
||||
},
|
||||
NUMANodes: []NUMANode{
|
||||
{NodeID: 0, CPUs: "0-1", MemSize: "1G", MemBackendType: "memory-backend-ram"},
|
||||
{NodeID: 1, CPUs: "2-3", MemSize: "1G", MemBackendType: "memory-backend-ram"},
|
||||
},
|
||||
NUMADists: []NUMADist{
|
||||
{Src: 0, Dst: 1, Val: 20},
|
||||
{Src: 1, Dst: 0, Val: 20},
|
||||
},
|
||||
}
|
||||
|
||||
c.appendMemoryKnobs()
|
||||
|
||||
expectedDist := []string{
|
||||
"-numa", "dist,src=0,dst=1,val=20",
|
||||
"-numa", "dist,src=1,dst=0,val=20",
|
||||
}
|
||||
params := c.qemuParams
|
||||
distParams := params[len(params)-4:]
|
||||
for i, p := range expectedDist {
|
||||
if distParams[i] != p {
|
||||
t.Errorf("Dist param %d: expected %q, got %q", i, p, distParams[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBadBios(t *testing.T) {
|
||||
c := &Config{}
|
||||
c.appendBios()
|
||||
|
||||
Reference in New Issue
Block a user