govmm: Add multi-NUMA memory backend and distance matrix support

Introduce NUMANode and NUMADist types, add NUMANodes/NUMADists fields to
Config, and implement appendMultiNUMAMemoryKnobs() to generate per-node
memory-backend objects with host-nodes/policy=bind, -numa node entries
with cpus= ranges, and -numa dist entries for the distance matrix.

Gate the multi-NUMA path in appendMemoryKnobs() behind isDimmSupported()
to ensure architectures without DIMM support (s390x, riscv64) fall back
to the single-node path. Drop 386 from isDimmSupported since 32-bit x86
is not a supported Kata target.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Fabiano Fidêncio
2026-04-14 14:58:11 +02:00
committed by Fabiano Fidêncio
parent 8d2ecaabb5
commit 1e9da61d48
3 changed files with 258 additions and 14 deletions

View File

@@ -152,7 +152,7 @@ const (
func isDimmSupported(config *Config) bool {
switch runtime.GOARCH {
case "amd64", "386", "ppc64le", "arm64":
case "amd64", "ppc64le", "arm64":
if config != nil && config.Machine.Type == MachineTypeMicrovm {
// microvm does not support NUMA
return false
@@ -1586,8 +1586,13 @@ func (vhostuserDev VhostUserDevice) QemuNetParams(config *Config) []string {
deviceParams = append(deviceParams, fmt.Sprintf("netdev=%s", vhostuserDev.TypeDevID))
deviceParams = append(deviceParams, fmt.Sprintf("mac=%s", vhostuserDev.Address))
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
if vhostuserDev.Transport.isVirtioPCI(config) {
// Pin to pcie.0 so pxb-pcie (when present) doesn't capture
// this leaf device as the default bus.
deviceParams = append(deviceParams, "bus=pcie.0")
if vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
}
}
qemuParams = append(qemuParams, "-netdev")
@@ -1612,8 +1617,11 @@ func (vhostuserDev VhostUserDevice) QemuSCSIParams(config *Config) []string {
deviceParams = append(deviceParams, fmt.Sprintf("id=%s", vhostuserDev.TypeDevID))
deviceParams = append(deviceParams, fmt.Sprintf("chardev=%s", vhostuserDev.CharDevID))
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
if vhostuserDev.Transport.isVirtioPCI(config) {
deviceParams = append(deviceParams, "bus=pcie.0")
if vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
}
}
qemuParams = append(qemuParams, "-device")
@@ -1637,8 +1645,11 @@ func (vhostuserDev VhostUserDevice) QemuBlkParams(config *Config) []string {
deviceParams = append(deviceParams, "size=512M")
deviceParams = append(deviceParams, fmt.Sprintf("chardev=%s", vhostuserDev.CharDevID))
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
if vhostuserDev.Transport.isVirtioPCI(config) {
deviceParams = append(deviceParams, "bus=pcie.0")
if vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
}
}
qemuParams = append(qemuParams, "-device")
@@ -1674,8 +1685,11 @@ func (vhostuserDev VhostUserDevice) QemuFSParams(config *Config) []string {
}
deviceParams = append(deviceParams, fmt.Sprintf("devno=%s", vhostuserDev.DevNo))
}
if vhostuserDev.Transport.isVirtioPCI(config) && vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
if vhostuserDev.Transport.isVirtioPCI(config) {
deviceParams = append(deviceParams, "bus=pcie.0")
if vhostuserDev.ROMFile != "" {
deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", vhostuserDev.ROMFile))
}
}
qemuParams = append(qemuParams, "-device")
@@ -2689,7 +2703,8 @@ type SMP struct {
Sockets uint32
// MaxCPUs is the maximum number of VCPUs that a VM can have.
// This value, if non-zero, MUST BE equal to or greater than CPUs
// This value, if non-zero, MUST BE equal to or greater than CPUs,
// and must be equal to Sockets * Cores * Threads if all are non-zero.
MaxCPUs uint32
}
@@ -2775,6 +2790,36 @@ func (fwcfg FwCfg) QemuParams(config *Config) []string {
return qemuParams
}
// NUMANode describes a guest NUMA node and its mapping to host resources.
type NUMANode struct {
// NodeID is the guest NUMA node identifier (0-based).
NodeID uint32
// CPUs is the guest vCPU range assigned to this node (e.g. "0-3").
CPUs string
// MemSize is the amount of memory for this node (e.g. "512M", "1G").
MemSize string
// HostNodes is the host NUMA node(s) this guest node maps to (e.g. "0" or "0-1").
HostNodes string
// MemBackendType selects the QEMU memory backend object type.
// Typical values: "memory-backend-ram" or "memory-backend-file".
MemBackendType string
// MemBackendPath is the mem-path for file-backed memory (hugepages, file-backed).
// Empty when using memory-backend-ram.
MemBackendPath string
}
// NUMADist describes a NUMA distance entry for `-numa dist`.
type NUMADist struct {
Src uint32
Dst uint32
Val uint32
}
// Knobs regroups a set of qemu boolean settings
type Knobs struct {
// NoUserConfig prevents qemu from loading user config files.
@@ -2922,6 +2967,14 @@ type Config struct {
IOThreads []IOThread
// NUMANodes defines multi-NUMA guest topology. When non-empty,
// appendMemoryKnobs creates per-node memory backends and -numa entries
// instead of a single flat memory region.
NUMANodes []NUMANode
// NUMADists defines inter-node distance entries emitted as -numa dist.
NUMADists []NUMADist
// PidFile is the -pidfile parameter
PidFile string
@@ -3096,6 +3149,13 @@ func (config *Config) appendCPUs() error {
return fmt.Errorf("MaxCPUs %d must be equal to or greater than CPUs %d",
config.SMP.MaxCPUs, config.SMP.CPUs)
}
if len(config.NUMANodes) > 1 && config.SMP.Sockets > 0 && config.SMP.Cores > 0 && config.SMP.Threads > 0 {
expected := config.SMP.Sockets * config.SMP.Cores * config.SMP.Threads
if config.SMP.MaxCPUs != expected {
return fmt.Errorf("MaxCPUs %d must equal Sockets(%d) * Cores(%d) * Threads(%d) = %d",
config.SMP.MaxCPUs, config.SMP.Sockets, config.SMP.Cores, config.SMP.Threads, expected)
}
}
SMPParams = append(SMPParams, fmt.Sprintf("maxcpus=%d", config.SMP.MaxCPUs))
}
@@ -3169,6 +3229,12 @@ func (config *Config) appendMemoryKnobs() {
if config.Memory.Size == "" {
return
}
if len(config.NUMANodes) > 0 && isDimmSupported(config) {
config.appendMultiNUMAMemoryKnobs()
return
}
var objMemParam, numaMemParam string
dimmName := "dimm1"
if config.Knobs.HugePages {
@@ -3200,6 +3266,49 @@ func (config *Config) appendMemoryKnobs() {
}
}
func (config *Config) appendMultiNUMAMemoryKnobs() {
for _, node := range config.NUMANodes {
memID := fmt.Sprintf("numa-mem%d", node.NodeID)
backendType := node.MemBackendType
if backendType == "" {
backendType = "memory-backend-ram"
}
objMemParam := fmt.Sprintf("%s,id=%s,size=%s", backendType, memID, node.MemSize)
if node.MemBackendPath != "" {
objMemParam += ",mem-path=" + node.MemBackendPath
}
if node.HostNodes != "" {
objMemParam += ",host-nodes=" + node.HostNodes + ",policy=bind"
}
if config.Knobs.MemShared {
objMemParam += ",share=on"
}
if config.Knobs.MemPrealloc {
objMemParam += ",prealloc=on"
}
config.qemuParams = append(config.qemuParams, "-object")
config.qemuParams = append(config.qemuParams, objMemParam)
numaParam := fmt.Sprintf("node,nodeid=%d,memdev=%s", node.NodeID, memID)
if node.CPUs != "" {
numaParam += ",cpus=" + node.CPUs
}
config.qemuParams = append(config.qemuParams, "-numa")
config.qemuParams = append(config.qemuParams, numaParam)
}
for _, dist := range config.NUMADists {
config.qemuParams = append(config.qemuParams, "-numa")
config.qemuParams = append(config.qemuParams, fmt.Sprintf("dist,src=%d,dst=%d,val=%d", dist.Src, dist.Dst, dist.Val))
}
}
func (config *Config) appendKnobs() {
if config.Knobs.NoUserConfig {
config.qemuParams = append(config.qemuParams, "-no-user-config")

View File

@@ -14,8 +14,8 @@ var (
deviceNetworkString = "-netdev tap,id=tap0,vhost=on,ifname=ceth0,downscript=no,script=no -device driver=virtio-net-pci,netdev=tap0,mac=01:02:de:ad:be:ef,bus=/pci-bus/pcie.0,addr=ff,disable-modern=true,romfile=efi-virtio.rom"
deviceNetworkStringMq = "-netdev tap,id=tap0,vhost=on,fds=3:4 -device driver=virtio-net-pci,netdev=tap0,mac=01:02:de:ad:be:ef,bus=/pci-bus/pcie.0,addr=ff,disable-modern=true,mq=on,vectors=6,romfile=efi-virtio.rom"
deviceSerialString = "-device virtio-serial-pci,disable-modern=true,id=serial0,romfile=efi-virtio.rom,max_ports=2"
deviceVhostUserNetString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -netdev type=vhost-user,id=net1,chardev=char1,vhostforce -device virtio-net-pci,netdev=net1,mac=00:11:22:33:44:55,romfile=efi-virtio.rom"
deviceVSOCKString = "-device vhost-vsock-pci,disable-modern=true,id=vhost-vsock-pci0,guest-cid=4,romfile=efi-virtio.rom"
deviceVhostUserNetString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -netdev type=vhost-user,id=net1,chardev=char1,vhostforce -device virtio-net-pci,netdev=net1,mac=00:11:22:33:44:55,bus=pcie.0,romfile=efi-virtio.rom"
deviceVSOCKString = "-device vhost-vsock-pci,disable-modern=true,id=vhost-vsock-pci0,guest-cid=4,bus=pcie.0,romfile=efi-virtio.rom"
deviceVFIOString = "-device vfio-pci,host=02:10.0,x-pci-vendor-id=0x1234,x-pci-device-id=0x5678,romfile=efi-virtio.rom"
devicePCIeRootPortSimpleString = "-device pcie-root-port,id=rp1,bus=pcie.0,chassis=0x00,slot=0x00,multifunction=off"
devicePCIeRootPortFullString = "-device pcie-root-port,id=rp2,bus=pcie.0,chassis=0x0,slot=0x1,addr=0x2,multifunction=on,bus-reserve=0x3,pref64-reserve=16G,mem-reserve=1G,io-reserve=512M,romfile=efi-virtio.rom"
@@ -23,8 +23,8 @@ var (
deviceVFIOPCIeFullString = "-device vfio-pci,host=02:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x15f8,romfile=efi-virtio.rom,bus=rp1"
deviceSCSIControllerStr = "-device virtio-scsi-pci,id=foo,disable-modern=false,romfile=efi-virtio.rom"
deviceSCSIControllerBusAddrStr = "-device virtio-scsi-pci,id=foo,bus=pci.0,addr=00:04.0,disable-modern=true,iothread=iothread1,romfile=efi-virtio.rom"
deviceVhostUserSCSIString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -device vhost-user-scsi-pci,id=scsi1,chardev=char1,romfile=efi-virtio.rom"
deviceVhostUserBlkString = "-chardev socket,id=char2,path=/tmp/nonexistentsocket.socket -device vhost-user-blk-pci,logical_block_size=4096,size=512M,chardev=char2,romfile=efi-virtio.rom"
deviceVhostUserSCSIString = "-chardev socket,id=char1,path=/tmp/nonexistentsocket.socket -device vhost-user-scsi-pci,id=scsi1,chardev=char1,bus=pcie.0,romfile=efi-virtio.rom"
deviceVhostUserBlkString = "-chardev socket,id=char2,path=/tmp/nonexistentsocket.socket -device vhost-user-blk-pci,logical_block_size=4096,size=512M,chardev=char2,bus=pcie.0,romfile=efi-virtio.rom"
deviceBlockString = "-device virtio-blk-pci,disable-modern=true,drive=hd0,config-wce=off,romfile=efi-virtio.rom,share-rw=on,serial=hd0 -drive id=hd0,file=/var/lib/vm.img,aio=threads,format=qcow2,if=none,readonly=on"
devicePCIBridgeString = "-device pci-bridge,bus=/pci-bus/pcie.0,id=mybridge,chassis_nr=5,shpc=on,addr=ff,romfile=efi-virtio.rom"
devicePCIBridgeStringReserved = "-device pci-bridge,bus=/pci-bus/pcie.0,id=mybridge,chassis_nr=5,shpc=off,addr=ff,romfile=efi-virtio.rom,io-reserve=4k,mem-reserve=1m,pref64-reserve=1m"

View File

@@ -9,6 +9,7 @@ import (
"fmt"
"os"
"reflect"
"runtime"
"strings"
"testing"
)
@@ -1117,6 +1118,140 @@ func TestBadMemoryKnobs(t *testing.T) {
}
}
func TestAppendMultiNUMAMemoryKnobs(t *testing.T) {
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
}
c := &Config{
Memory: Memory{
Size: "2G",
Slots: 8,
MaxMem: "4G",
},
NUMANodes: []NUMANode{
{
NodeID: 0,
CPUs: "0-3",
MemSize: "1G",
HostNodes: "0",
MemBackendType: "memory-backend-ram",
},
{
NodeID: 1,
CPUs: "4-7",
MemSize: "1G",
HostNodes: "1",
MemBackendType: "memory-backend-ram",
},
},
Knobs: Knobs{
MemShared: true,
MemPrealloc: true,
},
}
c.appendMemoryKnobs()
expected := []string{
"-object", "memory-backend-ram,id=numa-mem0,size=1G,host-nodes=0,policy=bind,share=on,prealloc=on",
"-numa", "node,nodeid=0,memdev=numa-mem0,cpus=0-3",
"-object", "memory-backend-ram,id=numa-mem1,size=1G,host-nodes=1,policy=bind,share=on,prealloc=on",
"-numa", "node,nodeid=1,memdev=numa-mem1,cpus=4-7",
}
if len(c.qemuParams) != len(expected) {
t.Fatalf("Expected %d params, got %d: %v", len(expected), len(c.qemuParams), c.qemuParams)
}
for i, p := range expected {
if c.qemuParams[i] != p {
t.Errorf("Param %d: expected %q, got %q", i, p, c.qemuParams[i])
}
}
}
func TestAppendMultiNUMAHugePages(t *testing.T) {
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
}
c := &Config{
Memory: Memory{
Size: "2G",
Slots: 8,
MaxMem: "4G",
},
NUMANodes: []NUMANode{
{
NodeID: 0,
CPUs: "0-1",
MemSize: "1G",
HostNodes: "0",
MemBackendType: "memory-backend-file",
MemBackendPath: "/dev/hugepages",
},
{
NodeID: 1,
CPUs: "2-3",
MemSize: "1G",
HostNodes: "1",
MemBackendType: "memory-backend-file",
MemBackendPath: "/dev/hugepages",
},
},
Knobs: Knobs{
MemShared: true,
},
}
c.appendMemoryKnobs()
expected := []string{
"-object", "memory-backend-file,id=numa-mem0,size=1G,mem-path=/dev/hugepages,host-nodes=0,policy=bind,share=on",
"-numa", "node,nodeid=0,memdev=numa-mem0,cpus=0-1",
"-object", "memory-backend-file,id=numa-mem1,size=1G,mem-path=/dev/hugepages,host-nodes=1,policy=bind,share=on",
"-numa", "node,nodeid=1,memdev=numa-mem1,cpus=2-3",
}
if len(c.qemuParams) != len(expected) {
t.Fatalf("Expected %d params, got %d: %v", len(expected), len(c.qemuParams), c.qemuParams)
}
for i, p := range expected {
if c.qemuParams[i] != p {
t.Errorf("Param %d: expected %q, got %q", i, p, c.qemuParams[i])
}
}
}
func TestAppendNUMADist(t *testing.T) {
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
}
c := &Config{
Memory: Memory{
Size: "2G",
},
NUMANodes: []NUMANode{
{NodeID: 0, CPUs: "0-1", MemSize: "1G", MemBackendType: "memory-backend-ram"},
{NodeID: 1, CPUs: "2-3", MemSize: "1G", MemBackendType: "memory-backend-ram"},
},
NUMADists: []NUMADist{
{Src: 0, Dst: 1, Val: 20},
{Src: 1, Dst: 0, Val: 20},
},
}
c.appendMemoryKnobs()
expectedDist := []string{
"-numa", "dist,src=0,dst=1,val=20",
"-numa", "dist,src=1,dst=0,val=20",
}
params := c.qemuParams
distParams := params[len(params)-4:]
for i, p := range expectedDist {
if distParams[i] != p {
t.Errorf("Dist param %d: expected %q, got %q", i, p, distParams[i])
}
}
}
func TestBadBios(t *testing.T) {
c := &Config{}
c.appendBios()