runtime: Add NUMA-aware SMP topology

Make cpuTopology() NUMA-aware by accepting a numNUMANodes parameter. When multiple NUMA nodes are configured, restructure the SMP topology so that Sockets=numNUMA and Cores=ceil(maxvcpus/numNUMA), grouping vCPUs by socket per NUMA node. Use ceiling division so that uneven vCPU counts (e.g. the +1 VMM overhead vCPU that Kata adds) produce a QEMU-valid SMP topology where MaxCPUs == Sockets * Cores * Threads. When numNUMANodes <= 1, the existing flat topology (Sockets=maxvcpus, Cores=1) is preserved. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2026-06-30 14:06:46 +00:00 · 2026-04-14 15:02:01 +02:00
parent 1e9da61d48
commit 1ee8bb5740
4 changed files with 61 additions and 12 deletions
--- a/src/runtime/virtcontainers/hypervisor_config_linux.go
+++ b/src/runtime/virtcontainers/hypervisor_config_linux.go
@@ -63,10 +63,6 @@ func validateHypervisorConfig(conf *HypervisorConfig) error {
 		conf.DefaultMaxVCPUs = defaultMaxVCPUs
 	}

-	if numNUMA := conf.NumGuestNUMANodes(); numNUMA > 1 {
-		conf.DefaultMaxVCPUs -= conf.DefaultMaxVCPUs % numNUMA
-	}
-
 	if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS {
 		conf.Msize9p = defaultMsize9p
 	}
--- a/src/runtime/virtcontainers/qemu.go
+++ b/src/runtime/virtcontainers/qemu.go
@@ -326,7 +326,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
 }

 func (q *qemu) cpuTopology() govmmQemu.SMP {
-	return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs)
+	return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, q.config.NumGuestNUMANodes())
 }

 func (q *qemu) memoryTopology() (govmmQemu.Memory, error) {
--- a/src/runtime/virtcontainers/qemu_arch_base.go
+++ b/src/runtime/virtcontainers/qemu_arch_base.go
@@ -61,8 +61,9 @@ type qemuArch interface {
 	// bridges sets the number bridges for the machine type
 	bridges(number uint32)

-	// cpuTopology returns the CPU topology for the given amount of vcpus
-	cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP
+	// cpuTopology returns the CPU topology for the given amount of vcpus.
+	// numNUMANodes > 1 restructures the topology so vCPUs are grouped by socket per NUMA node.
+	cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP

 	// cpuModel returns the CPU model for the machine type
 	cpuModel() string
@@ -324,16 +325,29 @@ func (q *qemuArchBase) bridges(number uint32) {
 	}
 }

-func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP {
-	smp := govmmQemu.SMP{
+func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP {
+	if numNUMANodes > 1 {
+		coresPerSocket := (maxvcpus + numNUMANodes - 1) / numNUMANodes
+		if coresPerSocket == 0 {
+			coresPerSocket = 1
+		}
+		smpMaxCPUs := numNUMANodes * coresPerSocket * defaultThreads
+		return govmmQemu.SMP{
+			CPUs:    vcpus,
+			Sockets: numNUMANodes,
+			Cores:   coresPerSocket,
+			Threads: defaultThreads,
+			MaxCPUs: smpMaxCPUs,
+		}
+	}
+
+	return govmmQemu.SMP{
 		CPUs:    vcpus,
 		Sockets: maxvcpus,
 		Cores:   defaultCores,
 		Threads: defaultThreads,
 		MaxCPUs: maxvcpus,
 	}
-
-	return smp
 }

 func (q *qemuArchBase) cpuModel() string {
--- a/src/runtime/virtcontainers/qemu_arch_base_test.go
+++ b/src/runtime/virtcontainers/qemu_arch_base_test.go
@@ -189,7 +189,46 @@ func TestQemuArchBaseCPUTopology(t *testing.T) {
 		MaxCPUs: defaultMaxVCPUs,
 	}

-	smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs)
+	smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0)
+	assert.Equal(expectedSMP, smp)
+}
+
+func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) {
+	assert := assert.New(t)
+	qemuArchBase := newQemuArchBase()
+	vcpus := uint32(2)
+	maxvcpus := uint32(8)
+	numNUMA := uint32(2)
+
+	expectedSMP := govmmQemu.SMP{
+		CPUs:    vcpus,
+		Sockets: numNUMA,
+		Cores:   maxvcpus / numNUMA,
+		Threads: defaultThreads,
+		MaxCPUs: maxvcpus,
+	}
+
+	smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA)
+	assert.Equal(expectedSMP, smp)
+}
+
+func TestQemuArchBaseCPUTopologyNUMAUneven(t *testing.T) {
+	assert := assert.New(t)
+	qemuArchBase := newQemuArchBase()
+	vcpus := uint32(2)
+	maxvcpus := uint32(5)
+	numNUMA := uint32(2)
+
+	coresPerSocket := (maxvcpus + numNUMA - 1) / numNUMA
+	expectedSMP := govmmQemu.SMP{
+		CPUs:    vcpus,
+		Sockets: numNUMA,
+		Cores:   coresPerSocket,
+		Threads: defaultThreads,
+		MaxCPUs: numNUMA * coresPerSocket * defaultThreads,
+	}
+
+	smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA)
 	assert.Equal(expectedSMP, smp)
 }