runtime: qemu: don't set maxcpus when confidential guest is enabled

QEMU maxcpus enables CPU hotplug capabilities but it's unused when confidential guest is enabled. Change Go runtime code to skip setting maxcpus QEMU cmdline if CPU hotplug is not needed. Commit 07db945b09 built a relationship between kernel's cmdline nr_cpus and the maxcpus config. Now that maxcpus is dropped for confidential guests, drop nr_cpus from kernel commandline too. This hopefully helps with the reference values computation too. Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
2026-07-01 14:38:33 +00:00 · 2026-02-18 13:37:34 +02:00
parent 2e625d0bab
commit e475d870fb
4 changed files with 122 additions and 34 deletions
--- a/src/runtime/virtcontainers/qemu.go
+++ b/src/runtime/virtcontainers/qemu.go
@@ -195,8 +195,10 @@ func (q *qemu) kernelParameters() string {
 	// use default parameters
 	params = append(params, defaultKernelParameters...)

-	// set the maximum number of vCPUs
-	params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
+	// set the maximum number of vCPUs (not applicable for confidential guests)
+	if !q.config.ConfidentialGuest {
+		params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
+	}

 	// set the SELinux params in accordance with the runtime configuration, disable_guest_selinux.
 	if q.config.DisableGuestSeLinux {
@@ -336,7 +338,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
 }

 func (q *qemu) cpuTopology(effectiveNUMANodes uint32) govmmQemu.SMP {
-	return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, effectiveNUMANodes)
+	return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, effectiveNUMANodes, q.config.ConfidentialGuest)
 }

 func (q *qemu) memoryTopology() (govmmQemu.Memory, error) {
@@ -584,10 +586,16 @@ func (q *qemu) buildNUMATopology() ([]govmmQemu.NUMANode, []govmmQemu.NUMADist,
 	// NumVCPUs == DefaultMaxVCPUs (set in oci/utils.go). All boot vCPUs
 	// are present at VM start, so the per-node CPU ranges below are valid.
 	//
-	// cpuTopology() rounds MaxCPUs up to (numNUMANodes * coresPerSocket)
-	// so that QEMU's SMP topology is consistent. We must cover all CPU
-	// slots in the NUMA map, otherwise QEMU warns about CPUs not present
-	// in any NUMA node. Apply the same ceiling here.
+	// For non-confidential guests, cpuTopology() rounds MaxCPUs up to
+	// (numNUMANodes * coresPerSocket). When vCPUs don't divide evenly across
+	// nodes, the last node gets one fewer boot CPU but the extra CPU slot is
+	// still pre-assigned to that node in the NUMA map so it lands on the
+	// correct node when hotplugged. Apply the same ceiling here.
+	//
+	// For confidential guests, cpuTopology() omits maxcpus so QEMU infers
+	// maxcpus=vcpus. CPU indices in the NUMA map must stay within [0, vcpus-1];
+	// skip the ceiling and distribute exactly DefaultMaxVCPUs. An uneven vCPU
+	// count simply means one node gets one fewer CPU — no hotplug slot needed.
 	numNodes := uint32(len(numaNodes))
 	if q.config.DefaultMaxVCPUs < numNodes {
 		hvLogger.WithFields(logrus.Fields{
@@ -596,8 +604,13 @@ func (q *qemu) buildNUMATopology() ([]govmmQemu.NUMANode, []govmmQemu.NUMADist,
 		}).Warn("DefaultMaxVCPUs < NUMA node count; skipping multi-NUMA topology")
 		return nil, nil, nil
 	}
-	coresPerSocket := (q.config.DefaultMaxVCPUs + numNodes - 1) / numNodes
-	maxVCPUs := numNodes * coresPerSocket
+	var maxVCPUs uint32
+	if q.config.ConfidentialGuest {
+		maxVCPUs = q.config.DefaultMaxVCPUs
+	} else {
+		coresPerSocket := (q.config.DefaultMaxVCPUs + numNodes - 1) / numNodes
+		maxVCPUs = numNodes * coresPerSocket
+	}

 	vcpusPerNode, err := utils.DistributeVCPUsProportionally(numaNodes, maxVCPUs)
 	if err != nil {
--- a/src/runtime/virtcontainers/qemu_arch_base.go
+++ b/src/runtime/virtcontainers/qemu_arch_base.go
@@ -63,7 +63,8 @@ type qemuArch interface {

 	// cpuTopology returns the CPU topology for the given amount of vcpus.
 	// numNUMANodes > 1 restructures the topology so vCPUs are grouped by socket per NUMA node.
-	cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP
+	// When confidentialGuest is true, CPU hotplug is disabled by setting MaxCPUs to 0.
+	cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32, confidentialGuest bool) govmmQemu.SMP

 	// cpuModel returns the CPU model for the machine type
 	cpuModel() string
@@ -325,29 +326,43 @@ func (q *qemuArchBase) bridges(number uint32) {
 	}
 }

-func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP {
+func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32, confidentialGuest bool) govmmQemu.SMP {
+	var smp govmmQemu.SMP
+
 	if numNUMANodes > 1 {
 		coresPerSocket := (maxvcpus + numNUMANodes - 1) / numNUMANodes
 		if coresPerSocket == 0 {
 			coresPerSocket = 1
 		}
 		smpMaxCPUs := numNUMANodes * coresPerSocket * defaultThreads
-		return govmmQemu.SMP{
+		smp = govmmQemu.SMP{
 			CPUs:    vcpus,
 			Sockets: numNUMANodes,
 			Cores:   coresPerSocket,
 			Threads: defaultThreads,
 			MaxCPUs: smpMaxCPUs,
 		}
+	} else {
+		smp = govmmQemu.SMP{
+			CPUs:    vcpus,
+			Sockets: maxvcpus,
+			Cores:   defaultCores,
+			Threads: defaultThreads,
+			MaxCPUs: maxvcpus,
+		}
 	}

-	return govmmQemu.SMP{
-		CPUs:    vcpus,
-		Sockets: maxvcpus,
-		Cores:   defaultCores,
-		Threads: defaultThreads,
-		MaxCPUs: maxvcpus,
+	// Disable CPU hotplug for confidential guests: zero MaxCPUs and Sockets so
+	// govmmQemu omits them, causing QEMU to set maxcpus=cpus. Cores is reset to
+	// defaultCores (1) so QEMU can infer a valid sockets value (cpus/cores/threads);
+	// a NUMA-derived coresPerSocket left here would violate the topology constraint.
+	if confidentialGuest {
+		smp.MaxCPUs = 0
+		smp.Sockets = 0
+		smp.Cores = defaultCores
 	}
+
+	return smp
 }

 func (q *qemuArchBase) cpuModel() string {
--- a/src/runtime/virtcontainers/qemu_arch_base_test.go
+++ b/src/runtime/virtcontainers/qemu_arch_base_test.go
@@ -181,16 +181,34 @@ func TestQemuArchBaseCPUTopology(t *testing.T) {
 	qemuArchBase := newQemuArchBase()
 	vcpus := uint32(2)

-	expectedSMP := govmmQemu.SMP{
-		CPUs:    vcpus,
-		Sockets: defaultMaxVCPUs,
-		Cores:   defaultCores,
-		Threads: defaultThreads,
-		MaxCPUs: defaultMaxVCPUs,
-	}
+	t.Run("NonConfidentialGuest", func(t *testing.T) {
+		expectedSMP := govmmQemu.SMP{
+			CPUs:    vcpus,
+			Sockets: defaultMaxVCPUs,
+			Cores:   defaultCores,
+			Threads: defaultThreads,
+			MaxCPUs: defaultMaxVCPUs,
+		}

-	smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0)
-	assert.Equal(expectedSMP, smp)
+		smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0, false)
+		assert.Equal(expectedSMP, smp)
+	})
+
+	t.Run("ConfidentialGuest", func(t *testing.T) {
+		// When confidential guest is enabled, MaxCPUs and Sockets are both 0 so
+		// govmmQemu omits them from -smp. QEMU then sets maxcpus=cpus (no hotplug)
+		// and infers sockets from cpus / (cores * threads).
+		expectedSMP := govmmQemu.SMP{
+			CPUs:    vcpus,
+			Sockets: 0,
+			Cores:   defaultCores,
+			Threads: defaultThreads,
+			MaxCPUs: 0,
+		}
+
+		smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0, true)
+		assert.Equal(expectedSMP, smp)
+	})
 }

 func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) {
@@ -208,7 +226,7 @@ func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) {
 		MaxCPUs: maxvcpus,
 	}

-	smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA)
+	smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA, false)
 	assert.Equal(expectedSMP, smp)
 }

@@ -228,7 +246,7 @@ func TestQemuArchBaseCPUTopologyNUMAUneven(t *testing.T) {
 		MaxCPUs: numNUMA * coresPerSocket * defaultThreads,
 	}

-	smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA)
+	smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA, false)
 	assert.Equal(expectedSMP, smp)
 }

--- a/src/runtime/virtcontainers/qemu_test.go
+++ b/src/runtime/virtcontainers/qemu_test.go
@@ -49,15 +49,19 @@ func newQemuConfig() HypervisorConfig {
 	}
 }

-func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected string, debug bool) {
+func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected string, debug bool, confidentialGuest bool) {
 	qemuConfig := newQemuConfig()
 	qemuConfig.KernelParams = kernelParams
 	assert := assert.New(t)

-	if debug == true {
+	if debug {
 		qemuConfig.Debug = true
 	}

+	if confidentialGuest {
+		qemuConfig.ConfidentialGuest = true
+	}
+
 	q := &qemu{
 		config: qemuConfig,
 		arch:   &qemuArchBase{},
@@ -68,7 +72,6 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
 }

 func TestQemuKernelParameters(t *testing.T) {
-	expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
 	params := []Param{
 		{
 			Key:   "foo",
@@ -80,8 +83,18 @@ func TestQemuKernelParameters(t *testing.T) {
 		},
 	}

-	testQemuKernelParameters(t, params, expectedOut, true)
-	testQemuKernelParameters(t, params, expectedOut, false)
+	t.Run("NonConfidentialGuest", func(t *testing.T) {
+		// nr_cpus is included for non-confidential guests
+		expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
+		testQemuKernelParameters(t, params, expectedOut, true, false)
+		testQemuKernelParameters(t, params, expectedOut, false, false)
+	})
+
+	t.Run("ConfidentialGuest", func(t *testing.T) {
+		// nr_cpus is omitted for confidential guests (CPU hotplug not applicable)
+		expectedOut := "panic=1 selinux=0 foo=foo bar=bar"
+		testQemuKernelParameters(t, params, expectedOut, false, true)
+	})
 }

 func TestQemuCreateVM(t *testing.T) {
@@ -1471,6 +1484,35 @@ func TestBuildNUMATopologyUnevenVCPUs(t *testing.T) {
 	assert.Equal("2-5", nodes[1].CPUs)
 }

+func TestBuildNUMATopologyUnevenVCPUsConfidentialGuest(t *testing.T) {
+	if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
+		t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
+	}
+	assert := assert.New(t)
+	// Mirror TestBuildNUMATopologyUnevenVCPUs but with confidentialGuest=true.
+	// Without the fix, maxVCPUs=ceil(5/2)*2=6 would assign cpus=2-5 to node 1
+	// while QEMU infers maxcpus=5, making index 5 out of range.
+	// With the fix, maxVCPUs=5 and node 1 gets cpus=2-4.
+	q := &qemu{
+		config: HypervisorConfig{
+			DefaultMaxVCPUs:   5,
+			MemorySize:        1024,
+			ConfidentialGuest: true,
+			GuestNUMANodes: []types.GuestNUMANode{
+				{HostNodes: "0", HostCPUs: "0-1"},
+				{HostNodes: "1", HostCPUs: "2-4"},
+			},
+		},
+	}
+	nodes, _, err := q.buildNUMATopology()
+	assert.NoError(err)
+	assert.Len(nodes, 2)
+	// 5 vCPUs distributed proportionally: 2 host CPUs → 2 vCPUs, 3 → 3.
+	// All indices within [0, 4] — no index ≥ maxcpus(5).
+	assert.Equal("0-1", nodes[0].CPUs)
+	assert.Equal("2-4", nodes[1].CPUs)
+}
+
 func TestBuildNUMATopologyMemMisaligned(t *testing.T) {
 	if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
 		t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)