runtime: qemu: don't set maxcpus when confidential guest is enabled

QEMU maxcpus enables CPU hotplug capabilities but it's unused when
confidential guest is enabled.

Change Go runtime code to skip setting maxcpus QEMU cmdline if CPU hotplug
is not needed.

Commit 07db945b09 built a relationship between kernel's cmdline nr_cpus and
the maxcpus config. Now that maxcpus is dropped for confidential guests, drop
nr_cpus from kernel commandline too. This hopefully helps with the reference
values computation too.

Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
This commit is contained in:
Mikko Ylinen
2026-02-18 13:37:34 +02:00
parent 2e625d0bab
commit e475d870fb
4 changed files with 122 additions and 34 deletions

View File

@@ -195,8 +195,10 @@ func (q *qemu) kernelParameters() string {
// use default parameters
params = append(params, defaultKernelParameters...)
// set the maximum number of vCPUs
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
// set the maximum number of vCPUs (not applicable for confidential guests)
if !q.config.ConfidentialGuest {
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
}
// set the SELinux params in accordance with the runtime configuration, disable_guest_selinux.
if q.config.DisableGuestSeLinux {
@@ -336,7 +338,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
}
func (q *qemu) cpuTopology(effectiveNUMANodes uint32) govmmQemu.SMP {
return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, effectiveNUMANodes)
return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, effectiveNUMANodes, q.config.ConfidentialGuest)
}
func (q *qemu) memoryTopology() (govmmQemu.Memory, error) {
@@ -584,10 +586,16 @@ func (q *qemu) buildNUMATopology() ([]govmmQemu.NUMANode, []govmmQemu.NUMADist,
// NumVCPUs == DefaultMaxVCPUs (set in oci/utils.go). All boot vCPUs
// are present at VM start, so the per-node CPU ranges below are valid.
//
// cpuTopology() rounds MaxCPUs up to (numNUMANodes * coresPerSocket)
// so that QEMU's SMP topology is consistent. We must cover all CPU
// slots in the NUMA map, otherwise QEMU warns about CPUs not present
// in any NUMA node. Apply the same ceiling here.
// For non-confidential guests, cpuTopology() rounds MaxCPUs up to
// (numNUMANodes * coresPerSocket). When vCPUs don't divide evenly across
// nodes, the last node gets one fewer boot CPU but the extra CPU slot is
// still pre-assigned to that node in the NUMA map so it lands on the
// correct node when hotplugged. Apply the same ceiling here.
//
// For confidential guests, cpuTopology() omits maxcpus so QEMU infers
// maxcpus=vcpus. CPU indices in the NUMA map must stay within [0, vcpus-1];
// skip the ceiling and distribute exactly DefaultMaxVCPUs. An uneven vCPU
// count simply means one node gets one fewer CPU — no hotplug slot needed.
numNodes := uint32(len(numaNodes))
if q.config.DefaultMaxVCPUs < numNodes {
hvLogger.WithFields(logrus.Fields{
@@ -596,8 +604,13 @@ func (q *qemu) buildNUMATopology() ([]govmmQemu.NUMANode, []govmmQemu.NUMADist,
}).Warn("DefaultMaxVCPUs < NUMA node count; skipping multi-NUMA topology")
return nil, nil, nil
}
coresPerSocket := (q.config.DefaultMaxVCPUs + numNodes - 1) / numNodes
maxVCPUs := numNodes * coresPerSocket
var maxVCPUs uint32
if q.config.ConfidentialGuest {
maxVCPUs = q.config.DefaultMaxVCPUs
} else {
coresPerSocket := (q.config.DefaultMaxVCPUs + numNodes - 1) / numNodes
maxVCPUs = numNodes * coresPerSocket
}
vcpusPerNode, err := utils.DistributeVCPUsProportionally(numaNodes, maxVCPUs)
if err != nil {

View File

@@ -63,7 +63,8 @@ type qemuArch interface {
// cpuTopology returns the CPU topology for the given amount of vcpus.
// numNUMANodes > 1 restructures the topology so vCPUs are grouped by socket per NUMA node.
cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP
// When confidentialGuest is true, CPU hotplug is disabled by setting MaxCPUs to 0.
cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32, confidentialGuest bool) govmmQemu.SMP
// cpuModel returns the CPU model for the machine type
cpuModel() string
@@ -325,29 +326,43 @@ func (q *qemuArchBase) bridges(number uint32) {
}
}
func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP {
func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32, confidentialGuest bool) govmmQemu.SMP {
var smp govmmQemu.SMP
if numNUMANodes > 1 {
coresPerSocket := (maxvcpus + numNUMANodes - 1) / numNUMANodes
if coresPerSocket == 0 {
coresPerSocket = 1
}
smpMaxCPUs := numNUMANodes * coresPerSocket * defaultThreads
return govmmQemu.SMP{
smp = govmmQemu.SMP{
CPUs: vcpus,
Sockets: numNUMANodes,
Cores: coresPerSocket,
Threads: defaultThreads,
MaxCPUs: smpMaxCPUs,
}
} else {
smp = govmmQemu.SMP{
CPUs: vcpus,
Sockets: maxvcpus,
Cores: defaultCores,
Threads: defaultThreads,
MaxCPUs: maxvcpus,
}
}
return govmmQemu.SMP{
CPUs: vcpus,
Sockets: maxvcpus,
Cores: defaultCores,
Threads: defaultThreads,
MaxCPUs: maxvcpus,
// Disable CPU hotplug for confidential guests: zero MaxCPUs and Sockets so
// govmmQemu omits them, causing QEMU to set maxcpus=cpus. Cores is reset to
// defaultCores (1) so QEMU can infer a valid sockets value (cpus/cores/threads);
// a NUMA-derived coresPerSocket left here would violate the topology constraint.
if confidentialGuest {
smp.MaxCPUs = 0
smp.Sockets = 0
smp.Cores = defaultCores
}
return smp
}
func (q *qemuArchBase) cpuModel() string {

View File

@@ -181,16 +181,34 @@ func TestQemuArchBaseCPUTopology(t *testing.T) {
qemuArchBase := newQemuArchBase()
vcpus := uint32(2)
expectedSMP := govmmQemu.SMP{
CPUs: vcpus,
Sockets: defaultMaxVCPUs,
Cores: defaultCores,
Threads: defaultThreads,
MaxCPUs: defaultMaxVCPUs,
}
t.Run("NonConfidentialGuest", func(t *testing.T) {
expectedSMP := govmmQemu.SMP{
CPUs: vcpus,
Sockets: defaultMaxVCPUs,
Cores: defaultCores,
Threads: defaultThreads,
MaxCPUs: defaultMaxVCPUs,
}
smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0)
assert.Equal(expectedSMP, smp)
smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0, false)
assert.Equal(expectedSMP, smp)
})
t.Run("ConfidentialGuest", func(t *testing.T) {
// When confidential guest is enabled, MaxCPUs and Sockets are both 0 so
// govmmQemu omits them from -smp. QEMU then sets maxcpus=cpus (no hotplug)
// and infers sockets from cpus / (cores * threads).
expectedSMP := govmmQemu.SMP{
CPUs: vcpus,
Sockets: 0,
Cores: defaultCores,
Threads: defaultThreads,
MaxCPUs: 0,
}
smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0, true)
assert.Equal(expectedSMP, smp)
})
}
func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) {
@@ -208,7 +226,7 @@ func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) {
MaxCPUs: maxvcpus,
}
smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA)
smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA, false)
assert.Equal(expectedSMP, smp)
}
@@ -228,7 +246,7 @@ func TestQemuArchBaseCPUTopologyNUMAUneven(t *testing.T) {
MaxCPUs: numNUMA * coresPerSocket * defaultThreads,
}
smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA)
smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA, false)
assert.Equal(expectedSMP, smp)
}

View File

@@ -49,15 +49,19 @@ func newQemuConfig() HypervisorConfig {
}
}
func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected string, debug bool) {
func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected string, debug bool, confidentialGuest bool) {
qemuConfig := newQemuConfig()
qemuConfig.KernelParams = kernelParams
assert := assert.New(t)
if debug == true {
if debug {
qemuConfig.Debug = true
}
if confidentialGuest {
qemuConfig.ConfidentialGuest = true
}
q := &qemu{
config: qemuConfig,
arch: &qemuArchBase{},
@@ -68,7 +72,6 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
}
func TestQemuKernelParameters(t *testing.T) {
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
params := []Param{
{
Key: "foo",
@@ -80,8 +83,18 @@ func TestQemuKernelParameters(t *testing.T) {
},
}
testQemuKernelParameters(t, params, expectedOut, true)
testQemuKernelParameters(t, params, expectedOut, false)
t.Run("NonConfidentialGuest", func(t *testing.T) {
// nr_cpus is included for non-confidential guests
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
testQemuKernelParameters(t, params, expectedOut, true, false)
testQemuKernelParameters(t, params, expectedOut, false, false)
})
t.Run("ConfidentialGuest", func(t *testing.T) {
// nr_cpus is omitted for confidential guests (CPU hotplug not applicable)
expectedOut := "panic=1 selinux=0 foo=foo bar=bar"
testQemuKernelParameters(t, params, expectedOut, false, true)
})
}
func TestQemuCreateVM(t *testing.T) {
@@ -1471,6 +1484,35 @@ func TestBuildNUMATopologyUnevenVCPUs(t *testing.T) {
assert.Equal("2-5", nodes[1].CPUs)
}
func TestBuildNUMATopologyUnevenVCPUsConfidentialGuest(t *testing.T) {
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)
}
assert := assert.New(t)
// Mirror TestBuildNUMATopologyUnevenVCPUs but with confidentialGuest=true.
// Without the fix, maxVCPUs=ceil(5/2)*2=6 would assign cpus=2-5 to node 1
// while QEMU infers maxcpus=5, making index 5 out of range.
// With the fix, maxVCPUs=5 and node 1 gets cpus=2-4.
q := &qemu{
config: HypervisorConfig{
DefaultMaxVCPUs: 5,
MemorySize: 1024,
ConfidentialGuest: true,
GuestNUMANodes: []types.GuestNUMANode{
{HostNodes: "0", HostCPUs: "0-1"},
{HostNodes: "1", HostCPUs: "2-4"},
},
},
}
nodes, _, err := q.buildNUMATopology()
assert.NoError(err)
assert.Len(nodes, 2)
// 5 vCPUs distributed proportionally: 2 host CPUs → 2 vCPUs, 3 → 3.
// All indices within [0, 4] — no index ≥ maxcpus(5).
assert.Equal("0-1", nodes[0].CPUs)
assert.Equal("2-4", nodes[1].CPUs)
}
func TestBuildNUMATopologyMemMisaligned(t *testing.T) {
if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" {
t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)