diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index fe99945f56..20db96f91e 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -195,8 +195,10 @@ func (q *qemu) kernelParameters() string { // use default parameters params = append(params, defaultKernelParameters...) - // set the maximum number of vCPUs - params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + // set the maximum number of vCPUs (not applicable for confidential guests) + if !q.config.ConfidentialGuest { + params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + } // set the SELinux params in accordance with the runtime configuration, disable_guest_selinux. if q.config.DisableGuestSeLinux { @@ -336,7 +338,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso } func (q *qemu) cpuTopology(effectiveNUMANodes uint32) govmmQemu.SMP { - return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, effectiveNUMANodes) + return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs, effectiveNUMANodes, q.config.ConfidentialGuest) } func (q *qemu) memoryTopology() (govmmQemu.Memory, error) { @@ -584,10 +586,16 @@ func (q *qemu) buildNUMATopology() ([]govmmQemu.NUMANode, []govmmQemu.NUMADist, // NumVCPUs == DefaultMaxVCPUs (set in oci/utils.go). All boot vCPUs // are present at VM start, so the per-node CPU ranges below are valid. // - // cpuTopology() rounds MaxCPUs up to (numNUMANodes * coresPerSocket) - // so that QEMU's SMP topology is consistent. We must cover all CPU - // slots in the NUMA map, otherwise QEMU warns about CPUs not present - // in any NUMA node. Apply the same ceiling here. + // For non-confidential guests, cpuTopology() rounds MaxCPUs up to + // (numNUMANodes * coresPerSocket). When vCPUs don't divide evenly across + // nodes, the last node gets one fewer boot CPU but the extra CPU slot is + // still pre-assigned to that node in the NUMA map so it lands on the + // correct node when hotplugged. Apply the same ceiling here. + // + // For confidential guests, cpuTopology() omits maxcpus so QEMU infers + // maxcpus=vcpus. CPU indices in the NUMA map must stay within [0, vcpus-1]; + // skip the ceiling and distribute exactly DefaultMaxVCPUs. An uneven vCPU + // count simply means one node gets one fewer CPU — no hotplug slot needed. numNodes := uint32(len(numaNodes)) if q.config.DefaultMaxVCPUs < numNodes { hvLogger.WithFields(logrus.Fields{ @@ -596,8 +604,13 @@ func (q *qemu) buildNUMATopology() ([]govmmQemu.NUMANode, []govmmQemu.NUMADist, }).Warn("DefaultMaxVCPUs < NUMA node count; skipping multi-NUMA topology") return nil, nil, nil } - coresPerSocket := (q.config.DefaultMaxVCPUs + numNodes - 1) / numNodes - maxVCPUs := numNodes * coresPerSocket + var maxVCPUs uint32 + if q.config.ConfidentialGuest { + maxVCPUs = q.config.DefaultMaxVCPUs + } else { + coresPerSocket := (q.config.DefaultMaxVCPUs + numNodes - 1) / numNodes + maxVCPUs = numNodes * coresPerSocket + } vcpusPerNode, err := utils.DistributeVCPUsProportionally(numaNodes, maxVCPUs) if err != nil { diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index f3bba704ca..1ad19af1a6 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -63,7 +63,8 @@ type qemuArch interface { // cpuTopology returns the CPU topology for the given amount of vcpus. // numNUMANodes > 1 restructures the topology so vCPUs are grouped by socket per NUMA node. - cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP + // When confidentialGuest is true, CPU hotplug is disabled by setting MaxCPUs to 0. + cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32, confidentialGuest bool) govmmQemu.SMP // cpuModel returns the CPU model for the machine type cpuModel() string @@ -325,29 +326,43 @@ func (q *qemuArchBase) bridges(number uint32) { } } -func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32) govmmQemu.SMP { +func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32, numNUMANodes uint32, confidentialGuest bool) govmmQemu.SMP { + var smp govmmQemu.SMP + if numNUMANodes > 1 { coresPerSocket := (maxvcpus + numNUMANodes - 1) / numNUMANodes if coresPerSocket == 0 { coresPerSocket = 1 } smpMaxCPUs := numNUMANodes * coresPerSocket * defaultThreads - return govmmQemu.SMP{ + smp = govmmQemu.SMP{ CPUs: vcpus, Sockets: numNUMANodes, Cores: coresPerSocket, Threads: defaultThreads, MaxCPUs: smpMaxCPUs, } + } else { + smp = govmmQemu.SMP{ + CPUs: vcpus, + Sockets: maxvcpus, + Cores: defaultCores, + Threads: defaultThreads, + MaxCPUs: maxvcpus, + } } - return govmmQemu.SMP{ - CPUs: vcpus, - Sockets: maxvcpus, - Cores: defaultCores, - Threads: defaultThreads, - MaxCPUs: maxvcpus, + // Disable CPU hotplug for confidential guests: zero MaxCPUs and Sockets so + // govmmQemu omits them, causing QEMU to set maxcpus=cpus. Cores is reset to + // defaultCores (1) so QEMU can infer a valid sockets value (cpus/cores/threads); + // a NUMA-derived coresPerSocket left here would violate the topology constraint. + if confidentialGuest { + smp.MaxCPUs = 0 + smp.Sockets = 0 + smp.Cores = defaultCores } + + return smp } func (q *qemuArchBase) cpuModel() string { diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index c177ee44a8..c838441198 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -181,16 +181,34 @@ func TestQemuArchBaseCPUTopology(t *testing.T) { qemuArchBase := newQemuArchBase() vcpus := uint32(2) - expectedSMP := govmmQemu.SMP{ - CPUs: vcpus, - Sockets: defaultMaxVCPUs, - Cores: defaultCores, - Threads: defaultThreads, - MaxCPUs: defaultMaxVCPUs, - } + t.Run("NonConfidentialGuest", func(t *testing.T) { + expectedSMP := govmmQemu.SMP{ + CPUs: vcpus, + Sockets: defaultMaxVCPUs, + Cores: defaultCores, + Threads: defaultThreads, + MaxCPUs: defaultMaxVCPUs, + } - smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0) - assert.Equal(expectedSMP, smp) + smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0, false) + assert.Equal(expectedSMP, smp) + }) + + t.Run("ConfidentialGuest", func(t *testing.T) { + // When confidential guest is enabled, MaxCPUs and Sockets are both 0 so + // govmmQemu omits them from -smp. QEMU then sets maxcpus=cpus (no hotplug) + // and infers sockets from cpus / (cores * threads). + expectedSMP := govmmQemu.SMP{ + CPUs: vcpus, + Sockets: 0, + Cores: defaultCores, + Threads: defaultThreads, + MaxCPUs: 0, + } + + smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs, 0, true) + assert.Equal(expectedSMP, smp) + }) } func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) { @@ -208,7 +226,7 @@ func TestQemuArchBaseCPUTopologyNUMA(t *testing.T) { MaxCPUs: maxvcpus, } - smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA) + smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA, false) assert.Equal(expectedSMP, smp) } @@ -228,7 +246,7 @@ func TestQemuArchBaseCPUTopologyNUMAUneven(t *testing.T) { MaxCPUs: numNUMA * coresPerSocket * defaultThreads, } - smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA) + smp := qemuArchBase.cpuTopology(vcpus, maxvcpus, numNUMA, false) assert.Equal(expectedSMP, smp) } diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 9fcb8dc1fa..5c058e6318 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -49,15 +49,19 @@ func newQemuConfig() HypervisorConfig { } } -func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected string, debug bool) { +func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected string, debug bool, confidentialGuest bool) { qemuConfig := newQemuConfig() qemuConfig.KernelParams = kernelParams assert := assert.New(t) - if debug == true { + if debug { qemuConfig.Debug = true } + if confidentialGuest { + qemuConfig.ConfidentialGuest = true + } + q := &qemu{ config: qemuConfig, arch: &qemuArchBase{}, @@ -68,7 +72,6 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs()) params := []Param{ { Key: "foo", @@ -80,8 +83,18 @@ func TestQemuKernelParameters(t *testing.T) { }, } - testQemuKernelParameters(t, params, expectedOut, true) - testQemuKernelParameters(t, params, expectedOut, false) + t.Run("NonConfidentialGuest", func(t *testing.T) { + // nr_cpus is included for non-confidential guests + expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs()) + testQemuKernelParameters(t, params, expectedOut, true, false) + testQemuKernelParameters(t, params, expectedOut, false, false) + }) + + t.Run("ConfidentialGuest", func(t *testing.T) { + // nr_cpus is omitted for confidential guests (CPU hotplug not applicable) + expectedOut := "panic=1 selinux=0 foo=foo bar=bar" + testQemuKernelParameters(t, params, expectedOut, false, true) + }) } func TestQemuCreateVM(t *testing.T) { @@ -1471,6 +1484,35 @@ func TestBuildNUMATopologyUnevenVCPUs(t *testing.T) { assert.Equal("2-5", nodes[1].CPUs) } +func TestBuildNUMATopologyUnevenVCPUsConfidentialGuest(t *testing.T) { + if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" { + t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH) + } + assert := assert.New(t) + // Mirror TestBuildNUMATopologyUnevenVCPUs but with confidentialGuest=true. + // Without the fix, maxVCPUs=ceil(5/2)*2=6 would assign cpus=2-5 to node 1 + // while QEMU infers maxcpus=5, making index 5 out of range. + // With the fix, maxVCPUs=5 and node 1 gets cpus=2-4. + q := &qemu{ + config: HypervisorConfig{ + DefaultMaxVCPUs: 5, + MemorySize: 1024, + ConfidentialGuest: true, + GuestNUMANodes: []types.GuestNUMANode{ + {HostNodes: "0", HostCPUs: "0-1"}, + {HostNodes: "1", HostCPUs: "2-4"}, + }, + }, + } + nodes, _, err := q.buildNUMATopology() + assert.NoError(err) + assert.Len(nodes, 2) + // 5 vCPUs distributed proportionally: 2 host CPUs → 2 vCPUs, 3 → 3. + // All indices within [0, 4] — no index ≥ maxcpus(5). + assert.Equal("0-1", nodes[0].CPUs) + assert.Equal("2-4", nodes[1].CPUs) +} + func TestBuildNUMATopologyMemMisaligned(t *testing.T) { if runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" { t.Skipf("multi-NUMA not supported on %s", runtime.GOARCH)