gpu: Add OVMF setting for MMIO aperture

The default size of OVMFs aperture is too low to initialized PCIe devices with huge BARs Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2025-07-15 16:13:20 +00:00 · 2023-04-25 09:53:08 +00:00 · 2023-04-25 09:53:08 +00:00 · dded731db3
commit dded731db3
parent 2a830177ca
3 changed files with 10 additions and 52 deletions
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@ -509,10 +509,6 @@ type HypervisorConfig struct {
 	// The PCIe Root Port device is used to hot-plug the PCIe device
 	PCIeRootPort uint32

-	// VFIODevics are used to get PCIe device info early before the sandbox
-	// is started to make better PCIe topology decisions
-	VFIODevices []config.DeviceInfo
-
 	// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
 	// root port, switch or no port
 	ColdPlugVFIO hv.PCIePort
--- a/src/runtime/virtcontainers/qemu.go
+++ b/src/runtime/virtcontainers/qemu.go
@ -712,56 +712,23 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi

 	q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)

-	// If we have a VFIO device we need to update the firmware configuration
-	// if executed in a trusted execution environment.
-	if hypervisorConfig.ConfidentialGuest {
-		// At the sandbox level we alreaady checked that we have a
-		// VFIO device, pass-through of a PCIe device needs allocated
-		// mmemory in the firmware otherwise BARs cannot be mapped
-		// First check if we have a PCIe devices, otherwise ignore
-		err, fwCfg := q.appendFwCfgForConfidentialGuest(hypervisorConfig.VFIODevices)
-		if err != nil {
-			return err
-		}
-		if fwCfg != nil {
-			qemuConfig.FwCfg = append(qemuConfig.FwCfg, *fwCfg)
+	// The default OVMF MMIO aperture is too small for some PCIe devices
+	// with huge BARs so we need to increase it.
+	// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
+	if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
+		pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
+		fwCfg := govmmQemu.FwCfg{
+			Name: "opt/ovmf/X-PciMmio64Mb",
+			Str:  pciMmio64Mb,
 		}
+		qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
 	}
+
 	q.qemuConfig = qemuConfig

 	return err
 }

-// appendFwCfgForConfidentialGuest appends the firmware configuration for a
-// VFIO and PCIe device, otherwise it will be ignored.
-func (q *qemu) appendFwCfgForConfidentialGuest(vfioDevices []config.DeviceInfo) (error, *govmmQemu.FwCfg) {
-	var err error
-	for _, dev := range vfioDevices {
-		dev.HostPath, err = config.GetHostPath(dev, false, "")
-		if err != nil {
-			return err, nil
-		}
-		vfioDevs, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev, true)
-		if err != nil {
-			return err, nil
-		}
-		fwCfg := govmmQemu.FwCfg{}
-		for _, vfioDev := range vfioDevs {
-			switch (*vfioDev).GetType() {
-			case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
-				if (*vfioDev).(config.VFIOPCIDev).IsPCIe {
-					fwCfg = govmmQemu.FwCfg{
-						Name: "opt/ovmf/X-PciMmio64Mb",
-						Str:  "262144",
-					}
-					return nil, &fwCfg
-				}
-			}
-		}
-	}
-	return nil, nil
-}
-
 func (q *qemu) checkBpfEnabled() {
 	if q.config.SeccompSandbox != "" {
 		out, err := os.ReadFile("/proc/sys/net/core/bpf_jit_enable")
--- a/src/runtime/virtcontainers/sandbox.go
+++ b/src/runtime/virtcontainers/sandbox.go
@ -639,11 +639,6 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
 			}
 		}
 	}
-	// If we have a confidential guest, we need to add a specific
-	// firmware configuration to the hypervisor. We cannot do it here at
-	// the sandbox level we need to do that at the hypervisor level, capturing
-	// the devices here and processing in CreateVM().
-	sandboxConfig.HypervisorConfig.VFIODevices = devs

 	// store doesn't require hypervisor to be stored immediately
 	if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {