gpu: Add OVMF setting for MMIO aperture

The default size of OVMFs aperture is too low to
initialized PCIe devices with huge BARs

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2023-04-25 09:53:08 +00:00
parent 2a830177ca
commit dded731db3
3 changed files with 10 additions and 52 deletions

View File

@ -509,10 +509,6 @@ type HypervisorConfig struct {
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort uint32
// VFIODevics are used to get PCIe device info early before the sandbox
// is started to make better PCIe topology decisions
VFIODevices []config.DeviceInfo
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
// root port, switch or no port
ColdPlugVFIO hv.PCIePort

View File

@ -712,56 +712,23 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
// If we have a VFIO device we need to update the firmware configuration
// if executed in a trusted execution environment.
if hypervisorConfig.ConfidentialGuest {
// At the sandbox level we alreaady checked that we have a
// VFIO device, pass-through of a PCIe device needs allocated
// mmemory in the firmware otherwise BARs cannot be mapped
// First check if we have a PCIe devices, otherwise ignore
err, fwCfg := q.appendFwCfgForConfidentialGuest(hypervisorConfig.VFIODevices)
if err != nil {
return err
}
if fwCfg != nil {
qemuConfig.FwCfg = append(qemuConfig.FwCfg, *fwCfg)
// The default OVMF MMIO aperture is too small for some PCIe devices
// with huge BARs so we need to increase it.
// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
fwCfg := govmmQemu.FwCfg{
Name: "opt/ovmf/X-PciMmio64Mb",
Str: pciMmio64Mb,
}
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
}
q.qemuConfig = qemuConfig
return err
}
// appendFwCfgForConfidentialGuest appends the firmware configuration for a
// VFIO and PCIe device, otherwise it will be ignored.
func (q *qemu) appendFwCfgForConfidentialGuest(vfioDevices []config.DeviceInfo) (error, *govmmQemu.FwCfg) {
var err error
for _, dev := range vfioDevices {
dev.HostPath, err = config.GetHostPath(dev, false, "")
if err != nil {
return err, nil
}
vfioDevs, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev, true)
if err != nil {
return err, nil
}
fwCfg := govmmQemu.FwCfg{}
for _, vfioDev := range vfioDevs {
switch (*vfioDev).GetType() {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
if (*vfioDev).(config.VFIOPCIDev).IsPCIe {
fwCfg = govmmQemu.FwCfg{
Name: "opt/ovmf/X-PciMmio64Mb",
Str: "262144",
}
return nil, &fwCfg
}
}
}
}
return nil, nil
}
func (q *qemu) checkBpfEnabled() {
if q.config.SeccompSandbox != "" {
out, err := os.ReadFile("/proc/sys/net/core/bpf_jit_enable")

View File

@ -639,11 +639,6 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
}
}
}
// If we have a confidential guest, we need to add a specific
// firmware configuration to the hypervisor. We cannot do it here at
// the sandbox level we need to do that at the hypervisor level, capturing
// the devices here and processing in CreateVM().
sandboxConfig.HypervisorConfig.VFIODevices = devs
// store doesn't require hypervisor to be stored immediately
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {