mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-23 22:17:19 +00:00
qemu: Disable SHPC hotplug
Under certain circumstances[0] Kata will attempt to use SHPC hotplug for PCI devices on the guest. In fact we explicitly enable SHPC on our PCI to PCI bridges, regardless of the qemu default. SHPC was designed a long, long time ago for physical hotplugging and works very poorly for a virtual environment. In particular it has a mandatory 5s delay to allow a (real, human) operator to back out the operation if they press a button by mistake. This alone makes it unusable for a fast start up application like Kata. Worse, the agent forces a PCI rescan during startup. That will race with the SHPC hotplug operation causing the device to go into a bad state where config space can't be accessed from the guest at all. The only reason we've sort of gotten away with this is that our default guest kernel configuration triggers what's arguably a kernel bug effectively disabling SHPC. That makes the agent rescan the only reason we see the new device. Now that we require a qemu >=6.1, which includes ACPI PCI hotplug on the q35 machine, we can explicitly disable SHPC in all cases. It's nothing but trouble. fixes #2174 Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
parent
cc4983eeac
commit
8bbcb06af5
@ -67,7 +67,7 @@ To use large BARs devices (for example, Nvidia Tesla P100), you need Kata versio
|
||||
|
||||
The following configuration in the Kata `configuration.toml` file as shown below can work:
|
||||
|
||||
Hotplug for PCI devices by `shpchp` (Linux's SHPC PCI Hotplug driver):
|
||||
Hotplug for PCI devices by `acpi_pcihp` (Linux's ACPI PCI Hotplug driver):
|
||||
```
|
||||
machine_type = "q35"
|
||||
|
||||
@ -91,7 +91,6 @@ The following kernel config options need to be enabled:
|
||||
```
|
||||
# Support PCI/PCIe device hotplug (Required for large BARs device)
|
||||
CONFIG_HOTPLUG_PCI_PCIE=y
|
||||
CONFIG_HOTPLUG_PCI_SHPC=y
|
||||
|
||||
# Support for loading modules (Required for load Nvidia drivers)
|
||||
CONFIG_MODULES=y
|
||||
|
@ -2126,8 +2126,23 @@ func genericAppendBridges(devices []govmmQemu.Device, bridges []types.Bridge, ma
|
||||
ID: b.ID,
|
||||
// Each bridge is required to be assigned a unique chassis id > 0
|
||||
Chassis: idx + 1,
|
||||
SHPC: true,
|
||||
SHPC: false,
|
||||
Addr: strconv.FormatInt(int64(bridges[idx].Addr), 10),
|
||||
// Certain guest BIOS versions think
|
||||
// !SHPC means no hotplug, and won't
|
||||
// reserve the IO and memory windows
|
||||
// that will be needed for devices
|
||||
// added underneath this bridge. This
|
||||
// will only break for certain
|
||||
// combinations of exact qemu, BIOS
|
||||
// and guest kernel versions, but for
|
||||
// consistency, just hint the usual
|
||||
// default windows for a bridge (as
|
||||
// the BIOS would use with SHPC) so
|
||||
// that we can do ACPI hotplug.
|
||||
IOReserve: "4k",
|
||||
MemReserve: "1m",
|
||||
Pref64Reserve: "1m",
|
||||
},
|
||||
)
|
||||
}
|
||||
|
@ -187,12 +187,15 @@ func TestQemuAmd64AppendBridges(t *testing.T) {
|
||||
|
||||
expectedOut := []govmmQemu.Device{
|
||||
govmmQemu.BridgeDevice{
|
||||
Type: govmmQemu.PCIBridge,
|
||||
Bus: defaultBridgeBus,
|
||||
ID: bridges[0].ID,
|
||||
Chassis: 1,
|
||||
SHPC: true,
|
||||
Addr: "2",
|
||||
Type: govmmQemu.PCIBridge,
|
||||
Bus: defaultBridgeBus,
|
||||
ID: bridges[0].ID,
|
||||
Chassis: 1,
|
||||
SHPC: false,
|
||||
Addr: "2",
|
||||
IOReserve: "4k",
|
||||
MemReserve: "1m",
|
||||
Pref64Reserve: "1m",
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -307,12 +307,15 @@ func TestQemuArchBaseAppendBridges(t *testing.T) {
|
||||
|
||||
expectedOut := []govmmQemu.Device{
|
||||
govmmQemu.BridgeDevice{
|
||||
Type: govmmQemu.PCIBridge,
|
||||
Bus: defaultBridgeBus,
|
||||
ID: bridges[0].ID,
|
||||
Chassis: 1,
|
||||
SHPC: true,
|
||||
Addr: "2",
|
||||
Type: govmmQemu.PCIBridge,
|
||||
Bus: defaultBridgeBus,
|
||||
ID: bridges[0].ID,
|
||||
Chassis: 1,
|
||||
SHPC: false,
|
||||
Addr: "2",
|
||||
IOReserve: "4k",
|
||||
MemReserve: "1m",
|
||||
Pref64Reserve: "1m",
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -78,12 +78,15 @@ func TestQemuArm64AppendBridges(t *testing.T) {
|
||||
|
||||
expectedOut := []govmmQemu.Device{
|
||||
govmmQemu.BridgeDevice{
|
||||
Type: govmmQemu.PCIBridge,
|
||||
Bus: defaultBridgeBus,
|
||||
ID: bridges[0].ID,
|
||||
Chassis: 1,
|
||||
SHPC: true,
|
||||
Addr: "2",
|
||||
Type: govmmQemu.PCIBridge,
|
||||
Bus: defaultBridgeBus,
|
||||
ID: bridges[0].ID,
|
||||
Chassis: 1,
|
||||
SHPC: false,
|
||||
Addr: "2",
|
||||
IOReserve: "4k",
|
||||
MemReserve: "1m",
|
||||
Pref64Reserve: "1m",
|
||||
},
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user