From 907459c1c169c15749717d522a972bad3f806613 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 14 Apr 2021 14:07:16 +1000 Subject: [PATCH] agent/device: Don't force PCI rescans The agent initiates a PCI rescan from two places. One is triggered for each virtio-blk PCI device, and one is triggered unconditionally when we start a new container. The PCI bus rescan code was added long time ago in Clear Containers due to lack of ACPI support in QEMU 2.9 + q35. Since Kata routinely plugs devices under a PCIe-to-PCI bridge, that left SHPC as the only available hotplug mechanism. However, while Kata was using SHPC on the qemu side, it wasn't actually using it on the guest side. Due to a quirk of our guest kernel configuration, the SHPC driver never bound to the bridge, and *no* hotplug was working at all. To work around that, Kata was forcing the rescan manually, which would discover the new device. That was very fragile (we were arguably relying on a kernel bug). Even if we were using SHPC propertly, it includes a mandatory 5s delay during plug operations (designed for physical cards and human operators), which makes it unsuitable quick start up. Worse, the forced PCI rescans could race with either SHPC or PCIe native hotplug sequences, causing several problems. In some cases this could put the device into an entirely broken state where it wouldn't respond to config space accesses at all. Since pull request #2323 was merged, we have instead used ACPI hotplug which is both fast, and more solid in terms of semantics and races. So, the forced PCI rescans are no longer necessary. Remove them all. fixes #683 Signed-off-by: David Gibson --- src/agent/src/device.rs | 7 ------- src/agent/src/linux_abi.rs | 1 - src/agent/src/rpc.rs | 8 +------- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 4ee275f300..067c6d03d7 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -56,11 +56,6 @@ struct DevIndexEntry { #[derive(Debug)] struct DevIndex(HashMap); -#[instrument] -pub fn rescan_pci_bus() -> Result<()> { - online_device(SYSFS_PCI_BUS_RESCAN_FILE) -} - #[instrument] pub fn online_device(path: &str) -> Result<()> { fs::write(path, "1")?; @@ -171,8 +166,6 @@ pub async fn get_virtio_blk_pci_device_name( let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?; let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path); - rescan_pci_bus()?; - let uev = wait_for_uevent(sandbox, matcher).await?; Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index 08600d9f7e..e49d901f80 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -9,7 +9,6 @@ use std::fs; pub const SYSFS_DIR: &str = "/sys"; -pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan"; #[cfg(any( target_arch = "powerpc64", target_arch = "s390x", diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index c748beb040..8154975b74 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -43,9 +43,7 @@ use nix::sys::stat; use nix::unistd::{self, Pid}; use rustjail::process::ProcessOperations; -use crate::device::{ - add_devices, get_virtio_blk_pci_device_name, rescan_pci_bus, update_device_cgroup, -}; +use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_device_cgroup}; use crate::linux_abi::*; use crate::metrics::get_metrics; use crate::mount::{add_storages, baremount, remove_mounts, STORAGE_HANDLER_LIST}; @@ -136,10 +134,6 @@ impl AgentService { info!(sl!(), "receive createcontainer, spec: {:?}", &oci); - // re-scan PCI bus - // looking for hidden devices - rescan_pci_bus().context("Could not rescan PCI bus")?; - // Some devices need some extra processing (the ones invoked with // --device for instance), and that's what this call is doing. It // updates the devices listed in the OCI spec, so that they actually