From 484a36472955e7f2177e1c38897e4a86fb94dacc Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 12 Mar 2021 09:48:55 +1100 Subject: [PATCH 1/4] agent/device: Rework uevent handling for virtio-blk devices There are some problems with get_pci_device_name(): 1) It's misnamed: in fact it is only used for handling virtio-blk PCI devices. It's also only correct for virtio-blk devices, the event matching doesn't locate the "raw" PCI device, but rather the block device created by virtio-blk as a child of the PCI device itself. 2) The uevent matching is imprecise. As all things using the legacy DevAddrMatcher, it matches on a bunch of conditions used across several different device types, not all of which make sense for virtio-blk pci devices specifically. Signed-off-by: David Gibson --- src/agent/src/device.rs | 55 ++++++++++++++++++++++++++++++++++++++--- src/agent/src/mount.rs | 4 +-- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 750aa46691..5034ace688 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -5,6 +5,7 @@ use libc::{c_uint, major, minor}; use nix::sys::stat; +use regex::Regex; use std::collections::HashMap; use std::fs; use std::os::unix::fs::MetadataExt; @@ -148,15 +149,39 @@ pub async fn get_scsi_device_name( get_device_name(sandbox, &dev_sub_path).await } -pub async fn get_pci_device_name( +#[derive(Debug)] +struct VirtioBlkPciMatcher { + rex: Regex, +} + +impl VirtioBlkPciMatcher { + fn new(relpath: &str) -> Result { + let root_bus = create_pci_root_bus_path(); + let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath); + Ok(VirtioBlkPciMatcher { + rex: Regex::new(&re).unwrap(), + }) + } +} + +impl UeventMatcher for VirtioBlkPciMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.subsystem == "block" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty() + } +} + +pub async fn get_virtio_blk_pci_device_name( sandbox: &Arc>, pcipath: &pci::Path, ) -> Result { let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path()); let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?; + let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path)?; rescan_pci_bus()?; - get_device_name(sandbox, &sysfs_rel_path).await + + let uev = wait_for_uevent(sandbox, matcher).await?; + Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } pub async fn get_pmem_device_name( @@ -305,7 +330,7 @@ async fn virtio_blk_device_handler( // path for cloud-hypervisor when BDF information is not available if !device.id.is_empty() { let pcipath = pci::Path::from_str(&device.id)?; - dev.vm_path = get_pci_device_name(sandbox, &pcipath).await?; + dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?; } update_spec_device_list(&dev, spec, devidx) @@ -821,4 +846,28 @@ mod tests { assert!(name.is_ok(), "{}", name.unwrap_err()); assert_eq!(name.unwrap(), format!("{}/{}", SYSTEM_DEV_PATH, devname)); } + + #[tokio::test] + async fn test_virtio_blk_matcher() { + let root_bus = create_pci_root_bus_path(); + let devname = "vda"; + + let mut uev_a = crate::uevent::Uevent::default(); + let relpath_a = "/0000:00:0a.0"; + uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); + uev_a.subsystem = "block".to_string(); + uev_a.devname = devname.to_string(); + uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname); + let matcher_a = VirtioBlkPciMatcher::new(&relpath_a).unwrap(); + + let mut uev_b = uev_a.clone(); + let relpath_b = "/0000:00:0a.0/0000:00:0b.0"; + uev_b.devpath = format!("{}{}/virtio0/block/{}", root_bus, relpath_b, devname); + let matcher_b = VirtioBlkPciMatcher::new(&relpath_b).unwrap(); + + assert!(matcher_a.is_match(&uev_a)); + assert!(matcher_b.is_match(&uev_b)); + assert!(!matcher_b.is_match(&uev_a)); + assert!(!matcher_a.is_match(&uev_b)); + } } diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index be4bcfadba..d0b91a740e 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -24,7 +24,7 @@ use std::fs::File; use std::io::{BufRead, BufReader}; use crate::device::{ - get_pci_device_name, get_pmem_device_name, get_scsi_device_name, online_device, + get_pmem_device_name, get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, }; use crate::linux_abi::*; use crate::pci; @@ -342,7 +342,7 @@ async fn virtio_blk_storage_handler( } } else { let pcipath = pci::Path::from_str(&storage.source)?; - let dev_path = get_pci_device_name(&sandbox, &pcipath).await?; + let dev_path = get_virtio_blk_pci_device_name(&sandbox, &pcipath).await?; storage.source = dev_path; } From a59e07c1f9ec59d01723ee3d3b694374946b0d20 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 6 Apr 2021 15:02:21 +1000 Subject: [PATCH 2/4] agent/define: Refine uevent matching for virtio-scsi devices Current get_scsi_device_name() uses the legacy uevent matching which isn't very precise. This refines it to use a specific matcher implementation. While we're at it: - No longer insist on the SCSI controller being under the PCI root. It generally will be, but there's no particular reason to require it. The matcher still has a problem in that it won't work sensibly if there are multiple SCSI busses in the guest. Fixing that requires changes on the runtime side as well, though, so it's beyond scope for this change. Signed-off-by: David Gibson --- src/agent/src/device.rs | 56 ++++++++++++++++++++++++++++++++++++-- src/agent/src/linux_abi.rs | 4 --- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 5034ace688..60fca50df3 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -139,14 +139,36 @@ async fn get_device_name(sandbox: &Arc>, dev_addr: &str) -> Resul Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } +// FIXME: This matcher is only correct if the guest has at most one +// SCSI host. +#[derive(Debug)] +struct ScsiBlockMatcher { + search: String, +} + +impl ScsiBlockMatcher { + fn new(scsi_addr: &str) -> Result { + let search = format!(r"/0:0:{}/block/", scsi_addr); + + Ok(ScsiBlockMatcher { search }) + } +} + +impl UeventMatcher for ScsiBlockMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.subsystem == "block" && uev.devpath.contains(&self.search) && !uev.devname.is_empty() + } +} + pub async fn get_scsi_device_name( sandbox: &Arc>, scsi_addr: &str, ) -> Result { - let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX); + let matcher = ScsiBlockMatcher::new(scsi_addr)?; scan_scsi_bus(scsi_addr)?; - get_device_name(sandbox, &dev_sub_path).await + let uev = wait_for_uevent(sandbox, matcher).await?; + Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } #[derive(Debug)] @@ -870,4 +892,34 @@ mod tests { assert!(!matcher_b.is_match(&uev_a)); assert!(!matcher_a.is_match(&uev_b)); } + + #[tokio::test] + async fn test_scsi_block_matcher() { + let root_bus = create_pci_root_bus_path(); + let devname = "sda"; + + let mut uev_a = crate::uevent::Uevent::default(); + let addr_a = "0:0"; + uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); + uev_a.subsystem = "block".to_string(); + uev_a.devname = devname.to_string(); + uev_a.devpath = format!( + "{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda", + root_bus, addr_a + ); + let matcher_a = ScsiBlockMatcher::new(&addr_a).unwrap(); + + let mut uev_b = uev_a.clone(); + let addr_b = "2:0"; + uev_b.devpath = format!( + "{}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{}/block/sdb", + root_bus, addr_b + ); + let matcher_b = ScsiBlockMatcher::new(&addr_b).unwrap(); + + assert!(matcher_a.is_match(&uev_a)); + assert!(matcher_b.is_match(&uev_b)); + assert!(!matcher_b.is_match(&uev_a)); + assert!(!matcher_a.is_match(&uev_b)); + } } diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index c6e52fa9e9..c95ea09662 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -78,10 +78,6 @@ pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe"; pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory"; -// Here in "0:0", the first number is the SCSI host number because -// only one SCSI controller has been plugged, while the second number -// is always 0. -pub const SCSI_HOST_CHANNEL: &str = "0:0:"; pub const SCSI_BLOCK_SUFFIX: &str = "block"; pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host"; From 5d007743c1821ed3347c17569491b4498927e8bb Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 7 Apr 2021 16:31:48 +1000 Subject: [PATCH 3/4] agent/device: Refine uevent matching for pmem devices Use the new uevent matching infrastructure to refine the matching for pmem devices to something more pinned down to that device type. While we're there, fix a few anciliary problems with get_pmem_device_name(): - The name is poor - the *input* to this function is the expected device name, so the result isn't helpful, except that it needs to wait for the device to be ready in the guest. Change it to wait_for_pmem_device() and explicitly check that the returned device name matches the one expected. - Remove an incorrect comment in nvdimm_storage_handler() (the only caller) which appears to have been copied from the virtio-blk path, but then become stale. Signed-off-by: David Gibson --- src/agent/src/device.rs | 49 ++++++++++++++++++++++++++++++++++++----- src/agent/src/mount.rs | 18 +++------------ 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 60fca50df3..09ead0e49c 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -206,12 +206,49 @@ pub async fn get_virtio_blk_pci_device_name( Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } -pub async fn get_pmem_device_name( - sandbox: &Arc>, - pmem_devname: &str, -) -> Result { - let dev_sub_path = format!("/{}/{}", SCSI_BLOCK_SUFFIX, pmem_devname); - get_device_name(sandbox, &dev_sub_path).await +#[derive(Debug)] +struct PmemBlockMatcher { + suffix: String, +} + +impl PmemBlockMatcher { + fn new(devname: &str) -> Result { + let suffix = format!(r"/block/{}", devname); + + Ok(PmemBlockMatcher { suffix }) + } +} + +impl UeventMatcher for PmemBlockMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.subsystem == "block" + && uev.devpath.starts_with(ACPI_DEV_PATH) + && uev.devpath.ends_with(&self.suffix) + && !uev.devname.is_empty() + } +} + +pub async fn wait_for_pmem_device(sandbox: &Arc>, devpath: &str) -> Result<()> { + let devname = match devpath.strip_prefix("/dev/") { + Some(dev) => dev, + None => { + return Err(anyhow!( + "Storage source '{}' must start with /dev/", + devpath + )) + } + }; + + let matcher = PmemBlockMatcher::new(devname)?; + let uev = wait_for_uevent(sandbox, matcher).await?; + if uev.devname != devname { + return Err(anyhow!( + "Unexpected device name {} for pmem device (expected {})", + uev.devname, + devname + )); + } + Ok(()) } /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN) diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index d0b91a740e..0f649773ed 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -24,7 +24,7 @@ use std::fs::File; use std::io::{BufRead, BufReader}; use crate::device::{ - get_pmem_device_name, get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, + get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device, }; use crate::linux_abi::*; use crate::pci; @@ -377,22 +377,10 @@ async fn nvdimm_storage_handler( storage: &Storage, sandbox: Arc>, ) -> Result { - let mut storage = storage.clone(); - // If hot-plugged, get the device node path based on the PCI address else - // use the virt path provided in Storage Source - let pmem_devname = match storage.source.strip_prefix("/dev/") { - Some(dev) => dev, - None => { - return Err(anyhow!( - "Storage source '{}' must start with /dev/", - storage.source - )) - } - }; + let storage = storage.clone(); // Retrieve the device path from NVDIMM address. - let dev_path = get_pmem_device_name(&sandbox, pmem_devname).await?; - storage.source = dev_path; + wait_for_pmem_device(&sandbox, &storage.source).await?; common_storage_handler(logger, &storage) } From 8ea2ce9a317a119cf4149740ff29bee6c25c9421 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 7 Apr 2021 16:48:42 +1000 Subject: [PATCH 4/4] agent/device: Remove legacy uevent matching DevAddrMatcher existed purely as a transitional step as we refined the uevent matching logic for each of the different device types we care about. We've now done that, so it can be removed along with several related pieces. fixes #1628 Signed-off-by: David Gibson --- src/agent/src/device.rs | 73 ++++++++++---------------------------- src/agent/src/linux_abi.rs | 1 - 2 files changed, 18 insertions(+), 56 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 09ead0e49c..28f503b37b 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -88,57 +88,6 @@ fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result Ok(relpath) } -#[derive(Debug, Clone)] -struct DevAddrMatcher { - dev_addr: String, -} - -impl DevAddrMatcher { - fn new(dev_addr: &str) -> DevAddrMatcher { - DevAddrMatcher { - dev_addr: dev_addr.to_string(), - } - } -} - -impl UeventMatcher for DevAddrMatcher { - fn is_match(&self, uev: &Uevent) -> bool { - let pci_root_bus_path = create_pci_root_bus_path(); - let pci_p = format!("{}{}", pci_root_bus_path, self.dev_addr); - let pmem_suffix = format!("/{}/{}", SCSI_BLOCK_SUFFIX, uev.devname); - - uev.subsystem == "block" - && { - uev.devpath.starts_with(pci_root_bus_path.as_str()) - || uev.devpath.starts_with(ACPI_DEV_PATH) // NVDIMM/PMEM devices - } - && !uev.devname.is_empty() - && { - // blk block device - uev.devpath.starts_with(pci_p.as_str()) - // scsi block device - || ( - self.dev_addr.ends_with(SCSI_BLOCK_SUFFIX) && - uev.devpath.contains(self.dev_addr.as_str()) - ) - // nvdimm/pmem device - || ( - uev.devpath.starts_with(ACPI_DEV_PATH) && - uev.devpath.ends_with(pmem_suffix.as_str()) && - self.dev_addr.ends_with(pmem_suffix.as_str()) - ) - } - } -} - -async fn get_device_name(sandbox: &Arc>, dev_addr: &str) -> Result { - let matcher = DevAddrMatcher::new(dev_addr); - - let uev = wait_for_uevent(sandbox, matcher).await?; - - Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) -} - // FIXME: This matcher is only correct if the guest has at most one // SCSI host. #[derive(Debug)] @@ -871,6 +820,20 @@ mod tests { assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0"); } + // We use device specific variants of this for real cases, but + // they have some complications that make them troublesome to unit + // test + async fn example_get_device_name( + sandbox: &Arc>, + relpath: &str, + ) -> Result { + let matcher = VirtioBlkPciMatcher::new(relpath)?; + + let uev = wait_for_uevent(sandbox, matcher).await?; + + Ok(uev.devname) + } + #[tokio::test] async fn test_get_device_name() { let devname = "vda"; @@ -891,9 +854,9 @@ mod tests { sb.uevent_map.insert(devpath.clone(), uev); drop(sb); // unlock - let name = get_device_name(&sandbox, relpath).await; + let name = example_get_device_name(&sandbox, relpath).await; assert!(name.is_ok(), "{}", name.unwrap_err()); - assert_eq!(name.unwrap(), format!("{}/{}", SYSTEM_DEV_PATH, devname)); + assert_eq!(name.unwrap(), devname); let mut sb = sandbox.lock().await; let uev = sb.uevent_map.remove(&devpath).unwrap(); @@ -901,9 +864,9 @@ mod tests { spawn_test_watcher(sandbox.clone(), uev); - let name = get_device_name(&sandbox, relpath).await; + let name = example_get_device_name(&sandbox, relpath).await; assert!(name.is_ok(), "{}", name.unwrap_err()); - assert_eq!(name.unwrap(), format!("{}/{}", SYSTEM_DEV_PATH, devname)); + assert_eq!(name.unwrap(), devname); } #[tokio::test] diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index c95ea09662..de695a1292 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -78,7 +78,6 @@ pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe"; pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory"; -pub const SCSI_BLOCK_SUFFIX: &str = "block"; pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host"; pub const SYSFS_CGROUPPATH: &str = "/sys/fs/cgroup";