From 548f252bc4710dbc9675cda9fc45a67db80cb49c Mon Sep 17 00:00:00 2001 From: "alex.lyn" Date: Mon, 8 Jan 2024 15:30:48 +0800 Subject: [PATCH 1/5] runtime-rs: bugfix incorrect use of refcount before vfio attach When there's a pod with multiple containers, there may be case that attach point more than 2, we should not return Err in that case when we are doing attach ops, but just return Ok. Fixes: #8738 Signed-off-by: Alex Lyn --- .../crates/hypervisor/src/device/driver/vfio.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index 7d8abd2e09..578e5d35c2 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -456,7 +456,13 @@ impl Device for VfioDevice { .await .context("failed to increase attach count")? { - return Err(anyhow!("attach count increased failed as some reason.")); + warn!( + sl!(), + "The device {:?} is not allowed to be attached more than one times.", + self.device_id + ); + + return Ok(()); } // do add device for vfio deivce From 5750faaf31adecadcd36cdc637a09e42da0cc08d Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Mon, 1 Apr 2024 11:04:23 +0800 Subject: [PATCH 2/5] runtime-rs: introduce dedicated function do_increase_count Since there are many implementations of reference counting in the drivers, all of which have the same implementation, we should try to reduce such duplicated code as much as possible. Therefore, a new function is introduced to solve the problem of duplicated code. Fixes: #8738 Signed-off-by: Alex Lyn --- .../crates/hypervisor/src/device/util.rs | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/runtime-rs/crates/hypervisor/src/device/util.rs b/src/runtime-rs/crates/hypervisor/src/device/util.rs index 5d999d8f6c..f3c6964f42 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/util.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/util.rs @@ -69,8 +69,24 @@ pub(crate) fn get_virt_drive_name(mut index: i32) -> Result { Ok(String::from(PREFIX) + std::str::from_utf8(&disk_letters)?) } +// Using the return value of do_increase_count to indicate whether a device has been inserted into the guest. +// Specially, Increment the reference count by 1, then check the incremented ref_count: +// If the incremented reference count is not equal to 1, the device has been inserted into the guest. Return true. +// If the reference count is equal to 1, the device has not been inserted into the guest. Return false. +pub fn do_increase_count(ref_count: &mut u64) -> Result { + // ref_count = 0: Device is new and not attached. + // ref_count > 0: Device has been attempted to be attached many times. + *ref_count = (*ref_count) + .checked_add(1) + .ok_or("device reference count overflow") + .map_err(|e| anyhow!(e))?; + + Ok((*ref_count) != 1) +} + #[cfg(test)] mod tests { + use crate::device::util::do_increase_count; use crate::device::util::get_virt_drive_name; #[actix_rt::test] @@ -88,4 +104,45 @@ mod tests { assert_eq!(&out, output); } } + + #[test] + fn test_do_increase_count() { + // First, ref_count is 0 + let ref_count_0 = &mut 0_u64; + assert!(!do_increase_count(ref_count_0).unwrap()); + + // Second, ref_count > 0 + let ref_count_3 = &mut 3_u64; + assert!(do_increase_count(ref_count_3).unwrap()); + + // Third, ref_count is MAX + let mut max_count = std::u64::MAX; + let ref_count_max: &mut u64 = &mut max_count; + assert!(do_increase_count(ref_count_max).is_err()); + } + + #[test] + fn test_data_reference_count() { + #[derive(Default)] + struct TestData { + ref_cnt: u64, + } + + impl TestData { + fn attach(&mut self) -> bool { + do_increase_count(&mut self.ref_cnt).unwrap() + } + } + + let testd = &mut TestData { ref_cnt: 0_u64 }; + + // First, ref_cnt is 0 + assert!(!testd.attach()); + assert_eq!(testd.ref_cnt, 1_u64); + // Second, ref_cnt > 0 + assert!(testd.attach()); + assert_eq!(testd.ref_cnt, 2_u64); + assert!(testd.attach()); + assert_eq!(testd.ref_cnt, 3_u64); + } } From fff64f1c3e3a23723ec6591263891c6db2fdb380 Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Mon, 1 Apr 2024 11:09:33 +0800 Subject: [PATCH 3/5] runtime-rs: introduce dedicated function do_decrease_count Introduce a dedicated public function do_decrease_count to reduce duplicated code in drivers' decrease_attach_count. Fixes: #8738 Signed-off-by: Alex Lyn --- .../crates/hypervisor/src/device/util.rs | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/hypervisor/src/device/util.rs b/src/runtime-rs/crates/hypervisor/src/device/util.rs index f3c6964f42..d719fb9e31 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/util.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/util.rs @@ -84,10 +84,28 @@ pub fn do_increase_count(ref_count: &mut u64) -> Result { Ok((*ref_count) != 1) } +// The return value of do_decrease_count can be used to indicate whether the device is still in use. +// Specifically, the reference count can be decremented by 1 first, then check the decremented ref_count: +// If the decremented reference count is not equal to 0, it indicates that the device is still in use by +// the guest and cannot be detached. Return true. +// If the reference count is equal to 0, it indicates that the device will not be used and can be unplugged +// from the guest. Return false. +pub fn do_decrease_count(ref_count: &mut u64) -> Result { + // ref_count = 0: Device not inserted (cannot decrease further). + // ref_count = 1: Device is attached to the Guest. Decrement ref_count and notify Device Manager of detachment. + // ref_count > 1: Device remains attached to the Guest. Simply decrement ref_count and notify Device Manager. + *ref_count = (*ref_count) + .checked_sub(1) + .ok_or("The device is not attached") + .map_err(|e| anyhow!(e))?; + + Ok((*ref_count) != 0) +} + #[cfg(test)] mod tests { - use crate::device::util::do_increase_count; use crate::device::util::get_virt_drive_name; + use crate::device::util::{do_decrease_count, do_increase_count}; #[actix_rt::test] async fn test_get_virt_drive_name() { @@ -109,11 +127,15 @@ mod tests { fn test_do_increase_count() { // First, ref_count is 0 let ref_count_0 = &mut 0_u64; + assert!(do_decrease_count(ref_count_0).is_err()); + assert!(!do_increase_count(ref_count_0).unwrap()); + assert!(!do_decrease_count(ref_count_0).unwrap()); // Second, ref_count > 0 let ref_count_3 = &mut 3_u64; assert!(do_increase_count(ref_count_3).unwrap()); + assert!(do_decrease_count(ref_count_3).unwrap()); // Third, ref_count is MAX let mut max_count = std::u64::MAX; @@ -132,6 +154,10 @@ mod tests { fn attach(&mut self) -> bool { do_increase_count(&mut self.ref_cnt).unwrap() } + + fn detach(&mut self) -> bool { + do_decrease_count(&mut self.ref_cnt).unwrap() + } } let testd = &mut TestData { ref_cnt: 0_u64 }; @@ -144,5 +170,13 @@ mod tests { assert_eq!(testd.ref_cnt, 2_u64); assert!(testd.attach()); assert_eq!(testd.ref_cnt, 3_u64); + + let testd2 = &mut TestData { ref_cnt: 2_u64 }; + + assert!(testd2.detach()); + assert_eq!(testd2.ref_cnt, 1_u64); + + assert!(!testd2.detach()); + assert_eq!(testd2.ref_cnt, 0_u64); } } From 4f0fab938d2625ef97fd4462adf490aa045fae05 Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Mon, 1 Apr 2024 11:11:52 +0800 Subject: [PATCH 4/5] runtime-rs: refactor increase_attach_count with do_increase_count Try to reduce duplicated code in increase_attach_count with public new function do_increase_count. Fixes: #8738 Signed-off-by: Alex Lyn --- .../crates/hypervisor/src/device/driver/vfio.rs | 14 ++------------ .../src/device/driver/vhost_user_blk.rs | 15 ++------------- .../hypervisor/src/device/driver/virtio_blk.rs | 14 ++------------ 3 files changed, 6 insertions(+), 37 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index 578e5d35c2..8ed6d43864 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -20,6 +20,7 @@ use crate::{ device::{ pci_path::PciPath, topology::{do_add_pcie_endpoint, PCIeTopology}, + util::do_increase_count, Device, DeviceType, PCIeDevice, }, register_pcie_device, unregister_pcie_device, update_pcie_device, Hypervisor as hypervisor, @@ -522,18 +523,7 @@ impl Device for VfioDevice { } async fn increase_attach_count(&mut self) -> Result { - match self.attach_count { - 0 => { - // do real attach - self.attach_count += 1; - Ok(false) - } - std::u64::MAX => Err(anyhow!("device was attached too many times")), - _ => { - self.attach_count += 1; - Ok(true) - } - } + do_increase_count(&mut self.attach_count) } async fn decrease_attach_count(&mut self) -> Result { diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs index b2a1d90f92..3ff92ae6da 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs @@ -9,7 +9,7 @@ use async_trait::async_trait; use super::VhostUserConfig; use crate::{ - device::{topology::PCIeTopology, Device, DeviceType}, + device::{topology::PCIeTopology, util::do_increase_count, Device, DeviceType}, Hypervisor as hypervisor, }; @@ -104,18 +104,7 @@ impl Device for VhostUserBlkDevice { } async fn increase_attach_count(&mut self) -> Result { - match self.attach_count { - 0 => { - // do real attach - self.attach_count += 1; - Ok(false) - } - std::u64::MAX => Err(anyhow!("device was attached too many times")), - _ => { - self.attach_count += 1; - Ok(true) - } - } + do_increase_count(&mut self.attach_count) } async fn decrease_attach_count(&mut self) -> Result { diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs index fdf0f7ea2d..27388c29bf 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs @@ -6,6 +6,7 @@ use crate::device::pci_path::PciPath; use crate::device::topology::PCIeTopology; +use crate::device::util::do_increase_count; use crate::device::Device; use crate::device::DeviceType; use crate::Hypervisor as hypervisor; @@ -135,18 +136,7 @@ impl Device for BlockDevice { } async fn increase_attach_count(&mut self) -> Result { - match self.attach_count { - 0 => { - // do real attach - self.attach_count += 1; - Ok(false) - } - std::u64::MAX => Err(anyhow!("device was attached too many times")), - _ => { - self.attach_count += 1; - Ok(true) - } - } + do_increase_count(&mut self.attach_count) } async fn decrease_attach_count(&mut self) -> Result { From 935a1a3b4019fbaa7493e2c376f2b6c47744f667 Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Mon, 1 Apr 2024 11:13:35 +0800 Subject: [PATCH 5/5] runtime-rs: refactor decrease_attach_count with do_decrease_count Try to reduce duplicated code in decrease_attach_count with public new function do_decrease_count. Fixes: #8738 Signed-off-by: Alex Lyn --- .../hypervisor/src/device/driver/vfio.rs | 15 ++----------- .../src/device/driver/vhost_user_blk.rs | 21 +++++++------------ .../src/device/driver/virtio_blk.rs | 16 +++----------- 3 files changed, 12 insertions(+), 40 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index 8ed6d43864..1113d2b879 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -20,7 +20,7 @@ use crate::{ device::{ pci_path::PciPath, topology::{do_add_pcie_endpoint, PCIeTopology}, - util::do_increase_count, + util::{do_decrease_count, do_increase_count}, Device, DeviceType, PCIeDevice, }, register_pcie_device, unregister_pcie_device, update_pcie_device, Hypervisor as hypervisor, @@ -527,18 +527,7 @@ impl Device for VfioDevice { } async fn decrease_attach_count(&mut self) -> Result { - match self.attach_count { - 0 => Err(anyhow!("detaching a device that wasn't attached")), - 1 => { - // do real wrok - self.attach_count -= 1; - Ok(false) - } - _ => { - self.attach_count -= 1; - Ok(true) - } - } + do_decrease_count(&mut self.attach_count) } async fn get_device_info(&self) -> DeviceType { diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs index 3ff92ae6da..a3335ae32b 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs @@ -4,12 +4,16 @@ // SPDX-License-Identifier: Apache-2.0 // -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result}; use async_trait::async_trait; use super::VhostUserConfig; use crate::{ - device::{topology::PCIeTopology, util::do_increase_count, Device, DeviceType}, + device::{ + topology::PCIeTopology, + util::{do_decrease_count, do_increase_count}, + Device, DeviceType, + }, Hypervisor as hypervisor, }; @@ -108,17 +112,6 @@ impl Device for VhostUserBlkDevice { } async fn decrease_attach_count(&mut self) -> Result { - match self.attach_count { - 0 => Err(anyhow!("detaching a device that wasn't attached")), - 1 => { - // do real wrok - self.attach_count -= 1; - Ok(false) - } - _ => { - self.attach_count -= 1; - Ok(true) - } - } + do_decrease_count(&mut self.attach_count) } } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs index 27388c29bf..56d3fbd639 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs @@ -6,11 +6,12 @@ use crate::device::pci_path::PciPath; use crate::device::topology::PCIeTopology; +use crate::device::util::do_decrease_count; use crate::device::util::do_increase_count; use crate::device::Device; use crate::device::DeviceType; use crate::Hypervisor as hypervisor; -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result}; use async_trait::async_trait; /// VIRTIO_BLOCK_PCI indicates block driver is virtio-pci based @@ -140,17 +141,6 @@ impl Device for BlockDevice { } async fn decrease_attach_count(&mut self) -> Result { - match self.attach_count { - 0 => Err(anyhow!("detaching a device that wasn't attached")), - 1 => { - // do real wrok - self.attach_count -= 1; - Ok(false) - } - _ => { - self.attach_count -= 1; - Ok(true) - } - } + do_decrease_count(&mut self.attach_count) } }