diff --git a/src/libs/kata-types/src/capabilities.rs b/src/libs/kata-types/src/capabilities.rs index 15207e6136..7d24ee2c1d 100644 --- a/src/libs/kata-types/src/capabilities.rs +++ b/src/libs/kata-types/src/capabilities.rs @@ -17,6 +17,8 @@ pub enum CapabilityBits { MultiQueueSupport, /// hypervisor supports filesystem share FsSharingSupport, + /// hypervisor supports hybrid-vsock + HybridVsockSupport, } /// Capabilities describe a virtcontainers hypervisor capabilities through a bit mask. @@ -60,6 +62,11 @@ impl Capabilities { self.flags.and(CapabilityBits::MultiQueueSupport) != 0 } + /// is_hybrid_vsock_supported tells if an hypervisor supports hybrid-vsock support. + pub fn is_hybrid_vsock_supported(&self) -> bool { + self.flags.and(CapabilityBits::HybridVsockSupport) != 0 + } + /// is_fs_sharing_supported tells if an hypervisor supports host filesystem sharing. pub fn is_fs_sharing_supported(&self) -> bool { self.flags.and(CapabilityBits::FsSharingSupport) != 0 @@ -77,6 +84,9 @@ mod tests { let mut cap = Capabilities::new(); assert!(!cap.is_block_device_supported()); + // test legacy vsock support + assert!(!cap.is_hybrid_vsock_supported()); + // test set block device support cap.set(CapabilityBits::BlockDeviceSupport); assert!(cap.is_block_device_supported()); @@ -102,6 +112,10 @@ mod tests { | CapabilityBits::MultiQueueSupport | CapabilityBits::FsSharingSupport, ); - assert!(cap.is_fs_sharing_supported()) + assert!(cap.is_fs_sharing_supported()); + + // test set hybrid-vsock support + cap.set(CapabilityBits::HybridVsockSupport); + assert!(cap.is_hybrid_vsock_supported()); } } diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs index dfab644d15..fd771861cf 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs @@ -83,7 +83,8 @@ impl CloudHypervisorInner { capabilities.set( CapabilityBits::BlockDeviceSupport | CapabilityBits::BlockDeviceHotplugSupport - | CapabilityBits::FsSharingSupport, + | CapabilityBits::FsSharingSupport + | CapabilityBits::HybridVsockSupport, ); let (tx, rx) = channel(true); diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs index 717b4dbd21..6aa301495e 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -712,11 +712,14 @@ impl CloudHypervisorInner { let flags = if guest_protection_is_tdx(self.guest_protection_to_use.clone()) { // TDX does not permit the use of virtio-fs. - CapabilityBits::BlockDeviceSupport | CapabilityBits::BlockDeviceHotplugSupport + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::HybridVsockSupport } else { CapabilityBits::BlockDeviceSupport | CapabilityBits::BlockDeviceHotplugSupport | CapabilityBits::FsSharingSupport + | CapabilityBits::HybridVsockSupport }; caps.set(flags); diff --git a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs index 8d71ecbccd..31602a01f0 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs @@ -12,7 +12,7 @@ use tokio::sync::{Mutex, RwLock}; use crate::{ vhost_user_blk::VhostUserBlkDevice, BlockConfig, BlockDevice, HybridVsockDevice, Hypervisor, - NetworkDevice, ShareFsDevice, VfioDevice, VhostUserConfig, KATA_BLK_DEV_TYPE, + NetworkDevice, ShareFsDevice, VfioDevice, VhostUserConfig, VsockDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM, }; @@ -330,6 +330,10 @@ impl DeviceManager { // No need to do find device for hybrid vsock device. Arc::new(Mutex::new(HybridVsockDevice::new(&device_id, hvconfig))) } + DeviceConfig::VsockCfg(_vconfig) => { + // No need to do find device for vsock device. + Arc::new(Mutex::new(VsockDevice::new(device_id.clone()).await?)) + } DeviceConfig::ShareFsCfg(config) => { // Try to find the sharefs device. If found, just return matched device id. if let Some(device_id_matched) = @@ -346,9 +350,6 @@ impl DeviceManager { Arc::new(Mutex::new(ShareFsDevice::new(&device_id, config))) } - _ => { - return Err(anyhow!("invliad device type")); - } }; // register device to devices diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs index 6ca8879536..f3ef545a96 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs @@ -84,16 +84,13 @@ impl Device for HybridVsockDevice { } } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct VsockConfig { /// A 32-bit Context Identifier (CID) used to identify the guest. pub guest_cid: u32, - - /// Vhost vsock fd. Hold to ensure CID is not used by other VM. - pub vhost_fd: File, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct VsockDevice { /// Unique identifier of the device pub id: String, @@ -121,46 +118,87 @@ const CID_RETRY_COUNT: u32 = 50; impl VsockDevice { pub async fn new(id: String) -> Result { - let vhost_fd = OpenOptions::new() - .read(true) - .write(true) - .open(VHOST_VSOCK_DEVICE) + let (guest_cid, _vhost_fd) = generate_vhost_vsock_cid() .await - .context(format!( - "failed to open {}, try to run modprobe vhost_vsock.", - VHOST_VSOCK_DEVICE - ))?; - let mut rng = rand::thread_rng(); + .context("generate vhost vsock cid failed")?; - // Try 50 times to find a context ID that is not in use. - for _ in 0..CID_RETRY_COUNT { - // First usable CID above VMADDR_CID_HOST (see vsock(7)) - let first_usable_cid = 3; - let rand_cid = rng.gen_range(first_usable_cid..=(u32::MAX)); - let guest_cid = - unsafe { vhost_vsock_set_guest_cid(vhost_fd.as_raw_fd(), &(rand_cid as u64)) }; - match guest_cid { - Ok(_) => { - return Ok(VsockDevice { - id, - config: VsockConfig { - guest_cid: rand_cid, - vhost_fd, - }, - }); - } - Err(nix::Error::EADDRINUSE) => { - // The CID is already in use. Try another one. - } - Err(err) => { - return Err(err).context("failed to set guest CID"); - } - } - } - - anyhow::bail!( - "failed to find a free vsock context ID after {} attempts", - CID_RETRY_COUNT - ); + Ok(Self { + id, + config: VsockConfig { guest_cid }, + }) } } + +#[async_trait] +impl Device for VsockDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + h.add_device(DeviceType::Vsock(self.clone())) + .await + .context("add vsock device.")?; + + return Ok(()); + } + + async fn detach(&mut self, _h: &dyn hypervisor) -> Result> { + // no need to do detach, just return Ok(None) + Ok(None) + } + + async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> { + // There's no need to do update for vsock device + Ok(()) + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::Vsock(self.clone()) + } + + async fn increase_attach_count(&mut self) -> Result { + // vsock devices will not be attached multiple times, Just return Ok(false) + + Ok(false) + } + + async fn decrease_attach_count(&mut self) -> Result { + // vsock devices will not be detached multiple times, Just return Ok(false) + + Ok(false) + } +} + +pub async fn generate_vhost_vsock_cid() -> Result<(u32, File)> { + let vhost_fd = OpenOptions::new() + .read(true) + .write(true) + .open(VHOST_VSOCK_DEVICE) + .await + .context(format!( + "failed to open {}, try to run modprobe vhost_vsock.", + VHOST_VSOCK_DEVICE + ))?; + let mut rng = rand::thread_rng(); + + // Try 50 times to find a context ID that is not in use. + for _ in 0..CID_RETRY_COUNT { + // First usable CID above VMADDR_CID_HOST (see vsock(7)) + let first_usable_cid = 3; + let rand_cid = rng.gen_range(first_usable_cid..=(u32::MAX)); + let guest_cid = + unsafe { vhost_vsock_set_guest_cid(vhost_fd.as_raw_fd(), &(rand_cid as u64)) }; + match guest_cid { + Ok(_) => return Ok((rand_cid, vhost_fd)), + Err(nix::Error::EADDRINUSE) => { + // The CID is already in use. Try another one. + continue; + } + Err(err) => { + return Err(err).context("failed to set guest CID"); + } + }; + } + + anyhow::bail!( + "failed to find a free vsock context ID after {} attempts", + CID_RETRY_COUNT + ); +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/mod.rs index b40f784ddb..71b9575dbe 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/mod.rs @@ -10,7 +10,7 @@ use crate::device::driver::vhost_user_blk::VhostUserBlkDevice; use crate::{ BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor, NetworkConfig, NetworkDevice, ShareFsConfig, ShareFsDevice, VfioConfig, VfioDevice, - VhostUserConfig, VsockConfig, + VhostUserConfig, VsockConfig, VsockDevice, }; use anyhow::Result; use async_trait::async_trait; @@ -38,6 +38,7 @@ pub enum DeviceType { Network(NetworkDevice), ShareFs(ShareFsDevice), HybridVsock(HybridVsockDevice), + Vsock(VsockDevice), } impl fmt::Display for DeviceType { diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index 68d4d7fbe2..7879f1c44a 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -76,7 +76,8 @@ impl DragonballInner { capabilities.set( CapabilityBits::BlockDeviceSupport | CapabilityBits::BlockDeviceHotplugSupport - | CapabilityBits::FsSharingSupport, + | CapabilityBits::FsSharingSupport + | CapabilityBits::HybridVsockSupport, ); DragonballInner { id: "".to_string(), diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs index 7f86b4d3a8..9484d3dc42 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs @@ -67,6 +67,7 @@ impl DragonballInner { DeviceType::ShareFs(sharefs) => self .add_share_fs_device(&sharefs.config) .context("add share fs device"), + DeviceType::Vsock(_) => todo!(), } } diff --git a/src/runtime-rs/crates/resource/src/lib.rs b/src/runtime-rs/crates/resource/src/lib.rs index f7df9b687b..3a7524da9c 100644 --- a/src/runtime-rs/crates/resource/src/lib.rs +++ b/src/runtime-rs/crates/resource/src/lib.rs @@ -17,7 +17,7 @@ pub mod manager; mod manager_inner; pub mod network; pub mod resource_persist; -use hypervisor::{BlockConfig, HybridVsockConfig}; +use hypervisor::{BlockConfig, HybridVsockConfig, VsockConfig}; use network::NetworkConfig; pub mod rootfs; pub mod share_fs; @@ -33,6 +33,7 @@ pub enum ResourceConfig { ShareFs(SharedFsInfo), VmRootfs(BlockConfig), HybridVsock(HybridVsockConfig), + Vsock(VsockConfig), } #[derive(Debug, Clone, Copy, PartialEq)] diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 2ee6a5a054..592b2f5d4f 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -134,6 +134,11 @@ impl ResourceManagerInner { .await .context("do handle hybrid-vsock device failed.")?; } + ResourceConfig::Vsock(v) => { + do_handle_device(&self.device_manager, &DeviceConfig::VsockCfg(v)) + .await + .context("do handle vsock device failed.")?; + } }; } diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index a86aa07d7c..e103f18d70 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -14,6 +14,7 @@ use async_trait::async_trait; use common::message::{Action, Message}; use common::{Sandbox, SandboxNetworkEnv}; use containerd_shim_protos::events::task::TaskOOM; +use hypervisor::VsockConfig; use hypervisor::{dragonball::Dragonball, BlockConfig, Hypervisor, HYPERVISOR_DRAGONBALL}; use hypervisor::{utils::get_hvsock_path, HybridVsockConfig, DEFAULT_GUEST_VSOCK_CID}; use kata_sys_util::hooks::HookStates; @@ -28,6 +29,7 @@ use tracing::instrument; use crate::health_check::HealthCheck; pub(crate) const VIRTCONTAINER: &str = "virt_container"; + pub struct SandboxRestoreArgs { pub sid: String, pub toml_config: TomlConfig, @@ -102,13 +104,12 @@ impl VirtSandbox { ) -> Result> { let mut resource_configs = vec![]; - // Prepare VM hybrid vsock device config and add the hybrid vsock device first. - info!(sl!(), "prepare hybrid vsock resource for sandbox."); - let vm_hvsock = ResourceConfig::HybridVsock(HybridVsockConfig { - guest_cid: DEFAULT_GUEST_VSOCK_CID, - uds_path: get_hvsock_path(id), - }); - resource_configs.push(vm_hvsock); + info!(sl!(), "prepare vm socket config for sandbox."); + let vm_socket_config = self + .prepare_vm_socket_config() + .await + .context("failed to prepare vm socket config")?; + resource_configs.push(vm_socket_config); // prepare network config if !network_env.network_created { @@ -223,6 +224,30 @@ impl VirtSandbox { }) } + async fn prepare_vm_socket_config(&self) -> Result { + // It will check the hypervisor's capabilities to see if it supports hybrid-vsock. + // If it does not, it'll assume that it only supports legacy vsock. + let vm_socket = if self + .hypervisor + .capabilities() + .await? + .is_hybrid_vsock_supported() + { + // Firecracker/Dragonball/CLH use the hybrid-vsock device model. + ResourceConfig::HybridVsock(HybridVsockConfig { + guest_cid: DEFAULT_GUEST_VSOCK_CID, + uds_path: get_hvsock_path(&self.sid), + }) + } else { + // Qemu uses the vsock device model. + ResourceConfig::Vsock(VsockConfig { + guest_cid: libc::VMADDR_CID_ANY, + }) + }; + + Ok(vm_socket) + } + fn has_prestart_hooks( &self, prestart_hooks: Vec,