Merge pull request #8516 from Apokleos/vsock-dev

move vsock device into device manager
This commit is contained in:
Greg Kurz 2023-12-05 11:28:37 +01:00 committed by GitHub
commit 1650d02b91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 152 additions and 61 deletions

View File

@ -17,6 +17,8 @@ pub enum CapabilityBits {
MultiQueueSupport,
/// hypervisor supports filesystem share
FsSharingSupport,
/// hypervisor supports hybrid-vsock
HybridVsockSupport,
}
/// Capabilities describe a virtcontainers hypervisor capabilities through a bit mask.
@ -60,6 +62,11 @@ impl Capabilities {
self.flags.and(CapabilityBits::MultiQueueSupport) != 0
}
/// is_hybrid_vsock_supported tells if an hypervisor supports hybrid-vsock support.
pub fn is_hybrid_vsock_supported(&self) -> bool {
self.flags.and(CapabilityBits::HybridVsockSupport) != 0
}
/// is_fs_sharing_supported tells if an hypervisor supports host filesystem sharing.
pub fn is_fs_sharing_supported(&self) -> bool {
self.flags.and(CapabilityBits::FsSharingSupport) != 0
@ -77,6 +84,9 @@ mod tests {
let mut cap = Capabilities::new();
assert!(!cap.is_block_device_supported());
// test legacy vsock support
assert!(!cap.is_hybrid_vsock_supported());
// test set block device support
cap.set(CapabilityBits::BlockDeviceSupport);
assert!(cap.is_block_device_supported());
@ -102,6 +112,10 @@ mod tests {
| CapabilityBits::MultiQueueSupport
| CapabilityBits::FsSharingSupport,
);
assert!(cap.is_fs_sharing_supported())
assert!(cap.is_fs_sharing_supported());
// test set hybrid-vsock support
cap.set(CapabilityBits::HybridVsockSupport);
assert!(cap.is_hybrid_vsock_supported());
}
}

View File

@ -83,7 +83,8 @@ impl CloudHypervisorInner {
capabilities.set(
CapabilityBits::BlockDeviceSupport
| CapabilityBits::BlockDeviceHotplugSupport
| CapabilityBits::FsSharingSupport,
| CapabilityBits::FsSharingSupport
| CapabilityBits::HybridVsockSupport,
);
let (tx, rx) = channel(true);

View File

@ -712,11 +712,14 @@ impl CloudHypervisorInner {
let flags = if guest_protection_is_tdx(self.guest_protection_to_use.clone()) {
// TDX does not permit the use of virtio-fs.
CapabilityBits::BlockDeviceSupport | CapabilityBits::BlockDeviceHotplugSupport
CapabilityBits::BlockDeviceSupport
| CapabilityBits::BlockDeviceHotplugSupport
| CapabilityBits::HybridVsockSupport
} else {
CapabilityBits::BlockDeviceSupport
| CapabilityBits::BlockDeviceHotplugSupport
| CapabilityBits::FsSharingSupport
| CapabilityBits::HybridVsockSupport
};
caps.set(flags);

View File

@ -12,7 +12,7 @@ use tokio::sync::{Mutex, RwLock};
use crate::{
vhost_user_blk::VhostUserBlkDevice, BlockConfig, BlockDevice, HybridVsockDevice, Hypervisor,
NetworkDevice, ShareFsDevice, VfioDevice, VhostUserConfig, KATA_BLK_DEV_TYPE,
NetworkDevice, ShareFsDevice, VfioDevice, VhostUserConfig, VsockDevice, KATA_BLK_DEV_TYPE,
KATA_MMIO_BLK_DEV_TYPE, KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM,
};
@ -330,6 +330,10 @@ impl DeviceManager {
// No need to do find device for hybrid vsock device.
Arc::new(Mutex::new(HybridVsockDevice::new(&device_id, hvconfig)))
}
DeviceConfig::VsockCfg(_vconfig) => {
// No need to do find device for vsock device.
Arc::new(Mutex::new(VsockDevice::new(device_id.clone()).await?))
}
DeviceConfig::ShareFsCfg(config) => {
// Try to find the sharefs device. If found, just return matched device id.
if let Some(device_id_matched) =
@ -346,9 +350,6 @@ impl DeviceManager {
Arc::new(Mutex::new(ShareFsDevice::new(&device_id, config)))
}
_ => {
return Err(anyhow!("invliad device type"));
}
};
// register device to devices

View File

@ -84,16 +84,13 @@ impl Device for HybridVsockDevice {
}
}
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct VsockConfig {
/// A 32-bit Context Identifier (CID) used to identify the guest.
pub guest_cid: u32,
/// Vhost vsock fd. Hold to ensure CID is not used by other VM.
pub vhost_fd: File,
}
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct VsockDevice {
/// Unique identifier of the device
pub id: String,
@ -121,46 +118,87 @@ const CID_RETRY_COUNT: u32 = 50;
impl VsockDevice {
pub async fn new(id: String) -> Result<Self> {
let vhost_fd = OpenOptions::new()
.read(true)
.write(true)
.open(VHOST_VSOCK_DEVICE)
let (guest_cid, _vhost_fd) = generate_vhost_vsock_cid()
.await
.context(format!(
"failed to open {}, try to run modprobe vhost_vsock.",
VHOST_VSOCK_DEVICE
))?;
let mut rng = rand::thread_rng();
.context("generate vhost vsock cid failed")?;
// Try 50 times to find a context ID that is not in use.
for _ in 0..CID_RETRY_COUNT {
// First usable CID above VMADDR_CID_HOST (see vsock(7))
let first_usable_cid = 3;
let rand_cid = rng.gen_range(first_usable_cid..=(u32::MAX));
let guest_cid =
unsafe { vhost_vsock_set_guest_cid(vhost_fd.as_raw_fd(), &(rand_cid as u64)) };
match guest_cid {
Ok(_) => {
return Ok(VsockDevice {
id,
config: VsockConfig {
guest_cid: rand_cid,
vhost_fd,
},
});
}
Err(nix::Error::EADDRINUSE) => {
// The CID is already in use. Try another one.
}
Err(err) => {
return Err(err).context("failed to set guest CID");
}
}
}
anyhow::bail!(
"failed to find a free vsock context ID after {} attempts",
CID_RETRY_COUNT
);
Ok(Self {
id,
config: VsockConfig { guest_cid },
})
}
}
#[async_trait]
impl Device for VsockDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
h.add_device(DeviceType::Vsock(self.clone()))
.await
.context("add vsock device.")?;
return Ok(());
}
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
// no need to do detach, just return Ok(None)
Ok(None)
}
async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> {
// There's no need to do update for vsock device
Ok(())
}
async fn get_device_info(&self) -> DeviceType {
DeviceType::Vsock(self.clone())
}
async fn increase_attach_count(&mut self) -> Result<bool> {
// vsock devices will not be attached multiple times, Just return Ok(false)
Ok(false)
}
async fn decrease_attach_count(&mut self) -> Result<bool> {
// vsock devices will not be detached multiple times, Just return Ok(false)
Ok(false)
}
}
pub async fn generate_vhost_vsock_cid() -> Result<(u32, File)> {
let vhost_fd = OpenOptions::new()
.read(true)
.write(true)
.open(VHOST_VSOCK_DEVICE)
.await
.context(format!(
"failed to open {}, try to run modprobe vhost_vsock.",
VHOST_VSOCK_DEVICE
))?;
let mut rng = rand::thread_rng();
// Try 50 times to find a context ID that is not in use.
for _ in 0..CID_RETRY_COUNT {
// First usable CID above VMADDR_CID_HOST (see vsock(7))
let first_usable_cid = 3;
let rand_cid = rng.gen_range(first_usable_cid..=(u32::MAX));
let guest_cid =
unsafe { vhost_vsock_set_guest_cid(vhost_fd.as_raw_fd(), &(rand_cid as u64)) };
match guest_cid {
Ok(_) => return Ok((rand_cid, vhost_fd)),
Err(nix::Error::EADDRINUSE) => {
// The CID is already in use. Try another one.
continue;
}
Err(err) => {
return Err(err).context("failed to set guest CID");
}
};
}
anyhow::bail!(
"failed to find a free vsock context ID after {} attempts",
CID_RETRY_COUNT
);
}

View File

@ -10,7 +10,7 @@ use crate::device::driver::vhost_user_blk::VhostUserBlkDevice;
use crate::{
BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor,
NetworkConfig, NetworkDevice, ShareFsConfig, ShareFsDevice, VfioConfig, VfioDevice,
VhostUserConfig, VsockConfig,
VhostUserConfig, VsockConfig, VsockDevice,
};
use anyhow::Result;
use async_trait::async_trait;
@ -38,6 +38,7 @@ pub enum DeviceType {
Network(NetworkDevice),
ShareFs(ShareFsDevice),
HybridVsock(HybridVsockDevice),
Vsock(VsockDevice),
}
impl fmt::Display for DeviceType {

View File

@ -76,7 +76,8 @@ impl DragonballInner {
capabilities.set(
CapabilityBits::BlockDeviceSupport
| CapabilityBits::BlockDeviceHotplugSupport
| CapabilityBits::FsSharingSupport,
| CapabilityBits::FsSharingSupport
| CapabilityBits::HybridVsockSupport,
);
DragonballInner {
id: "".to_string(),

View File

@ -67,6 +67,7 @@ impl DragonballInner {
DeviceType::ShareFs(sharefs) => self
.add_share_fs_device(&sharefs.config)
.context("add share fs device"),
DeviceType::Vsock(_) => todo!(),
}
}

View File

@ -17,7 +17,7 @@ pub mod manager;
mod manager_inner;
pub mod network;
pub mod resource_persist;
use hypervisor::{BlockConfig, HybridVsockConfig};
use hypervisor::{BlockConfig, HybridVsockConfig, VsockConfig};
use network::NetworkConfig;
pub mod rootfs;
pub mod share_fs;
@ -33,6 +33,7 @@ pub enum ResourceConfig {
ShareFs(SharedFsInfo),
VmRootfs(BlockConfig),
HybridVsock(HybridVsockConfig),
Vsock(VsockConfig),
}
#[derive(Debug, Clone, Copy, PartialEq)]

View File

@ -134,6 +134,11 @@ impl ResourceManagerInner {
.await
.context("do handle hybrid-vsock device failed.")?;
}
ResourceConfig::Vsock(v) => {
do_handle_device(&self.device_manager, &DeviceConfig::VsockCfg(v))
.await
.context("do handle vsock device failed.")?;
}
};
}

View File

@ -14,6 +14,7 @@ use async_trait::async_trait;
use common::message::{Action, Message};
use common::{Sandbox, SandboxNetworkEnv};
use containerd_shim_protos::events::task::TaskOOM;
use hypervisor::VsockConfig;
use hypervisor::{dragonball::Dragonball, BlockConfig, Hypervisor, HYPERVISOR_DRAGONBALL};
use hypervisor::{utils::get_hvsock_path, HybridVsockConfig, DEFAULT_GUEST_VSOCK_CID};
use kata_sys_util::hooks::HookStates;
@ -28,6 +29,7 @@ use tracing::instrument;
use crate::health_check::HealthCheck;
pub(crate) const VIRTCONTAINER: &str = "virt_container";
pub struct SandboxRestoreArgs {
pub sid: String,
pub toml_config: TomlConfig,
@ -102,13 +104,12 @@ impl VirtSandbox {
) -> Result<Vec<ResourceConfig>> {
let mut resource_configs = vec![];
// Prepare VM hybrid vsock device config and add the hybrid vsock device first.
info!(sl!(), "prepare hybrid vsock resource for sandbox.");
let vm_hvsock = ResourceConfig::HybridVsock(HybridVsockConfig {
guest_cid: DEFAULT_GUEST_VSOCK_CID,
uds_path: get_hvsock_path(id),
});
resource_configs.push(vm_hvsock);
info!(sl!(), "prepare vm socket config for sandbox.");
let vm_socket_config = self
.prepare_vm_socket_config()
.await
.context("failed to prepare vm socket config")?;
resource_configs.push(vm_socket_config);
// prepare network config
if !network_env.network_created {
@ -223,6 +224,30 @@ impl VirtSandbox {
})
}
async fn prepare_vm_socket_config(&self) -> Result<ResourceConfig> {
// It will check the hypervisor's capabilities to see if it supports hybrid-vsock.
// If it does not, it'll assume that it only supports legacy vsock.
let vm_socket = if self
.hypervisor
.capabilities()
.await?
.is_hybrid_vsock_supported()
{
// Firecracker/Dragonball/CLH use the hybrid-vsock device model.
ResourceConfig::HybridVsock(HybridVsockConfig {
guest_cid: DEFAULT_GUEST_VSOCK_CID,
uds_path: get_hvsock_path(&self.sid),
})
} else {
// Qemu uses the vsock device model.
ResourceConfig::Vsock(VsockConfig {
guest_cid: libc::VMADDR_CID_ANY,
})
};
Ok(vm_socket)
}
fn has_prestart_hooks(
&self,
prestart_hooks: Vec<oci::Hook>,