runtime-rs: add support spdk/vhost-user based volume.

Unlike the previous usage which requires creating
/dev/xxx by mknod on the host, the new approach will
fully utilize the DirectVolume-related usage method,
and pass the spdk controller to vmm.

And a user guide about using the spdk volume when run
a kata-containers. it can be found in docs/how-to.

Fixes: #6526

Signed-off-by: alex.lyn <alex.lyn@antgroup.com>
This commit is contained in:
alex.lyn 2023-06-25 16:17:25 +08:00
parent 4cf552c151
commit 0df2fc2702
15 changed files with 573 additions and 95 deletions

View File

@ -25,8 +25,8 @@ Finally, when running a Kata Containers with `ctr run --mount type=X, src=Y, dst
Now, supported types: Now, supported types:
- `directvol` for direct volume - `directvol` for direct volume
- `spdkvol` for SPDK volume (TBD)
- `vfiovol` for VFIO device based volume - `vfiovol` for VFIO device based volume
- `spdkvol` for SPDK/vhost-user based volume
## Setup Device and Run a Kata-Containers ## Setup Device and Run a Kata-Containers
@ -147,6 +147,80 @@ $ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=vfiovol,src=
``` ```
### SPDK Device Based Volume ### SPDK Device Based Block Volume
TBD SPDK vhost-user devices in runtime-rs, unlike runtime (golang version), there is no need to `mknod` device node under `/dev/` any more.
Just using the `kata-ctl direct-volume add ..` to make a mount info config is enough.
#### Run SPDK vhost target and Expose vhost block device
Run a SPDK vhost target and get vhost-user block controller as an example:
First, run SPDK vhost target:
> **Tips:** If driver `vfio-pci` supported, you can run SPDK with `DRIVER_OVERRIDE=vfio-pci`
> Otherwise, Just run without it `sudo HUGEMEM=4096 ./scripts/setup.sh`.
```bash
$ SPDK_DEVEL=/xx/spdk
$ VHU_UDS_PATH=/tmp/vhu-targets
$ RAW_DISKS=/xx/rawdisks
$ # Reset first
$ ${SPDK_DEVEL}/scripts/setup.sh reset
$ sudo sysctl -w vm.nr_hugepages=2048
$ #4G Huge Memory for spdk
$ sudo HUGEMEM=4096 DRIVER_OVERRIDE=vfio-pci ${SPDK_DEVEL}/scripts/setup.sh
$ sudo ${SPDK_DEVEL}/build/bin/spdk_tgt -S $VHU_UDS_PATH -s 1024 -m 0x3 &
```
Second, create a vhost controller:
```bash
$ sudo dd if=/dev/zero of=${RAW_DISKS}/rawdisk01.20g bs=1M count=20480
$ sudo ${SPDK_DEVEL}/scripts/rpc.py bdev_aio_create ${RAW_DISKS}/rawdisk01.20g vhu-rawdisk01.20g 512
$ sudo ${SPDK_DEVEL}/scripts/rpc.py vhost_create_blk_controller vhost-blk-rawdisk01.sock vhu-rawdisk01.20g
```
Here, a vhost controller `vhost-blk-rawdisk01.sock` is created, and the controller will
be passed to Hypervisor, such as Dragonball, Cloud-Hypervisor, Firecracker or QEMU.
#### setup vhost-user block device for kata-containers
First, `mkdir` a sub-path `kubelet/kata-test-vol-001/` under `/run/kata-containers/shared/direct-volumes/`.
Second, fill fields in `mountinfo.json`, it looks like as below:
```json
{
"device": "/tmp/vhu-targets/vhost-blk-rawdisk01.sock",
"volume_type": "spdkvol",
"fs_type": "ext4",
"metadata":"{}",
"options": []
}
```
Third, with the help of `kata-ctl direct-volume` to add block device to generate `mountinfo.json`, and run a kata container with `--mount`.
```bash
$ # kata-ctl direct-volume add
$ sudo kata-ctl direct-volume add /kubelet/kata-test-vol-001/volume001 "{\"device\": \"/tmp/vhu-targets/vhost-blk-rawdisk01.sock\", \"volume_type\":\"spdkvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
$ # /kubelet/kata-test-vol-001/volume001 <==> /run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx
$ cat L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx/mountInfo.json
$ {"volume_type":"spdkvol","device":"/tmp/vhu-targets/vhost-blk-rawdisk01.sock","fs_type":"ext4","metadata":{},"options":[]}
```
As `/run/kata-containers/shared/direct-volumes/` is a fixed path , we will be able to run a kata pod with `--mount` and set
`src` sub-path. And the `--mount` argument looks like: `--mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001`.
#### Run a Kata container with SPDK vhost-user block device
In the case, `ctr run --mount type=X, src=source, dst=dest`, the X will be set `spdkvol` which is a proprietary type specifically designed for SPDK volumes.
```bash
$ # ctr run with --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001
$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001,options=rbind:rw "$image" kata-spdk-vol-xx0530 /bin/bash
```

View File

@ -114,6 +114,8 @@ pub enum BlockDeviceType {
/// SPOOL is a reliable NVMe virtualization system for the cloud environment. /// SPOOL is a reliable NVMe virtualization system for the cloud environment.
/// You could learn more SPOOL here: https://www.usenix.org/conference/atc20/presentation/xue /// You could learn more SPOOL here: https://www.usenix.org/conference/atc20/presentation/xue
Spool, Spool,
/// The standard vhost-user-blk based device such as Spdk device.
Spdk,
/// Local disk/file based low level device. /// Local disk/file based low level device.
RawBlock, RawBlock,
} }
@ -124,6 +126,8 @@ impl BlockDeviceType {
// SPOOL path should be started with "spool", e.g. "spool:/device1" // SPOOL path should be started with "spool", e.g. "spool:/device1"
if path.starts_with("spool:/") { if path.starts_with("spool:/") {
BlockDeviceType::Spool BlockDeviceType::Spool
} else if path.starts_with("spdk:/") {
BlockDeviceType::Spdk
} else { } else {
BlockDeviceType::RawBlock BlockDeviceType::RawBlock
} }
@ -400,6 +404,10 @@ impl BlockDeviceMgr {
BlockDeviceError::DeviceManager(e) BlockDeviceError::DeviceManager(e)
}) })
} }
BlockDeviceType::Spool | BlockDeviceType::Spdk => {
// TBD
todo!()
}
_ => Err(BlockDeviceError::InvalidBlockDeviceType), _ => Err(BlockDeviceError::InvalidBlockDeviceType),
} }
} }

View File

@ -11,8 +11,8 @@ use kata_sys_util::rand::RandomBytes;
use tokio::sync::{Mutex, RwLock}; use tokio::sync::{Mutex, RwLock};
use crate::{ use crate::{
BlockConfig, BlockDevice, Hypervisor, VfioDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, device::VhostUserBlkDevice, BlockConfig, BlockDevice, Hypervisor, VfioDevice, VhostUserConfig,
VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
}; };
use super::{ use super::{
@ -25,17 +25,34 @@ pub type ArcMutexDevice = Arc<Mutex<dyn Device>>;
/// block_index and released_block_index are used to search an available block index /// block_index and released_block_index are used to search an available block index
/// in Sandbox. /// in Sandbox.
/// ///
/// @block_driver to be used for block device;
/// @block_index generally default is 1 for <vdb>; /// @block_index generally default is 1 for <vdb>;
/// @released_block_index for blk devices removed and indexes will released at the same time. /// @released_block_index for blk devices removed and indexes will released at the same time.
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
struct SharedInfo { struct SharedInfo {
block_driver: String,
block_index: u64, block_index: u64,
released_block_index: Vec<u64>, released_block_index: Vec<u64>,
} }
impl SharedInfo { impl SharedInfo {
fn new() -> Self { async fn new(hypervisor: Arc<dyn Hypervisor>) -> Self {
// get hypervisor block driver
let block_driver = match hypervisor
.hypervisor_config()
.await
.blockdev_info
.block_device_driver
.as_str()
{
// convert the block driver to kata type
VIRTIO_BLOCK_MMIO => KATA_MMIO_BLK_DEV_TYPE.to_string(),
VIRTIO_BLOCK_PCI => KATA_BLK_DEV_TYPE.to_string(),
_ => "".to_string(),
};
SharedInfo { SharedInfo {
block_driver,
block_index: 1, block_index: 1,
released_block_index: vec![], released_block_index: vec![],
} }
@ -67,26 +84,24 @@ pub struct DeviceManager {
} }
impl DeviceManager { impl DeviceManager {
pub fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> { pub async fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> {
let devices = HashMap::<String, ArcMutexDevice>::new(); let devices = HashMap::<String, ArcMutexDevice>::new();
Ok(DeviceManager { Ok(DeviceManager {
devices, devices,
hypervisor, hypervisor: hypervisor.clone(),
shared_info: SharedInfo::new(), shared_info: SharedInfo::new(hypervisor.clone()).await,
}) })
} }
async fn try_add_device(&mut self, device_id: &str) -> Result<()> { pub async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
// find the device // find the device
let device = self let device = self
.devices .devices
.get(device_id) .get(device_id)
.context("failed to find device")?; .context("failed to find device")?;
// attach device
let mut device_guard = device.lock().await; let mut device_guard = device.lock().await;
// attach device
let result = device_guard.attach(self.hypervisor.as_ref()).await; let result = device_guard.attach(self.hypervisor.as_ref()).await;
// handle attach error // handle attach error
if let Err(e) = result { if let Err(e) = result {
match device_guard.get_device_info().await { match device_guard.get_device_info().await {
@ -102,6 +117,9 @@ impl DeviceManager {
.release_device_index(device.config.virt_path.unwrap().0); .release_device_index(device.config.virt_path.unwrap().0);
} }
} }
DeviceType::VhostUserBlk(device) => {
self.shared_info.release_device_index(device.config.index);
}
_ => { _ => {
debug!(sl!(), "no need to do release device index."); debug!(sl!(), "no need to do release device index.");
} }
@ -109,6 +127,7 @@ impl DeviceManager {
drop(device_guard); drop(device_guard);
self.devices.remove(device_id); self.devices.remove(device_id);
return Err(e); return Err(e);
} }
@ -168,6 +187,11 @@ impl DeviceManager {
return Some(device_id.to_string()); return Some(device_id.to_string());
} }
} }
DeviceType::VhostUserBlk(device) => {
if device.config.socket_path == host_path {
return Some(device_id.to_string());
}
}
_ => { _ => {
// TODO: support find other device type // TODO: support find other device type
continue; continue;
@ -225,6 +249,23 @@ impl DeviceManager {
&vfio_dev_config, &vfio_dev_config,
))) )))
} }
DeviceConfig::VhostUserBlkCfg(config) => {
// try to find the device, found and just return id.
if let Some(dev_id_matched) = self.find_device(config.socket_path.clone()).await {
info!(
sl!(),
"vhost blk device with path:{:?} found. just return device id: {:?}",
config.socket_path.clone(),
dev_id_matched
);
return Ok(dev_id_matched);
}
self.create_vhost_blk_device(config, device_id.clone())
.await
.context("failed to create vhost blk device")?
}
_ => { _ => {
return Err(anyhow!("invliad device type")); return Err(anyhow!("invliad device type"));
} }
@ -236,27 +277,34 @@ impl DeviceManager {
Ok(device_id) Ok(device_id)
} }
async fn create_vhost_blk_device(
&mut self,
config: &VhostUserConfig,
device_id: String,
) -> Result<ArcMutexDevice> {
let mut vhu_blk_config = config.clone();
vhu_blk_config.driver_option = self.shared_info.block_driver.clone();
// generate block device index and virt path
// safe here, Block device always has virt_path.
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {
vhu_blk_config.index = virt_path.0;
vhu_blk_config.virt_path = virt_path.1;
}
Ok(Arc::new(Mutex::new(VhostUserBlkDevice::new(
device_id,
vhu_blk_config,
))))
}
async fn create_block_device( async fn create_block_device(
&mut self, &mut self,
config: &BlockConfig, config: &BlockConfig,
device_id: String, device_id: String,
) -> Result<ArcMutexDevice> { ) -> Result<ArcMutexDevice> {
let mut block_config = config.clone(); let mut block_config = config.clone();
// get hypervisor block driver block_config.driver_option = self.shared_info.block_driver.clone();
let block_driver = match self
.hypervisor
.hypervisor_config()
.await
.blockdev_info
.block_device_driver
.as_str()
{
// convert the block driver to kata type
VIRTIO_BLOCK_MMIO => KATA_MMIO_BLK_DEV_TYPE.to_string(),
VIRTIO_BLOCK_PCI => KATA_BLK_DEV_TYPE.to_string(),
_ => "".to_string(),
};
block_config.driver_option = block_driver;
// generate virt path // generate virt path
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? { if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {

View File

@ -1,5 +1,5 @@
// Copyright (c) 2019-2022 Alibaba Cloud // Copyright (c) 2019-2023 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group // Copyright (c) 2019-2023 Ant Group
// //
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
@ -26,6 +26,9 @@ pub use virtio_fs::{
pub use virtio_net::{Address, NetworkConfig, NetworkDevice}; pub use virtio_net::{Address, NetworkConfig, NetworkDevice};
pub use virtio_vsock::{HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice}; pub use virtio_vsock::{HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice};
pub mod vhost_user_blk;
pub use vhost_user::{VhostUserConfig, VhostUserDevice, VhostUserType};
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
// Tips: // Tips:

View File

@ -1,34 +1,69 @@
// Copyright (c) 2019-2023 Alibaba Cloud // Copyright (c) 2022-2023 Alibaba Cloud
// Copyright (c) 2019-2023 Ant Group // Copyright (c) 2022-2023 Ant Group
// //
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
use crate::device::Device; #[derive(Debug, Clone)]
use crate::device::DeviceType; pub enum VhostUserType {
use crate::Hypervisor as hypervisor; /// Blk - represents a block vhostuser device type
use anyhow::Result; /// "vhost-user-blk-pci"
use async_trait::async_trait; Blk(String),
/// SCSI - represents SCSI based vhost-user type
/// "vhost-user-scsi-pci"
SCSI(String),
/// Net - represents Net based vhost-user type
/// "virtio-net-pci"
Net(String),
/// FS - represents a virtio-fs vhostuser device type
/// "vhost-user-fs-pci"
FS(String),
}
impl Default for VhostUserType {
fn default() -> Self {
VhostUserType::Blk("vhost-user-blk-pci".to_owned())
}
}
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
/// VhostUserConfig represents data shared by most vhost-user devices /// VhostUserConfig represents data shared by most vhost-user devices
pub struct VhostUserConfig { pub struct VhostUserConfig {
/// Device id /// device id
pub dev_id: String, pub dev_id: String,
/// Socket path /// socket path
pub socket_path: String, pub socket_path: String,
/// Mac_address is only meaningful for vhost user net device /// mac_address is only meaningful for vhost user net device
pub mac_address: String, pub mac_address: String,
/// These are only meaningful for vhost user fs devices
/// vhost-user-fs is only meaningful for vhost-user-fs device
pub tag: String, pub tag: String,
pub cache: String, /// vhost-user-fs cache mode
pub device_type: String, pub cache_mode: String,
/// Pci_addr is the PCI address used to identify the slot at which the drive is attached. /// vhost-user-fs cache size in MB
pub pci_addr: Option<String>,
/// Block index of the device if assigned
pub index: u8,
pub cache_size: u32, pub cache_size: u32,
pub queue_siez: u32,
/// vhost user device type
pub device_type: VhostUserType,
/// guest block driver
pub driver_option: String,
/// pci_addr is the PCI address used to identify the slot at which the drive is attached.
pub pci_addr: Option<String>,
/// Block index of the device if assigned
/// type u64 is not OK
pub index: u64,
/// Virtio queue size. Size: byte
pub queue_size: u32,
/// Block device multi-queue
pub num_queues: usize,
/// device path in guest
pub virt_path: String,
} }
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
@ -36,26 +71,3 @@ pub struct VhostUserDevice {
pub device_id: String, pub device_id: String,
pub config: VhostUserConfig, pub config: VhostUserConfig,
} }
#[async_trait]
impl Device for VhostUserConfig {
async fn attach(&mut self, _h: &dyn hypervisor) -> Result<()> {
todo!()
}
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
todo!()
}
async fn get_device_info(&self) -> DeviceType {
todo!()
}
async fn increase_attach_count(&mut self) -> Result<bool> {
todo!()
}
async fn decrease_attach_count(&mut self) -> Result<bool> {
todo!()
}
}

View File

@ -0,0 +1,122 @@
// Copyright (c) 2023 Alibaba Cloud
// Copyright (c) 2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use super::VhostUserConfig;
use crate::{
device::{Device, DeviceType},
Hypervisor as hypervisor,
};
#[derive(Debug, Clone, Default)]
pub struct VhostUserBlkDevice {
pub device_id: String,
/// If set to true, the drive is opened in read-only mode. Otherwise, the
/// drive is opened as read-write.
pub is_readonly: bool,
/// Don't close `path_on_host` file when dropping the device.
pub no_drop: bool,
/// driver type for block device
pub driver_option: String,
pub attach_count: u64,
pub config: VhostUserConfig,
}
impl VhostUserBlkDevice {
// new creates a new VhostUserBlkDevice
pub fn new(device_id: String, config: VhostUserConfig) -> Self {
VhostUserBlkDevice {
device_id,
attach_count: 0,
config,
..Default::default()
}
}
}
#[async_trait]
impl Device for VhostUserBlkDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
// increase attach count, skip attach the device if the device is already attached
if self
.increase_attach_count()
.await
.context("failed to increase attach count")?
{
return Ok(());
}
if let Err(e) = h.add_device(DeviceType::VhostUserBlk(self.clone())).await {
self.decrease_attach_count().await?;
return Err(e);
}
return Ok(());
}
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
// get the count of device detached, and detach once it reaches 0
if self
.decrease_attach_count()
.await
.context("failed to decrease attach count")?
{
return Ok(None);
}
if let Err(e) = h
.remove_device(DeviceType::VhostUserBlk(self.clone()))
.await
{
self.increase_attach_count().await?;
return Err(e);
}
Ok(Some(self.config.index))
}
async fn get_device_info(&self) -> DeviceType {
DeviceType::VhostUserBlk(self.clone())
}
async fn increase_attach_count(&mut self) -> Result<bool> {
match self.attach_count {
0 => {
// do real attach
self.attach_count += 1;
Ok(false)
}
std::u64::MAX => Err(anyhow!("device was attached too many times")),
_ => {
self.attach_count += 1;
Ok(true)
}
}
}
async fn decrease_attach_count(&mut self) -> Result<bool> {
match self.attach_count {
0 => Err(anyhow!("detaching a device that wasn't attached")),
1 => {
// do real wrok
self.attach_count -= 1;
Ok(false)
}
_ => {
self.attach_count -= 1;
Ok(true)
}
}
}
}

View File

@ -1,17 +1,18 @@
// Copyright (c) 2019-2022 Alibaba Cloud // Copyright (c) 2022-2023 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group // Copyright (c) 2022-2023 Ant Group
// //
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
use crate::device::Device; use crate::device::Device;
use crate::device::DeviceType; use crate::device::DeviceType;
use crate::Hypervisor as hypervisor; use crate::Hypervisor as hypervisor;
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use async_trait::async_trait; use async_trait::async_trait;
/// VIRTIO_BLOCK_PCI indicates block driver is virtio-pci based /// VIRTIO_BLOCK_PCI indicates block driver is virtio-pci based
pub const VIRTIO_BLOCK_PCI: &str = "virtio-blk-pci"; pub const VIRTIO_BLOCK_PCI: &str = "virtio-blk-pci";
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
pub const KATA_MMIO_BLK_DEV_TYPE: &str = "mmioblk"; pub const KATA_MMIO_BLK_DEV_TYPE: &str = "mmioblk";
pub const KATA_BLK_DEV_TYPE: &str = "blk"; pub const KATA_BLK_DEV_TYPE: &str = "blk";

View File

@ -6,10 +6,11 @@
use std::fmt; use std::fmt;
use crate::device::driver::vhost_user_blk::VhostUserBlkDevice;
use crate::{ use crate::{
BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor, BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor,
NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig,
ShareFsMountDevice, VfioConfig, VfioDevice, VsockConfig, VsockDevice, ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, VsockDevice,
}; };
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
@ -21,6 +22,7 @@ pub mod util;
#[derive(Debug)] #[derive(Debug)]
pub enum DeviceConfig { pub enum DeviceConfig {
BlockCfg(BlockConfig), BlockCfg(BlockConfig),
VhostUserBlkCfg(VhostUserConfig),
NetworkCfg(NetworkConfig), NetworkCfg(NetworkConfig),
ShareFsCfg(ShareFsDeviceConfig), ShareFsCfg(ShareFsDeviceConfig),
VfioCfg(VfioConfig), VfioCfg(VfioConfig),
@ -32,6 +34,7 @@ pub enum DeviceConfig {
#[derive(Debug)] #[derive(Debug)]
pub enum DeviceType { pub enum DeviceType {
Block(BlockDevice), Block(BlockDevice),
VhostUserBlk(VhostUserBlkDevice),
Vfio(VfioDevice), Vfio(VfioDevice),
Network(NetworkDevice), Network(NetworkDevice),
ShareFs(ShareFsDevice), ShareFs(ShareFsDevice),

View File

@ -8,9 +8,12 @@ use std::path::PathBuf;
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use dbs_utils::net::MacAddr; use dbs_utils::net::MacAddr;
use dragonball::api::v1::{ use dragonball::{
BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo, api::v1::{
VsockDeviceConfigInfo, BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo,
VsockDeviceConfigInfo,
},
device_manager::blk_dev_mgr::BlockDeviceType,
}; };
use super::DragonballInner; use super::DragonballInner;
@ -56,6 +59,14 @@ impl DragonballInner {
block.config.no_drop, block.config.no_drop,
) )
.context("add block device"), .context("add block device"),
DeviceType::VhostUserBlk(block) => self
.add_block_device(
block.config.socket_path.as_str(),
block.device_id.as_str(),
block.is_readonly,
block.no_drop,
)
.context("add vhost user based block device"),
DeviceType::HybridVsock(hvsock) => self.add_hvsock(&hvsock.config).context("add vsock"), DeviceType::HybridVsock(hvsock) => self.add_hvsock(&hvsock.config).context("add vsock"),
DeviceType::ShareFs(sharefs) => self DeviceType::ShareFs(sharefs) => self
.add_share_fs_device(&sharefs.config) .add_share_fs_device(&sharefs.config)
@ -161,6 +172,7 @@ impl DragonballInner {
let blk_cfg = BlockDeviceConfigInfo { let blk_cfg = BlockDeviceConfigInfo {
drive_id: id.to_string(), drive_id: id.to_string(),
device_type: BlockDeviceType::get_type(path),
path_on_host: PathBuf::from(jailed_drive), path_on_host: PathBuf::from(jailed_drive),
is_direct: self.config.blockdev_info.block_device_cache_direct, is_direct: self.config.blockdev_info.block_device_cache_direct,
no_drop, no_drop,

View File

@ -35,19 +35,16 @@ pub struct ResourceManager {
} }
impl ResourceManager { impl ResourceManager {
pub fn new( pub async fn new(
sid: &str, sid: &str,
agent: Arc<dyn Agent>, agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>, hypervisor: Arc<dyn Hypervisor>,
toml_config: Arc<TomlConfig>, toml_config: Arc<TomlConfig>,
) -> Result<Self> { ) -> Result<Self> {
Ok(Self { Ok(Self {
inner: Arc::new(RwLock::new(ResourceManagerInner::new( inner: Arc::new(RwLock::new(
sid, ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
agent, )),
hypervisor,
toml_config,
)?)),
}) })
} }

View File

@ -51,15 +51,16 @@ pub(crate) struct ResourceManagerInner {
} }
impl ResourceManagerInner { impl ResourceManagerInner {
pub(crate) fn new( pub(crate) async fn new(
sid: &str, sid: &str,
agent: Arc<dyn Agent>, agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>, hypervisor: Arc<dyn Hypervisor>,
toml_config: Arc<TomlConfig>, toml_config: Arc<TomlConfig>,
) -> Result<Self> { ) -> Result<Self> {
// create device manager // create device manager
let dev_manager = let dev_manager = DeviceManager::new(hypervisor.clone())
DeviceManager::new(hypervisor.clone()).context("failed to create device manager")?; .await
.context("failed to create device manager")?;
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?; let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
let cpu_resource = CpuResource::new(toml_config.clone())?; let cpu_resource = CpuResource::new(toml_config.clone())?;
@ -473,7 +474,9 @@ impl Persist for ResourceManagerInner {
sid: resource_args.sid, sid: resource_args.sid,
agent: resource_args.agent, agent: resource_args.agent,
hypervisor: resource_args.hypervisor.clone(), hypervisor: resource_args.hypervisor.clone(),
device_manager: Arc::new(RwLock::new(DeviceManager::new(resource_args.hypervisor)?)), device_manager: Arc::new(RwLock::new(
DeviceManager::new(resource_args.hypervisor).await?,
)),
network: None, network: None,
share_fs: None, share_fs: None,
rootfs_resource: RootFsResource::new(), rootfs_resource: RootFsResource::new(),

View File

@ -14,6 +14,9 @@ pub mod utils;
pub mod vfio_volume; pub mod vfio_volume;
use vfio_volume::is_vfio_volume; use vfio_volume::is_vfio_volume;
pub mod spdk_volume;
use spdk_volume::is_spdk_volume;
use std::{sync::Arc, vec::Vec}; use std::{sync::Arc, vec::Vec};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
@ -84,6 +87,12 @@ impl VolumeResource {
.await .await
.with_context(|| format!("new vfio volume {:?}", m))?, .with_context(|| format!("new vfio volume {:?}", m))?,
) )
} else if is_spdk_volume(m) {
Arc::new(
spdk_volume::SPDKVolume::new(d, m, read_only, cid, sid)
.await
.with_context(|| format!("create spdk volume {:?}", m))?,
)
} else if let Some(options) = } else if let Some(options) =
get_huge_page_option(m).context("failed to check huge page")? get_huge_page_option(m).context("failed to check huge page")?
{ {

View File

@ -0,0 +1,189 @@
// Copyright (c) 2023 Alibaba Cloud
// Copyright (c) 2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use nix::sys::{stat, stat::SFlag};
use tokio::sync::RwLock;
use super::Volume;
use crate::volume::utils::{
generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_SPDK_VOLUME_TYPE,
KATA_SPOOL_VOLUME_TYPE,
};
use hypervisor::{
device::{
device_manager::{do_handle_device, DeviceManager},
DeviceConfig, DeviceType,
},
VhostUserConfig, VhostUserType,
};
/// SPDKVolume: spdk block device volume
#[derive(Clone)]
pub(crate) struct SPDKVolume {
storage: Option<agent::Storage>,
mount: oci::Mount,
device_id: String,
}
impl SPDKVolume {
pub(crate) async fn new(
d: &RwLock<DeviceManager>,
m: &oci::Mount,
read_only: bool,
cid: &str,
sid: &str,
) -> Result<Self> {
let mnt_src: &str = &m.source;
// deserde Information from mountinfo.json
let v = volume_mount_info(mnt_src).context("deserde information from mountinfo.json")?;
let device = match v.volume_type.as_str() {
KATA_SPDK_VOLUME_TYPE => {
if v.device.starts_with("spdk://") {
v.device.clone()
} else {
format!("spdk://{}", v.device.as_str())
}
}
KATA_SPOOL_VOLUME_TYPE => {
if v.device.starts_with("spool://") {
v.device.clone()
} else {
format!("spool://{}", v.device.as_str())
}
}
_ => return Err(anyhow!("mountinfo.json is invalid")),
};
// device format: X:///x/y/z.sock,so just unwrap it.
// if file is not S_IFSOCK, return error.
{
// device tokens: (Type, Socket)
let device_tokens = device.split_once("://").unwrap();
let fstat = stat::stat(device_tokens.1).context("stat socket failed")?;
let s_flag = SFlag::from_bits_truncate(fstat.st_mode);
if s_flag != SFlag::S_IFSOCK {
return Err(anyhow!("device {:?} is not valid", device));
}
}
let mut vhu_blk_config = &mut VhostUserConfig {
socket_path: device,
device_type: VhostUserType::Blk("vhost-user-blk-pci".to_owned()),
..Default::default()
};
if let Some(num) = v.metadata.get("num_queues") {
vhu_blk_config.num_queues = num
.parse::<usize>()
.context("num queues parse usize failed.")?;
}
if let Some(size) = v.metadata.get("queue_size") {
vhu_blk_config.queue_size = size
.parse::<u32>()
.context("num queues parse u32 failed.")?;
}
// create and insert block device into Kata VM
let device_info =
do_handle_device(d, &DeviceConfig::VhostUserBlkCfg(vhu_blk_config.clone()))
.await
.context("do handle device failed.")?;
// generate host guest shared path
let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid)
.await
.context("generate host-guest shared path failed")?;
// storage
let mut storage = agent::Storage {
mount_point: guest_path.clone(),
..Default::default()
};
storage.options = if read_only {
vec!["ro".to_string()]
} else {
Vec::new()
};
let mut device_id = String::new();
if let DeviceType::VhostUserBlk(device) = device_info {
// blk, mmioblk
storage.driver = device.config.driver_option;
// /dev/vdX
storage.source = device.config.virt_path;
device_id = device.device_id;
}
if m.r#type != "bind" {
storage.fs_type = v.fs_type.clone();
} else {
storage.fs_type = DEFAULT_VOLUME_FS_TYPE.to_string();
}
if m.destination.clone().starts_with("/dev") {
storage.fs_type = "bind".to_string();
storage.options.append(&mut m.options.clone());
}
storage.fs_group = None;
let mount = oci::Mount {
destination: m.destination.clone(),
r#type: storage.fs_type.clone(),
source: guest_path,
options: m.options.clone(),
};
Ok(Self {
storage: Some(storage),
mount,
device_id,
})
}
}
#[async_trait]
impl Volume for SPDKVolume {
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
let s = if let Some(s) = self.storage.as_ref() {
vec![s.clone()]
} else {
vec![]
};
Ok(s)
}
async fn cleanup(&self, device_manager: &RwLock<DeviceManager>) -> Result<()> {
device_manager
.write()
.await
.try_remove_device(&self.device_id)
.await
}
fn get_device_id(&self) -> Result<Option<String>> {
Ok(Some(self.device_id.clone()))
}
}
pub(crate) fn is_spdk_volume(m: &oci::Mount) -> bool {
// spdkvol or spoolvol will share the same implementation
let vol_types = vec![KATA_SPDK_VOLUME_TYPE, KATA_SPOOL_VOLUME_TYPE];
if vol_types.contains(&m.r#type.as_str()) {
return true;
}
false
}

View File

@ -20,6 +20,7 @@ pub const KATA_MOUNT_BIND_TYPE: &str = "bind";
pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol"; pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol";
pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol"; pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol";
pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol"; pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol";
pub const KATA_SPOOL_VOLUME_TYPE: &str = "spoolvol";
// volume mount info load infomation from mountinfo.json // volume mount info load infomation from mountinfo.json
pub fn volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> { pub fn volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {

View File

@ -74,12 +74,8 @@ impl RuntimeHandler for VirtContainer {
// get uds from hypervisor and get config from toml_config // get uds from hypervisor and get config from toml_config
let agent = new_agent(&config).context("new agent")?; let agent = new_agent(&config).context("new agent")?;
let resource_manager = Arc::new(ResourceManager::new( let resource_manager =
sid, Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?);
agent.clone(),
hypervisor.clone(),
config,
)?);
let pid = std::process::id(); let pid = std::process::id();
let sandbox = sandbox::VirtSandbox::new( let sandbox = sandbox::VirtSandbox::new(