runtime-rs: add support spdk/vhost-user based volume.

Unlike the previous usage which requires creating
/dev/xxx by mknod on the host, the new approach will
fully utilize the DirectVolume-related usage method,
and pass the spdk controller to vmm.

And a user guide about using the spdk volume when run
a kata-containers. it can be found in docs/how-to.

Fixes: #6526

Signed-off-by: alex.lyn <alex.lyn@antgroup.com>
This commit is contained in:
alex.lyn 2023-06-25 16:17:25 +08:00
parent 4cf552c151
commit 0df2fc2702
15 changed files with 573 additions and 95 deletions

View File

@ -25,8 +25,8 @@ Finally, when running a Kata Containers with `ctr run --mount type=X, src=Y, dst
Now, supported types:
- `directvol` for direct volume
- `spdkvol` for SPDK volume (TBD)
- `vfiovol` for VFIO device based volume
- `spdkvol` for SPDK/vhost-user based volume
## Setup Device and Run a Kata-Containers
@ -147,6 +147,80 @@ $ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=vfiovol,src=
```
### SPDK Device Based Volume
### SPDK Device Based Block Volume
TBD
SPDK vhost-user devices in runtime-rs, unlike runtime (golang version), there is no need to `mknod` device node under `/dev/` any more.
Just using the `kata-ctl direct-volume add ..` to make a mount info config is enough.
#### Run SPDK vhost target and Expose vhost block device
Run a SPDK vhost target and get vhost-user block controller as an example:
First, run SPDK vhost target:
> **Tips:** If driver `vfio-pci` supported, you can run SPDK with `DRIVER_OVERRIDE=vfio-pci`
> Otherwise, Just run without it `sudo HUGEMEM=4096 ./scripts/setup.sh`.
```bash
$ SPDK_DEVEL=/xx/spdk
$ VHU_UDS_PATH=/tmp/vhu-targets
$ RAW_DISKS=/xx/rawdisks
$ # Reset first
$ ${SPDK_DEVEL}/scripts/setup.sh reset
$ sudo sysctl -w vm.nr_hugepages=2048
$ #4G Huge Memory for spdk
$ sudo HUGEMEM=4096 DRIVER_OVERRIDE=vfio-pci ${SPDK_DEVEL}/scripts/setup.sh
$ sudo ${SPDK_DEVEL}/build/bin/spdk_tgt -S $VHU_UDS_PATH -s 1024 -m 0x3 &
```
Second, create a vhost controller:
```bash
$ sudo dd if=/dev/zero of=${RAW_DISKS}/rawdisk01.20g bs=1M count=20480
$ sudo ${SPDK_DEVEL}/scripts/rpc.py bdev_aio_create ${RAW_DISKS}/rawdisk01.20g vhu-rawdisk01.20g 512
$ sudo ${SPDK_DEVEL}/scripts/rpc.py vhost_create_blk_controller vhost-blk-rawdisk01.sock vhu-rawdisk01.20g
```
Here, a vhost controller `vhost-blk-rawdisk01.sock` is created, and the controller will
be passed to Hypervisor, such as Dragonball, Cloud-Hypervisor, Firecracker or QEMU.
#### setup vhost-user block device for kata-containers
First, `mkdir` a sub-path `kubelet/kata-test-vol-001/` under `/run/kata-containers/shared/direct-volumes/`.
Second, fill fields in `mountinfo.json`, it looks like as below:
```json
{
"device": "/tmp/vhu-targets/vhost-blk-rawdisk01.sock",
"volume_type": "spdkvol",
"fs_type": "ext4",
"metadata":"{}",
"options": []
}
```
Third, with the help of `kata-ctl direct-volume` to add block device to generate `mountinfo.json`, and run a kata container with `--mount`.
```bash
$ # kata-ctl direct-volume add
$ sudo kata-ctl direct-volume add /kubelet/kata-test-vol-001/volume001 "{\"device\": \"/tmp/vhu-targets/vhost-blk-rawdisk01.sock\", \"volume_type\":\"spdkvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
$ # /kubelet/kata-test-vol-001/volume001 <==> /run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx
$ cat L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx/mountInfo.json
$ {"volume_type":"spdkvol","device":"/tmp/vhu-targets/vhost-blk-rawdisk01.sock","fs_type":"ext4","metadata":{},"options":[]}
```
As `/run/kata-containers/shared/direct-volumes/` is a fixed path , we will be able to run a kata pod with `--mount` and set
`src` sub-path. And the `--mount` argument looks like: `--mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001`.
#### Run a Kata container with SPDK vhost-user block device
In the case, `ctr run --mount type=X, src=source, dst=dest`, the X will be set `spdkvol` which is a proprietary type specifically designed for SPDK volumes.
```bash
$ # ctr run with --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001
$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001,options=rbind:rw "$image" kata-spdk-vol-xx0530 /bin/bash
```

View File

@ -114,6 +114,8 @@ pub enum BlockDeviceType {
/// SPOOL is a reliable NVMe virtualization system for the cloud environment.
/// You could learn more SPOOL here: https://www.usenix.org/conference/atc20/presentation/xue
Spool,
/// The standard vhost-user-blk based device such as Spdk device.
Spdk,
/// Local disk/file based low level device.
RawBlock,
}
@ -124,6 +126,8 @@ impl BlockDeviceType {
// SPOOL path should be started with "spool", e.g. "spool:/device1"
if path.starts_with("spool:/") {
BlockDeviceType::Spool
} else if path.starts_with("spdk:/") {
BlockDeviceType::Spdk
} else {
BlockDeviceType::RawBlock
}
@ -400,6 +404,10 @@ impl BlockDeviceMgr {
BlockDeviceError::DeviceManager(e)
})
}
BlockDeviceType::Spool | BlockDeviceType::Spdk => {
// TBD
todo!()
}
_ => Err(BlockDeviceError::InvalidBlockDeviceType),
}
}

View File

@ -11,8 +11,8 @@ use kata_sys_util::rand::RandomBytes;
use tokio::sync::{Mutex, RwLock};
use crate::{
BlockConfig, BlockDevice, Hypervisor, VfioDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
device::VhostUserBlkDevice, BlockConfig, BlockDevice, Hypervisor, VfioDevice, VhostUserConfig,
KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
};
use super::{
@ -25,17 +25,34 @@ pub type ArcMutexDevice = Arc<Mutex<dyn Device>>;
/// block_index and released_block_index are used to search an available block index
/// in Sandbox.
///
/// @block_driver to be used for block device;
/// @block_index generally default is 1 for <vdb>;
/// @released_block_index for blk devices removed and indexes will released at the same time.
#[derive(Clone, Debug, Default)]
struct SharedInfo {
block_driver: String,
block_index: u64,
released_block_index: Vec<u64>,
}
impl SharedInfo {
fn new() -> Self {
async fn new(hypervisor: Arc<dyn Hypervisor>) -> Self {
// get hypervisor block driver
let block_driver = match hypervisor
.hypervisor_config()
.await
.blockdev_info
.block_device_driver
.as_str()
{
// convert the block driver to kata type
VIRTIO_BLOCK_MMIO => KATA_MMIO_BLK_DEV_TYPE.to_string(),
VIRTIO_BLOCK_PCI => KATA_BLK_DEV_TYPE.to_string(),
_ => "".to_string(),
};
SharedInfo {
block_driver,
block_index: 1,
released_block_index: vec![],
}
@ -67,26 +84,24 @@ pub struct DeviceManager {
}
impl DeviceManager {
pub fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> {
pub async fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> {
let devices = HashMap::<String, ArcMutexDevice>::new();
Ok(DeviceManager {
devices,
hypervisor,
shared_info: SharedInfo::new(),
hypervisor: hypervisor.clone(),
shared_info: SharedInfo::new(hypervisor.clone()).await,
})
}
async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
pub async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
// find the device
let device = self
.devices
.get(device_id)
.context("failed to find device")?;
// attach device
let mut device_guard = device.lock().await;
// attach device
let result = device_guard.attach(self.hypervisor.as_ref()).await;
// handle attach error
if let Err(e) = result {
match device_guard.get_device_info().await {
@ -102,6 +117,9 @@ impl DeviceManager {
.release_device_index(device.config.virt_path.unwrap().0);
}
}
DeviceType::VhostUserBlk(device) => {
self.shared_info.release_device_index(device.config.index);
}
_ => {
debug!(sl!(), "no need to do release device index.");
}
@ -109,6 +127,7 @@ impl DeviceManager {
drop(device_guard);
self.devices.remove(device_id);
return Err(e);
}
@ -168,6 +187,11 @@ impl DeviceManager {
return Some(device_id.to_string());
}
}
DeviceType::VhostUserBlk(device) => {
if device.config.socket_path == host_path {
return Some(device_id.to_string());
}
}
_ => {
// TODO: support find other device type
continue;
@ -225,6 +249,23 @@ impl DeviceManager {
&vfio_dev_config,
)))
}
DeviceConfig::VhostUserBlkCfg(config) => {
// try to find the device, found and just return id.
if let Some(dev_id_matched) = self.find_device(config.socket_path.clone()).await {
info!(
sl!(),
"vhost blk device with path:{:?} found. just return device id: {:?}",
config.socket_path.clone(),
dev_id_matched
);
return Ok(dev_id_matched);
}
self.create_vhost_blk_device(config, device_id.clone())
.await
.context("failed to create vhost blk device")?
}
_ => {
return Err(anyhow!("invliad device type"));
}
@ -236,27 +277,34 @@ impl DeviceManager {
Ok(device_id)
}
async fn create_vhost_blk_device(
&mut self,
config: &VhostUserConfig,
device_id: String,
) -> Result<ArcMutexDevice> {
let mut vhu_blk_config = config.clone();
vhu_blk_config.driver_option = self.shared_info.block_driver.clone();
// generate block device index and virt path
// safe here, Block device always has virt_path.
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {
vhu_blk_config.index = virt_path.0;
vhu_blk_config.virt_path = virt_path.1;
}
Ok(Arc::new(Mutex::new(VhostUserBlkDevice::new(
device_id,
vhu_blk_config,
))))
}
async fn create_block_device(
&mut self,
config: &BlockConfig,
device_id: String,
) -> Result<ArcMutexDevice> {
let mut block_config = config.clone();
// get hypervisor block driver
let block_driver = match self
.hypervisor
.hypervisor_config()
.await
.blockdev_info
.block_device_driver
.as_str()
{
// convert the block driver to kata type
VIRTIO_BLOCK_MMIO => KATA_MMIO_BLK_DEV_TYPE.to_string(),
VIRTIO_BLOCK_PCI => KATA_BLK_DEV_TYPE.to_string(),
_ => "".to_string(),
};
block_config.driver_option = block_driver;
block_config.driver_option = self.shared_info.block_driver.clone();
// generate virt path
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {

View File

@ -1,5 +1,5 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
// Copyright (c) 2019-2023 Alibaba Cloud
// Copyright (c) 2019-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
@ -26,6 +26,9 @@ pub use virtio_fs::{
pub use virtio_net::{Address, NetworkConfig, NetworkDevice};
pub use virtio_vsock::{HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice};
pub mod vhost_user_blk;
pub use vhost_user::{VhostUserConfig, VhostUserDevice, VhostUserType};
use anyhow::{anyhow, Context, Result};
// Tips:

View File

@ -1,34 +1,69 @@
// Copyright (c) 2019-2023 Alibaba Cloud
// Copyright (c) 2019-2023 Ant Group
// Copyright (c) 2022-2023 Alibaba Cloud
// Copyright (c) 2022-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::device::Device;
use crate::device::DeviceType;
use crate::Hypervisor as hypervisor;
use anyhow::Result;
use async_trait::async_trait;
#[derive(Debug, Clone)]
pub enum VhostUserType {
/// Blk - represents a block vhostuser device type
/// "vhost-user-blk-pci"
Blk(String),
/// SCSI - represents SCSI based vhost-user type
/// "vhost-user-scsi-pci"
SCSI(String),
/// Net - represents Net based vhost-user type
/// "virtio-net-pci"
Net(String),
/// FS - represents a virtio-fs vhostuser device type
/// "vhost-user-fs-pci"
FS(String),
}
impl Default for VhostUserType {
fn default() -> Self {
VhostUserType::Blk("vhost-user-blk-pci".to_owned())
}
}
#[derive(Debug, Clone, Default)]
/// VhostUserConfig represents data shared by most vhost-user devices
pub struct VhostUserConfig {
/// Device id
/// device id
pub dev_id: String,
/// Socket path
/// socket path
pub socket_path: String,
/// Mac_address is only meaningful for vhost user net device
/// mac_address is only meaningful for vhost user net device
pub mac_address: String,
/// These are only meaningful for vhost user fs devices
/// vhost-user-fs is only meaningful for vhost-user-fs device
pub tag: String,
pub cache: String,
pub device_type: String,
/// Pci_addr is the PCI address used to identify the slot at which the drive is attached.
pub pci_addr: Option<String>,
/// Block index of the device if assigned
pub index: u8,
/// vhost-user-fs cache mode
pub cache_mode: String,
/// vhost-user-fs cache size in MB
pub cache_size: u32,
pub queue_siez: u32,
/// vhost user device type
pub device_type: VhostUserType,
/// guest block driver
pub driver_option: String,
/// pci_addr is the PCI address used to identify the slot at which the drive is attached.
pub pci_addr: Option<String>,
/// Block index of the device if assigned
/// type u64 is not OK
pub index: u64,
/// Virtio queue size. Size: byte
pub queue_size: u32,
/// Block device multi-queue
pub num_queues: usize,
/// device path in guest
pub virt_path: String,
}
#[derive(Debug, Clone, Default)]
@ -36,26 +71,3 @@ pub struct VhostUserDevice {
pub device_id: String,
pub config: VhostUserConfig,
}
#[async_trait]
impl Device for VhostUserConfig {
async fn attach(&mut self, _h: &dyn hypervisor) -> Result<()> {
todo!()
}
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
todo!()
}
async fn get_device_info(&self) -> DeviceType {
todo!()
}
async fn increase_attach_count(&mut self) -> Result<bool> {
todo!()
}
async fn decrease_attach_count(&mut self) -> Result<bool> {
todo!()
}
}

View File

@ -0,0 +1,122 @@
// Copyright (c) 2023 Alibaba Cloud
// Copyright (c) 2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use super::VhostUserConfig;
use crate::{
device::{Device, DeviceType},
Hypervisor as hypervisor,
};
#[derive(Debug, Clone, Default)]
pub struct VhostUserBlkDevice {
pub device_id: String,
/// If set to true, the drive is opened in read-only mode. Otherwise, the
/// drive is opened as read-write.
pub is_readonly: bool,
/// Don't close `path_on_host` file when dropping the device.
pub no_drop: bool,
/// driver type for block device
pub driver_option: String,
pub attach_count: u64,
pub config: VhostUserConfig,
}
impl VhostUserBlkDevice {
// new creates a new VhostUserBlkDevice
pub fn new(device_id: String, config: VhostUserConfig) -> Self {
VhostUserBlkDevice {
device_id,
attach_count: 0,
config,
..Default::default()
}
}
}
#[async_trait]
impl Device for VhostUserBlkDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
// increase attach count, skip attach the device if the device is already attached
if self
.increase_attach_count()
.await
.context("failed to increase attach count")?
{
return Ok(());
}
if let Err(e) = h.add_device(DeviceType::VhostUserBlk(self.clone())).await {
self.decrease_attach_count().await?;
return Err(e);
}
return Ok(());
}
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
// get the count of device detached, and detach once it reaches 0
if self
.decrease_attach_count()
.await
.context("failed to decrease attach count")?
{
return Ok(None);
}
if let Err(e) = h
.remove_device(DeviceType::VhostUserBlk(self.clone()))
.await
{
self.increase_attach_count().await?;
return Err(e);
}
Ok(Some(self.config.index))
}
async fn get_device_info(&self) -> DeviceType {
DeviceType::VhostUserBlk(self.clone())
}
async fn increase_attach_count(&mut self) -> Result<bool> {
match self.attach_count {
0 => {
// do real attach
self.attach_count += 1;
Ok(false)
}
std::u64::MAX => Err(anyhow!("device was attached too many times")),
_ => {
self.attach_count += 1;
Ok(true)
}
}
}
async fn decrease_attach_count(&mut self) -> Result<bool> {
match self.attach_count {
0 => Err(anyhow!("detaching a device that wasn't attached")),
1 => {
// do real wrok
self.attach_count -= 1;
Ok(false)
}
_ => {
self.attach_count -= 1;
Ok(true)
}
}
}
}

View File

@ -1,17 +1,18 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
// Copyright (c) 2022-2023 Alibaba Cloud
// Copyright (c) 2022-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
use crate::device::Device;
use crate::device::DeviceType;
use crate::Hypervisor as hypervisor;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
/// VIRTIO_BLOCK_PCI indicates block driver is virtio-pci based
pub const VIRTIO_BLOCK_PCI: &str = "virtio-blk-pci";
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
pub const KATA_MMIO_BLK_DEV_TYPE: &str = "mmioblk";
pub const KATA_BLK_DEV_TYPE: &str = "blk";

View File

@ -6,10 +6,11 @@
use std::fmt;
use crate::device::driver::vhost_user_blk::VhostUserBlkDevice;
use crate::{
BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor,
NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig,
ShareFsMountDevice, VfioConfig, VfioDevice, VsockConfig, VsockDevice,
ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, VsockDevice,
};
use anyhow::Result;
use async_trait::async_trait;
@ -21,6 +22,7 @@ pub mod util;
#[derive(Debug)]
pub enum DeviceConfig {
BlockCfg(BlockConfig),
VhostUserBlkCfg(VhostUserConfig),
NetworkCfg(NetworkConfig),
ShareFsCfg(ShareFsDeviceConfig),
VfioCfg(VfioConfig),
@ -32,6 +34,7 @@ pub enum DeviceConfig {
#[derive(Debug)]
pub enum DeviceType {
Block(BlockDevice),
VhostUserBlk(VhostUserBlkDevice),
Vfio(VfioDevice),
Network(NetworkDevice),
ShareFs(ShareFsDevice),

View File

@ -8,9 +8,12 @@ use std::path::PathBuf;
use anyhow::{anyhow, Context, Result};
use dbs_utils::net::MacAddr;
use dragonball::api::v1::{
BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo,
VsockDeviceConfigInfo,
use dragonball::{
api::v1::{
BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo,
VsockDeviceConfigInfo,
},
device_manager::blk_dev_mgr::BlockDeviceType,
};
use super::DragonballInner;
@ -56,6 +59,14 @@ impl DragonballInner {
block.config.no_drop,
)
.context("add block device"),
DeviceType::VhostUserBlk(block) => self
.add_block_device(
block.config.socket_path.as_str(),
block.device_id.as_str(),
block.is_readonly,
block.no_drop,
)
.context("add vhost user based block device"),
DeviceType::HybridVsock(hvsock) => self.add_hvsock(&hvsock.config).context("add vsock"),
DeviceType::ShareFs(sharefs) => self
.add_share_fs_device(&sharefs.config)
@ -161,6 +172,7 @@ impl DragonballInner {
let blk_cfg = BlockDeviceConfigInfo {
drive_id: id.to_string(),
device_type: BlockDeviceType::get_type(path),
path_on_host: PathBuf::from(jailed_drive),
is_direct: self.config.blockdev_info.block_device_cache_direct,
no_drop,

View File

@ -35,19 +35,16 @@ pub struct ResourceManager {
}
impl ResourceManager {
pub fn new(
pub async fn new(
sid: &str,
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
toml_config: Arc<TomlConfig>,
) -> Result<Self> {
Ok(Self {
inner: Arc::new(RwLock::new(ResourceManagerInner::new(
sid,
agent,
hypervisor,
toml_config,
)?)),
inner: Arc::new(RwLock::new(
ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
)),
})
}

View File

@ -51,15 +51,16 @@ pub(crate) struct ResourceManagerInner {
}
impl ResourceManagerInner {
pub(crate) fn new(
pub(crate) async fn new(
sid: &str,
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
toml_config: Arc<TomlConfig>,
) -> Result<Self> {
// create device manager
let dev_manager =
DeviceManager::new(hypervisor.clone()).context("failed to create device manager")?;
let dev_manager = DeviceManager::new(hypervisor.clone())
.await
.context("failed to create device manager")?;
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
let cpu_resource = CpuResource::new(toml_config.clone())?;
@ -473,7 +474,9 @@ impl Persist for ResourceManagerInner {
sid: resource_args.sid,
agent: resource_args.agent,
hypervisor: resource_args.hypervisor.clone(),
device_manager: Arc::new(RwLock::new(DeviceManager::new(resource_args.hypervisor)?)),
device_manager: Arc::new(RwLock::new(
DeviceManager::new(resource_args.hypervisor).await?,
)),
network: None,
share_fs: None,
rootfs_resource: RootFsResource::new(),

View File

@ -14,6 +14,9 @@ pub mod utils;
pub mod vfio_volume;
use vfio_volume::is_vfio_volume;
pub mod spdk_volume;
use spdk_volume::is_spdk_volume;
use std::{sync::Arc, vec::Vec};
use anyhow::{Context, Result};
@ -84,6 +87,12 @@ impl VolumeResource {
.await
.with_context(|| format!("new vfio volume {:?}", m))?,
)
} else if is_spdk_volume(m) {
Arc::new(
spdk_volume::SPDKVolume::new(d, m, read_only, cid, sid)
.await
.with_context(|| format!("create spdk volume {:?}", m))?,
)
} else if let Some(options) =
get_huge_page_option(m).context("failed to check huge page")?
{

View File

@ -0,0 +1,189 @@
// Copyright (c) 2023 Alibaba Cloud
// Copyright (c) 2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use nix::sys::{stat, stat::SFlag};
use tokio::sync::RwLock;
use super::Volume;
use crate::volume::utils::{
generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_SPDK_VOLUME_TYPE,
KATA_SPOOL_VOLUME_TYPE,
};
use hypervisor::{
device::{
device_manager::{do_handle_device, DeviceManager},
DeviceConfig, DeviceType,
},
VhostUserConfig, VhostUserType,
};
/// SPDKVolume: spdk block device volume
#[derive(Clone)]
pub(crate) struct SPDKVolume {
storage: Option<agent::Storage>,
mount: oci::Mount,
device_id: String,
}
impl SPDKVolume {
pub(crate) async fn new(
d: &RwLock<DeviceManager>,
m: &oci::Mount,
read_only: bool,
cid: &str,
sid: &str,
) -> Result<Self> {
let mnt_src: &str = &m.source;
// deserde Information from mountinfo.json
let v = volume_mount_info(mnt_src).context("deserde information from mountinfo.json")?;
let device = match v.volume_type.as_str() {
KATA_SPDK_VOLUME_TYPE => {
if v.device.starts_with("spdk://") {
v.device.clone()
} else {
format!("spdk://{}", v.device.as_str())
}
}
KATA_SPOOL_VOLUME_TYPE => {
if v.device.starts_with("spool://") {
v.device.clone()
} else {
format!("spool://{}", v.device.as_str())
}
}
_ => return Err(anyhow!("mountinfo.json is invalid")),
};
// device format: X:///x/y/z.sock,so just unwrap it.
// if file is not S_IFSOCK, return error.
{
// device tokens: (Type, Socket)
let device_tokens = device.split_once("://").unwrap();
let fstat = stat::stat(device_tokens.1).context("stat socket failed")?;
let s_flag = SFlag::from_bits_truncate(fstat.st_mode);
if s_flag != SFlag::S_IFSOCK {
return Err(anyhow!("device {:?} is not valid", device));
}
}
let mut vhu_blk_config = &mut VhostUserConfig {
socket_path: device,
device_type: VhostUserType::Blk("vhost-user-blk-pci".to_owned()),
..Default::default()
};
if let Some(num) = v.metadata.get("num_queues") {
vhu_blk_config.num_queues = num
.parse::<usize>()
.context("num queues parse usize failed.")?;
}
if let Some(size) = v.metadata.get("queue_size") {
vhu_blk_config.queue_size = size
.parse::<u32>()
.context("num queues parse u32 failed.")?;
}
// create and insert block device into Kata VM
let device_info =
do_handle_device(d, &DeviceConfig::VhostUserBlkCfg(vhu_blk_config.clone()))
.await
.context("do handle device failed.")?;
// generate host guest shared path
let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid)
.await
.context("generate host-guest shared path failed")?;
// storage
let mut storage = agent::Storage {
mount_point: guest_path.clone(),
..Default::default()
};
storage.options = if read_only {
vec!["ro".to_string()]
} else {
Vec::new()
};
let mut device_id = String::new();
if let DeviceType::VhostUserBlk(device) = device_info {
// blk, mmioblk
storage.driver = device.config.driver_option;
// /dev/vdX
storage.source = device.config.virt_path;
device_id = device.device_id;
}
if m.r#type != "bind" {
storage.fs_type = v.fs_type.clone();
} else {
storage.fs_type = DEFAULT_VOLUME_FS_TYPE.to_string();
}
if m.destination.clone().starts_with("/dev") {
storage.fs_type = "bind".to_string();
storage.options.append(&mut m.options.clone());
}
storage.fs_group = None;
let mount = oci::Mount {
destination: m.destination.clone(),
r#type: storage.fs_type.clone(),
source: guest_path,
options: m.options.clone(),
};
Ok(Self {
storage: Some(storage),
mount,
device_id,
})
}
}
#[async_trait]
impl Volume for SPDKVolume {
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
let s = if let Some(s) = self.storage.as_ref() {
vec![s.clone()]
} else {
vec![]
};
Ok(s)
}
async fn cleanup(&self, device_manager: &RwLock<DeviceManager>) -> Result<()> {
device_manager
.write()
.await
.try_remove_device(&self.device_id)
.await
}
fn get_device_id(&self) -> Result<Option<String>> {
Ok(Some(self.device_id.clone()))
}
}
pub(crate) fn is_spdk_volume(m: &oci::Mount) -> bool {
// spdkvol or spoolvol will share the same implementation
let vol_types = vec![KATA_SPDK_VOLUME_TYPE, KATA_SPOOL_VOLUME_TYPE];
if vol_types.contains(&m.r#type.as_str()) {
return true;
}
false
}

View File

@ -20,6 +20,7 @@ pub const KATA_MOUNT_BIND_TYPE: &str = "bind";
pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol";
pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol";
pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol";
pub const KATA_SPOOL_VOLUME_TYPE: &str = "spoolvol";
// volume mount info load infomation from mountinfo.json
pub fn volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {

View File

@ -74,12 +74,8 @@ impl RuntimeHandler for VirtContainer {
// get uds from hypervisor and get config from toml_config
let agent = new_agent(&config).context("new agent")?;
let resource_manager = Arc::new(ResourceManager::new(
sid,
agent.clone(),
hypervisor.clone(),
config,
)?);
let resource_manager =
Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?);
let pid = std::process::id();
let sandbox = sandbox::VirtSandbox::new(