mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-01 11:56:29 +00:00
runtime-rs: add support spdk/vhost-user based volume.
Unlike the previous usage which requires creating /dev/xxx by mknod on the host, the new approach will fully utilize the DirectVolume-related usage method, and pass the spdk controller to vmm. And a user guide about using the spdk volume when run a kata-containers. it can be found in docs/how-to. Fixes: #6526 Signed-off-by: alex.lyn <alex.lyn@antgroup.com>
This commit is contained in:
parent
4cf552c151
commit
0df2fc2702
@ -25,8 +25,8 @@ Finally, when running a Kata Containers with `ctr run --mount type=X, src=Y, dst
|
||||
Now, supported types:
|
||||
|
||||
- `directvol` for direct volume
|
||||
- `spdkvol` for SPDK volume (TBD)
|
||||
- `vfiovol` for VFIO device based volume
|
||||
- `spdkvol` for SPDK/vhost-user based volume
|
||||
|
||||
|
||||
## Setup Device and Run a Kata-Containers
|
||||
@ -147,6 +147,80 @@ $ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=vfiovol,src=
|
||||
```
|
||||
|
||||
|
||||
### SPDK Device Based Volume
|
||||
### SPDK Device Based Block Volume
|
||||
|
||||
TBD
|
||||
SPDK vhost-user devices in runtime-rs, unlike runtime (golang version), there is no need to `mknod` device node under `/dev/` any more.
|
||||
Just using the `kata-ctl direct-volume add ..` to make a mount info config is enough.
|
||||
|
||||
#### Run SPDK vhost target and Expose vhost block device
|
||||
|
||||
Run a SPDK vhost target and get vhost-user block controller as an example:
|
||||
|
||||
First, run SPDK vhost target:
|
||||
|
||||
> **Tips:** If driver `vfio-pci` supported, you can run SPDK with `DRIVER_OVERRIDE=vfio-pci`
|
||||
> Otherwise, Just run without it `sudo HUGEMEM=4096 ./scripts/setup.sh`.
|
||||
|
||||
```bash
|
||||
$ SPDK_DEVEL=/xx/spdk
|
||||
$ VHU_UDS_PATH=/tmp/vhu-targets
|
||||
$ RAW_DISKS=/xx/rawdisks
|
||||
$ # Reset first
|
||||
$ ${SPDK_DEVEL}/scripts/setup.sh reset
|
||||
$ sudo sysctl -w vm.nr_hugepages=2048
|
||||
$ #4G Huge Memory for spdk
|
||||
$ sudo HUGEMEM=4096 DRIVER_OVERRIDE=vfio-pci ${SPDK_DEVEL}/scripts/setup.sh
|
||||
$ sudo ${SPDK_DEVEL}/build/bin/spdk_tgt -S $VHU_UDS_PATH -s 1024 -m 0x3 &
|
||||
```
|
||||
|
||||
Second, create a vhost controller:
|
||||
|
||||
```bash
|
||||
$ sudo dd if=/dev/zero of=${RAW_DISKS}/rawdisk01.20g bs=1M count=20480
|
||||
$ sudo ${SPDK_DEVEL}/scripts/rpc.py bdev_aio_create ${RAW_DISKS}/rawdisk01.20g vhu-rawdisk01.20g 512
|
||||
$ sudo ${SPDK_DEVEL}/scripts/rpc.py vhost_create_blk_controller vhost-blk-rawdisk01.sock vhu-rawdisk01.20g
|
||||
```
|
||||
|
||||
Here, a vhost controller `vhost-blk-rawdisk01.sock` is created, and the controller will
|
||||
be passed to Hypervisor, such as Dragonball, Cloud-Hypervisor, Firecracker or QEMU.
|
||||
|
||||
|
||||
#### setup vhost-user block device for kata-containers
|
||||
|
||||
|
||||
First, `mkdir` a sub-path `kubelet/kata-test-vol-001/` under `/run/kata-containers/shared/direct-volumes/`.
|
||||
|
||||
Second, fill fields in `mountinfo.json`, it looks like as below:
|
||||
```json
|
||||
{
|
||||
"device": "/tmp/vhu-targets/vhost-blk-rawdisk01.sock",
|
||||
"volume_type": "spdkvol",
|
||||
"fs_type": "ext4",
|
||||
"metadata":"{}",
|
||||
"options": []
|
||||
}
|
||||
```
|
||||
|
||||
Third, with the help of `kata-ctl direct-volume` to add block device to generate `mountinfo.json`, and run a kata container with `--mount`.
|
||||
|
||||
```bash
|
||||
$ # kata-ctl direct-volume add
|
||||
$ sudo kata-ctl direct-volume add /kubelet/kata-test-vol-001/volume001 "{\"device\": \"/tmp/vhu-targets/vhost-blk-rawdisk01.sock\", \"volume_type\":\"spdkvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
|
||||
$ # /kubelet/kata-test-vol-001/volume001 <==> /run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx
|
||||
$ cat L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx/mountInfo.json
|
||||
$ {"volume_type":"spdkvol","device":"/tmp/vhu-targets/vhost-blk-rawdisk01.sock","fs_type":"ext4","metadata":{},"options":[]}
|
||||
```
|
||||
|
||||
As `/run/kata-containers/shared/direct-volumes/` is a fixed path , we will be able to run a kata pod with `--mount` and set
|
||||
`src` sub-path. And the `--mount` argument looks like: `--mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001`.
|
||||
|
||||
|
||||
#### Run a Kata container with SPDK vhost-user block device
|
||||
|
||||
|
||||
In the case, `ctr run --mount type=X, src=source, dst=dest`, the X will be set `spdkvol` which is a proprietary type specifically designed for SPDK volumes.
|
||||
|
||||
```bash
|
||||
$ # ctr run with --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001
|
||||
$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001,options=rbind:rw "$image" kata-spdk-vol-xx0530 /bin/bash
|
||||
```
|
||||
|
@ -114,6 +114,8 @@ pub enum BlockDeviceType {
|
||||
/// SPOOL is a reliable NVMe virtualization system for the cloud environment.
|
||||
/// You could learn more SPOOL here: https://www.usenix.org/conference/atc20/presentation/xue
|
||||
Spool,
|
||||
/// The standard vhost-user-blk based device such as Spdk device.
|
||||
Spdk,
|
||||
/// Local disk/file based low level device.
|
||||
RawBlock,
|
||||
}
|
||||
@ -124,6 +126,8 @@ impl BlockDeviceType {
|
||||
// SPOOL path should be started with "spool", e.g. "spool:/device1"
|
||||
if path.starts_with("spool:/") {
|
||||
BlockDeviceType::Spool
|
||||
} else if path.starts_with("spdk:/") {
|
||||
BlockDeviceType::Spdk
|
||||
} else {
|
||||
BlockDeviceType::RawBlock
|
||||
}
|
||||
@ -400,6 +404,10 @@ impl BlockDeviceMgr {
|
||||
BlockDeviceError::DeviceManager(e)
|
||||
})
|
||||
}
|
||||
BlockDeviceType::Spool | BlockDeviceType::Spdk => {
|
||||
// TBD
|
||||
todo!()
|
||||
}
|
||||
_ => Err(BlockDeviceError::InvalidBlockDeviceType),
|
||||
}
|
||||
}
|
||||
|
@ -11,8 +11,8 @@ use kata_sys_util::rand::RandomBytes;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use crate::{
|
||||
BlockConfig, BlockDevice, Hypervisor, VfioDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
|
||||
VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
|
||||
device::VhostUserBlkDevice, BlockConfig, BlockDevice, Hypervisor, VfioDevice, VhostUserConfig,
|
||||
KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
|
||||
};
|
||||
|
||||
use super::{
|
||||
@ -25,17 +25,34 @@ pub type ArcMutexDevice = Arc<Mutex<dyn Device>>;
|
||||
/// block_index and released_block_index are used to search an available block index
|
||||
/// in Sandbox.
|
||||
///
|
||||
/// @block_driver to be used for block device;
|
||||
/// @block_index generally default is 1 for <vdb>;
|
||||
/// @released_block_index for blk devices removed and indexes will released at the same time.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
struct SharedInfo {
|
||||
block_driver: String,
|
||||
block_index: u64,
|
||||
released_block_index: Vec<u64>,
|
||||
}
|
||||
|
||||
impl SharedInfo {
|
||||
fn new() -> Self {
|
||||
async fn new(hypervisor: Arc<dyn Hypervisor>) -> Self {
|
||||
// get hypervisor block driver
|
||||
let block_driver = match hypervisor
|
||||
.hypervisor_config()
|
||||
.await
|
||||
.blockdev_info
|
||||
.block_device_driver
|
||||
.as_str()
|
||||
{
|
||||
// convert the block driver to kata type
|
||||
VIRTIO_BLOCK_MMIO => KATA_MMIO_BLK_DEV_TYPE.to_string(),
|
||||
VIRTIO_BLOCK_PCI => KATA_BLK_DEV_TYPE.to_string(),
|
||||
_ => "".to_string(),
|
||||
};
|
||||
|
||||
SharedInfo {
|
||||
block_driver,
|
||||
block_index: 1,
|
||||
released_block_index: vec![],
|
||||
}
|
||||
@ -67,26 +84,24 @@ pub struct DeviceManager {
|
||||
}
|
||||
|
||||
impl DeviceManager {
|
||||
pub fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> {
|
||||
pub async fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> {
|
||||
let devices = HashMap::<String, ArcMutexDevice>::new();
|
||||
Ok(DeviceManager {
|
||||
devices,
|
||||
hypervisor,
|
||||
shared_info: SharedInfo::new(),
|
||||
hypervisor: hypervisor.clone(),
|
||||
shared_info: SharedInfo::new(hypervisor.clone()).await,
|
||||
})
|
||||
}
|
||||
|
||||
async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
|
||||
pub async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
|
||||
// find the device
|
||||
let device = self
|
||||
.devices
|
||||
.get(device_id)
|
||||
.context("failed to find device")?;
|
||||
|
||||
// attach device
|
||||
let mut device_guard = device.lock().await;
|
||||
// attach device
|
||||
let result = device_guard.attach(self.hypervisor.as_ref()).await;
|
||||
|
||||
// handle attach error
|
||||
if let Err(e) = result {
|
||||
match device_guard.get_device_info().await {
|
||||
@ -102,6 +117,9 @@ impl DeviceManager {
|
||||
.release_device_index(device.config.virt_path.unwrap().0);
|
||||
}
|
||||
}
|
||||
DeviceType::VhostUserBlk(device) => {
|
||||
self.shared_info.release_device_index(device.config.index);
|
||||
}
|
||||
_ => {
|
||||
debug!(sl!(), "no need to do release device index.");
|
||||
}
|
||||
@ -109,6 +127,7 @@ impl DeviceManager {
|
||||
|
||||
drop(device_guard);
|
||||
self.devices.remove(device_id);
|
||||
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
@ -168,6 +187,11 @@ impl DeviceManager {
|
||||
return Some(device_id.to_string());
|
||||
}
|
||||
}
|
||||
DeviceType::VhostUserBlk(device) => {
|
||||
if device.config.socket_path == host_path {
|
||||
return Some(device_id.to_string());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// TODO: support find other device type
|
||||
continue;
|
||||
@ -225,6 +249,23 @@ impl DeviceManager {
|
||||
&vfio_dev_config,
|
||||
)))
|
||||
}
|
||||
DeviceConfig::VhostUserBlkCfg(config) => {
|
||||
// try to find the device, found and just return id.
|
||||
if let Some(dev_id_matched) = self.find_device(config.socket_path.clone()).await {
|
||||
info!(
|
||||
sl!(),
|
||||
"vhost blk device with path:{:?} found. just return device id: {:?}",
|
||||
config.socket_path.clone(),
|
||||
dev_id_matched
|
||||
);
|
||||
|
||||
return Ok(dev_id_matched);
|
||||
}
|
||||
|
||||
self.create_vhost_blk_device(config, device_id.clone())
|
||||
.await
|
||||
.context("failed to create vhost blk device")?
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("invliad device type"));
|
||||
}
|
||||
@ -236,27 +277,34 @@ impl DeviceManager {
|
||||
Ok(device_id)
|
||||
}
|
||||
|
||||
async fn create_vhost_blk_device(
|
||||
&mut self,
|
||||
config: &VhostUserConfig,
|
||||
device_id: String,
|
||||
) -> Result<ArcMutexDevice> {
|
||||
let mut vhu_blk_config = config.clone();
|
||||
vhu_blk_config.driver_option = self.shared_info.block_driver.clone();
|
||||
|
||||
// generate block device index and virt path
|
||||
// safe here, Block device always has virt_path.
|
||||
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {
|
||||
vhu_blk_config.index = virt_path.0;
|
||||
vhu_blk_config.virt_path = virt_path.1;
|
||||
}
|
||||
|
||||
Ok(Arc::new(Mutex::new(VhostUserBlkDevice::new(
|
||||
device_id,
|
||||
vhu_blk_config,
|
||||
))))
|
||||
}
|
||||
|
||||
async fn create_block_device(
|
||||
&mut self,
|
||||
config: &BlockConfig,
|
||||
device_id: String,
|
||||
) -> Result<ArcMutexDevice> {
|
||||
let mut block_config = config.clone();
|
||||
// get hypervisor block driver
|
||||
let block_driver = match self
|
||||
.hypervisor
|
||||
.hypervisor_config()
|
||||
.await
|
||||
.blockdev_info
|
||||
.block_device_driver
|
||||
.as_str()
|
||||
{
|
||||
// convert the block driver to kata type
|
||||
VIRTIO_BLOCK_MMIO => KATA_MMIO_BLK_DEV_TYPE.to_string(),
|
||||
VIRTIO_BLOCK_PCI => KATA_BLK_DEV_TYPE.to_string(),
|
||||
_ => "".to_string(),
|
||||
};
|
||||
block_config.driver_option = block_driver;
|
||||
block_config.driver_option = self.shared_info.block_driver.clone();
|
||||
|
||||
// generate virt path
|
||||
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {
|
||||
|
@ -1,5 +1,5 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
// Copyright (c) 2019-2023 Alibaba Cloud
|
||||
// Copyright (c) 2019-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
@ -26,6 +26,9 @@ pub use virtio_fs::{
|
||||
pub use virtio_net::{Address, NetworkConfig, NetworkDevice};
|
||||
pub use virtio_vsock::{HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice};
|
||||
|
||||
pub mod vhost_user_blk;
|
||||
pub use vhost_user::{VhostUserConfig, VhostUserDevice, VhostUserType};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
// Tips:
|
||||
|
@ -1,34 +1,69 @@
|
||||
// Copyright (c) 2019-2023 Alibaba Cloud
|
||||
// Copyright (c) 2019-2023 Ant Group
|
||||
// Copyright (c) 2022-2023 Alibaba Cloud
|
||||
// Copyright (c) 2022-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::device::Device;
|
||||
use crate::device::DeviceType;
|
||||
use crate::Hypervisor as hypervisor;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum VhostUserType {
|
||||
/// Blk - represents a block vhostuser device type
|
||||
/// "vhost-user-blk-pci"
|
||||
Blk(String),
|
||||
|
||||
/// SCSI - represents SCSI based vhost-user type
|
||||
/// "vhost-user-scsi-pci"
|
||||
SCSI(String),
|
||||
|
||||
/// Net - represents Net based vhost-user type
|
||||
/// "virtio-net-pci"
|
||||
Net(String),
|
||||
|
||||
/// FS - represents a virtio-fs vhostuser device type
|
||||
/// "vhost-user-fs-pci"
|
||||
FS(String),
|
||||
}
|
||||
|
||||
impl Default for VhostUserType {
|
||||
fn default() -> Self {
|
||||
VhostUserType::Blk("vhost-user-blk-pci".to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
/// VhostUserConfig represents data shared by most vhost-user devices
|
||||
pub struct VhostUserConfig {
|
||||
/// Device id
|
||||
/// device id
|
||||
pub dev_id: String,
|
||||
/// Socket path
|
||||
/// socket path
|
||||
pub socket_path: String,
|
||||
/// Mac_address is only meaningful for vhost user net device
|
||||
/// mac_address is only meaningful for vhost user net device
|
||||
pub mac_address: String,
|
||||
/// These are only meaningful for vhost user fs devices
|
||||
|
||||
/// vhost-user-fs is only meaningful for vhost-user-fs device
|
||||
pub tag: String,
|
||||
pub cache: String,
|
||||
pub device_type: String,
|
||||
/// Pci_addr is the PCI address used to identify the slot at which the drive is attached.
|
||||
pub pci_addr: Option<String>,
|
||||
/// Block index of the device if assigned
|
||||
pub index: u8,
|
||||
/// vhost-user-fs cache mode
|
||||
pub cache_mode: String,
|
||||
/// vhost-user-fs cache size in MB
|
||||
pub cache_size: u32,
|
||||
pub queue_siez: u32,
|
||||
|
||||
/// vhost user device type
|
||||
pub device_type: VhostUserType,
|
||||
/// guest block driver
|
||||
pub driver_option: String,
|
||||
/// pci_addr is the PCI address used to identify the slot at which the drive is attached.
|
||||
pub pci_addr: Option<String>,
|
||||
|
||||
/// Block index of the device if assigned
|
||||
/// type u64 is not OK
|
||||
pub index: u64,
|
||||
|
||||
/// Virtio queue size. Size: byte
|
||||
pub queue_size: u32,
|
||||
/// Block device multi-queue
|
||||
pub num_queues: usize,
|
||||
|
||||
/// device path in guest
|
||||
pub virt_path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@ -36,26 +71,3 @@ pub struct VhostUserDevice {
|
||||
pub device_id: String,
|
||||
pub config: VhostUserConfig,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Device for VhostUserConfig {
|
||||
async fn attach(&mut self, _h: &dyn hypervisor) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn increase_attach_count(&mut self) -> Result<bool> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn decrease_attach_count(&mut self) -> Result<bool> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
// Copyright (c) 2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::VhostUserConfig;
|
||||
use crate::{
|
||||
device::{Device, DeviceType},
|
||||
Hypervisor as hypervisor,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct VhostUserBlkDevice {
|
||||
pub device_id: String,
|
||||
|
||||
/// If set to true, the drive is opened in read-only mode. Otherwise, the
|
||||
/// drive is opened as read-write.
|
||||
pub is_readonly: bool,
|
||||
|
||||
/// Don't close `path_on_host` file when dropping the device.
|
||||
pub no_drop: bool,
|
||||
|
||||
/// driver type for block device
|
||||
pub driver_option: String,
|
||||
|
||||
pub attach_count: u64,
|
||||
pub config: VhostUserConfig,
|
||||
}
|
||||
|
||||
impl VhostUserBlkDevice {
|
||||
// new creates a new VhostUserBlkDevice
|
||||
pub fn new(device_id: String, config: VhostUserConfig) -> Self {
|
||||
VhostUserBlkDevice {
|
||||
device_id,
|
||||
attach_count: 0,
|
||||
config,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Device for VhostUserBlkDevice {
|
||||
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
|
||||
// increase attach count, skip attach the device if the device is already attached
|
||||
if self
|
||||
.increase_attach_count()
|
||||
.await
|
||||
.context("failed to increase attach count")?
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Err(e) = h.add_device(DeviceType::VhostUserBlk(self.clone())).await {
|
||||
self.decrease_attach_count().await?;
|
||||
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
|
||||
// get the count of device detached, and detach once it reaches 0
|
||||
if self
|
||||
.decrease_attach_count()
|
||||
.await
|
||||
.context("failed to decrease attach count")?
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if let Err(e) = h
|
||||
.remove_device(DeviceType::VhostUserBlk(self.clone()))
|
||||
.await
|
||||
{
|
||||
self.increase_attach_count().await?;
|
||||
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
Ok(Some(self.config.index))
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
DeviceType::VhostUserBlk(self.clone())
|
||||
}
|
||||
|
||||
async fn increase_attach_count(&mut self) -> Result<bool> {
|
||||
match self.attach_count {
|
||||
0 => {
|
||||
// do real attach
|
||||
self.attach_count += 1;
|
||||
Ok(false)
|
||||
}
|
||||
std::u64::MAX => Err(anyhow!("device was attached too many times")),
|
||||
_ => {
|
||||
self.attach_count += 1;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn decrease_attach_count(&mut self) -> Result<bool> {
|
||||
match self.attach_count {
|
||||
0 => Err(anyhow!("detaching a device that wasn't attached")),
|
||||
1 => {
|
||||
// do real wrok
|
||||
self.attach_count -= 1;
|
||||
Ok(false)
|
||||
}
|
||||
_ => {
|
||||
self.attach_count -= 1;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,17 +1,18 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
// Copyright (c) 2022-2023 Alibaba Cloud
|
||||
// Copyright (c) 2022-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
|
||||
use crate::device::Device;
|
||||
use crate::device::DeviceType;
|
||||
use crate::Hypervisor as hypervisor;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
|
||||
/// VIRTIO_BLOCK_PCI indicates block driver is virtio-pci based
|
||||
pub const VIRTIO_BLOCK_PCI: &str = "virtio-blk-pci";
|
||||
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
|
||||
pub const KATA_MMIO_BLK_DEV_TYPE: &str = "mmioblk";
|
||||
pub const KATA_BLK_DEV_TYPE: &str = "blk";
|
||||
|
||||
|
@ -6,10 +6,11 @@
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use crate::device::driver::vhost_user_blk::VhostUserBlkDevice;
|
||||
use crate::{
|
||||
BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor,
|
||||
NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig,
|
||||
ShareFsMountDevice, VfioConfig, VfioDevice, VsockConfig, VsockDevice,
|
||||
ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, VsockDevice,
|
||||
};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
@ -21,6 +22,7 @@ pub mod util;
|
||||
#[derive(Debug)]
|
||||
pub enum DeviceConfig {
|
||||
BlockCfg(BlockConfig),
|
||||
VhostUserBlkCfg(VhostUserConfig),
|
||||
NetworkCfg(NetworkConfig),
|
||||
ShareFsCfg(ShareFsDeviceConfig),
|
||||
VfioCfg(VfioConfig),
|
||||
@ -32,6 +34,7 @@ pub enum DeviceConfig {
|
||||
#[derive(Debug)]
|
||||
pub enum DeviceType {
|
||||
Block(BlockDevice),
|
||||
VhostUserBlk(VhostUserBlkDevice),
|
||||
Vfio(VfioDevice),
|
||||
Network(NetworkDevice),
|
||||
ShareFs(ShareFsDevice),
|
||||
|
@ -8,9 +8,12 @@ use std::path::PathBuf;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use dbs_utils::net::MacAddr;
|
||||
use dragonball::api::v1::{
|
||||
BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo,
|
||||
VsockDeviceConfigInfo,
|
||||
use dragonball::{
|
||||
api::v1::{
|
||||
BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo,
|
||||
VsockDeviceConfigInfo,
|
||||
},
|
||||
device_manager::blk_dev_mgr::BlockDeviceType,
|
||||
};
|
||||
|
||||
use super::DragonballInner;
|
||||
@ -56,6 +59,14 @@ impl DragonballInner {
|
||||
block.config.no_drop,
|
||||
)
|
||||
.context("add block device"),
|
||||
DeviceType::VhostUserBlk(block) => self
|
||||
.add_block_device(
|
||||
block.config.socket_path.as_str(),
|
||||
block.device_id.as_str(),
|
||||
block.is_readonly,
|
||||
block.no_drop,
|
||||
)
|
||||
.context("add vhost user based block device"),
|
||||
DeviceType::HybridVsock(hvsock) => self.add_hvsock(&hvsock.config).context("add vsock"),
|
||||
DeviceType::ShareFs(sharefs) => self
|
||||
.add_share_fs_device(&sharefs.config)
|
||||
@ -161,6 +172,7 @@ impl DragonballInner {
|
||||
|
||||
let blk_cfg = BlockDeviceConfigInfo {
|
||||
drive_id: id.to_string(),
|
||||
device_type: BlockDeviceType::get_type(path),
|
||||
path_on_host: PathBuf::from(jailed_drive),
|
||||
is_direct: self.config.blockdev_info.block_device_cache_direct,
|
||||
no_drop,
|
||||
|
@ -35,19 +35,16 @@ pub struct ResourceManager {
|
||||
}
|
||||
|
||||
impl ResourceManager {
|
||||
pub fn new(
|
||||
pub async fn new(
|
||||
sid: &str,
|
||||
agent: Arc<dyn Agent>,
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
inner: Arc::new(RwLock::new(ResourceManagerInner::new(
|
||||
sid,
|
||||
agent,
|
||||
hypervisor,
|
||||
toml_config,
|
||||
)?)),
|
||||
inner: Arc::new(RwLock::new(
|
||||
ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -51,15 +51,16 @@ pub(crate) struct ResourceManagerInner {
|
||||
}
|
||||
|
||||
impl ResourceManagerInner {
|
||||
pub(crate) fn new(
|
||||
pub(crate) async fn new(
|
||||
sid: &str,
|
||||
agent: Arc<dyn Agent>,
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
) -> Result<Self> {
|
||||
// create device manager
|
||||
let dev_manager =
|
||||
DeviceManager::new(hypervisor.clone()).context("failed to create device manager")?;
|
||||
let dev_manager = DeviceManager::new(hypervisor.clone())
|
||||
.await
|
||||
.context("failed to create device manager")?;
|
||||
|
||||
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
|
||||
let cpu_resource = CpuResource::new(toml_config.clone())?;
|
||||
@ -473,7 +474,9 @@ impl Persist for ResourceManagerInner {
|
||||
sid: resource_args.sid,
|
||||
agent: resource_args.agent,
|
||||
hypervisor: resource_args.hypervisor.clone(),
|
||||
device_manager: Arc::new(RwLock::new(DeviceManager::new(resource_args.hypervisor)?)),
|
||||
device_manager: Arc::new(RwLock::new(
|
||||
DeviceManager::new(resource_args.hypervisor).await?,
|
||||
)),
|
||||
network: None,
|
||||
share_fs: None,
|
||||
rootfs_resource: RootFsResource::new(),
|
||||
|
@ -14,6 +14,9 @@ pub mod utils;
|
||||
pub mod vfio_volume;
|
||||
use vfio_volume::is_vfio_volume;
|
||||
|
||||
pub mod spdk_volume;
|
||||
use spdk_volume::is_spdk_volume;
|
||||
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
@ -84,6 +87,12 @@ impl VolumeResource {
|
||||
.await
|
||||
.with_context(|| format!("new vfio volume {:?}", m))?,
|
||||
)
|
||||
} else if is_spdk_volume(m) {
|
||||
Arc::new(
|
||||
spdk_volume::SPDKVolume::new(d, m, read_only, cid, sid)
|
||||
.await
|
||||
.with_context(|| format!("create spdk volume {:?}", m))?,
|
||||
)
|
||||
} else if let Some(options) =
|
||||
get_huge_page_option(m).context("failed to check huge page")?
|
||||
{
|
||||
|
189
src/runtime-rs/crates/resource/src/volume/spdk_volume.rs
Normal file
189
src/runtime-rs/crates/resource/src/volume/spdk_volume.rs
Normal file
@ -0,0 +1,189 @@
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
// Copyright (c) 2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use nix::sys::{stat, stat::SFlag};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use super::Volume;
|
||||
use crate::volume::utils::{
|
||||
generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_SPDK_VOLUME_TYPE,
|
||||
KATA_SPOOL_VOLUME_TYPE,
|
||||
};
|
||||
use hypervisor::{
|
||||
device::{
|
||||
device_manager::{do_handle_device, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
VhostUserConfig, VhostUserType,
|
||||
};
|
||||
|
||||
/// SPDKVolume: spdk block device volume
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct SPDKVolume {
|
||||
storage: Option<agent::Storage>,
|
||||
mount: oci::Mount,
|
||||
device_id: String,
|
||||
}
|
||||
|
||||
impl SPDKVolume {
|
||||
pub(crate) async fn new(
|
||||
d: &RwLock<DeviceManager>,
|
||||
m: &oci::Mount,
|
||||
read_only: bool,
|
||||
cid: &str,
|
||||
sid: &str,
|
||||
) -> Result<Self> {
|
||||
let mnt_src: &str = &m.source;
|
||||
|
||||
// deserde Information from mountinfo.json
|
||||
let v = volume_mount_info(mnt_src).context("deserde information from mountinfo.json")?;
|
||||
let device = match v.volume_type.as_str() {
|
||||
KATA_SPDK_VOLUME_TYPE => {
|
||||
if v.device.starts_with("spdk://") {
|
||||
v.device.clone()
|
||||
} else {
|
||||
format!("spdk://{}", v.device.as_str())
|
||||
}
|
||||
}
|
||||
KATA_SPOOL_VOLUME_TYPE => {
|
||||
if v.device.starts_with("spool://") {
|
||||
v.device.clone()
|
||||
} else {
|
||||
format!("spool://{}", v.device.as_str())
|
||||
}
|
||||
}
|
||||
_ => return Err(anyhow!("mountinfo.json is invalid")),
|
||||
};
|
||||
|
||||
// device format: X:///x/y/z.sock,so just unwrap it.
|
||||
// if file is not S_IFSOCK, return error.
|
||||
{
|
||||
// device tokens: (Type, Socket)
|
||||
let device_tokens = device.split_once("://").unwrap();
|
||||
|
||||
let fstat = stat::stat(device_tokens.1).context("stat socket failed")?;
|
||||
let s_flag = SFlag::from_bits_truncate(fstat.st_mode);
|
||||
if s_flag != SFlag::S_IFSOCK {
|
||||
return Err(anyhow!("device {:?} is not valid", device));
|
||||
}
|
||||
}
|
||||
|
||||
let mut vhu_blk_config = &mut VhostUserConfig {
|
||||
socket_path: device,
|
||||
device_type: VhostUserType::Blk("vhost-user-blk-pci".to_owned()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
if let Some(num) = v.metadata.get("num_queues") {
|
||||
vhu_blk_config.num_queues = num
|
||||
.parse::<usize>()
|
||||
.context("num queues parse usize failed.")?;
|
||||
}
|
||||
if let Some(size) = v.metadata.get("queue_size") {
|
||||
vhu_blk_config.queue_size = size
|
||||
.parse::<u32>()
|
||||
.context("num queues parse u32 failed.")?;
|
||||
}
|
||||
|
||||
// create and insert block device into Kata VM
|
||||
let device_info =
|
||||
do_handle_device(d, &DeviceConfig::VhostUserBlkCfg(vhu_blk_config.clone()))
|
||||
.await
|
||||
.context("do handle device failed.")?;
|
||||
|
||||
// generate host guest shared path
|
||||
let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid)
|
||||
.await
|
||||
.context("generate host-guest shared path failed")?;
|
||||
|
||||
// storage
|
||||
let mut storage = agent::Storage {
|
||||
mount_point: guest_path.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
storage.options = if read_only {
|
||||
vec!["ro".to_string()]
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let mut device_id = String::new();
|
||||
if let DeviceType::VhostUserBlk(device) = device_info {
|
||||
// blk, mmioblk
|
||||
storage.driver = device.config.driver_option;
|
||||
// /dev/vdX
|
||||
storage.source = device.config.virt_path;
|
||||
device_id = device.device_id;
|
||||
}
|
||||
|
||||
if m.r#type != "bind" {
|
||||
storage.fs_type = v.fs_type.clone();
|
||||
} else {
|
||||
storage.fs_type = DEFAULT_VOLUME_FS_TYPE.to_string();
|
||||
}
|
||||
|
||||
if m.destination.clone().starts_with("/dev") {
|
||||
storage.fs_type = "bind".to_string();
|
||||
storage.options.append(&mut m.options.clone());
|
||||
}
|
||||
|
||||
storage.fs_group = None;
|
||||
let mount = oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: storage.fs_type.clone(),
|
||||
source: guest_path,
|
||||
options: m.options.clone(),
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
storage: Some(storage),
|
||||
mount,
|
||||
device_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for SPDKVolume {
|
||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
}
|
||||
|
||||
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
|
||||
let s = if let Some(s) = self.storage.as_ref() {
|
||||
vec![s.clone()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
async fn cleanup(&self, device_manager: &RwLock<DeviceManager>) -> Result<()> {
|
||||
device_manager
|
||||
.write()
|
||||
.await
|
||||
.try_remove_device(&self.device_id)
|
||||
.await
|
||||
}
|
||||
|
||||
fn get_device_id(&self) -> Result<Option<String>> {
|
||||
Ok(Some(self.device_id.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_spdk_volume(m: &oci::Mount) -> bool {
|
||||
// spdkvol or spoolvol will share the same implementation
|
||||
let vol_types = vec![KATA_SPDK_VOLUME_TYPE, KATA_SPOOL_VOLUME_TYPE];
|
||||
if vol_types.contains(&m.r#type.as_str()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
@ -20,6 +20,7 @@ pub const KATA_MOUNT_BIND_TYPE: &str = "bind";
|
||||
pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol";
|
||||
pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol";
|
||||
pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol";
|
||||
pub const KATA_SPOOL_VOLUME_TYPE: &str = "spoolvol";
|
||||
|
||||
// volume mount info load infomation from mountinfo.json
|
||||
pub fn volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {
|
||||
|
@ -74,12 +74,8 @@ impl RuntimeHandler for VirtContainer {
|
||||
|
||||
// get uds from hypervisor and get config from toml_config
|
||||
let agent = new_agent(&config).context("new agent")?;
|
||||
let resource_manager = Arc::new(ResourceManager::new(
|
||||
sid,
|
||||
agent.clone(),
|
||||
hypervisor.clone(),
|
||||
config,
|
||||
)?);
|
||||
let resource_manager =
|
||||
Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?);
|
||||
let pid = std::process::id();
|
||||
|
||||
let sandbox = sandbox::VirtSandbox::new(
|
||||
|
Loading…
Reference in New Issue
Block a user