mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-24 06:27:39 +00:00
runtime-rs: add support vfio device based volume
A new choice of using vfio devic based volume for kata-containers. With the help of kata-ctl direct-volume, users are able to add a specified device which is BDF or IOMMU group ID. To help users to use it smoothly, A doc about howto added in docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes. Fixes: #6525 Signed-off-by: alex.lyn <alex.lyn@antgroup.com>
This commit is contained in:
parent
1e3b372bbb
commit
59510cfee0
@ -20,13 +20,13 @@ The JSON file `mountinfo.json` placed in a sub-path `/kubelet/kata-test-vol-001/
|
||||
And the full path looks like: `/run/kata-containers/shared/direct-volumes/kubelet/kata-test-vol-001/volume001`, But for some security reasons. it is
|
||||
encoded as `/run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx`.
|
||||
|
||||
Finally, when running a Kata Containers witch `ctr run --mount type=X, src=Y, dst=Z,,options=rbind:rw`, the `type=X` should be specified a proprietary type specifically designed for some kind of volume.
|
||||
Finally, when running a Kata Containers with `ctr run --mount type=X, src=Y, dst=Z,,options=rbind:rw`, the `type=X` should be specified a proprietary type specifically designed for some kind of volume.
|
||||
|
||||
Now, supported types:
|
||||
|
||||
- `directvol` for direct volume
|
||||
- `spdkvol` for SPDK volume (TBD)
|
||||
- `vfiovol` for VFIO device based volume (TBD)
|
||||
- `vfiovol` for VFIO device based volume
|
||||
|
||||
|
||||
## Setup Device and Run a Kata-Containers
|
||||
@ -55,7 +55,7 @@ $ sudo mkfs.ext4 /tmp/stor/rawdisk01.20g
|
||||
```
|
||||
|
||||
```bash
|
||||
$ sudo ./kata-ctl direct-volume add /kubelet/kata-direct-vol-002/directvol002 "{\"device\": \"/tmp/stor/rawdisk01.20g\", \"volume_type\": \"directvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
|
||||
$ sudo kata-ctl direct-volume add /kubelet/kata-direct-vol-002/directvol002 "{\"device\": \"/tmp/stor/rawdisk01.20g\", \"volume_type\": \"directvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
|
||||
$# /kubelet/kata-direct-vol-002/directvol002 <==> /run/kata-containers/shared/direct-volumes/W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx
|
||||
$ cat W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx/mountInfo.json
|
||||
{"volume_type":"directvol","device":"/tmp/stor/rawdisk01.20g","fs_type":"ext4","metadata":{},"options":[]}
|
||||
@ -65,14 +65,88 @@ $ cat W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx/mountInfo.json
|
||||
|
||||
```bash
|
||||
$ # type=disrectvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw
|
||||
$sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=directvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw "$image" kata-direct-vol-xx05302045 /bin/bash
|
||||
$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=directvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw "$image" kata-direct-vol-xx05302045 /bin/bash
|
||||
```
|
||||
|
||||
|
||||
### VFIO Device Based Block Volume
|
||||
|
||||
#### create VFIO device based backend storage
|
||||
|
||||
> **Tip:** It only supports `vfio-pci` based PCI device passthrough mode.
|
||||
|
||||
In this scenario, the device's host kernel driver will be replaced by `vfio-pci`, and IOMMU group ID generated.
|
||||
And either device's BDF or its VFIO IOMMU group ID in `/dev/vfio/` is fine for "device" in `mountinfo.json`.
|
||||
|
||||
```bash
|
||||
$ lspci -nn -k -s 45:00.1
|
||||
45:00.1 SCSI storage controller
|
||||
...
|
||||
Kernel driver in use: vfio-pci
|
||||
...
|
||||
$ ls /dev/vfio/110
|
||||
/dev/vfio/110
|
||||
$ ls /sys/kernel/iommu_groups/110/devices/
|
||||
0000:45:00.1
|
||||
```
|
||||
|
||||
#### setup VFIO device for kata-containers
|
||||
|
||||
First, configure the `mountinfo.json`, as below:
|
||||
|
||||
- (1) device with `BB:DD:F`
|
||||
|
||||
```json
|
||||
{
|
||||
"device": "45:00.1",
|
||||
"volume_type": "vfiovol",
|
||||
"fs_type": "ext4",
|
||||
"metadata":"{}",
|
||||
"options": []
|
||||
}
|
||||
```
|
||||
|
||||
- (2) device with `DDDD:BB:DD:F`
|
||||
|
||||
```json
|
||||
{
|
||||
"device": "0000:45:00.1",
|
||||
"volume_type": "vfiovol",
|
||||
"fs_type": "ext4",
|
||||
"metadata":"{}",
|
||||
"options": []
|
||||
}
|
||||
```
|
||||
|
||||
- (3) device with `/dev/vfio/X`
|
||||
|
||||
```json
|
||||
{
|
||||
"device": "/dev/vfio/110",
|
||||
"volume_type": "vfiovol",
|
||||
"fs_type": "ext4",
|
||||
"metadata":"{}",
|
||||
"options": []
|
||||
}
|
||||
```
|
||||
|
||||
Second, run kata-containers with device(`/dev/vfio/110`) as an example:
|
||||
|
||||
```bash
|
||||
$ sudo kata-ctl direct-volume add /kubelet/kata-vfio-vol-003/vfiovol003 "{\"device\": \"/dev/vfio/110\", \"volume_type\": \"vfiovol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
|
||||
$ # /kubelet/kata-vfio-vol-003/directvol003 <==> /run/kata-containers/shared/direct-volumes/F0va22F0ZvaS12F0YS10a2F0DAxvbC0F0ZXvdm9sdF0Z0YSx
|
||||
$ cat F0va22F0ZvaS12F0YS10a2F0DAxvbC0F0ZXvdm9sdF0Z0YSx/mountInfo.json
|
||||
{"volume_type":"vfiovol","device":"/dev/vfio/110","fs_type":"ext4","metadata":{},"options":[]}
|
||||
```
|
||||
|
||||
#### Run a Kata container with VFIO block device based volume
|
||||
|
||||
```bash
|
||||
$ # type=disrectvol,src=/kubelet/kata-vfio-vol-003/vfiovol003,dst=/disk003,options=rbind:rw
|
||||
$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=vfiovol,src=/kubelet/kata-vfio-vol-003/vfiovol003,dst=/disk003,options=rbind:rw "$image" kata-vfio-vol-xx05302245 /bin/bash
|
||||
```
|
||||
|
||||
|
||||
### SPDK Device Based Volume
|
||||
|
||||
TBD
|
||||
|
||||
### VFIO Device Based Volume
|
||||
|
||||
TBD
|
@ -11,6 +11,9 @@ mod share_fs_volume;
|
||||
mod shm_volume;
|
||||
pub mod utils;
|
||||
|
||||
pub mod vfio_volume;
|
||||
use vfio_volume::is_vfio_volume;
|
||||
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
@ -75,6 +78,12 @@ impl VolumeResource {
|
||||
.await
|
||||
.with_context(|| format!("new share fs volume {:?}", m))?,
|
||||
)
|
||||
} else if is_vfio_volume(m) {
|
||||
Arc::new(
|
||||
vfio_volume::VfioVolume::new(d, m, read_only, cid, sid)
|
||||
.await
|
||||
.with_context(|| format!("new vfio volume {:?}", m))?,
|
||||
)
|
||||
} else if let Some(options) =
|
||||
get_huge_page_option(m).context("failed to check huge page")?
|
||||
{
|
||||
|
141
src/runtime-rs/crates/resource/src/volume/vfio_volume.rs
Normal file
141
src/runtime-rs/crates/resource/src/volume/vfio_volume.rs
Normal file
@ -0,0 +1,141 @@
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
// Copyright (c) 2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use super::Volume;
|
||||
use crate::volume::utils::{
|
||||
generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_VFIO_VOLUME_TYPE,
|
||||
};
|
||||
use hypervisor::{
|
||||
device::{
|
||||
device_manager::{do_handle_device, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
get_vfio_device, VfioConfig,
|
||||
};
|
||||
|
||||
pub(crate) struct VfioVolume {
|
||||
storage: Option<agent::Storage>,
|
||||
mount: oci::Mount,
|
||||
device_id: String,
|
||||
}
|
||||
|
||||
// VfioVolume: vfio device based block volume
|
||||
impl VfioVolume {
|
||||
pub(crate) async fn new(
|
||||
d: &RwLock<DeviceManager>,
|
||||
m: &oci::Mount,
|
||||
read_only: bool,
|
||||
cid: &str,
|
||||
sid: &str,
|
||||
) -> Result<Self> {
|
||||
let mnt_src: &str = &m.source;
|
||||
|
||||
// deserde Information from mountinfo.json
|
||||
let v = volume_mount_info(mnt_src).context("deserde information from mountinfo.json")?;
|
||||
if v.volume_type != KATA_VFIO_VOLUME_TYPE {
|
||||
return Err(anyhow!("volume type is invalid"));
|
||||
}
|
||||
|
||||
// support both /dev/vfio/X and BDF<DDDD:BB:DD.F> or BDF<BB:DD.F>
|
||||
let vfio_device = get_vfio_device(v.device).context("get vfio device failed.")?;
|
||||
let vfio_dev_config = &mut VfioConfig {
|
||||
host_path: vfio_device.clone(),
|
||||
dev_type: "b".to_string(),
|
||||
hostdev_prefix: "vfio_vol".to_owned(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// create and insert block device into Kata VM
|
||||
let device_info = do_handle_device(d, &DeviceConfig::VfioCfg(vfio_dev_config.clone()))
|
||||
.await
|
||||
.context("do handle device failed.")?;
|
||||
|
||||
// generate host guest shared path
|
||||
let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid)
|
||||
.await
|
||||
.context("generate host-guest shared path failed")?;
|
||||
|
||||
let storage_options = if read_only {
|
||||
vec!["ro".to_string()]
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let mut storage = agent::Storage {
|
||||
options: storage_options,
|
||||
mount_point: guest_path.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut device_id = String::new();
|
||||
if let DeviceType::Vfio(device) = device_info {
|
||||
device_id = device.device_id;
|
||||
storage.driver = device.driver_type;
|
||||
// safe here, device_info is correct and only unwrap it.
|
||||
storage.source = device.config.virt_path.unwrap().1;
|
||||
}
|
||||
|
||||
if m.r#type != "bind" {
|
||||
storage.fs_type = v.fs_type.clone();
|
||||
} else {
|
||||
storage.fs_type = DEFAULT_VOLUME_FS_TYPE.to_string();
|
||||
}
|
||||
|
||||
let mount = oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: v.fs_type,
|
||||
source: guest_path,
|
||||
options: m.options.clone(),
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
storage: Some(storage),
|
||||
mount,
|
||||
device_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for VfioVolume {
|
||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
}
|
||||
|
||||
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
|
||||
let s = if let Some(s) = self.storage.as_ref() {
|
||||
vec![s.clone()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
async fn cleanup(&self, device_manager: &RwLock<DeviceManager>) -> Result<()> {
|
||||
device_manager
|
||||
.write()
|
||||
.await
|
||||
.try_remove_device(&self.device_id)
|
||||
.await
|
||||
}
|
||||
|
||||
fn get_device_id(&self) -> Result<Option<String>> {
|
||||
Ok(Some(self.device_id.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_vfio_volume(m: &oci::Mount) -> bool {
|
||||
if m.r#type == KATA_VFIO_VOLUME_TYPE {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
Loading…
Reference in New Issue
Block a user