runtime-rs: add the ephemeral memory based volume support

For k8s, there's two type of volumes based on ephemral memory,
one is emptydir volume based on ephemeral memory, and the other
one is used for shm device such as /dev/shm. Thus add a new volume
type ephemeral volume to support those two type volumes and remove
the legacy shm volume.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
This commit is contained in:
Fupan Li 2025-03-28 11:46:38 +08:00
parent 3dfabd42c2
commit 6e5f3cbbeb
5 changed files with 141 additions and 139 deletions

View File

@ -10,27 +10,34 @@
//! to detect K8S EmptyDir medium type from `oci::spec::Mount` objects.
use kata_types::mount;
use oci_spec::runtime::Spec;
use oci_spec::runtime::{Mount, Spec};
use std::path::Path;
use crate::mount::get_linux_mount_info;
pub use kata_types::k8s::is_empty_dir;
/// Check whether the given path is a kubernetes ephemeral volume.
/// Check whether a given volume is an ephemeral volume.
///
/// This method depends on a specific path used by k8s to detect if it's type of ephemeral.
/// As of now, this is a very k8s specific solution that works but in future there should be a
/// better way for this method to determine if the path is for ephemeral volume type.
pub fn is_ephemeral_volume(path: &str) -> bool {
if is_empty_dir(path) {
if let Ok(info) = get_linux_mount_info(path) {
if info.fs_type == "tmpfs" {
return true;
}
}
}
/// For k8s, there are generally two types of ephemeral volumes: one is the
/// volume used as /dev/shm of the container, and the other is the
/// emptydir volume based on the memory type. Both types of volumes
/// are based on tmpfs mount volumes, so we classify them as ephemeral
/// volumes and can be setup in the guest; For the other volume based on tmpfs
/// which would contain some initial files we cound't deal them as ephemeral and
/// should be passed using share fs.
pub fn is_ephemeral_volume(mount: &Mount) -> bool {
matches!(
(
mount.typ().as_deref(),
mount.source().as_deref().and_then(|s| s.to_str()),
mount.destination(),
false
),
(Some("bind"), Some(source), dest) if get_linux_mount_info(source)
.map_or(false, |info| info.fs_type == "tmpfs") &&
(is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm"))
)
}
/// Check whether the given path is a kubernetes empty-dir volume of medium "default".
@ -65,7 +72,8 @@ pub fn update_ephemeral_storage_type(oci_spec: &mut Spec) {
if let Some(source) = &m.source() {
let mnt_src = &source.display().to_string();
if is_ephemeral_volume(mnt_src) {
//here we only care about the "bind" mount volume.
if is_ephemeral_volume(m) {
m.set_typ(Some(String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE)));
} else if is_host_empty_dir(mnt_src) {
// FIXME support disable_guest_empty_dir

View File

@ -19,7 +19,7 @@ pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:";
/// The sharedfs volume is mounted by guest OS before starting the kata-agent.
pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared";
/// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
/// KATA_EPHEMERAL_VOLUME_TYPE creates a tmpfs backed volume for sharing files between containers.
pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral";
/// KATA_HOST_DIR_TYPE use for host empty dir

View File

@ -0,0 +1,114 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::path::{Path, PathBuf};
use super::Volume;
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;
use nix::sys::stat::stat;
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
#[derive(Debug)]
pub(crate) struct EphemeralVolume {
mount: oci::Mount,
storage: Option<agent::Storage>,
}
impl EphemeralVolume {
pub(crate) fn new(m: &oci::Mount) -> Result<Self> {
if m.source().is_none() {
return Err(anyhow!(format!(
"got a wrong volume without source: {:?}",
m
)));
}
// refer to the golang `handleEphemeralStorage` code at
// https://github.com/kata-containers/kata-containers/blob/9516286f6dd5cfd6b138810e5d7c9e01cf6fc043/src/runtime/virtcontainers/kata_agent.go#L1354
let source = &get_mount_path(m.source());
let file_stat =
stat(Path::new(source)).with_context(|| format!("mount source {}", source))?;
// if volume's gid isn't root group(default group), this means there's
// an specific fsGroup is set on this local volume, then it should pass
// to guest.
let dir_options = if file_stat.st_gid != 0 {
vec![format!("fsgid={}", file_stat.st_gid)]
} else {
vec![]
};
let file_name = Path::new(source)
.file_name()
.context(format!("get file name from {:?}", &m.source()))?;
let source = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR)
.join(KATA_EPHEMERAL_VOLUME_TYPE)
.join(file_name)
.into_os_string()
.into_string()
.map_err(|e| anyhow!("failed to get ephemeral path {:?}", e))?;
// Create a storage struct so that kata agent is able to create
// tmpfs backed volume inside the VM
let ephemeral_storage = agent::Storage {
driver: String::from(KATA_EPHEMERAL_VOLUME_TYPE),
driver_options: Vec::new(),
source: String::from("tmpfs"),
fs_type: String::from("tmpfs"),
fs_group: None,
options: dir_options,
mount_point: source.clone(),
};
let mut mount = oci::Mount::default();
mount.set_destination(m.destination().clone());
mount.set_typ(Some("bind".to_string()));
mount.set_source(Some(PathBuf::from(&source)));
mount.set_options(Some(vec!["rbind".to_string()]));
Ok(Self {
mount,
storage: Some(ephemeral_storage),
})
}
}
#[async_trait]
impl Volume for EphemeralVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
let s = if let Some(s) = self.storage.as_ref() {
vec![s.clone()]
} else {
vec![]
};
Ok(s)
}
async fn cleanup(&self, _device_manager: &RwLock<DeviceManager>) -> Result<()> {
// TODO: Clean up EphemeralVolume
warn!(sl!(), "Cleaning up EphemeralVolume is still unimplemented.");
Ok(())
}
fn get_device_id(&self) -> Result<Option<String>> {
Ok(None)
}
}
pub(crate) fn is_ephemeral_volume(m: &oci::Mount) -> bool {
get_mount_type(m).as_str() == KATA_EPHEMERAL_VOLUME_TYPE
}

View File

@ -6,9 +6,9 @@
mod block_volume;
mod default_volume;
mod ephemeral_volume;
pub mod hugepage;
mod share_fs_volume;
mod shm_volume;
pub mod utils;
pub mod direct_volume;
@ -67,10 +67,9 @@ impl VolumeResource {
// handle mounts
for m in oci_mounts {
let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro");
let volume: Arc<dyn Volume> = if shm_volume::is_shm_volume(m) {
let shm_size = shm_volume::DEFAULT_SHM_SIZE;
let volume: Arc<dyn Volume> = if ephemeral_volume::is_ephemeral_volume(m) {
Arc::new(
shm_volume::ShmVolume::new(m, shm_size)
ephemeral_volume::EphemeralVolume::new(m)
.with_context(|| format!("new shm volume {:?}", m))?,
)
} else if is_block_volume(m) {

View File

@ -1,119 +0,0 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::path::{Path, PathBuf};
use super::Volume;
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
use anyhow::Result;
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
pub const SHM_DIR: &str = "shm";
// DEFAULT_SHM_SIZE is the default shm size to be used in case host
// IPC is used.
pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024;
// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral";
#[derive(Debug)]
pub(crate) struct ShmVolume {
mount: oci::Mount,
storage: Option<agent::Storage>,
}
impl ShmVolume {
pub(crate) fn new(m: &oci::Mount, shm_size: u64) -> Result<Self> {
let (storage, mount) = if shm_size > 0 {
// storage
let mount_path = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR);
let mount_path = mount_path.to_str().unwrap();
let option = format!("size={}", shm_size);
let options = vec![
String::from("noexec"),
String::from("nosuid"),
String::from("nodev"),
String::from("mode=1777"),
option,
];
let storage = agent::Storage {
driver: String::from(KATA_EPHEMERAL_DEV_TYPE),
driver_options: Vec::new(),
source: String::from("shm"),
fs_type: String::from("tmpfs"),
fs_group: None,
options,
mount_point: mount_path.to_string(),
};
let mut oci_mount = oci::Mount::default();
oci_mount.set_destination(m.destination().clone());
oci_mount.set_typ(Some("bind".to_string()));
oci_mount.set_source(Some(PathBuf::from(&mount_path)));
oci_mount.set_options(Some(vec!["rbind".to_string()]));
(Some(storage), oci_mount)
} else {
let mut oci_mount = oci::Mount::default();
oci_mount.set_destination(m.destination().clone());
oci_mount.set_typ(Some("tmpfs".to_string()));
oci_mount.set_source(Some(PathBuf::from("shm")));
oci_mount.set_options(Some(
[
"noexec",
"nosuid",
"nodev",
"mode=1777",
&format!("size={}", DEFAULT_SHM_SIZE),
]
.iter()
.map(|s| s.to_string())
.collect(),
));
(None, oci_mount)
};
Ok(Self { storage, mount })
}
}
#[async_trait]
impl Volume for ShmVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
let s = if let Some(s) = self.storage.as_ref() {
vec![s.clone()]
} else {
vec![]
};
Ok(s)
}
async fn cleanup(&self, _device_manager: &RwLock<DeviceManager>) -> Result<()> {
// TODO: Clean up ShmVolume
warn!(sl!(), "Cleaning up ShmVolume is still unimplemented.");
Ok(())
}
fn get_device_id(&self) -> Result<Option<String>> {
Ok(None)
}
}
pub(crate) fn is_shm_volume(m: &oci::Mount) -> bool {
get_mount_path(&Some(m.destination().clone())).as_str() == "/dev/shm"
&& get_mount_type(m).as_str() != KATA_EPHEMERAL_DEV_TYPE
}