runtime-rs: add the sandbox's shm volume support

Docker containers support specifying the shm size using the --shm-size
option and support sandbox-level shm volumes, so we've added support for
shm volumes. Since Kubernetes doesn't support specifying the shm size,
it typically uses a memory-based emptydir as the container's shm, and
its size can be specified.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
This commit is contained in:
Fupan Li
2025-09-09 10:42:59 +08:00
committed by Fabiano Fidêncio
parent d48c542a52
commit 4a92fc1129
12 changed files with 153 additions and 17 deletions

View File

@@ -11,7 +11,6 @@
use kata_types::mount;
use oci_spec::runtime::{Mount, Spec};
use std::path::Path;
use crate::mount::get_linux_mount_info;
@@ -34,9 +33,8 @@ pub fn is_ephemeral_volume(mount: &Mount) -> bool {
mount.destination(),
),
(Some("bind"), Some(source), dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") &&
(is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm"))
)
(Some("bind"), Some(source), _dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") &&
is_empty_dir(source))
}
/// Check whether the given path is a kubernetes empty-dir volume of medium "default".

View File

@@ -68,6 +68,12 @@ pub const KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS: &str = "layer_nydus_fs";
pub const KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL: &str = "image_guest_pull";
/// In CoCo scenario, we support force_guest_pull to enforce container image guest pull without remote snapshotter.
pub const KATA_IMAGE_FORCE_GUEST_PULL: &str = "force_guest_pull";
/// kata default guest sandbox dir.
pub const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/";
/// default shm directory name.
pub const SHM_DIR: &str = "shm";
/// shm device path.
pub const SHM_DEVICE: &str = "/dev/shm";
/// Manager to manage registered storage device handlers.
pub type StorageHandlerManager<H> = HandlerManager<H>;

View File

@@ -90,9 +90,9 @@ impl ResourceManager {
inner.setup_after_start_vm().await
}
pub async fn get_storage_for_sandbox(&self) -> Result<Vec<Storage>> {
pub async fn get_storage_for_sandbox(&self, shm_size: u64) -> Result<Vec<Storage>> {
let inner = self.inner.read().await;
inner.get_storage_for_sandbox().await
inner.get_storage_for_sandbox(shm_size).await
}
pub async fn handler_rootfs(

View File

@@ -17,7 +17,9 @@ use hypervisor::{
},
BlockConfig, Hypervisor, VfioConfig,
};
use kata_types::mount::Mount;
use kata_types::mount::{
Mount, DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DIR,
};
use kata_types::{
config::{hypervisor::TopologyConfigInfo, TomlConfig},
mount::{adjust_rootfs_mounts, KATA_IMAGE_FORCE_GUEST_PULL},
@@ -326,12 +328,33 @@ impl ResourceManagerInner {
Ok(())
}
pub async fn get_storage_for_sandbox(&self) -> Result<Vec<Storage>> {
pub async fn get_storage_for_sandbox(&self, shm_size: u64) -> Result<Vec<Storage>> {
let mut storages = vec![];
if let Some(d) = self.share_fs.as_ref() {
let mut s = d.get_storages().await.context("get storage")?;
storages.append(&mut s);
}
let shm_size_option = format!("size={}", shm_size);
let mount_point = format!("{}/{}", DEFAULT_KATA_GUEST_SANDBOX_DIR, SHM_DIR);
let shm_storage = Storage {
driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(),
mount_point,
source: "shm".to_string(),
fs_type: "tmpfs".to_string(),
options: vec![
"noexec".to_string(),
"nosuid".to_string(),
"nodev".to_string(),
"mode=1777".to_string(),
shm_size_option,
],
..Default::default()
};
storages.push(shm_storage);
Ok(storages)
}

View File

@@ -40,8 +40,6 @@ const KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/";
/// share fs (for example virtio-fs) mount path in the guest
pub const KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/";
pub(crate) const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/";
pub const PASSTHROUGH_FS_DIR: &str = "passthrough";
const RAFS_DIR: &str = "rafs";

View File

@@ -7,11 +7,11 @@
use std::path::{Path, PathBuf};
use super::Volume;
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use kata_types::mount::DEFAULT_KATA_GUEST_SANDBOX_DIR;
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;
use nix::sys::stat::stat;
use oci_spec::runtime as oci;

View File

@@ -9,6 +9,7 @@ mod default_volume;
mod ephemeral_volume;
pub mod hugepage;
mod share_fs_volume;
mod shm_volume;
pub mod utils;
pub mod direct_volume;
@@ -67,10 +68,15 @@ impl VolumeResource {
// handle mounts
for m in oci_mounts {
let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro");
let volume: Arc<dyn Volume> = if ephemeral_volume::is_ephemeral_volume(m) {
let volume: Arc<dyn Volume> = if shm_volume::is_shm_volume(m) {
Arc::new(
shm_volume::ShmVolume::new(m)
.with_context(|| format!("new shm volume {:?}", m))?,
)
} else if ephemeral_volume::is_ephemeral_volume(m) {
Arc::new(
ephemeral_volume::EphemeralVolume::new(m)
.with_context(|| format!("new shm volume {:?}", m))?,
.with_context(|| format!("new ephemeral volume {:?}", m))?,
)
} else if is_block_volume(m) {
// handle block volume

View File

@@ -0,0 +1,63 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::path::PathBuf;
use super::Volume;
use anyhow::Result;
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use kata_types::mount::{
DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DEVICE, SHM_DIR,
};
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
#[derive(Debug)]
pub(crate) struct ShmVolume {
mount: oci::Mount,
}
impl ShmVolume {
pub(crate) fn new(m: &oci::Mount) -> Result<Self> {
let mut mount = oci::Mount::default();
mount.set_destination(m.destination().clone());
mount.set_typ(Some("bind".to_string()));
mount.set_source(Some(
PathBuf::from(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR),
));
mount.set_options(Some(vec!["rbind".to_string()]));
Ok(Self { mount })
}
}
#[async_trait]
impl Volume for ShmVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
Ok(vec![])
}
async fn cleanup(&self, _device_manager: &RwLock<DeviceManager>) -> Result<()> {
// No cleanup is required for ShmVolume because it is a mount in guest which
// does not require explicit unmounting or deletion in host side.
Ok(())
}
fn get_device_id(&self) -> Result<Option<String>> {
Ok(None)
}
}
pub(crate) fn is_shm_volume(m: &oci::Mount) -> bool {
get_mount_path(&Some(m.destination().clone())).as_str() == SHM_DEVICE
&& get_mount_type(m).as_str() != KATA_EPHEMERAL_VOLUME_TYPE
}

View File

@@ -23,6 +23,10 @@ use kata_types::mount::Mount;
use oci_spec::runtime as oci;
use strum::Display;
// DEFAULT_SHM_SIZE is the default shm size to be used in case host
// IPC is used.
pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024;
/// TaskRequest: TaskRequest from shim
/// TaskRequest and TaskResponse messages need to be paired
#[derive(Debug, Clone, Display)]
@@ -176,6 +180,7 @@ pub struct SandboxConfig {
pub annotations: HashMap<String, String, RandomState>,
pub hooks: Option<oci::Hooks>,
pub state: runtime_spec::State,
pub shm_size: u64,
}
#[derive(Clone, Debug)]

View File

@@ -8,6 +8,7 @@ use super::{
ContainerConfig, ContainerID, ContainerProcess, ExecProcessRequest, KillRequest,
ResizePTYRequest, SandboxConfig, SandboxID, SandboxNetworkEnv, SandboxRequest,
SandboxStatusRequest, ShutdownRequest, StopSandboxRequest, TaskRequest, UpdateRequest,
DEFAULT_SHM_SIZE,
};
use kata_types::mount::Mount;
@@ -84,6 +85,7 @@ impl TryFrom<sandbox_api::CreateSandboxRequest> for SandboxRequest {
bundle: from.bundle_path,
annotations: config.annotations,
},
shm_size: DEFAULT_SHM_SIZE,
})))
}
}

View File

@@ -9,7 +9,7 @@ use common::{
message::Message,
types::{
ContainerProcess, PlatformInfo, SandboxConfig, SandboxRequest, SandboxResponse,
SandboxStatusInfo, StartSandboxInfo, TaskRequest, TaskResponse,
SandboxStatusInfo, StartSandboxInfo, TaskRequest, TaskResponse, DEFAULT_SHM_SIZE,
},
RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv,
};
@@ -17,12 +17,15 @@ use common::{
use hypervisor::Param;
use kata_sys_util::{mount::get_mount_path, spec::load_oci_spec};
use kata_types::{
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
annotations::Annotation,
config::{default::DEFAULT_GUEST_DNS_FILE, TomlConfig},
mount::SHM_DEVICE,
};
#[cfg(feature = "linux")]
use linux_container::LinuxContainer;
use logging::FILTER_RULE;
use netns_rs::NetNs;
use nix::sys::statfs;
use oci_spec::runtime as oci;
use persist::sandbox_persist::Persist;
use resource::{
@@ -355,6 +358,8 @@ impl RuntimeHandlerManager {
network_created,
};
let shm_size = get_shm_size(spec)?;
let sandbox_config = SandboxConfig {
sandbox_id: inner.id.clone(),
dns,
@@ -363,6 +368,7 @@ impl RuntimeHandlerManager {
annotations: spec.annotations().clone().unwrap_or_default(),
hooks: spec.hooks().clone(),
state: state.clone(),
shm_size,
};
inner.try_init(sandbox_config, Some(spec), options).await
@@ -714,3 +720,26 @@ fn update_component_log_level(config: &TomlConfig) {
updated_inner
});
}
fn get_shm_size(spec: &oci::Spec) -> Result<u64> {
let mut shm_size = DEFAULT_SHM_SIZE;
if let Some(mounts) = spec.mounts() {
for m in mounts {
if m.destination().as_path() != Path::new(SHM_DEVICE) {
continue;
}
if m.typ().eq(&Some("bind".to_string()))
&& !m.source().eq(&Some(PathBuf::from(SHM_DEVICE)))
{
if let Some(src) = m.source() {
let statfs = statfs::statfs(src)?;
shm_size = statfs.blocks() * statfs.block_size() as u64;
}
}
}
}
Ok(shm_size)
}

View File

@@ -12,9 +12,12 @@ use agent::{
};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use common::message::{Action, Message};
use common::types::utils::option_system_time_into;
use common::types::ContainerProcess;
use common::{
message::{Action, Message},
types::DEFAULT_SHM_SIZE,
};
use common::{
types::{SandboxConfig, SandboxExitInfo, SandboxStatus},
ContainerManager, Sandbox, SandboxNetworkEnv,
@@ -92,6 +95,7 @@ pub struct VirtSandbox {
hypervisor: Arc<dyn Hypervisor>,
monitor: Arc<HealthCheck>,
sandbox_config: Option<SandboxConfig>,
shm_size: u64,
}
impl std::fmt::Debug for VirtSandbox {
@@ -122,6 +126,7 @@ impl VirtSandbox {
hypervisor,
resource_manager,
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
shm_size: sandbox_config.shm_size,
sandbox_config: Some(sandbox_config),
})
}
@@ -607,7 +612,7 @@ impl Sandbox for VirtSandbox {
dns: sandbox_config.dns.clone(),
storages: self
.resource_manager
.get_storage_for_sandbox()
.get_storage_for_sandbox(self.shm_size)
.await
.context("get storages for sandbox")?,
sandbox_pidns: false,
@@ -923,6 +928,7 @@ impl Persist for VirtSandbox {
resource_manager,
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
sandbox_config: None,
shm_size: DEFAULT_SHM_SIZE,
})
}
}