diff --git a/src/libs/kata-sys-util/src/k8s.rs b/src/libs/kata-sys-util/src/k8s.rs index 5550c01d9d..d934c12247 100644 --- a/src/libs/kata-sys-util/src/k8s.rs +++ b/src/libs/kata-sys-util/src/k8s.rs @@ -11,7 +11,6 @@ use kata_types::mount; use oci_spec::runtime::{Mount, Spec}; -use std::path::Path; use crate::mount::get_linux_mount_info; @@ -34,9 +33,8 @@ pub fn is_ephemeral_volume(mount: &Mount) -> bool { mount.destination(), ), - (Some("bind"), Some(source), dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") && - (is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm")) - ) + (Some("bind"), Some(source), _dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") && + is_empty_dir(source)) } /// Check whether the given path is a kubernetes empty-dir volume of medium "default". diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index f0a6c75f47..3429c3aad6 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -68,6 +68,12 @@ pub const KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS: &str = "layer_nydus_fs"; pub const KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL: &str = "image_guest_pull"; /// In CoCo scenario, we support force_guest_pull to enforce container image guest pull without remote snapshotter. pub const KATA_IMAGE_FORCE_GUEST_PULL: &str = "force_guest_pull"; +/// kata default guest sandbox dir. +pub const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/"; +/// default shm directory name. +pub const SHM_DIR: &str = "shm"; +/// shm device path. +pub const SHM_DEVICE: &str = "/dev/shm"; /// Manager to manage registered storage device handlers. pub type StorageHandlerManager = HandlerManager; diff --git a/src/runtime-rs/crates/resource/src/manager.rs b/src/runtime-rs/crates/resource/src/manager.rs index 05e0b04605..ab08c5efbb 100644 --- a/src/runtime-rs/crates/resource/src/manager.rs +++ b/src/runtime-rs/crates/resource/src/manager.rs @@ -90,9 +90,9 @@ impl ResourceManager { inner.setup_after_start_vm().await } - pub async fn get_storage_for_sandbox(&self) -> Result> { + pub async fn get_storage_for_sandbox(&self, shm_size: u64) -> Result> { let inner = self.inner.read().await; - inner.get_storage_for_sandbox().await + inner.get_storage_for_sandbox(shm_size).await } pub async fn handler_rootfs( diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index c898da56d1..d86c4308ab 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -17,7 +17,9 @@ use hypervisor::{ }, BlockConfig, Hypervisor, VfioConfig, }; -use kata_types::mount::Mount; +use kata_types::mount::{ + Mount, DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DIR, +}; use kata_types::{ config::{hypervisor::TopologyConfigInfo, TomlConfig}, mount::{adjust_rootfs_mounts, KATA_IMAGE_FORCE_GUEST_PULL}, @@ -326,12 +328,33 @@ impl ResourceManagerInner { Ok(()) } - pub async fn get_storage_for_sandbox(&self) -> Result> { + pub async fn get_storage_for_sandbox(&self, shm_size: u64) -> Result> { let mut storages = vec![]; if let Some(d) = self.share_fs.as_ref() { let mut s = d.get_storages().await.context("get storage")?; storages.append(&mut s); } + + let shm_size_option = format!("size={}", shm_size); + let mount_point = format!("{}/{}", DEFAULT_KATA_GUEST_SANDBOX_DIR, SHM_DIR); + + let shm_storage = Storage { + driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(), + mount_point, + source: "shm".to_string(), + fs_type: "tmpfs".to_string(), + options: vec![ + "noexec".to_string(), + "nosuid".to_string(), + "nodev".to_string(), + "mode=1777".to_string(), + shm_size_option, + ], + ..Default::default() + }; + + storages.push(shm_storage); + Ok(storages) } diff --git a/src/runtime-rs/crates/resource/src/share_fs/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/mod.rs index faf6a20ccf..84155a8289 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/mod.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs @@ -40,8 +40,6 @@ const KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/"; /// share fs (for example virtio-fs) mount path in the guest pub const KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/"; -pub(crate) const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/"; - pub const PASSTHROUGH_FS_DIR: &str = "passthrough"; const RAFS_DIR: &str = "rafs"; diff --git a/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs b/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs index d09ea372b0..384ef2912e 100644 --- a/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs @@ -7,11 +7,11 @@ use std::path::{Path, PathBuf}; use super::Volume; -use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use hypervisor::device::device_manager::DeviceManager; use kata_sys_util::mount::{get_mount_path, get_mount_type}; +use kata_types::mount::DEFAULT_KATA_GUEST_SANDBOX_DIR; use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE; use nix::sys::stat::stat; use oci_spec::runtime as oci; diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index fc3afd39c8..81d6f1deaa 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -9,6 +9,7 @@ mod default_volume; mod ephemeral_volume; pub mod hugepage; mod share_fs_volume; +mod shm_volume; pub mod utils; pub mod direct_volume; @@ -67,10 +68,15 @@ impl VolumeResource { // handle mounts for m in oci_mounts { let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro"); - let volume: Arc = if ephemeral_volume::is_ephemeral_volume(m) { + let volume: Arc = if shm_volume::is_shm_volume(m) { + Arc::new( + shm_volume::ShmVolume::new(m) + .with_context(|| format!("new shm volume {:?}", m))?, + ) + } else if ephemeral_volume::is_ephemeral_volume(m) { Arc::new( ephemeral_volume::EphemeralVolume::new(m) - .with_context(|| format!("new shm volume {:?}", m))?, + .with_context(|| format!("new ephemeral volume {:?}", m))?, ) } else if is_block_volume(m) { // handle block volume diff --git a/src/runtime-rs/crates/resource/src/volume/shm_volume.rs b/src/runtime-rs/crates/resource/src/volume/shm_volume.rs new file mode 100644 index 0000000000..875a583b59 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/shm_volume.rs @@ -0,0 +1,63 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::PathBuf; + +use super::Volume; +use anyhow::Result; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use kata_sys_util::mount::{get_mount_path, get_mount_type}; +use kata_types::mount::{ + DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DEVICE, SHM_DIR, +}; +use oci_spec::runtime as oci; +use tokio::sync::RwLock; + +#[derive(Debug)] +pub(crate) struct ShmVolume { + mount: oci::Mount, +} + +impl ShmVolume { + pub(crate) fn new(m: &oci::Mount) -> Result { + let mut mount = oci::Mount::default(); + mount.set_destination(m.destination().clone()); + mount.set_typ(Some("bind".to_string())); + mount.set_source(Some( + PathBuf::from(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR), + )); + mount.set_options(Some(vec!["rbind".to_string()])); + + Ok(Self { mount }) + } +} + +#[async_trait] +impl Volume for ShmVolume { + fn get_volume_mount(&self) -> anyhow::Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + Ok(vec![]) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // No cleanup is required for ShmVolume because it is a mount in guest which + // does not require explicit unmounting or deletion in host side. + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} + +pub(crate) fn is_shm_volume(m: &oci::Mount) -> bool { + get_mount_path(&Some(m.destination().clone())).as_str() == SHM_DEVICE + && get_mount_type(m).as_str() != KATA_EPHEMERAL_VOLUME_TYPE +} diff --git a/src/runtime-rs/crates/runtimes/common/src/types/mod.rs b/src/runtime-rs/crates/runtimes/common/src/types/mod.rs index 3883204167..d0f40d78ab 100644 --- a/src/runtime-rs/crates/runtimes/common/src/types/mod.rs +++ b/src/runtime-rs/crates/runtimes/common/src/types/mod.rs @@ -23,6 +23,10 @@ use kata_types::mount::Mount; use oci_spec::runtime as oci; use strum::Display; +// DEFAULT_SHM_SIZE is the default shm size to be used in case host +// IPC is used. +pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024; + /// TaskRequest: TaskRequest from shim /// TaskRequest and TaskResponse messages need to be paired #[derive(Debug, Clone, Display)] @@ -176,6 +180,7 @@ pub struct SandboxConfig { pub annotations: HashMap, pub hooks: Option, pub state: runtime_spec::State, + pub shm_size: u64, } #[derive(Clone, Debug)] diff --git a/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs b/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs index 0ace92610e..d2f6c49637 100644 --- a/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs +++ b/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs @@ -8,6 +8,7 @@ use super::{ ContainerConfig, ContainerID, ContainerProcess, ExecProcessRequest, KillRequest, ResizePTYRequest, SandboxConfig, SandboxID, SandboxNetworkEnv, SandboxRequest, SandboxStatusRequest, ShutdownRequest, StopSandboxRequest, TaskRequest, UpdateRequest, + DEFAULT_SHM_SIZE, }; use kata_types::mount::Mount; @@ -84,6 +85,7 @@ impl TryFrom for SandboxRequest { bundle: from.bundle_path, annotations: config.annotations, }, + shm_size: DEFAULT_SHM_SIZE, }))) } } diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs index f092f2c933..df064ddcbf 100644 --- a/src/runtime-rs/crates/runtimes/src/manager.rs +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -9,7 +9,7 @@ use common::{ message::Message, types::{ ContainerProcess, PlatformInfo, SandboxConfig, SandboxRequest, SandboxResponse, - SandboxStatusInfo, StartSandboxInfo, TaskRequest, TaskResponse, + SandboxStatusInfo, StartSandboxInfo, TaskRequest, TaskResponse, DEFAULT_SHM_SIZE, }, RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv, }; @@ -17,12 +17,15 @@ use common::{ use hypervisor::Param; use kata_sys_util::{mount::get_mount_path, spec::load_oci_spec}; use kata_types::{ - annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig, + annotations::Annotation, + config::{default::DEFAULT_GUEST_DNS_FILE, TomlConfig}, + mount::SHM_DEVICE, }; #[cfg(feature = "linux")] use linux_container::LinuxContainer; use logging::FILTER_RULE; use netns_rs::NetNs; +use nix::sys::statfs; use oci_spec::runtime as oci; use persist::sandbox_persist::Persist; use resource::{ @@ -355,6 +358,8 @@ impl RuntimeHandlerManager { network_created, }; + let shm_size = get_shm_size(spec)?; + let sandbox_config = SandboxConfig { sandbox_id: inner.id.clone(), dns, @@ -363,6 +368,7 @@ impl RuntimeHandlerManager { annotations: spec.annotations().clone().unwrap_or_default(), hooks: spec.hooks().clone(), state: state.clone(), + shm_size, }; inner.try_init(sandbox_config, Some(spec), options).await @@ -714,3 +720,26 @@ fn update_component_log_level(config: &TomlConfig) { updated_inner }); } + +fn get_shm_size(spec: &oci::Spec) -> Result { + let mut shm_size = DEFAULT_SHM_SIZE; + + if let Some(mounts) = spec.mounts() { + for m in mounts { + if m.destination().as_path() != Path::new(SHM_DEVICE) { + continue; + } + + if m.typ().eq(&Some("bind".to_string())) + && !m.source().eq(&Some(PathBuf::from(SHM_DEVICE))) + { + if let Some(src) = m.source() { + let statfs = statfs::statfs(src)?; + shm_size = statfs.blocks() * statfs.block_size() as u64; + } + } + } + } + + Ok(shm_size) +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index 605bd7cf97..cc4f403e89 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -12,9 +12,12 @@ use agent::{ }; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; -use common::message::{Action, Message}; use common::types::utils::option_system_time_into; use common::types::ContainerProcess; +use common::{ + message::{Action, Message}, + types::DEFAULT_SHM_SIZE, +}; use common::{ types::{SandboxConfig, SandboxExitInfo, SandboxStatus}, ContainerManager, Sandbox, SandboxNetworkEnv, @@ -92,6 +95,7 @@ pub struct VirtSandbox { hypervisor: Arc, monitor: Arc, sandbox_config: Option, + shm_size: u64, } impl std::fmt::Debug for VirtSandbox { @@ -122,6 +126,7 @@ impl VirtSandbox { hypervisor, resource_manager, monitor: Arc::new(HealthCheck::new(true, keep_abnormal)), + shm_size: sandbox_config.shm_size, sandbox_config: Some(sandbox_config), }) } @@ -607,7 +612,7 @@ impl Sandbox for VirtSandbox { dns: sandbox_config.dns.clone(), storages: self .resource_manager - .get_storage_for_sandbox() + .get_storage_for_sandbox(self.shm_size) .await .context("get storages for sandbox")?, sandbox_pidns: false, @@ -923,6 +928,7 @@ impl Persist for VirtSandbox { resource_manager, monitor: Arc::new(HealthCheck::new(true, keep_abnormal)), sandbox_config: None, + shm_size: DEFAULT_SHM_SIZE, }) } }