mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-24 10:17:21 +00:00
runtime-rs: add the sandbox's shm volume support
Docker containers support specifying the shm size using the --shm-size option and support sandbox-level shm volumes, so we've added support for shm volumes. Since Kubernetes doesn't support specifying the shm size, it typically uses a memory-based emptydir as the container's shm, and its size can be specified. Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
d48c542a52
commit
4a92fc1129
@@ -11,7 +11,6 @@
|
||||
|
||||
use kata_types::mount;
|
||||
use oci_spec::runtime::{Mount, Spec};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::mount::get_linux_mount_info;
|
||||
|
||||
@@ -34,9 +33,8 @@ pub fn is_ephemeral_volume(mount: &Mount) -> bool {
|
||||
mount.destination(),
|
||||
|
||||
),
|
||||
(Some("bind"), Some(source), dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") &&
|
||||
(is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm"))
|
||||
)
|
||||
(Some("bind"), Some(source), _dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") &&
|
||||
is_empty_dir(source))
|
||||
}
|
||||
|
||||
/// Check whether the given path is a kubernetes empty-dir volume of medium "default".
|
||||
|
@@ -68,6 +68,12 @@ pub const KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS: &str = "layer_nydus_fs";
|
||||
pub const KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL: &str = "image_guest_pull";
|
||||
/// In CoCo scenario, we support force_guest_pull to enforce container image guest pull without remote snapshotter.
|
||||
pub const KATA_IMAGE_FORCE_GUEST_PULL: &str = "force_guest_pull";
|
||||
/// kata default guest sandbox dir.
|
||||
pub const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/";
|
||||
/// default shm directory name.
|
||||
pub const SHM_DIR: &str = "shm";
|
||||
/// shm device path.
|
||||
pub const SHM_DEVICE: &str = "/dev/shm";
|
||||
|
||||
/// Manager to manage registered storage device handlers.
|
||||
pub type StorageHandlerManager<H> = HandlerManager<H>;
|
||||
|
@@ -90,9 +90,9 @@ impl ResourceManager {
|
||||
inner.setup_after_start_vm().await
|
||||
}
|
||||
|
||||
pub async fn get_storage_for_sandbox(&self) -> Result<Vec<Storage>> {
|
||||
pub async fn get_storage_for_sandbox(&self, shm_size: u64) -> Result<Vec<Storage>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_storage_for_sandbox().await
|
||||
inner.get_storage_for_sandbox(shm_size).await
|
||||
}
|
||||
|
||||
pub async fn handler_rootfs(
|
||||
|
@@ -17,7 +17,9 @@ use hypervisor::{
|
||||
},
|
||||
BlockConfig, Hypervisor, VfioConfig,
|
||||
};
|
||||
use kata_types::mount::Mount;
|
||||
use kata_types::mount::{
|
||||
Mount, DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DIR,
|
||||
};
|
||||
use kata_types::{
|
||||
config::{hypervisor::TopologyConfigInfo, TomlConfig},
|
||||
mount::{adjust_rootfs_mounts, KATA_IMAGE_FORCE_GUEST_PULL},
|
||||
@@ -326,12 +328,33 @@ impl ResourceManagerInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_storage_for_sandbox(&self) -> Result<Vec<Storage>> {
|
||||
pub async fn get_storage_for_sandbox(&self, shm_size: u64) -> Result<Vec<Storage>> {
|
||||
let mut storages = vec![];
|
||||
if let Some(d) = self.share_fs.as_ref() {
|
||||
let mut s = d.get_storages().await.context("get storage")?;
|
||||
storages.append(&mut s);
|
||||
}
|
||||
|
||||
let shm_size_option = format!("size={}", shm_size);
|
||||
let mount_point = format!("{}/{}", DEFAULT_KATA_GUEST_SANDBOX_DIR, SHM_DIR);
|
||||
|
||||
let shm_storage = Storage {
|
||||
driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(),
|
||||
mount_point,
|
||||
source: "shm".to_string(),
|
||||
fs_type: "tmpfs".to_string(),
|
||||
options: vec![
|
||||
"noexec".to_string(),
|
||||
"nosuid".to_string(),
|
||||
"nodev".to_string(),
|
||||
"mode=1777".to_string(),
|
||||
shm_size_option,
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
storages.push(shm_storage);
|
||||
|
||||
Ok(storages)
|
||||
}
|
||||
|
||||
|
@@ -40,8 +40,6 @@ const KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/";
|
||||
/// share fs (for example virtio-fs) mount path in the guest
|
||||
pub const KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/";
|
||||
|
||||
pub(crate) const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/";
|
||||
|
||||
pub const PASSTHROUGH_FS_DIR: &str = "passthrough";
|
||||
const RAFS_DIR: &str = "rafs";
|
||||
|
||||
|
@@ -7,11 +7,11 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use super::Volume;
|
||||
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
use kata_sys_util::mount::{get_mount_path, get_mount_type};
|
||||
use kata_types::mount::DEFAULT_KATA_GUEST_SANDBOX_DIR;
|
||||
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;
|
||||
use nix::sys::stat::stat;
|
||||
use oci_spec::runtime as oci;
|
||||
|
@@ -9,6 +9,7 @@ mod default_volume;
|
||||
mod ephemeral_volume;
|
||||
pub mod hugepage;
|
||||
mod share_fs_volume;
|
||||
mod shm_volume;
|
||||
pub mod utils;
|
||||
|
||||
pub mod direct_volume;
|
||||
@@ -67,10 +68,15 @@ impl VolumeResource {
|
||||
// handle mounts
|
||||
for m in oci_mounts {
|
||||
let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro");
|
||||
let volume: Arc<dyn Volume> = if ephemeral_volume::is_ephemeral_volume(m) {
|
||||
let volume: Arc<dyn Volume> = if shm_volume::is_shm_volume(m) {
|
||||
Arc::new(
|
||||
shm_volume::ShmVolume::new(m)
|
||||
.with_context(|| format!("new shm volume {:?}", m))?,
|
||||
)
|
||||
} else if ephemeral_volume::is_ephemeral_volume(m) {
|
||||
Arc::new(
|
||||
ephemeral_volume::EphemeralVolume::new(m)
|
||||
.with_context(|| format!("new shm volume {:?}", m))?,
|
||||
.with_context(|| format!("new ephemeral volume {:?}", m))?,
|
||||
)
|
||||
} else if is_block_volume(m) {
|
||||
// handle block volume
|
||||
|
63
src/runtime-rs/crates/resource/src/volume/shm_volume.rs
Normal file
63
src/runtime-rs/crates/resource/src/volume/shm_volume.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::Volume;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
use kata_sys_util::mount::{get_mount_path, get_mount_type};
|
||||
use kata_types::mount::{
|
||||
DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DEVICE, SHM_DIR,
|
||||
};
|
||||
use oci_spec::runtime as oci;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ShmVolume {
|
||||
mount: oci::Mount,
|
||||
}
|
||||
|
||||
impl ShmVolume {
|
||||
pub(crate) fn new(m: &oci::Mount) -> Result<Self> {
|
||||
let mut mount = oci::Mount::default();
|
||||
mount.set_destination(m.destination().clone());
|
||||
mount.set_typ(Some("bind".to_string()));
|
||||
mount.set_source(Some(
|
||||
PathBuf::from(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR),
|
||||
));
|
||||
mount.set_options(Some(vec!["rbind".to_string()]));
|
||||
|
||||
Ok(Self { mount })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for ShmVolume {
|
||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
}
|
||||
|
||||
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn cleanup(&self, _device_manager: &RwLock<DeviceManager>) -> Result<()> {
|
||||
// No cleanup is required for ShmVolume because it is a mount in guest which
|
||||
// does not require explicit unmounting or deletion in host side.
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_device_id(&self) -> Result<Option<String>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_shm_volume(m: &oci::Mount) -> bool {
|
||||
get_mount_path(&Some(m.destination().clone())).as_str() == SHM_DEVICE
|
||||
&& get_mount_type(m).as_str() != KATA_EPHEMERAL_VOLUME_TYPE
|
||||
}
|
@@ -23,6 +23,10 @@ use kata_types::mount::Mount;
|
||||
use oci_spec::runtime as oci;
|
||||
use strum::Display;
|
||||
|
||||
// DEFAULT_SHM_SIZE is the default shm size to be used in case host
|
||||
// IPC is used.
|
||||
pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024;
|
||||
|
||||
/// TaskRequest: TaskRequest from shim
|
||||
/// TaskRequest and TaskResponse messages need to be paired
|
||||
#[derive(Debug, Clone, Display)]
|
||||
@@ -176,6 +180,7 @@ pub struct SandboxConfig {
|
||||
pub annotations: HashMap<String, String, RandomState>,
|
||||
pub hooks: Option<oci::Hooks>,
|
||||
pub state: runtime_spec::State,
|
||||
pub shm_size: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
@@ -8,6 +8,7 @@ use super::{
|
||||
ContainerConfig, ContainerID, ContainerProcess, ExecProcessRequest, KillRequest,
|
||||
ResizePTYRequest, SandboxConfig, SandboxID, SandboxNetworkEnv, SandboxRequest,
|
||||
SandboxStatusRequest, ShutdownRequest, StopSandboxRequest, TaskRequest, UpdateRequest,
|
||||
DEFAULT_SHM_SIZE,
|
||||
};
|
||||
|
||||
use kata_types::mount::Mount;
|
||||
@@ -84,6 +85,7 @@ impl TryFrom<sandbox_api::CreateSandboxRequest> for SandboxRequest {
|
||||
bundle: from.bundle_path,
|
||||
annotations: config.annotations,
|
||||
},
|
||||
shm_size: DEFAULT_SHM_SIZE,
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@ use common::{
|
||||
message::Message,
|
||||
types::{
|
||||
ContainerProcess, PlatformInfo, SandboxConfig, SandboxRequest, SandboxResponse,
|
||||
SandboxStatusInfo, StartSandboxInfo, TaskRequest, TaskResponse,
|
||||
SandboxStatusInfo, StartSandboxInfo, TaskRequest, TaskResponse, DEFAULT_SHM_SIZE,
|
||||
},
|
||||
RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv,
|
||||
};
|
||||
@@ -17,12 +17,15 @@ use common::{
|
||||
use hypervisor::Param;
|
||||
use kata_sys_util::{mount::get_mount_path, spec::load_oci_spec};
|
||||
use kata_types::{
|
||||
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
|
||||
annotations::Annotation,
|
||||
config::{default::DEFAULT_GUEST_DNS_FILE, TomlConfig},
|
||||
mount::SHM_DEVICE,
|
||||
};
|
||||
#[cfg(feature = "linux")]
|
||||
use linux_container::LinuxContainer;
|
||||
use logging::FILTER_RULE;
|
||||
use netns_rs::NetNs;
|
||||
use nix::sys::statfs;
|
||||
use oci_spec::runtime as oci;
|
||||
use persist::sandbox_persist::Persist;
|
||||
use resource::{
|
||||
@@ -355,6 +358,8 @@ impl RuntimeHandlerManager {
|
||||
network_created,
|
||||
};
|
||||
|
||||
let shm_size = get_shm_size(spec)?;
|
||||
|
||||
let sandbox_config = SandboxConfig {
|
||||
sandbox_id: inner.id.clone(),
|
||||
dns,
|
||||
@@ -363,6 +368,7 @@ impl RuntimeHandlerManager {
|
||||
annotations: spec.annotations().clone().unwrap_or_default(),
|
||||
hooks: spec.hooks().clone(),
|
||||
state: state.clone(),
|
||||
shm_size,
|
||||
};
|
||||
|
||||
inner.try_init(sandbox_config, Some(spec), options).await
|
||||
@@ -714,3 +720,26 @@ fn update_component_log_level(config: &TomlConfig) {
|
||||
updated_inner
|
||||
});
|
||||
}
|
||||
|
||||
fn get_shm_size(spec: &oci::Spec) -> Result<u64> {
|
||||
let mut shm_size = DEFAULT_SHM_SIZE;
|
||||
|
||||
if let Some(mounts) = spec.mounts() {
|
||||
for m in mounts {
|
||||
if m.destination().as_path() != Path::new(SHM_DEVICE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if m.typ().eq(&Some("bind".to_string()))
|
||||
&& !m.source().eq(&Some(PathBuf::from(SHM_DEVICE)))
|
||||
{
|
||||
if let Some(src) = m.source() {
|
||||
let statfs = statfs::statfs(src)?;
|
||||
shm_size = statfs.blocks() * statfs.block_size() as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(shm_size)
|
||||
}
|
||||
|
@@ -12,9 +12,12 @@ use agent::{
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use common::message::{Action, Message};
|
||||
use common::types::utils::option_system_time_into;
|
||||
use common::types::ContainerProcess;
|
||||
use common::{
|
||||
message::{Action, Message},
|
||||
types::DEFAULT_SHM_SIZE,
|
||||
};
|
||||
use common::{
|
||||
types::{SandboxConfig, SandboxExitInfo, SandboxStatus},
|
||||
ContainerManager, Sandbox, SandboxNetworkEnv,
|
||||
@@ -92,6 +95,7 @@ pub struct VirtSandbox {
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
monitor: Arc<HealthCheck>,
|
||||
sandbox_config: Option<SandboxConfig>,
|
||||
shm_size: u64,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for VirtSandbox {
|
||||
@@ -122,6 +126,7 @@ impl VirtSandbox {
|
||||
hypervisor,
|
||||
resource_manager,
|
||||
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
|
||||
shm_size: sandbox_config.shm_size,
|
||||
sandbox_config: Some(sandbox_config),
|
||||
})
|
||||
}
|
||||
@@ -607,7 +612,7 @@ impl Sandbox for VirtSandbox {
|
||||
dns: sandbox_config.dns.clone(),
|
||||
storages: self
|
||||
.resource_manager
|
||||
.get_storage_for_sandbox()
|
||||
.get_storage_for_sandbox(self.shm_size)
|
||||
.await
|
||||
.context("get storages for sandbox")?,
|
||||
sandbox_pidns: false,
|
||||
@@ -923,6 +928,7 @@ impl Persist for VirtSandbox {
|
||||
resource_manager,
|
||||
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
|
||||
sandbox_config: None,
|
||||
shm_size: DEFAULT_SHM_SIZE,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user