diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index 848bde6355..eac2afbffb 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -63,6 +63,8 @@ pub const KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS: &str = "image_nydus_fs"; pub const KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS: &str = "layer_nydus_fs"; /// Download and extra container image inside guest vm. pub const KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL: &str = "image_guest_pull"; +/// In CoCo scenario, we support force_guest_pull to enforce container image guest pull without remote snapshotter. +pub const KATA_IMAGE_FORCE_GUEST_PULL: &str = "force_guest_pull"; /// Manager to manage registered storage device handlers. pub type StorageHandlerManager = HandlerManager; @@ -512,6 +514,29 @@ pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) { (real_path, mode) } +/// This function, adjust_rootfs_mounts, manages the root filesystem mounts based on guest-pull mechanism. +/// - the function disregards any provided rootfs_mounts. +/// Instead, it forcefully creates a single, default KataVirtualVolume specifically for guest-pull operations. +/// This volume's representation is then base64-encoded and added as the only option to a new, singular Mount entry, +/// which becomes the sole item in the returned Vec. +/// This ensures that when guest pull is active, the root filesystem is exclusively configured via this virtual volume. +pub fn adjust_rootfs_mounts() -> Result> { + // We enforce a single, default KataVirtualVolume as the exclusive rootfs mount. + let volume = KataVirtualVolume::new(KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL.to_string()); + + // Convert the virtual volume to a base64 string for the mount option. + let b64_vol = volume + .to_base64() + .context("failed to base64 encode KataVirtualVolume")?; + + // Create a new Vec with a single Mount entry. + // This Mount's options will contain the base64-encoded virtual volume. + Ok(vec![Mount { + options: vec![format!("{}={}", "io.katacontainers.volume", b64_vol)], + ..Default::default() // Use default values for other Mount fields + }]) +} + #[cfg(test)] mod tests { use super::*; @@ -681,4 +706,36 @@ mod tests { ); assert_eq!(volume.fs_type.as_str(), "rafsv6") } + + #[test] + fn test_adjust_rootfs_mounts_basic_success() { + let result = adjust_rootfs_mounts(); + assert!(result.is_ok()); + let mounts = result.unwrap(); + + // 1. Mount length is 1 + assert_eq!(mounts.len(), 1); + let returned_mount = &mounts[0]; + + // 2. Verify Mount's fields and ensure source, destination, typ with default value + let expected_default_mount = Mount::default(); + assert_eq!(returned_mount.source, expected_default_mount.source); + assert_eq!( + returned_mount.destination, + expected_default_mount.destination + ); + assert_eq!(returned_mount.fs_type, expected_default_mount.fs_type); + + // 3. Mount's options + assert_eq!(returned_mount.options.len(), 1); + let option_str = &returned_mount.options[0]; + assert!(option_str.starts_with("io.katacontainers.volume=")); + + let expected_volume_obj = + KataVirtualVolume::new(KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL.to_string()); + let expected_b64_vol = expected_volume_obj.to_base64().unwrap(); + let (_prefix, encoded_vol) = option_str.split_once("io.katacontainers.volume=").unwrap(); + + assert_eq!(encoded_vol, expected_b64_vol); + } } diff --git a/src/runtime-rs/crates/resource/src/manager.rs b/src/runtime-rs/crates/resource/src/manager.rs index f2b35996fb..05e0b04605 100644 --- a/src/runtime-rs/crates/resource/src/manager.rs +++ b/src/runtime-rs/crates/resource/src/manager.rs @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 // +use std::collections::HashMap; use std::sync::Arc; use agent::{Agent, Storage}; @@ -100,10 +101,11 @@ impl ResourceManager { root: &oci::Root, bundle_path: &str, rootfs_mounts: &[Mount], + annotations: &HashMap, ) -> Result> { let inner = self.inner.read().await; inner - .handler_rootfs(cid, root, bundle_path, rootfs_mounts) + .handler_rootfs(cid, root, bundle_path, rootfs_mounts, annotations) .await } diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index ba5eae5cf5..2f419c4100 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -4,7 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 // -use std::{sync::Arc, thread}; +use std::{collections::HashMap, sync::Arc, thread}; use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage}; use anyhow::{anyhow, Context, Ok, Result}; @@ -17,8 +17,11 @@ use hypervisor::{ }, BlockConfig, Hypervisor, VfioConfig, }; -use kata_types::config::{hypervisor::TopologyConfigInfo, TomlConfig}; use kata_types::mount::Mount; +use kata_types::{ + config::{hypervisor::TopologyConfigInfo, TomlConfig}, + mount::{adjust_rootfs_mounts, KATA_IMAGE_FORCE_GUEST_PULL}, +}; use oci::{Linux, LinuxCpu, LinuxResources}; use oci_spec::runtime::{self as oci, LinuxDeviceType}; use persist::sandbox_persist::Persist; @@ -320,7 +323,18 @@ impl ResourceManagerInner { root: &oci::Root, bundle_path: &str, rootfs_mounts: &[Mount], + annotations: &HashMap, ) -> Result> { + let adjust_rootfs_mounts = if !self + .config() + .runtime + .is_experiment_enabled(KATA_IMAGE_FORCE_GUEST_PULL) + { + rootfs_mounts.to_vec() + } else { + adjust_rootfs_mounts()? + }; + self.rootfs_resource .handler_rootfs( &self.share_fs, @@ -330,7 +344,8 @@ impl ResourceManagerInner { cid, root, bundle_path, - rootfs_mounts, + &adjust_rootfs_mounts, + annotations, ) .await } diff --git a/src/runtime-rs/crates/resource/src/rootfs/mod.rs b/src/runtime-rs/crates/resource/src/rootfs/mod.rs index a9dbf5a827..e0d34e22ba 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/mod.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/mod.rs @@ -11,9 +11,12 @@ use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use kata_types::mount::Mount; mod block_rootfs; -use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +pub mod virtual_volume; -use std::{sync::Arc, vec::Vec}; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use virtual_volume::{is_kata_virtual_volume, VirtualVolume}; + +use std::{collections::HashMap, sync::Arc, vec::Vec}; use tokio::sync::RwLock; use self::{block_rootfs::is_block_rootfs, nydus_rootfs::NYDUS_ROOTFS_TYPE}; @@ -66,6 +69,7 @@ impl RootFsResource { root: &oci::Root, bundle_path: &str, rootfs_mounts: &[Mount], + annotations: &HashMap, ) -> Result> { match rootfs_mounts { // if rootfs_mounts is empty @@ -90,6 +94,17 @@ impl RootFsResource { // Safe as single_layer_rootfs must have one layer let layer = &mounts_vec[0]; let mut inner = self.inner.write().await; + + if is_guest_pull_volume(share_fs, layer) { + let mount_options = layer.options.clone(); + let virtual_volume: Arc = Arc::new( + VirtualVolume::new(cid, annotations, mount_options.to_vec()) + .await + .context("kata virtual volume failed.")?, + ); + return Ok(virtual_volume); + } + let rootfs = if let Some(dev_id) = is_block_rootfs(&layer.source) { // handle block rootfs info!(sl!(), "block device: {}", dev_id); @@ -158,3 +173,10 @@ impl RootFsResource { fn is_single_layer_rootfs(rootfs_mounts: &[Mount]) -> bool { rootfs_mounts.len() == 1 } + +pub fn is_guest_pull_volume( + share_fs: &Option>, + m: &kata_types::mount::Mount, +) -> bool { + share_fs.is_none() && is_kata_virtual_volume(m) +} diff --git a/src/runtime-rs/crates/resource/src/rootfs/virtual_volume.rs b/src/runtime-rs/crates/resource/src/rootfs/virtual_volume.rs new file mode 100644 index 0000000000..32cf69a76a --- /dev/null +++ b/src/runtime-rs/crates/resource/src/rootfs/virtual_volume.rs @@ -0,0 +1,297 @@ +// Copyright (c) 2024-2025 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::Path; +use std::str::FromStr; +use std::{collections::HashMap, path::PathBuf}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use oci_spec::runtime as oci; +use serde_json; +use tokio::sync::RwLock; + +use hypervisor::device::device_manager::DeviceManager; +use kata_types::{ + annotations, + container::ContainerType, + mount::{KataVirtualVolume, KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL}, +}; + +/// Image guest-pull related consts +const KUBERNETES_CRI_IMAGE_NAME: &str = "io.kubernetes.cri.image-name"; +const KUBERNETES_CRIO_IMAGE_NAME: &str = "io.kubernetes.cri-o.ImageName"; +const KATA_VIRTUAL_VOLUME_PREFIX: &str = "io.katacontainers.volume="; +const KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS: &str = "overlayfs"; +const KATA_GUEST_ROOT_SHARED_FS: &str = "/run/kata-containers/"; + +const CRI_CONTAINER_TYPE_KEY_LIST: &[&str] = &[ + // cri containerd + annotations::cri_containerd::CONTAINER_TYPE_LABEL_KEY, + // cri-o + annotations::crio::CONTAINER_TYPE_LABEL_KEY, +]; + +/// Retrieves the image reference from OCI spec annotations. +/// +/// It checks known Kubernetes CRI and CRI-O annotation keys for the container type. +/// If the container is a PodSandbox, it returns "pause". +/// Otherwise, it attempts to find the image name using the appropriate Kubernetes +/// annotation key. +pub fn get_image_reference(spec_annotations: &HashMap) -> Result<&str> { + info!( + sl!(), + "get image reference from spec annotation: {:?}", spec_annotations + ); + for &key in CRI_CONTAINER_TYPE_KEY_LIST { + if let Some(type_value) = spec_annotations.get(key) { + if let Ok(container_type) = ContainerType::from_str(type_value) { + return match container_type { + ContainerType::PodSandbox => Ok("pause"), + _ => { + let image_name_key = if key == annotations::crio::CONTAINER_TYPE_LABEL_KEY { + KUBERNETES_CRIO_IMAGE_NAME + } else { + KUBERNETES_CRI_IMAGE_NAME + }; + + spec_annotations + .get(image_name_key) + .map(AsRef::as_ref) + .ok_or_else(|| anyhow!("get image reference failed")) + } + }; + } + } + } + + Err(anyhow!("no target image reference found")) +} + +/// Handles storage configuration for KataVirtualVolume based on its type. +/// Specifically processes `KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL` type. +/// Processes a virtual volume specifically for image guest-pull scenarios. +/// It enriches the volume info with image reference and metadata, then serializes it into agent.Storage. +fn handle_virtual_volume_storage( + cid: &str, + annotations: &HashMap, + virt_volume: &KataVirtualVolume, +) -> Result { + if virt_volume.volume_type.as_str() == KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL { + // get container image reference + let image_ref = get_image_reference(annotations).context("get image reference failed.")?; + + let mut virtual_volume_info = virt_volume.clone(); + // Merge metadata + for (k, v) in annotations.iter() { + if let Some(ref mut image_pull) = virtual_volume_info.image_pull { + image_pull.metadata.insert(k.to_owned(), v.to_owned()); + } + } + // Serialize ImagePull as JSON + let image_pull_info = serde_json::to_string(&virtual_volume_info.image_pull) + .map_err(|e| anyhow!(e.to_string()))?; + + // build the agent Storage Object + Ok(agent::Storage { + source: image_ref.to_string(), + driver: KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL.to_string(), + driver_options: vec![format!( + "{}={}", + KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL, image_pull_info + )], + fs_type: KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS.to_string(), + mount_point: Path::new(KATA_GUEST_ROOT_SHARED_FS) + .join(cid) + .join("rootfs") + .display() + .to_string(), + ..Default::default() + }) + } else { + Err(anyhow!( + "Error in handle virtual volume storage with unsupported type: {:?}", + virt_volume.volume_type + )) + } +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct VirtualVolume { + guest_path: PathBuf, + storages: Vec, +} + +impl VirtualVolume { + #[allow(dead_code)] + pub(crate) async fn new( + cid: &str, + annotations: &HashMap, + options: Vec, + ) -> Result { + let mut volumes = Vec::new(); + + // It's ensured that the virtual volume info is indeed existing in the options, as pre-check with `is_kata_virtual_volume()` is done. + for o in options.iter() { + // Iterate over reference to avoid consuming options + if let Some(stripped_str) = o.strip_prefix(KATA_VIRTUAL_VOLUME_PREFIX) { + // Ensure `from_base64` provides a descriptive error on failure + let virt_volume = KataVirtualVolume::from_base64(stripped_str).context(format!( + "Failed to decode KataVirtualVolume from base64: {}", + stripped_str + ))?; + + let vol = handle_virtual_volume_storage(cid, annotations, &virt_volume) + .context("Failed to handle virtual volume storage object")?; + volumes.push(vol); + + // As there's only one virtual volume supported, when the first one is hit, just break the iteration. + break; + } + } + + let guest_path = Path::new(KATA_GUEST_ROOT_SHARED_FS) + .join(cid) + .join("rootfs") + .to_path_buf(); + + Ok(VirtualVolume { + guest_path, + storages: volumes, + }) + } +} + +#[async_trait] +impl super::Rootfs for VirtualVolume { + async fn get_guest_rootfs_path(&self) -> Result { + Ok(self.guest_path.display().to_string().clone()) + } + + async fn get_rootfs_mount(&self) -> Result> { + Ok(vec![]) + } + + async fn get_storage(&self) -> Option { + Some(self.storages[0].clone()) + } + + async fn get_device_id(&self) -> Result> { + Ok(None) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + Ok(()) + } +} + +pub fn is_kata_virtual_volume(m: &kata_types::mount::Mount) -> bool { + let options = m.options.clone(); + options + .iter() + .any(|o| o.starts_with(&KATA_VIRTUAL_VOLUME_PREFIX.to_owned())) +} + +#[cfg(test)] +mod tests { + use crate::rootfs::virtual_volume::{ + KATA_GUEST_ROOT_SHARED_FS, KATA_VIRTUAL_VOLUME_PREFIX, KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS, + }; + + use super::get_image_reference; + use super::VirtualVolume; + use kata_types::mount::{KataVirtualVolume, KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL}; + use std::{collections::HashMap, path::Path}; + + #[test] + fn test_get_image_ref() { + // Test Standard cri-containerd image name + let mut annotations_x = HashMap::new(); + annotations_x.insert( + "io.kubernetes.cri.container-type".to_string(), + "container".to_string(), + ); + annotations_x.insert( + "io.kubernetes.cri.image-name".to_string(), + "example-image-x".to_string(), + ); + let image_ref_result_crio = get_image_reference(&annotations_x); + assert!(image_ref_result_crio.is_ok()); + assert_eq!(image_ref_result_crio.unwrap(), "example-image-x"); + + // Test cri-o image name + let mut annotations_crio = HashMap::new(); + annotations_crio.insert( + "io.kubernetes.cri-o.ContainerType".to_string(), + "container".to_string(), + ); + annotations_crio.insert( + "io.kubernetes.cri-o.ImageName".to_string(), + "example-image-y".to_string(), + ); + let image_ref_result_crio = get_image_reference(&annotations_crio); + assert!(image_ref_result_crio.is_ok()); + assert_eq!(image_ref_result_crio.unwrap(), "example-image-y"); + + // Test PodSandbox type + let mut annotations_pod_sandbox = HashMap::new(); + annotations_pod_sandbox.insert( + "io.kubernetes.cri.container-type".to_string(), + "sandbox".to_string(), + ); + let image_ref_result_pod_sandbox = get_image_reference(&annotations_pod_sandbox); + assert!(image_ref_result_pod_sandbox.is_ok()); + assert_eq!(image_ref_result_pod_sandbox.unwrap(), "pause"); + } + + #[tokio::test] + async fn test_virtual_volume_new_success() { + let virt_vol = KataVirtualVolume { + volume_type: KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL.to_string(), + source: KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS.to_owned(), + ..Default::default() + }; + let encoded_vol = virt_vol.to_base64().unwrap(); + let options = vec![format!("{}{}", KATA_VIRTUAL_VOLUME_PREFIX, encoded_vol)]; + + let mut annotations = HashMap::new(); + annotations.insert( + "io.kubernetes.cri.container-type".to_string(), + "container".to_string(), + ); + annotations.insert( + "io.kubernetes.cri.image-name".to_string(), + "example-image-x".to_string(), + ); + + let cid = "4adf90201"; + let result = VirtualVolume::new(cid, &annotations, options).await; + assert!(result.is_ok()); + let virt_vol_obj = result.unwrap(); + + // 1. Verify guest_path + let expected_guest_path = Path::new(KATA_GUEST_ROOT_SHARED_FS) + .join(cid) + .join("rootfs"); + assert_eq!(virt_vol_obj.guest_path, expected_guest_path); + + // 2. Verify storages vector length + assert_eq!(virt_vol_obj.storages.len(), 1); + + // 3. Verify the content of the AgentStorage object + let storage = &virt_vol_obj.storages[0]; + + assert_eq!(storage.source, "example-image-x".to_string()); + assert_eq!(storage.driver, KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL); + assert_eq!(storage.fs_type, KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS); + + let expected_mount_point = Path::new(KATA_GUEST_ROOT_SHARED_FS) + .join(cid) + .join("rootfs") + .display() + .to_string(); + assert_eq!(storage.mount_point, expected_mount_point); + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs index 75c6427c0a..93cd6effd0 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs @@ -110,6 +110,8 @@ impl Container { // is 'true'. None => true, }; + let annotations = spec.annotations().clone().unwrap_or_default(); + amend_spec( &mut spec, toml_config.runtime.disable_guest_seccomp, @@ -131,6 +133,7 @@ impl Container { root, &config.bundle, &config.rootfs_mounts, + &annotations, ) .await .context("handler rootfs")?;