diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml index 0d79d314b0..1161e08474 100644 --- a/.github/workflows/basic-ci-amd64.yaml +++ b/.github/workflows/basic-ci-amd64.yaml @@ -137,7 +137,7 @@ jobs: fail-fast: false matrix: containerd_version: ['minimum', 'latest'] - vmm: ['clh', 'qemu', 'dragonball', 'qemu-runtime-rs'] + vmm: ['clh', 'qemu', 'dragonball', 'qemu-runtime-rs', 'clh-runtime-rs'] concurrency: group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }}-run-nydus-amd64-${{ toJSON(matrix) }} cancel-in-progress: true diff --git a/Cargo.lock b/Cargo.lock index ed57a852a0..7d8a405b5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6308,6 +6308,8 @@ dependencies = [ "futures", "futures-lite 2.6.1", "hex", + "hyper 0.14.32", + "hyperlocal", "hypervisor", "inotify 0.11.1", "kata-sys-util", diff --git a/docs/how-to/how-to-use-virtio-fs-nydus-with-kata.md b/docs/how-to/how-to-use-virtio-fs-nydus-with-kata.md index 0ea8bf73f7..dd5bb8d5c0 100644 --- a/docs/how-to/how-to-use-virtio-fs-nydus-with-kata.md +++ b/docs/how-to/how-to-use-virtio-fs-nydus-with-kata.md @@ -2,57 +2,432 @@ ## Introduction -Refer to [kata-`nydus`-design](../design/kata-nydus-design.md) for introduction and `nydus` has supported Kata Containers with hypervisor `QEMU` and `CLH` currently. +Nydus is a container image acceleration service that provides fast container startup and on-demand data loading. Kata Containers integrates with Nydus through virtio-fs, supporting two operational modes: -## How to +### Operational Modes -You can use Kata Containers with `nydus` as follows, +1. **Standalone Mode (virtio-fs-nydus)** + - Used with QEMU and Cloud-Hypervisor + - Nydusd runs as an independent process + - Mounts RAFS through nydusd and lets the guest kernel assemble overlayfs -1. Use [`nydus` latest branch](https://github.com/dragonflyoss/image-service); +2. **Inline Mode (inline-virtio-fs) / Builtin Nydus** + - Used with Dragonball VMM + - Nydusd is built into the VMM (builtin nydus) + - Lower resource overhead and better performance + - Suitable for lightweight VM scenarios -2. Deploy `nydus` environment as [`Nydus` Setup for Containerd Environment](https://github.com/dragonflyoss/image-service/blob/master/docs/containerd-env-setup.md); +Refer to [kata-nydus-design](../design/kata-nydus-design.md) for detailed design documentation. -3. Start `nydus-snapshotter` with `enable_nydus_overlayfs` enabled; +## Architecture Overview -4. Use [kata-containers](https://github.com/kata-containers/kata-containers) `latest` branch to compile and build `kata-containers.img`; +### Standalone Mode Architecture -5. Update `configuration-qemu.toml` or `configuration-clh.toml`to include: +``` +┌─────────────────────────────────────────────────────────┐ +│ Host System │ +│ ┌──────────────┐ ┌──────────────────────┐ │ +│ │ nydusd │◄────────┤ nydus-snapshotter │ │ +│ │ (standalone)│ │ │ │ +│ └──────┬───────┘ └──────────────────────┘ │ +│ │ virtiofs │ +│ ▼ │ +│ ┌──────────────┐ │ +│ │ QEMU │ │ +│ │ / Cloud-Hypervisor │ +│ └──────────────┘ │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Guest VM │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ /run/kata-containers/shared/ │ │ +│ │ ├── containers//rootfs (overlay) │ │ +│ │ │ ├── upperdir │ │ +│ │ │ ├── workdir │ │ +│ │ │ └── lowerdir (from rafs) │ │ +│ │ └── rafs//lowerdir (nydus image) │ │ +│ └──────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ +``` + +### Inline Mode (Builtin Nydus) Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Host System │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ nydus-snapshotter │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Dragonball VMM │ │ +│ │ ┌────────────────────────────────────────┐ │ │ +│ │ │ Builtin Nydusd (virtiofs server) │ │ │ +│ │ │ ┌──────────────────────────────────┐ │ │ │ +│ │ │ │ Vfs (Virtual File System) │ │ │ │ +│ │ │ │ ├── Rafs (nydus image backend) │ │ │ │ +│ │ │ │ └── PassthroughFs │ │ │ │ +│ │ │ └──────────────────────────────────┘ │ │ │ +│ │ └────────────────────────────────────────┘ │ │ +│ └──────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Guest VM │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ /run/kata-containers/shared/containers/ │ │ +│ │ ├── /rootfs (overlay mount) │ │ +│ │ ├── /rootfs_lower/ (Rafs mount) │ │ +│ │ └── /snapshotdir/ │ │ +│ │ ├── fs/ (upperdir) │ │ +│ │ └── work/ (workdir) │ │ +│ │ │ │ +│ │ Guest Kernel Overlay Assembly: │ │ +│ │ overlay lowerdir=rootfs_lower/ │ │ +│ │ upperdir=snapshotdir/fs/ │ │ +│ │ workdir=snapshotdir/work/ │ │ +│ └──────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ +``` + +### Key Components + +- **nydusd**: The nydus daemon that provides: + - Virtiofs server for file sharing + - Rafs (Registry Accelerated File System) image mounting + - Passthrough filesystem for host-guest file sharing + +- **nydus-snapshotter**: Containerd snapshotter that: + - Manages nydus image layers + - Provides RAFS bootstrap/config/snapshot directory information to Kata + - Prepares container rootfs + - Communicates with Kata runtime + +- **Builtin Nydus (Inline Mode)**: Integrated nydus in Dragonball VMM: + - Runs inside the VMM process, no separate daemon needed + - Uses fuse_backend_rs Vfs to manage multiple filesystem backends + - Supports Rafs for nydus image mounting + - Supports PassthroughFs for host directory sharing + - Lower memory footprint and faster startup + - Overlay filesystem assembled by guest kernel instead of nydusd + +## Prerequisites + +### 1. Install Nydus + +Use the [nydus latest branch](https://github.com/dragonflyoss/image-service) and build nydusd: + +```bash +git clone https://github.com/dragonflyoss/image-service.git +cd image-service +make nydusd +``` + +### 2. Deploy Nydus Environment + +Deploy nydus environment as described in [Nydus Setup for Containerd Environment](https://github.com/dragonflyoss/image-service/blob/master/docs/containerd-env-setup.md). + +### 3. Start Nydus Snapshotter + +Start `nydus-snapshotter` with `enable_nydus_overlayfs` enabled: + +```bash +./nydus-snapshotter --enable-nydus-overlayfs +``` + +### 4. Build Kata Containers + +Use [kata-containers](https://github.com/kata-containers/kata-containers) `latest` branch to compile and build `kata-containers.img`. + +## Configuration + +### Shared Filesystem Types + +Kata Containers supports the following shared filesystem types: + +| Type | Description | Hypervisor | +|------|-------------|------------| +| `virtio-fs` | Standard virtio-fs with virtiofsd | QEMU, Cloud-Hypervisor | +| `virtio-fs-nydus` | Virtio-fs with standalone nydusd | QEMU, Cloud-Hypervisor | +| `inline-virtio-fs` | Inline virtio-fs with builtin nydus | Dragonball | +| `none` | Disable shared filesystem | All | + +### Configuration for QEMU + +Update `configuration-qemu.toml` (for the go runtime) or `configuration-qemu-runtime-rs.toml` (for runtime-rs): + +```toml +# Enable virtio-fs-nydus for standalone mode +shared_fs = "virtio-fs-nydus" + +# Path to nydusd binary (required for virtio-fs-nydus) +virtio_fs_daemon = "/usr/local/bin/nydusd" + +# Optional: Extra arguments for nydusd +# Example: virtio_fs_extra_args = ["--log-level", "debug", "--threads", "4"] +virtio_fs_extra_args = [] + +# Cache mode for virtio-fs (never, auto, always) +virtio_fs_cache = "never" + +# Optional: Enable DAX for better performance +virtio_fs_is_dax = false +``` + +### Configuration for Cloud-Hypervisor + +Update `configuration-clh.toml` (for the go runtime) or `configuration-clh-runtime-rs.toml` (for runtime-rs): ```toml shared_fs = "virtio-fs-nydus" -virtio_fs_daemon = "" +virtio_fs_daemon = "/usr/local/bin/nydusd" virtio_fs_extra_args = [] +virtio_fs_cache = "never" ``` -6. run `crictl run -r kata nydus-container.yaml nydus-sandbox.yaml`; +### Configuration for Dragonball (Inline Mode / Builtin Nydus) -The `nydus-sandbox.yaml` looks like below: +For Dragonball VMM, use inline mode (builtin nydus), update `configuration-dragonball.toml`: + +```toml +shared_fs = "inline-virtio-fs" +# Note: virtio_fs_daemon is not needed for inline mode +# nydusd is built into Dragonball VMM +virtio_fs_cache = "never" +``` + +## How Nydusd Works in Kata + +### Nydusd Startup Process + +#### Standalone Mode + +When using `virtio-fs-nydus`, Kata runtime starts nydusd with the following parameters: + +```bash +nydusd virtiofs \ + --hybrid-mode \ + --log-level info \ + --apisock /path/to/nydusd-api.sock \ + --sock /path/to/virtiofs.sock +``` + +Key features: + +- **Hybrid Mode**: Enables both Rafs and passthrough filesystem support +- **API Socket**: Provides HTTP API for runtime to mount Rafs images +- **Virtiofs Socket**: Used by hypervisor for virtio-fs communication + +After nydusd starts, Kata runtime automatically: + +1. Waits for the nydusd API server to be ready +2. Mounts passthrough_fs at `/containers` within the nydusd virtiofs namespace + - This maps to `/run/kata-containers/shared/containers/` in the guest + - The passthrough_fs provides the writable layer for container overlay + +#### Inline Mode (Builtin Nydus) + +When using `inline-virtio-fs` with Dragonball: + +- Nydusd is built into the Dragonball VMM binary +- No separate process or daemon startup required +- Virtiofs server initializes automatically during VMM boot +- Filesystem backends (Rafs, PassthroughFs) are mounted via VMM API +- Lower resource usage and faster initialization + +### Filesystem Layout in Guest + +#### Standalone Mode (virtio-fs-nydus) + +``` +/run/kata-containers/shared/ # virtiofs mount point +├── containers// # passthrough_fs from host +│ ├── rootfs/ # container rootfs mount point +│ └── snapshotdir/ # snapshot directory +│ ├── fs/ # upperdir (writable layer) +│ └── work/ # workdir (overlay work directory) +└── rafs//lowerdir/ # Rafs mount (nydus image) +``` + +#### Inline Mode (inline-virtio-fs / Builtin Nydus) + +``` +/run/kata-containers/shared/containers/ +├── / +│ ├── rootfs/ # container rootfs mount point +│ ├── rootfs_lower/ # Rafs mount (lowerdir) +│ └── snapshotdir/ +│ ├── fs/ # upperdir +│ └── work/ # workdir +└── passthrough/ # passthrough filesystem +``` + +### Overlay Filesystem Assembly + +#### Standalone Mode (Guest Kernel Overlay) + +The nydus snapshotter provides the RAFS bootstrap path, nydus config, and +snapshot directory through the container rootfs mount options. Runtime-rs passes +the RAFS source/config to nydusd, then asks kata-agent to assemble the writable +overlay in the guest kernel with: + +- **Lowerdir**: Rafs mount point (nydus image) +- **Upperdir**: Writable layer from snapshot directory +- **Workdir**: Overlay work directory + +The nydusd mount request only mounts RAFS: + +```json +{ + "fs_type": "rafs", + "source": "/path/to/bootstrap", + "config": "{...nydus config...}" +} +``` + +Runtime-rs then sends an overlay `Storage` to kata-agent whose lowerdir points +at `/run/kata-containers/shared/rafs//lowerdir` and whose upper/work dirs +come from `/run/kata-containers/shared/containers//snapshotdir/`. + +#### Inline Mode (Guest Kernel Overlay) + +The guest kernel assembles overlay filesystem: + +- Kata agent receives overlay mount information +- Lowerdir points to Rafs mount (rootfs_lower/) +- Upperdir and workdir from snapshot directory +- No native overlay support in builtin nydusd + +## Usage Examples + +### Running Containers with nerdctl + +```bash +$sudo nerdctl run --snapshotter nydus --runtime io.containerd.kata.v2 --net=none --rm -it ghcr.io/dragonflyoss/image-service/ubuntu:nydus-nightly-v5 lsblk +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +pmem0 259:0 0 254M 1 disk +`-pmem0p1 259:1 0 253M 1 part +``` + +### Using with Kubernetes + +Create a Pod specification: ```yaml +apiVersion: v1 +kind: Pod metadata: - attempt: 1 - name: nydus-sandbox - uid: nydus-uid - namespace: default -log_directory: /tmp -linux: - security_context: - namespace_options: - network: 2 + name: nydus-test + annotations: + io.containerd.osfeature: "nydus.remoteimage.v1" +spec: + runtimeClassName: kata + containers: + - name: test + image: ghcr.io/dragonflyoss/image-service/ubuntu:nydus-nightly-v5 + command: ["/bin/sleep", "600"] +``` + +## Advanced Configuration + +### Nydusd Extra Arguments + +You can pass additional arguments to nydusd through configuration: + +```toml +virtio_fs_extra_args = ["--log-level", "debug", "--threads", "4"] +``` + +### Prefetch Configuration + +Enable prefetch for faster container startup: + +```toml +# Path to prefetch file list +prefetch_list_path = "/path/to/prefetch_file.list" +``` + +### Performance Tuning + +#### Cache Mode + +Choose appropriate cache mode based on your use case: + +- **never**: No caching, always fetch from host (default, safest) +- **auto**: Cache with timeout, good for read-heavy workloads +- **always**: Aggressive caching, best performance but may see stale data + +```toml +virtio_fs_cache = "auto" +``` + +#### DAX (Direct Access) + +Enable DAX for memory-mapped I/O: + +```toml +virtio_fs_is_dax = true +virtio_fs_cache_size = 1024 # Size in MiB +``` + +## Debugging and Path Mappings + +### Debug Logging + +Enable debug logging for nydusd: + +```toml +virtio_fs_extra_args = ["--log-level", "debug"] +``` + +Or via annotation: + +```yaml annotations: - "io.containerd.osfeature": "nydus.remoteimage.v1" + io.katacontainers.config.hypervisor.virtio_fs_extra_args: "--log-level=debug" ``` -The `nydus-container.yaml` looks like below: +### Path Mappings -```yaml -metadata: - name: nydus-container -image: - image: localhost:5000/ubuntu-nydus:latest -command: - - /bin/sleep -args: - - 600 -log_path: container.1.log -``` +#### Standalone Mode (virtio-fs-nydus) + +| Component | Host Path | Guest Path | Notes | +|-----------|-----------|------------|-------| +| Virtiofs mount | N/A | `/run/kata-containers/shared/` | Root virtiofs mount point | +| Passthrough FS | `/run/kata-containers/shared/sandboxes//rw/` | `/run/kata-containers/shared/containers/` | Mounted at `/containers` in nydusd namespace | +| Rafs mount | Bootstrap path from snapshotter | `/run/kata-containers/shared/rafs//lowerdir` | Mounted via nydusd API | +| Container rootfs | `/run/kata-containers/shared/sandboxes//rw//rootfs` | `/run/kata-containers/shared/containers//rootfs` | Overlay mount point | +| Snapshot dir | From snapshotter | `/run/kata-containers/shared/containers//snapshotdir/` | Contains upperdir and workdir | + +#### Inline Mode (inline-virtio-fs) + +| Component | Host Path | Guest Path | Notes | +|-----------|-----------|------------|-------| +| Virtiofs mount | N/A | `/run/kata-containers/shared/containers/` | Root virtiofs mount point | +| Passthrough FS | `/run/kata-containers/shared/sandboxes//rw/passthrough/` | `/run/kata-containers/shared/containers/passthrough/` | Uses PASSTHROUGH_FS_DIR | +| Rafs mount | Bootstrap path from snapshotter | `/run/kata-containers/shared/containers//rootfs_lower/` | Mounted via DeviceManager | +| Container rootfs | `/run/kata-containers/shared/sandboxes//rw/passthrough//rootfs` | `/run/kata-containers/shared/containers//rootfs` | Overlay mount point | + +## Comparison: Standalone vs Inline Mode + +| Feature | Standalone (virtio-fs-nydus) | Inline (inline-virtio-fs) | +|---------|------------------------------|---------------------------| +| Hypervisor | QEMU, Cloud-Hypervisor | Dragonball | +| Nydusd Process | Independent process | Built into VMM | +| Overlay Support | Guest kernel | Guest kernel | +| Performance | Good | Good | +| Resource Usage | Higher (separate process) | Lower (integrated) | +| Flexibility | More configurable | Less configurable | +| Use Case | General purpose | Lightweight VMs | +| Startup Time | Slower (daemon startup) | Faster (no daemon) | +| Memory Overhead | Higher | Lower | + +## References + +- [Nydus Image Service](https://github.com/dragonflyoss/image-service) +- [Nydus Setup for Containerd](https://github.com/dragonflyoss/image-service/blob/master/docs/containerd-env-setup.md) +- [Kata Containers with Nydus Design](../design/kata-nydus-design.md) +- [Virtio-fs Documentation](https://virtio-fs.gitlab.io/) diff --git a/src/agent/src/storage/fs_handler.rs b/src/agent/src/storage/fs_handler.rs index 2c7c3c1cc9..6407612ffa 100644 --- a/src/agent/src/storage/fs_handler.rs +++ b/src/agent/src/storage/fs_handler.rs @@ -5,6 +5,7 @@ // use std::fs; +use std::io::ErrorKind; use std::path::Path; use std::sync::Arc; @@ -18,6 +19,27 @@ use tracing::instrument; #[derive(Debug)] pub struct OverlayfsHandler {} +fn ensure_directory_exists(path: &Path) -> Result<()> { + match fs::create_dir_all(path) { + Ok(()) => Ok(()), + Err(err) if err.kind() == ErrorKind::AlreadyExists && path.is_dir() => Ok(()), + Err(err) if err.raw_os_error() == Some(libc::ENOSYS) => { + if path.is_dir() { + Ok(()) + } else { + Err(err).context(format!( + "failed to create overlay directory {}: filesystem does not support mkdir", + path.display() + )) + } + } + Err(err) => Err(err).context(format!( + "failed to create overlay directory {}", + path.display() + )), + } +} + #[async_trait::async_trait] impl StorageHandler for OverlayfsHandler { #[instrument] @@ -61,7 +83,7 @@ impl StorageHandler for OverlayfsHandler { .as_str() .strip_prefix(overlay_create_dir_prefix) { - fs::create_dir_all(dir).context("Failed to create directory")?; + ensure_directory_exists(Path::new(dir))?; } } let path = common_storage_handler(ctx.logger, &storage)?; diff --git a/src/libs/kata-sys-util/src/mount.rs b/src/libs/kata-sys-util/src/mount.rs index ce9f807fce..01e0ef284a 100644 --- a/src/libs/kata-sys-util/src/mount.rs +++ b/src/libs/kata-sys-util/src/mount.rs @@ -190,10 +190,20 @@ pub fn create_mount_destination, D: AsRef, R: AsRef>( .parent() .ok_or_else(|| Error::InvalidPath(dst.to_path_buf()))?; let mut builder = fs::DirBuilder::new(); - builder - .mode(MOUNT_DIR_PERM) - .recursive(true) - .create(parent)?; + builder.mode(MOUNT_DIR_PERM).recursive(true); + + // Try to create parent directory, but handle ENOSYS gracefully + // ENOSYS can occur on certain filesystems (e.g., virtio-fs) where mkdir is not fully supported + if let Err(e) = builder.create(parent) { + // If the error is ENOSYS or AlreadyExists, check if parent exists and continue + if e.kind() != std::io::ErrorKind::AlreadyExists && e.raw_os_error() != Some(libc::ENOSYS) { + return Err(e.into()); + } + // Verify parent exists + if !parent.exists() { + return Err(e.into()); + } + } if fs_type == "bind" { // The source and destination for bind mounting must be the same type: file or directory. @@ -207,11 +217,17 @@ pub fn create_mount_destination, D: AsRef, R: AsRef>( } } + // Try to create destination directory, but handle ENOSYS gracefully if let Err(e) = builder.create(dst) { - if e.kind() != std::io::ErrorKind::AlreadyExists { + if e.kind() != std::io::ErrorKind::AlreadyExists && e.raw_os_error() != Some(libc::ENOSYS) { return Err(e.into()); } + // If ENOSYS or AlreadyExists, check if dst exists and is a directory + if !dst.exists() || !dst.is_dir() { + return Err(Error::InvalidPath(dst.to_path_buf())); + } } + if !dst.is_dir() { Err(Error::InvalidPath(dst.to_path_buf())) } else { diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index 6ff534c2be..4c1a276a4b 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -70,6 +70,7 @@ pub use self::firecracker::{FirecrackerConfig, HYPERVISOR_NAME_FIRECRACKER}; const NO_VIRTIO_FS: &str = "none"; const VIRTIO_FS: &str = "virtio-fs"; const VIRTIO_FS_INLINE: &str = "inline-virtio-fs"; +const VIRTIO_FS_NYDUS: &str = "virtio-fs-nydus"; const MAX_BRIDGE_SIZE: u32 = 5; const MAX_NETWORK_QUEUES: u32 = 256; @@ -1528,6 +1529,7 @@ impl SharedFsInfo { match self.shared_fs.as_deref() { Some(VIRTIO_FS) => self.adjust_virtio_fs(false)?, Some(VIRTIO_FS_INLINE) => self.adjust_virtio_fs(true)?, + Some(VIRTIO_FS_NYDUS) => self.adjust_virtio_fs(false)?, _ => {} } @@ -1543,6 +1545,7 @@ impl SharedFsInfo { None => Ok(()), Some(VIRTIO_FS) => self.validate_virtio_fs(false), Some(VIRTIO_FS_INLINE) => self.validate_virtio_fs(true), + Some(VIRTIO_FS_NYDUS) => self.validate_virtio_fs(false), Some(v) => Err(std::io::Error::other(format!("Invalid shared_fs type {v}"))), } } diff --git a/src/runtime-rs/crates/resource/Cargo.toml b/src/runtime-rs/crates/resource/Cargo.toml index e7e4407daf..29dfa609fc 100644 --- a/src/runtime-rs/crates/resource/Cargo.toml +++ b/src/runtime-rs/crates/resource/Cargo.toml @@ -39,6 +39,8 @@ flate2 = "1.1" tempfile = "3.19.1" hex = "0.4" base64 = { workspace = true } +hyper = { workspace = true, features = ["client", "http1"] } +hyperlocal = { workspace = true } ## Dependencies from `rust-netlink` ## 0.30+ parses IFLA_INET6_CONF on kernels 6.17+ (240-byte blob; DEVCONF_FORCE_FORWARDING). diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index c6c4175b03..9f99c0944c 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -40,7 +40,7 @@ use crate::{ network::{self, dan_config_path, Network, NetworkConfig, NetworkWithNetNsConfig}, resource_persist::ResourceState, rootfs::{RootFsResource, Rootfs}, - share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, ShareFs}, + share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, NydusShareFs, ShareFs}, volume::{Volume, VolumeResource}, ResourceConfig, ResourceUpdateOp, }; @@ -53,6 +53,7 @@ pub(crate) struct ResourceManagerInner { device_manager: Arc>, network: Option>, share_fs: Option>, + nydus_share_fs: Option>, pub rootfs_resource: RootFsResource, pub volume_resource: VolumeResource, @@ -124,6 +125,7 @@ impl ResourceManagerInner { device_manager, network: None, share_fs: None, + nydus_share_fs: None, rootfs_resource: RootFsResource::new(), volume_resource: VolumeResource::new(), cgroups_resource, @@ -148,14 +150,15 @@ impl ResourceManagerInner { for dc in device_configs { match dc { ResourceConfig::ShareFs(c) => { - self.share_fs = if self + if self .hypervisor .capabilities() .await? .is_fs_sharing_supported() { - let share_fs = share_fs::new(&self.sid, &c).context("new share fs")?; - share_fs + let instance = share_fs::new(&self.sid, &c).context("new share fs")?; + instance + .share_fs .setup_device_before_start_vm( self.hypervisor.as_ref(), &self.device_manager, @@ -168,9 +171,8 @@ impl ResourceManagerInner { .await .context("failed setup sandbox bindmounts")?; - Some(share_fs) - } else { - None + self.share_fs = Some(instance.share_fs); + self.nydus_share_fs = instance.nydus_share_fs; }; } ResourceConfig::Network(c) => { @@ -468,6 +470,7 @@ impl ResourceManagerInner { self.rootfs_resource .handler_rootfs( &self.share_fs, + &self.nydus_share_fs, self.device_manager.as_ref(), self.hypervisor.as_ref(), &self.sid, @@ -915,6 +918,14 @@ impl ResourceManagerInner { .await .context("failed to cleanup sandbox bindmounts")?; + // stop share fs daemon (e.g., virtiofsd, nydusd) before cleaning up mount + if let Some(share_fs) = &self.share_fs { + share_fs + .stop() + .await + .context("failed to stop share fs daemon")?; + } + // clean up share fs mount if let Some(share_fs) = &self.share_fs { share_fs @@ -1069,6 +1080,7 @@ impl Persist for ResourceManagerInner { device_manager, network: None, share_fs: None, + nydus_share_fs: None, rootfs_resource: RootFsResource::new(), volume_resource: VolumeResource::new(), cgroups_resource: CgroupsResource::restore( diff --git a/src/runtime-rs/crates/resource/src/rootfs/mod.rs b/src/runtime-rs/crates/resource/src/rootfs/mod.rs index f11a5cc71c..7d9821bd29 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/mod.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/mod.rs @@ -24,11 +24,12 @@ use self::{ block_rootfs::is_block_rootfs, erofs_rootfs::ErofsMultiLayerRootfs, nydus_rootfs::NYDUS_ROOTFS_TYPE, }; -use crate::{rootfs::erofs_rootfs::is_erofs_multi_layer, share_fs::ShareFs}; +use crate::rootfs::erofs_rootfs::is_erofs_multi_layer; +use crate::share_fs::{NydusShareFs, ShareFs}; use oci_spec::runtime as oci; const ROOTFS: &str = "rootfs"; -const HYBRID_ROOTFS_LOWER_DIR: &str = "rootfs_lower"; +pub const HYBRID_ROOTFS_LOWER_DIR: &str = "rootfs_lower"; const TYPE_OVERLAY_FS: &str = "overlay"; #[async_trait] @@ -66,6 +67,7 @@ impl RootFsResource { pub async fn handler_rootfs( &self, share_fs: &Option>, + nydus_share_fs: &Option>, device_manager: &RwLock, h: &dyn Hypervisor, sid: &str, @@ -136,12 +138,13 @@ impl RootFsResource { ); Ok(block_rootfs) } else if let Some(share_fs) = share_fs { - // handle nydus rootfs + // handle nydus rootfs (unified implementation for both inline and standalone modes) let share_rootfs: Arc = if layer.fs_type == NYDUS_ROOTFS_TYPE { Arc::new( nydus_rootfs::NydusRootfs::new( device_manager, share_fs, + nydus_share_fs, h, sid, cid, diff --git a/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs index db6bbbe59f..17e3662188 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs @@ -1,16 +1,29 @@ -// Copyright (c) 2019-2022 Alibaba Cloud -// Copyright (c) 2019-2022 Ant Group +// Copyright (c) 2019-2026 Alibaba Cloud +// Copyright (c) 2019-2026 Ant Group // // SPDX-License-Identifier: Apache-2.0 // + +//! Nydus Rootfs Implementation +//! +//! This module provides a unified implementation for nydus rootfs that supports two modes: +//! - **Inline mode**: Used with Dragonball VMM where nydus is built-in +//! - **Standalone mode**: Used with QEMU/Cloud-Hypervisor where nydusd runs as a separate process +//! +//! The mode is determined by whether a `NydusShareFs` instance is provided: +//! - `Some(nydus_fs)`: Standalone mode (external nydusd with guest kernel overlay) +//! - `None`: Inline mode (built-in nydusd with guest kernel overlay) + +use std::path::PathBuf; use std::{fs, path::Path, sync::Arc}; use super::{Rootfs, TYPE_OVERLAY_FS}; +use crate::rootfs::HYBRID_ROOTFS_LOWER_DIR; use crate::{ - rootfs::{HYBRID_ROOTFS_LOWER_DIR, ROOTFS}, + rootfs::ROOTFS, share_fs::{ - do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path, rafs_mount, ShareFs, - ShareFsRootfsConfig, PASSTHROUGH_FS_DIR, + do_get_guest_path, get_host_rw_shared_path, kata_guest_nydus_root_dir, + kata_guest_share_dir, NydusShareFs, ShareFs, ShareFsRootfsConfig, PASSTHROUGH_FS_DIR, }, }; use agent::Storage; @@ -20,27 +33,45 @@ use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; use kata_types::mount::{Mount, NydusExtraOptions}; use oci_spec::runtime as oci; use tokio::sync::RwLock; -// Used for nydus rootfs + +/// Used for nydus rootfs type detection pub(crate) const NYDUS_ROOTFS_TYPE: &str = "fuse.nydus-overlayfs"; -// Used for Nydus v5 rootfs version + +/// Nydus v5 rootfs version const NYDUS_ROOTFS_V5: &str = "v5"; -// Used for Nydus v6 rootfs version +/// Nydus v6 rootfs version const NYDUS_ROOTFS_V6: &str = "v6"; +/// Snapshot directory name const SNAPSHOT_DIR: &str = "snapshotdir"; +/// Overlay device type for kata-agent const KATA_OVERLAY_DEV_TYPE: &str = "overlayfs"; -// nydus prefetch file list name +/// Nydus prefetch file list name const NYDUS_PREFETCH_FILE_LIST: &str = "prefetch_file.list"; +/// The lower directory name used in the rafs mountpoint path within the nydusd namespace. +const LOWER_DIR: &str = "lowerdir"; +/// The nydus image directory under the guest share root: /run/kata-containers/shared/rafs/. +const NYDUS_RAFS_DIR: &str = "rafs"; +/// Unified Nydus Rootfs implementation supporting both inline and standalone modes. pub(crate) struct NydusRootfs { guest_path: String, - rootfs: Storage, + rootfs: Option, + sid: String, + cid: String, + /// Nydus-specific share fs reference for standalone mode cleanup. + /// None in inline mode. + nydus_share_fs: Option>, + /// It's used to track the rafs mount point for cleanup. + /// None in inline mode. + rafs_mountpoint: Option, } impl NydusRootfs { pub async fn new( - d: &RwLock, + device_manager: &RwLock, share_fs: &Arc, + nydus_share_fs: &Option>, h: &dyn Hypervisor, sid: &str, cid: &str, @@ -49,94 +80,295 @@ impl NydusRootfs { let prefetch_list_path = get_nydus_prefetch_files(h.hypervisor_config().await.prefetch_list_path).await; - let share_fs_mount = share_fs.get_share_fs_mount(); let extra_options = NydusExtraOptions::new(rootfs).context("failed to parse nydus extra options")?; - info!(sl!(), "extra_option {:?}", &extra_options); + info!( + sl!(), + "nydus rootfs extra_options: {:?}, is_standalone_nydus: {}", + &extra_options, + nydus_share_fs.is_some() + ); + let rafs_meta = &extra_options.source; - let (rootfs_storage, rootfs_guest_path) = match extra_options.fs_version.as_str() { - // both nydus v5 and v6 can be handled by the builtin nydus in dragonball by using the rafs mode. - // nydus v6 could also be handled by the guest kernel as well, but some kernel patch is not support in the upstream community. We will add an option to let runtime-rs handle nydus v6 in the guest kernel optionally once the patch is ready - // see this issue (https://github.com/kata-containers/kata-containers/issues/5143) - NYDUS_ROOTFS_V5 | NYDUS_ROOTFS_V6 => { - // rafs mount the metadata of nydus rootfs - let rafs_mnt = do_get_guest_share_path(HYBRID_ROOTFS_LOWER_DIR, cid, true); - rafs_mount( - d, - sid, - rafs_meta.to_string(), - rafs_mnt, - extra_options.config.clone(), - prefetch_list_path, - ) - .await - .context("failed to do rafs mount")?; - // create rootfs under the share directory - let container_share_dir = get_host_rw_shared_path(sid) - .join(PASSTHROUGH_FS_DIR) - .join(cid); - let rootfs_dir = container_share_dir.join(ROOTFS); - fs::create_dir_all(rootfs_dir).context("failed to create directory")?; - // mount point inside the guest - let rootfs_guest_path = do_get_guest_path(ROOTFS, cid, false, false); - // bind mount the snapshot dir under the share directory - share_fs_mount - .share_rootfs(&ShareFsRootfsConfig { - cid: cid.to_string(), - source: extra_options.snapshot_dir.clone(), - target: SNAPSHOT_DIR.to_string(), - readonly: false, - is_rafs: false, - }) - .await - .context("share nydus rootfs")?; - let mut options: Vec = Vec::new(); - options.push( - "lowerdir=".to_string() - + &do_get_guest_path(HYBRID_ROOTFS_LOWER_DIR, cid, false, true), - ); - options.push( - "workdir=".to_string() - + &do_get_guest_path( - format!("{}/{}", SNAPSHOT_DIR, "work").as_str(), + let (rootfs_storage, rootfs_guest_path, rafs_mountpoint) = + match extra_options.fs_version.as_str() { + // both nydus v5 and v6 can be handled by the builtin nydus in dragonball by using the rafs mode. + // nydus v6 could also be handled by the guest kernel as well, but some kernel patch is not support in the upstream community. We will add an option to let runtime-rs handle nydus v6 in the guest kernel optionally once the patch is ready + // see this issue (https://github.com/kata-containers/kata-containers/issues/5143) + NYDUS_ROOTFS_V5 | NYDUS_ROOTFS_V6 => { + // Determine the mode based on whether NydusShareFs is available + if let Some(nydus_fs) = nydus_share_fs { + // Standalone mode: external nydusd with guest kernel overlay + Self::create_standalone_rootfs( + nydus_fs.as_ref(), + sid, cid, - false, - false, - ), - ); - options.push( - "upperdir=".to_string() - + &do_get_guest_path( - format!("{}/{}", SNAPSHOT_DIR, "fs").as_str(), + rafs_meta, + &extra_options, + ) + .await? + } else { + // Inline mode: built-in nydusd with guest kernel overlay + Self::create_inline_rootfs( + device_manager, + share_fs, + sid, cid, - false, - false, - ), - ); - options.push("index=off".to_string()); - Ok(( - Storage { - driver: KATA_OVERLAY_DEV_TYPE.to_string(), - source: TYPE_OVERLAY_FS.to_string(), - fs_type: TYPE_OVERLAY_FS.to_string(), - options, - mount_point: rootfs_guest_path.clone(), - ..Default::default() - }, - rootfs_guest_path, - )) - } - _ => { - let errstr: &str = "new_nydus_rootfs: invalid nydus rootfs type"; - error!(sl!(), "{}", errstr); - Err(anyhow!(errstr)) - } - }?; + rafs_meta, + &extra_options, + prefetch_list_path, + ) + .await? + } + } + _ => { + let errstr = "invalid nydus rootfs version, expected v5 or v6"; + error!(sl!(), "{}", errstr); + return Err(anyhow!(errstr)); + } + }; + + info!( + sl!(), + "nydus rootfs created: guest_path={}, storage={:?}", rootfs_guest_path, rootfs_storage + ); + Ok(NydusRootfs { guest_path: rootfs_guest_path, rootfs: rootfs_storage, + sid: sid.to_string(), + cid: cid.to_string(), + nydus_share_fs: nydus_share_fs.clone(), + rafs_mountpoint, }) } + + /// Create rootfs in standalone mode (external nydusd). + /// + /// In this mode: + /// - nydusd runs as a separate process + /// - nydusd exposes the RAFS lowerdir + /// - the writable overlay is assembled by kata-agent in the guest kernel + /// - virtiofs is mounted at `/run/kata-containers/shared/` + /// - passthrough_fs is mounted at `/containers` within nydusd namespace + async fn create_standalone_rootfs( + nydus_fs: &dyn crate::share_fs::NydusShareFs, + sid: &str, + cid: &str, + rafs_meta: &str, + extra_options: &NydusExtraOptions, + ) -> Result<(Option, String, Option)> { + // Create rootfs directory on the host under the share directory. + // Host/Guest Mapping in Standalone Mode: + // - Host: get_host_rw_shared_path(sid)//rootfs = .../rw//rootfs + // - Guest: /run/kata-containers/shared/containers//rootfs + let container_share_dir = get_host_rw_shared_path(sid).join(cid); + let rootfs_dir = container_share_dir.join(ROOTFS); + fs::create_dir_all(&rootfs_dir).context("failed to create rootfs directory")?; + + // The guest mount point for the overlay rootfs: /run/kata-containers/shared/containers//rootfs + let rootfs_guest_path = Self::guest_shared_path(cid, ROOTFS); + + // Bind mount the snapshot dir (allocated by the snapshotter on the host) to the shared directory + // so it becomes visible in the guest. + let snapshot_share_dir = container_share_dir.join(SNAPSHOT_DIR); + kata_sys_util::mount::bind_mount_unchecked( + &extra_options.snapshot_dir, + &snapshot_share_dir, + false, + nix::mount::MsFlags::MS_SLAVE, + ) + .context("failed to bind mount snapshot dir")?; + + // Guest paths for overlay upper and work directories. + let upper_dir_guest = Self::guest_shared_path(cid, &format!("{}/{}", SNAPSHOT_DIR, "fs")); + let work_dir_guest = Self::guest_shared_path(cid, &format!("{}/{}", SNAPSHOT_DIR, "work")); + + info!( + sl!(), + "mounting rafs (standalone mode): source={}, cid={}", rafs_meta, cid, + ); + + // Go-runtime parity: mount plain RAFS via nydusd. The writable overlay + // is assembled by kata-agent in the guest kernel using the Storage below. + // If the rafs mount fails, undo the snapshot bind mount above so we don't + // leak a mount that would block later cleanup/retries. + let rafs_mnt = match nydus_fs + .mount_rafs(cid, rafs_meta, &extra_options.config) + .await + { + Ok(mnt) => mnt, + Err(e) => { + if let Err(umount_err) = nix::mount::umount(&snapshot_share_dir) { + warn!( + sl!(), + "failed to umount snapshot dir {:?} after rafs mount failure: {}", + snapshot_share_dir, + umount_err + ); + } + return Err(e).context("failed to mount rafs in standalone mode"); + } + }; + + let lowerdir_guest = Self::guest_nydus_image_path(cid); + let options = vec![ + format!("upperdir={}", upper_dir_guest), + format!("workdir={}", work_dir_guest), + format!("lowerdir={}", lowerdir_guest), + "index=off".to_string(), + ]; + + info!( + sl!(), + "nydus standalone overlay storage: mount_point={}, lowerdir={}, upperdir={}, workdir={}", + rootfs_guest_path, + lowerdir_guest, + upper_dir_guest, + work_dir_guest + ); + + Ok(( + Storage { + driver: KATA_OVERLAY_DEV_TYPE.to_string(), + source: TYPE_OVERLAY_FS.to_string(), + fs_type: TYPE_OVERLAY_FS.to_string(), + options, + mount_point: rootfs_guest_path.clone(), + ..Default::default() + } + .into(), + rootfs_guest_path, + Some(rafs_mnt), + )) + } + + /// Create rootfs in inline mode (built-in nydusd). + /// + /// In this mode: + /// - nydus is built into Dragonball VMM + /// - overlay is assembled by guest kernel + /// - virtiofs is mounted at `/run/kata-containers/shared/containers/` + /// - passthrough_fs uses PASSTHROUGH_FS_DIR subdirectory + async fn create_inline_rootfs( + device_manager: &RwLock, + share_fs: &Arc, + sid: &str, + cid: &str, + rafs_meta: &str, + extra_options: &NydusExtraOptions, + prefetch_list_path: Option, + ) -> Result<(Option, String, Option)> { + let share_fs_mount = share_fs.get_share_fs_mount(); + + // Mount rafs via DeviceManager (inline mode uses built-in nydusd). + // This is different from standalone mode which uses nydusd API. + let rafs_mnt = crate::share_fs::do_get_guest_share_path(HYBRID_ROOTFS_LOWER_DIR, cid, true); + crate::share_fs::rafs_mount( + device_manager, + sid, + rafs_meta.to_string(), + rafs_mnt.clone(), + extra_options.config.clone(), + prefetch_list_path, + ) + .await + .context("failed to do rafs mount")?; + + // Create rootfs directory on the host side. + // In inline mode, we use PASSTHROUGH_FS_DIR subdirectory. + let container_share_dir = get_host_rw_shared_path(sid) + .join(PASSTHROUGH_FS_DIR) + .join(cid); + let rootfs_dir = container_share_dir.join(ROOTFS); + fs::create_dir_all(rootfs_dir).context("failed to create directory")?; + + // Guest mount point + let rootfs_guest_path = do_get_guest_path(ROOTFS, cid, false, false); + + // Bind mount the snapshot dir under the share directory + share_fs_mount + .share_rootfs(&ShareFsRootfsConfig { + cid: cid.to_string(), + source: extra_options.snapshot_dir.clone(), + target: SNAPSHOT_DIR.to_string(), + readonly: false, + is_rafs: false, + }) + .await + .context("share nydus rootfs")?; + + // Build overlay options for guest kernel overlay + let options = vec![ + format!( + "lowerdir={}", + do_get_guest_path(HYBRID_ROOTFS_LOWER_DIR, cid, false, true) + ), + format!( + "workdir={}", + do_get_guest_path( + format!("{}/{}", SNAPSHOT_DIR, "work").as_str(), + cid, + false, + false + ) + ), + format!( + "upperdir={}", + do_get_guest_path( + format!("{}/{}", SNAPSHOT_DIR, "fs").as_str(), + cid, + false, + false + ) + ), + "index=off".to_string(), + ]; + + info!( + sl!(), + "nydus inline overlay storage: mount_point={}, rafs_mnt={}", + rootfs_guest_path, + rafs_mnt + ); + + Ok(( + Storage { + driver: KATA_OVERLAY_DEV_TYPE.to_string(), + source: TYPE_OVERLAY_FS.to_string(), + fs_type: TYPE_OVERLAY_FS.to_string(), + options, + mount_point: rootfs_guest_path.clone(), + ..Default::default() + } + .into(), + rootfs_guest_path, + None, + )) + } + + /// Generate the nydus image guest path for lowerdir: + /// `/run/kata-containers/shared/rafs//lowerdir` + fn guest_nydus_image_path(cid: &str) -> String { + PathBuf::from(kata_guest_nydus_root_dir()) + .join(NYDUS_RAFS_DIR) + .join(cid) + .join(LOWER_DIR) + .to_str() + .unwrap() + .to_string() + } + + /// Generate the guest shared dir path for containers: `/run/kata-containers/shared/containers//` + fn guest_shared_path(cid: &str, suffix: &str) -> String { + let guest_shared_dir = kata_guest_share_dir(); + PathBuf::from(&guest_shared_dir) + .join(cid) + .join(suffix) + .to_str() + .unwrap() + .to_string() + } } #[async_trait] @@ -150,7 +382,7 @@ impl Rootfs for NydusRootfs { } async fn get_storage(&self) -> Option> { - Some(vec![self.rootfs.clone()]) + self.rootfs.clone().map(|rootfs| vec![rootfs]) } async fn get_device_id(&self) -> Result> { @@ -158,8 +390,27 @@ impl Rootfs for NydusRootfs { } async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { - // TODO: Clean up NydusRootfs after the container is killed - warn!(sl!(), "Cleaning up NydusRootfs is still unimplemented."); + if let (Some(nydus_fs), Some(rafs_mnt)) = (&self.nydus_share_fs, &self.rafs_mountpoint) { + if let Err(e) = nydus_fs.umount_rafs(rafs_mnt).await { + warn!( + sl!(), + "failed to umount rafs at {} with err {}", rafs_mnt, e + ); + } + + let sn_shared_path = get_host_rw_shared_path(&self.sid) + .join(&self.cid) + .join(SNAPSHOT_DIR); + if sn_shared_path.exists() { + if let Err(e) = nix::mount::umount(&sn_shared_path) { + warn!( + sl!(), + "failed to umount snapshot mount at {:?} with err {}", sn_shared_path, e + ); + } + } + } + Ok(()) } } @@ -205,6 +456,40 @@ mod tests { use std::{fs::File, path::PathBuf}; use tempfile::tempdir; + #[test] + fn test_guest_shared_path() { + // "/run/kata-containers/shared/containers//" + let cid = "nydustester"; + let path = NydusRootfs::guest_shared_path(cid, "rootfs"); + assert_eq!( + path, + "/run/kata-containers/shared/containers/nydustester/rootfs" + ); + + let upper_path = NydusRootfs::guest_shared_path(cid, &format!("{}/{}", SNAPSHOT_DIR, "fs")); + assert_eq!( + upper_path, + "/run/kata-containers/shared/containers/nydustester/snapshotdir/fs" + ); + + let work_path = + NydusRootfs::guest_shared_path(cid, &format!("{}/{}", SNAPSHOT_DIR, "work")); + assert_eq!( + work_path, + "/run/kata-containers/shared/containers/nydustester/snapshotdir/work" + ); + } + + #[test] + fn test_guest_nydus_image_path() { + let cid = "nydustester"; + let path = NydusRootfs::guest_nydus_image_path(cid); + assert_eq!( + path, + "/run/kata-containers/shared/rafs/nydustester/lowerdir" + ); + } + #[tokio::test] async fn test_get_nydus_prefetch_files() { let temp_dir = tempdir().unwrap(); diff --git a/src/runtime-rs/crates/resource/src/share_fs/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/mod.rs index 98e81839b0..85fa59bbea 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/mod.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs @@ -1,8 +1,11 @@ // Copyright (c) 2019-2022 Alibaba Cloud -// Copyright (c) 2019-2022 Ant Group +// Copyright (c) 2019-2026 Ant Group // // SPDX-License-Identifier: Apache-2.0 // +// + +mod nydus; mod share_virtio_fs; pub use share_virtio_fs::rafs_mount; @@ -10,6 +13,9 @@ mod share_virtio_fs_inline; use share_virtio_fs_inline::ShareVirtioFsInline; mod share_virtio_fs_standalone; use share_virtio_fs_standalone::ShareVirtioFsStandalone; +mod share_virtio_fs_nydus; +pub use nydus::nydus_client::NydusClient; +pub use nydus::nydus_daemon::{Nydusd, NydusdConfig}; mod utils; use tokio::sync::Mutex; pub use utils::{ @@ -23,7 +29,7 @@ pub mod sandbox_bind_mounts; use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc}; use agent::Storage; -use anyhow::{anyhow, Context, Ok, Result}; +use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use kata_types::{build_path, config::hypervisor::SharedFsInfo}; use oci_spec::runtime as oci; @@ -31,8 +37,10 @@ use tokio::sync::RwLock; use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use crate::share_fs::share_virtio_fs_nydus::ShareVirtioFsNydus; + const VIRTIO_FS: &str = "virtio-fs"; -const _VIRTIO_FS_NYDUS: &str = "virtio-fs-nydus"; +pub const VIRTIO_FS_NYDUS: &str = "virtio-fs-nydus"; const INLINE_VIRTIO_FS: &str = "inline-virtio-fs"; const DEFAULT_KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/"; @@ -40,6 +48,10 @@ const DEFAULT_KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxe /// default share fs (for example virtio-fs) mount path in the guest const DEFAULT_KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/"; +/// The virtiofs mount point in the guest for nydusd mode. +/// In nydusd mode, virtiofs is mounted at `/run/kata-containers/shared/` +const DEFAULT_KATA_GUEST_ROOT_DIR: &str = "/run/kata-containers/shared/"; + pub const PASSTHROUGH_FS_DIR: &str = "passthrough"; const RAFS_DIR: &str = "rafs"; @@ -51,6 +63,11 @@ pub fn kata_guest_share_dir() -> String { build_path(DEFAULT_KATA_GUEST_SHARE_DIR) } +/// The virtiofs mount point in the guest for nydusd mode. +pub fn kata_guest_nydus_root_dir() -> String { + build_path(DEFAULT_KATA_GUEST_ROOT_DIR) +} + #[async_trait] pub trait ShareFs: Send + Sync { fn get_share_fs_mount(&self) -> Arc; @@ -66,6 +83,27 @@ pub trait ShareFs: Send + Sync { ) -> Result<()>; async fn get_storages(&self) -> Result>; fn mounted_info_set(&self) -> Arc>>; + + /// Stop the share fs daemon process (e.g., virtiofsd, nydusd). + /// Called during sandbox cleanup before cleaning up mounts. + /// Default implementation does nothing for inline modes that don't manage external daemons. + async fn stop(&self) -> Result<()> { + Ok(()) + } +} + +/// Trait for nydus-specific data-plane operations (standalone nydusd mode). +/// This trait is implemented by ShareVirtioFsNydus and provides operations +/// that are specific to the nydusd daemon's rafs mount capabilities. +#[async_trait] +pub trait NydusShareFs: Send + Sync { + /// Mount rafs through nydusd. + /// Returns the mount point path within the nydusd namespace. + async fn mount_rafs(&self, cid: &str, rafs_meta: &str, config: &str) -> Result; + + /// Umount rafs from nydusd. + /// Called during container cleanup. + async fn umount_rafs(&self, mountpoint: &str) -> Result<()>; } #[derive(Debug, Clone)] @@ -155,16 +193,39 @@ pub trait ShareFsMount: Send + Sync { async fn cleanup(&self, sid: &str) -> Result<()>; } -pub fn new(id: &str, config: &SharedFsInfo) -> Result> { +/// Result of creating a new share fs instance. +pub struct ShareFsInstance { + /// The share fs trait object (always present). + pub share_fs: Arc, + /// The nydus-specific trait object (present only in standalone nydus mode). + pub nydus_share_fs: Option>, +} + +pub fn new(id: &str, config: &SharedFsInfo) -> Result { let shared_fs = config.shared_fs.clone(); let shared_fs = shared_fs.unwrap_or_default(); match shared_fs.as_str() { - INLINE_VIRTIO_FS => Ok(Arc::new( - ShareVirtioFsInline::new(id, config).context("new inline virtio fs")?, - )), - VIRTIO_FS => Ok(Arc::new( - ShareVirtioFsStandalone::new(id, config).context("new standalone virtio fs")?, - )), + INLINE_VIRTIO_FS => Ok(ShareFsInstance { + share_fs: Arc::new( + ShareVirtioFsInline::new(id, config).context("new inline virtiofs")?, + ), + nydus_share_fs: None, + }), + VIRTIO_FS => Ok(ShareFsInstance { + share_fs: Arc::new( + ShareVirtioFsStandalone::new(id, config).context("new standalone virtiofs")?, + ), + nydus_share_fs: None, + }), + VIRTIO_FS_NYDUS => { + let nydus = Arc::new( + ShareVirtioFsNydus::new(id, config).context("new standalone nydus virtiofs")?, + ); + Ok(ShareFsInstance { + share_fs: nydus.clone() as Arc, + nydus_share_fs: Some(nydus as Arc), + }) + } _ => Err(anyhow!("unsupported shared fs {:?}", &shared_fs)), } } diff --git a/src/runtime-rs/crates/resource/src/share_fs/nydus/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/nydus/mod.rs new file mode 100644 index 0000000000..8dd7bb171c --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/nydus/mod.rs @@ -0,0 +1,28 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +// + +pub mod nydus_client; +pub mod nydus_daemon; + +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MountRequest { + pub fs_type: String, + pub source: PathBuf, + pub config: String, +} + +impl MountRequest { + pub fn new(fs_type: &str, source: &Path, config: &str) -> Self { + Self { + fs_type: fs_type.to_string(), + source: source.to_path_buf(), + config: config.to_string(), + } + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/nydus/nydus_client.rs b/src/runtime-rs/crates/resource/src/share_fs/nydus/nydus_client.rs new file mode 100644 index 0000000000..76f300aa3f --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/nydus/nydus_client.rs @@ -0,0 +1,215 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, Context, Result}; +use hyper::{body::to_bytes, Body, Client, Method, Request, StatusCode}; +use hyperlocal::{UnixClientExt, Uri}; +use serde::{Deserialize, Serialize}; +use tokio::time::{timeout, Duration}; + +use crate::share_fs::nydus::MountRequest; + +const HTTP_CLIENT_TIMEOUT_SECS: u64 = 30; +// Keep the per-probe timeout short relative to the total readiness timeout so a +// single slow/hung probe cannot consume the whole budget and starve the retry +// loop (which would make `max_attempts` largely ineffective). +const HTTP_READY_CHECK_TIMEOUT_SECS: u64 = 1; +const HTTP_READY_TOTAL_TIMEOUT_SECS: u64 = 10; + +const INFO_ENDPOINT: &str = "/api/v1/daemon"; +const MOUNT_ENDPOINT: &str = "/api/v1/mount"; + +const NYDUSD_DAEMON_STATE_RUNNING: &str = "RUNNING"; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BuildTimeInfo { + pub package_ver: String, + pub git_commit: String, + pub build_time: String, + pub profile: String, + pub rustc: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DaemonInfo { + pub version: BuildTimeInfo, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub supervisor: Option, + pub state: String, + #[serde(default)] + pub backend_collection: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ErrorMessage { + pub code: String, + pub message: String, +} + +pub struct NydusClient { + sock_path: PathBuf, + client: Client, +} + +impl NydusClient { + pub fn new(sock_path: &Path) -> Self { + Self { + sock_path: sock_path.to_path_buf(), + client: Client::unix(), + } + } + + async fn send_request( + &self, + method: Method, + path: &str, + body: Option<&str>, + ) -> Result<(StatusCode, Vec)> { + self.send_request_with_timeout(method, path, body, HTTP_CLIENT_TIMEOUT_SECS) + .await + } + + async fn send_request_with_timeout( + &self, + method: Method, + path: &str, + body: Option<&str>, + timeout_secs: u64, + ) -> Result<(StatusCode, Vec)> { + let uri: hyper::Uri = Uri::new(&self.sock_path, path).into(); + + let request_builder = Request::builder() + .method(method) + .uri(uri) + .header("Content-Type", "application/json"); + + let req = match body { + Some(b) => request_builder + .body(Body::from(b.to_string())) + .context("failed to build HTTP request with body")?, + None => request_builder + .body(Body::empty()) + .context("failed to build HTTP request")?, + }; + + let response = timeout(Duration::from_secs(timeout_secs), self.client.request(req)) + .await + .context("timeout waiting for response")? + .context("failed to send HTTP request")?; + + let status = response.status(); + let body_bytes = to_bytes(response.into_body()) + .await + .context("failed to read response body")?; + + Ok((status, body_bytes.to_vec())) + } + + pub async fn check_status(&self) -> Result { + let (status, body) = self.send_request(Method::GET, INFO_ENDPOINT, None).await?; + + if status != StatusCode::OK { + return Err(anyhow!("nydusd check status failed with code {}", status)); + } + + let info: DaemonInfo = + serde_json::from_slice(&body).context("failed to parse DaemonInfo")?; + Ok(info) + } + + pub async fn mount(&self, mountpoint: &str, req: &MountRequest) -> Result<()> { + let path = format!( + "{}?mountpoint={}", + MOUNT_ENDPOINT, + percent_encode_query_value(mountpoint) + ); + let body = serde_json::to_string(req).context("failed to serialize MountRequest")?; + let (status, resp_body) = self.send_request(Method::POST, &path, Some(&body)).await?; + + if status == StatusCode::NO_CONTENT { + return Ok(()); + } + + let err: ErrorMessage = + serde_json::from_slice(&resp_body).context("failed to parse error message")?; + Err(anyhow!("nydusd mount failed: {}", err.message)) + } + + pub async fn umount(&self, mountpoint: &str) -> Result<()> { + let path = format!( + "{}?mountpoint={}", + MOUNT_ENDPOINT, + percent_encode_query_value(mountpoint) + ); + let (status, resp_body) = self.send_request(Method::DELETE, &path, None).await?; + + if status == StatusCode::NO_CONTENT { + return Ok(()); + } + + let err: ErrorMessage = + serde_json::from_slice(&resp_body).context("failed to parse error message")?; + Err(anyhow!("nydusd umount failed: {}", err.message)) + } + + pub async fn wait_until_ready(&self, max_attempts: u32, delay_ms: u64) -> Result<()> { + timeout(Duration::from_secs(HTTP_READY_TOTAL_TIMEOUT_SECS), async { + for _ in 0..max_attempts { + match self + .check_status_with_timeout(HTTP_READY_CHECK_TIMEOUT_SECS) + .await + { + Ok(info) if info.state == NYDUSD_DAEMON_STATE_RUNNING => { + return Ok(()); + } + Ok(info) => { + debug!(sl!(), "nydusd state: {}, waiting...", info.state); + } + Err(e) => { + debug!(sl!(), "nydusd not ready: {}", e); + } + } + tokio::time::sleep(Duration::from_millis(delay_ms)).await; + } + + Err(anyhow!( + "nydusd API server not ready after {} attempts", + max_attempts + )) + }) + .await + .context("timeout waiting for nydusd API server ready")? + } + + async fn check_status_with_timeout(&self, timeout_secs: u64) -> Result { + let (status, body) = self + .send_request_with_timeout(Method::GET, INFO_ENDPOINT, None, timeout_secs) + .await?; + + if status != StatusCode::OK { + return Err(anyhow!("nydusd check status failed with code {}", status)); + } + + let info: DaemonInfo = + serde_json::from_slice(&body).context("failed to parse DaemonInfo")?; + Ok(info) + } +} + +fn percent_encode_query_value(value: &str) -> String { + value + .bytes() + .flat_map(|byte| match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => { + vec![byte as char] + } + _ => format!("%{byte:02X}").chars().collect(), + }) + .collect() +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/nydus/nydus_daemon.rs b/src/runtime-rs/crates/resource/src/share_fs/nydus/nydus_daemon.rs new file mode 100644 index 0000000000..5a4b29572a --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/nydus/nydus_daemon.rs @@ -0,0 +1,409 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +// + +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; +use std::process::Stdio; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_types::rootless::is_rootless; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::{Child, Command}; +use tokio::sync::RwLock; + +use crate::share_fs::nydus::{nydus_client::NydusClient, MountRequest}; + +/// passthrough_fs is a special filesystem type in nydus which simply passthroughs the source directory +/// to the guest without any caching or overlay. +pub const NYDUS_PASSTHROUGH_FS: &str = "passthrough_fs"; +/// RAFS filesystem type for nydus. This is used to tell nydusd to mount a RAFS filesystem. +pub const NYDUS_RAFS: &str = "rafs"; +/// The mountpoint for passthrough_fs inside the nydusd virtiofs namespace. +/// This is NOT a guest absolute path; it's a path within the virtiofs namespace. +/// When the guest mounts virtiofs at `/run/kata-containers/shared/`, this maps to +/// `/run/kata-containers/shared/containers/` in the guest. +pub const SHARED_PATH_IN_GUEST: &str = "/containers"; + +/// The number of attempts to check if nydusd API server is ready after starting nydusd. +const NYDUSD_WAIT_READY_ATTEMPTS: u32 = 100; +/// The delay in milliseconds between each attempt to check if nydusd API server is ready. +const NYDUSD_WAIT_READY_DELAY_MS: u64 = 100; + +/// PathType is used to specify the expected type of a path for validation purposes. +/// - Socket: the path is expected to be a socket file and it is used for nydusd's API and data sockets. +/// - File: the path is expected to be a regular file and it is used for the nydusd binary path. +/// - Directory: the path is expected to be a directory and it is used for the source directory of the passthrough_fs. +enum PathType { + Socket, + File, + Directory, +} + +#[derive(Clone, Debug)] +pub struct NydusdConfig { + pub path: PathBuf, + pub sock_path: PathBuf, + pub api_sock_path: PathBuf, + pub source_path: PathBuf, + pub debug: bool, + pub extra_args: Vec, +} + +impl NydusdConfig { + pub fn new( + path: PathBuf, + sock_path: PathBuf, + api_sock_path: PathBuf, + source_path: PathBuf, + debug: bool, + extra_args: Vec, + ) -> Self { + Self { + path, + sock_path, + api_sock_path, + source_path, + debug, + extra_args, + } + } + + pub fn validate(&self) -> Result { + validate_path(&self.path, PathType::File)?; + validate_path(&self.sock_path, PathType::Socket)?; + validate_path(&self.api_sock_path, PathType::Socket)?; + validate_path(&self.source_path, PathType::Directory)?; + + Ok(self.clone()) + } +} + +struct NydusdInner { + pid: Option, + child: Option, +} + +pub struct Nydusd { + config: NydusdConfig, + inner: Arc>, +} + +#[allow(dead_code)] +impl Nydusd { + pub fn new(config: NydusdConfig) -> Self { + Self { + config, + inner: Arc::new(RwLock::new(NydusdInner { + pid: None, + child: None, + })), + } + } + + fn build_args(&self) -> Result> { + let log_level = if self.config.debug { "debug" } else { "info" }; + + // In rootless mode the jailer prefix can make absolute socket paths exceed + // the unix socket path length limit (typically 108 bytes), which would make + // nydusd fail to bind its data/API sockets. Mirror the virtiofsd workaround: + // pass short, relative socket file names and rely on the process working + // directory being set to the socket parent directory (see `start()`). + let (sock_arg, api_sock_arg) = if is_rootless() { + ( + socket_file_name(&self.config.sock_path, "sock")?, + socket_file_name(&self.config.api_sock_path, "api sock")?, + ) + } else { + ( + self.config.sock_path.to_string_lossy().to_string(), + self.config.api_sock_path.to_string_lossy().to_string(), + ) + }; + + let mut args = vec![ + "virtiofs".to_string(), + "--hybrid-mode".to_string(), + "--log-level".to_string(), + log_level.to_string(), + "--apisock".to_string(), + api_sock_arg, + "--sock".to_string(), + sock_arg, + ]; + + for extra_arg in &self.config.extra_args { + args.push(extra_arg.clone()); + } + + Ok(args) + } + + pub async fn start(&self) -> Result { + // Before starting nydusd, we need to clean up any stale socket files + // that might exist from a previous run. + cleanup_socket(&self.config.sock_path).await?; + cleanup_socket(&self.config.api_sock_path).await?; + + let args = self.build_args()?; + info!( + sl!(), + "starting nydusd with path: {:?} args: {:?}", self.config.path, args + ); + + let mut cmd = Command::new(&self.config.path); + cmd.args(&args) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .kill_on_drop(true); + + if is_rootless() { + // `build_args()` uses relative socket file names in rootless mode; run + // nydusd from the socket parent directory so the short names resolve and + // the bound socket files still land at the configured absolute paths. + let work_dir = self + .config + .sock_path + .parent() + .ok_or_else(|| anyhow!("failed to get parent dir of {:?}", self.config.sock_path))?; + cmd.current_dir(work_dir); + } + + let mut child = cmd.spawn().context("failed to spawn nydusd process")?; + let pid = child + .id() + .ok_or_else(|| anyhow!("failed to get nydusd pid"))?; + + let stdout = child + .stdout + .take() + .ok_or_else(|| anyhow!("failed to capture stdout"))?; + let stderr = child + .stderr + .take() + .ok_or_else(|| anyhow!("failed to capture stderr"))?; + + tokio::spawn(async move { + let reader = BufReader::new(stderr); + let mut lines = reader.lines(); + while let Ok(Some(line)) = lines.next_line().await { + // It's not error here. + info!(sl!(), "nydusd start: {}", line); + } + }); + + tokio::spawn(async move { + let reader = BufReader::new(stdout); + let mut lines = reader.lines(); + while let Ok(Some(line)) = lines.next_line().await { + info!(sl!(), "nydusd stdout: {}", line); + } + }); + + { + let mut inner = self.inner.write().await; + inner.pid = Some(pid); + inner.child = Some(child); + } + + info!( + sl!(), + "nydusd started with pid {}, waiting for API server ready", pid + ); + + let startup_result: Result<()> = async { + let client = NydusClient::new(&self.config.api_sock_path); + client + .wait_until_ready(NYDUSD_WAIT_READY_ATTEMPTS, NYDUSD_WAIT_READY_DELAY_MS) + .await + .context("nydusd API server not ready")?; + + info!(sl!(), "nydusd API server ready, setting up passthrough fs"); + self.setup_passthrough_fs().await + } + .await; + + // As `wait_until_ready()` or `setup_passthrough_fs()` can fail after nydusd + // has already been spawned and stored in `self.inner`, so clean it up here + // to avoid leaking the process and stale socket files on startup failure. + if let Err(err) = startup_result { + if let Err(stop_err) = self.stop().await { + warn!( + sl!(), + "failed to clean up nydusd after startup error: {}", stop_err + ); + } + + return Err(err); + } + + info!(sl!(), "nydusd setup completed"); + + Ok(pid) + } + + async fn setup_passthrough_fs(&self) -> Result<()> { + let client = NydusClient::new(&self.config.api_sock_path); + let req = MountRequest::new(NYDUS_PASSTHROUGH_FS, &self.config.source_path, ""); + + info!( + sl!(), + "mounting passthrough fs from {:?} to {}", + self.config.source_path, + SHARED_PATH_IN_GUEST + ); + + client + .mount(SHARED_PATH_IN_GUEST, &req) + .await + .context("failed to mount passthrough fs")?; + + Ok(()) + } + + pub async fn mount_rafs(&self, mountpoint: &str, source: &PathBuf, config: &str) -> Result<()> { + let client = NydusClient::new(&self.config.api_sock_path); + let req = MountRequest::new(NYDUS_RAFS, source, config); + + info!(sl!(), "mounting rafs from {:?} to {}", source, mountpoint); + + client + .mount(mountpoint, &req) + .await + .context("failed to mount rafs")?; + + info!(sl!(), "rafs mounted successfully at {}", mountpoint); + Ok(()) + } + + pub async fn umount(&self, mountpoint: &str) -> Result<()> { + let client = NydusClient::new(&self.config.api_sock_path); + + info!(sl!(), "unmounting {}", mountpoint); + + client + .umount(mountpoint) + .await + .context("failed to umount")?; + + info!(sl!(), "unmounted {}", mountpoint); + Ok(()) + } + + pub async fn stop(&self) -> Result<()> { + let (pid, child) = { + let mut inner = self.inner.write().await; + (inner.pid.take(), inner.child.take()) + }; + + if let Some(pid) = pid { + info!(sl!(), "stopping nydusd with pid {}", pid); + + if let Some(mut child) = child { + let _ = child.kill().await; + let _ = child.wait().await; + } + + // Clean up the socket files created by nydusd + cleanup_socket(&self.config.sock_path).await?; + cleanup_socket(&self.config.api_sock_path).await?; + + info!(sl!(), "nydusd stopped"); + } + + Ok(()) + } + + pub async fn get_pid(&self) -> Option { + let inner = self.inner.read().await; + inner.pid + } +} + +/// Extract the file name component of a socket path as a string, used to build a +/// short relative socket path in rootless mode. +fn socket_file_name(path: &Path, name: &str) -> Result { + Ok(path + .file_name() + .ok_or_else(|| anyhow!("failed to get {} file name of {:?}", name, path))? + .to_string_lossy() + .to_string()) +} + +async fn cleanup_socket(path: &Path) -> Result<()> { + match tokio::fs::remove_file(path).await { + Ok(()) => Ok(()), + Err(err) if err.kind() == ErrorKind::NotFound => Ok(()), + Err(err) => Err(err).context(format!("failed to remove socket {:?}", path)), + } +} + +/// validate that the path exists and is of the expected type +fn validate_path(path: &PathBuf, path_type: PathType) -> Result<()> { + if path.as_os_str().is_empty() { + return Err(anyhow!("path is empty")); + } + + let parent = path.parent().unwrap_or(Path::new("/")); + std::fs::canonicalize(parent) + .context(format!("failed to canonicalize parent path {:?}", parent))?; + + match path_type { + PathType::Socket => Ok(()), + PathType::File => { + if !path.exists() { + return Err(anyhow!("path {:?} does not exist", path)); + } + + if !path.is_file() { + return Err(anyhow!("path {:?} is not a file", path)); + } + + Ok(()) + } + PathType::Directory => { + if !path.exists() { + return Err(anyhow!("path {:?} does not exist", path)); + } + + if !path.is_dir() { + return Err(anyhow!("path {:?} is not a directory", path)); + } + + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::tempdir; + + #[test] + fn test_nydusd_config_validate_checks_all_paths() { + let dir = tempdir().unwrap(); + let daemon_path = dir.path().join("nydusd"); + let source_path = dir.path().join("source"); + let sock_path = dir.path().join("nydusd.sock"); + let api_sock_path = dir.path().join("nydusd-api.sock"); + + fs::write(&daemon_path, b"binary").unwrap(); + fs::create_dir(&source_path).unwrap(); + + let config = NydusdConfig::new( + daemon_path, + sock_path, + api_sock_path, + source_path, + false, + vec![], + ); + + assert!(config.validate().is_ok()); + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_nydus.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_nydus.rs new file mode 100644 index 0000000000..2eb4dab32a --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_nydus.rs @@ -0,0 +1,185 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::{Path, PathBuf}; +use std::{collections::HashMap, sync::Arc}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::sync::{Mutex, RwLock}; + +use agent::Storage; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use kata_types::config::hypervisor::SharedFsInfo; + +use super::nydus::nydus_daemon::{Nydusd, NydusdConfig}; +use super::share_virtio_fs::{ + prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG, +}; +use super::utils::get_host_rw_shared_path; +use super::virtio_fs_share_mount::VirtiofsShareMount; +use super::{kata_guest_nydus_root_dir, MountedInfo, NydusShareFs, ShareFs, ShareFsMount}; + +const NYDUSD_API_SOCK: &str = "nydusd-api.sock"; + +#[derive(Debug, Clone)] +pub struct ShareVirtioFsNydusConfig { + id: String, + pub virtio_fs_daemon: PathBuf, + pub virtio_fs_extra_args: Vec, + pub debug: bool, +} + +pub struct ShareVirtioFsNydus { + config: ShareVirtioFsNydusConfig, + nydusd: Arc>>, + share_fs_mount: Arc, + mounted_info_set: Arc>>, +} + +impl ShareVirtioFsNydus { + pub fn new(id: &str, config: &SharedFsInfo) -> Result { + Ok(Self { + config: ShareVirtioFsNydusConfig { + id: id.to_string(), + virtio_fs_daemon: config.virtio_fs_daemon.clone().into(), + virtio_fs_extra_args: config.virtio_fs_extra_args.clone(), + debug: false, + }, + nydusd: Arc::new(RwLock::new(None)), + share_fs_mount: Arc::new(VirtiofsShareMount::new(id)), + mounted_info_set: Arc::new(Mutex::new(HashMap::new())), + }) + } + + async fn setup_nydusd(&self, h: &dyn Hypervisor) -> Result<()> { + let jailer_root = h.get_jailer_root().await?; + let sock_path = Path::new(&jailer_root).join("virtiofsd.sock"); + let api_sock_path = Path::new(&jailer_root).join(NYDUSD_API_SOCK); + + // new and validate nydusd config + let nydusd_config = NydusdConfig::new( + self.config.virtio_fs_daemon.clone(), + sock_path, + api_sock_path, + get_host_rw_shared_path(&self.config.id), + self.config.debug, + self.config.virtio_fs_extra_args.clone(), + ) + .validate() + .context("validate nydusd config")?; + + // start nydusd with the validated config + let nydusd = Nydusd::new(nydusd_config); + let pid = nydusd.start().await.context("failed to start nydusd")?; + + info!(sl!(), "nydusd started with pid {}", pid); + + { + let mut n = self.nydusd.write().await; + *n = Some(nydusd); + } + + Ok(()) + } +} + +#[async_trait] +impl ShareFs for ShareVirtioFsNydus { + fn get_share_fs_mount(&self) -> Arc { + self.share_fs_mount.clone() + } + + async fn setup_device_before_start_vm( + &self, + h: &dyn Hypervisor, + d: &RwLock, + ) -> Result<()> { + let jailer_root = h.get_jailer_root().await?; + + prepare_virtiofs(d, KATA_VIRTIO_FS_DEV_TYPE, &self.config.id, &jailer_root) + .await + .context("prepare virtiofs for nydus")?; + + self.setup_nydusd(h).await.context("setup nydusd")?; + + Ok(()) + } + + async fn setup_device_after_start_vm( + &self, + _h: &dyn Hypervisor, + _d: &RwLock, + ) -> Result<()> { + Ok(()) + } + + async fn get_storages(&self) -> Result> { + let mut storages: Vec = Vec::new(); + + // In nydusd mode, virtiofs is mounted at `/run/kata-containers/shared/`, because nydusd's + // internal passthrough_fs is mounted at `/containers` within the virtiofs namespace, which + // maps to `/run/kata-containers/shared/containers/` in the guest. + let shared_volume = Storage { + driver: String::from(KATA_VIRTIO_FS_DEV_TYPE), + driver_options: Vec::new(), + source: String::from(MOUNT_GUEST_TAG), + fs_type: String::from(FS_TYPE_VIRTIO_FS), + fs_group: None, + options: vec![String::from("nodev")], + mount_point: kata_guest_nydus_root_dir(), + shared: false, + }; + + storages.push(shared_volume); + Ok(storages) + } + + fn mounted_info_set(&self) -> Arc>> { + self.mounted_info_set.clone() + } + + async fn stop(&self) -> Result<()> { + info!(sl!(), "stopping nydusd daemon"); + let nydusd = { + let mut nydusd_guard = self.nydusd.write().await; + nydusd_guard.take() + }; + + if let Some(nydusd) = nydusd { + nydusd.stop().await.context("failed to stop nydusd")?; + } + Ok(()) + } +} + +#[async_trait] +impl NydusShareFs for ShareVirtioFsNydus { + async fn mount_rafs(&self, cid: &str, rafs_meta: &str, config: &str) -> Result { + let mountpoint = format!("/rafs/{}/lowerdir", cid); + let nydusd_guard = self.nydusd.read().await; + let nydusd = nydusd_guard + .as_ref() + .ok_or_else(|| anyhow!("nydusd not initialized"))?; + + nydusd + .mount_rafs(&mountpoint, &PathBuf::from(rafs_meta), config) + .await + .context("failed to mount rafs via nydusd API")?; + + Ok(mountpoint) + } + + async fn umount_rafs(&self, mountpoint: &str) -> Result<()> { + let nydusd_guard = self.nydusd.read().await; + let nydusd = nydusd_guard + .as_ref() + .ok_or_else(|| anyhow!("nydusd not initialized"))?; + nydusd + .umount(mountpoint) + .await + .context("failed to umount rafs via nydusd API") + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs index 8e8cbd5b6d..abd95618ec 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs @@ -256,4 +256,10 @@ impl ShareFs for ShareVirtioFsStandalone { fn mounted_info_set(&self) -> Arc>> { self.mounted_info_set.clone() } + + async fn stop(&self) -> Result<()> { + self.shutdown_virtiofsd() + .await + .context("failed to stop virtiofsd daemon") + } } diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 180ce4e157..755673dd2d 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -23,6 +23,8 @@ SYSCONFIG_FILE="/etc/kata-containers/configuration.toml" DEFAULT_CONFIG_FILE="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" CLH_CONFIG_FILE="/opt/kata/share/defaults/kata-containers/configuration-clh.toml" DB_CONFIG_FILE="/opt/kata/share/defaults/kata-containers/runtime-rs/configuration-dragonball.toml" +QEMU_RS_CONFIG_FILE="/opt/kata/share/defaults/kata-containers/runtime-rs/configuration-qemu-runtime-rs.toml" +CLH_RS_CONFIG_FILE="/opt/kata/share/defaults/kata-containers/runtime-rs/configuration-clh-runtime-rs.toml" need_restore_containerd_config=false need_restore_containerd_fragment=false containerd_config="/etc/containerd/config.toml" @@ -33,13 +35,15 @@ containerd_fragment_backup="/tmp/50-nydus.toml.bak" # test image for container IMAGE="${IMAGE:-ghcr.io/dragonflyoss/image-service/alpine:nydus-latest}" -if [[ "${KATA_HYPERVISOR}" != "qemu" ]] && [[ "${KATA_HYPERVISOR}" != "clh" ]] && [[ "${KATA_HYPERVISOR}" != "dragonball" ]]; then - echo "Skip nydus test for ${KATA_HYPERVISOR}, it only works for QEMU/CLH/DB now." +if [[ "${KATA_HYPERVISOR}" != "qemu" ]] && [[ "${KATA_HYPERVISOR}" != "clh" ]] && \ + [[ "${KATA_HYPERVISOR}" != "dragonball" ]] && [[ "${KATA_HYPERVISOR}" != "qemu-runtime-rs" ]] && \ + [[ "${KATA_HYPERVISOR}" != "clh-runtime-rs" ]]; then + echo "Skip nydus test for ${KATA_HYPERVISOR}, it only works for QEMU/CLH/DB/QEMU-runtime-rs/CLH-runtime-rs now." exit 0 fi case "${KATA_HYPERVISOR}" in - dragonball) + dragonball|qemu-runtime-rs|clh-runtime-rs) SYSCONFIG_FILE="/etc/kata-containers/runtime-rs/configuration.toml" ;; *) @@ -66,6 +70,10 @@ function config_kata() { sudo cp -a "${DEFAULT_CONFIG_FILE}" "${SYSCONFIG_FILE}" elif [[ "${KATA_HYPERVISOR}" == "dragonball" ]]; then sudo cp -a "${DB_CONFIG_FILE}" "${SYSCONFIG_FILE}" + elif [[ "${KATA_HYPERVISOR}" == "qemu-runtime-rs" ]]; then + sudo cp -a "${QEMU_RS_CONFIG_FILE}" "${SYSCONFIG_FILE}" + elif [[ "${KATA_HYPERVISOR}" == "clh-runtime-rs" ]]; then + sudo cp -a "${CLH_RS_CONFIG_FILE}" "${SYSCONFIG_FILE}" else sudo cp -a "${CLH_CONFIG_FILE}" "${SYSCONFIG_FILE}" fi diff --git a/tests/spellcheck/kata-dictionary.txt b/tests/spellcheck/kata-dictionary.txt index 2e962083f2..68d230242c 100644 --- a/tests/spellcheck/kata-dictionary.txt +++ b/tests/spellcheck/kata-dictionary.txt @@ -76,6 +76,16 @@ Sharedfs Initdata fsmerge fsmerged +Rafs +virtiofs +virtiofsd +Virtiofs +nydusd +Lowerdir +lowerdir +Upperdir +upperdir + # Networking & Communication netns @@ -93,6 +103,7 @@ coredump CPUSET crio nerdctl +crictl dockershim dentries hypercalls