runtime-rs: add generic support for running the VMM in non-root mode

This commit introduces generic support for running the VMM in rootless mode in runtime-rs:
1.Detect whether the VMM is running in rootless mode.
2.Before starting the VMM process, create a non-root user and launch the VMM with that user’s UID and GID; also add the KVM user's group ID to the VMM process's supplementary groups so the VMM process can access /dev/kvm.
3.Add the setup of the rootless directory located in the dir /run/user/<uid> directory, and modify some path variables to be functions that return the path with the rootless directory prefix when running in rootless mode.

Fixes: #11414

Signed-off-by: stevenfryto <sunzitai_1832@bupt.edu.cn>
This commit is contained in:
stevenfryto
2025-09-24 14:13:01 +00:00
committed by Xuewei Niu
parent 319237e447
commit bde6eb7c3a
23 changed files with 723 additions and 76 deletions

View File

@@ -4,11 +4,11 @@
//
use hypervisor::BlockConfig;
use kata_types::build_path;
/// The path /run/kata-containers/shared/initdata, combined with the sandbox ID,
/// will form the directory for storing the initdata image.
/// Path::new(KATA_SHARED_INIT_DATA_PATH).join(SID)
pub const KATA_SHARED_INIT_DATA_PATH: &str = "/run/kata-containers/shared/initdata";
/// will form the default directory for storing the initdata image.
pub const DEFAULT_KATA_SHARED_INIT_DATA_PATH: &str = "/run/kata-containers/shared/initdata";
/// kata initdata image
pub const KATA_INIT_DATA_IMAGE: &str = "initdata.image";
@@ -17,3 +17,10 @@ pub const KATA_INIT_DATA_IMAGE: &str = "initdata.image";
/// string included in the disk. And, both of them will come up at the same time.
#[derive(Clone, Debug)]
pub struct InitDataConfig(pub BlockConfig, pub String);
/// The path /run/kata-containers/shared/initdata, combined with the sandbox ID,
/// will form the directory for storing the initdata image.
/// The directory will be prefixed with the rootless directory when running in rootless mode
pub fn kata_shared_init_data_path() -> String {
build_path(DEFAULT_KATA_SHARED_INIT_DATA_PATH)
}

View File

@@ -17,9 +17,7 @@ use hypervisor::{
},
BlockConfig, BlockDeviceAio, Hypervisor, VfioConfig,
};
use kata_types::mount::{
Mount, DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DIR,
};
use kata_types::mount::{kata_guest_sandbox_dir, Mount, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DIR};
use kata_types::{
config::{hypervisor::TopologyConfigInfo, TomlConfig},
mount::{adjust_rootfs_mounts, KATA_IMAGE_FORCE_GUEST_PULL},
@@ -336,7 +334,7 @@ impl ResourceManagerInner {
}
let shm_size_option = format!("size={}", shm_size);
let mount_point = format!("{}/{}", DEFAULT_KATA_GUEST_SANDBOX_DIR, SHM_DIR);
let mount_point = format!("{}/{}", kata_guest_sandbox_dir(), SHM_DIR);
let shm_storage = Storage {
driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(),

View File

@@ -9,6 +9,7 @@ use std::{collections::HashMap, path::PathBuf};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use kata_types::build_path;
use kata_types::mount::ImagePullVolume;
use oci_spec::runtime as oci;
use serde_json;
@@ -25,7 +26,7 @@ use kata_types::{
const KUBERNETES_CRI_IMAGE_NAME: &str = "io.kubernetes.cri.image-name";
const KUBERNETES_CRIO_IMAGE_NAME: &str = "io.kubernetes.cri-o.ImageName";
const KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS: &str = "overlay";
const KATA_GUEST_ROOT_SHARED_FS: &str = "/run/kata-containers/";
const DEFAULT_KATA_GUEST_ROOT_SHARED_FS: &str = "/run/kata-containers/";
const CRI_CONTAINER_TYPE_KEY_LIST: &[&str] = &[
// cri containerd
@@ -34,6 +35,11 @@ const CRI_CONTAINER_TYPE_KEY_LIST: &[&str] = &[
annotations::crio::CONTAINER_TYPE_LABEL_KEY,
];
/// Get Kata guest root shared filesystem path.
fn kata_guest_root_shared_fs() -> String {
build_path(DEFAULT_KATA_GUEST_ROOT_SHARED_FS)
}
/// Retrieves the image reference from OCI spec annotations.
///
/// It checks known Kubernetes CRI and CRI-O annotation keys for the container type.
@@ -108,7 +114,7 @@ fn handle_virtual_volume_storage(
KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL, image_pull_info
)],
fs_type: KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS.to_string(),
mount_point: Path::new(KATA_GUEST_ROOT_SHARED_FS)
mount_point: Path::new(kata_guest_root_shared_fs().as_str())
.join(cid)
.join("rootfs")
.display()
@@ -157,7 +163,7 @@ impl VirtualVolume {
}
}
let guest_path = Path::new(KATA_GUEST_ROOT_SHARED_FS)
let guest_path = Path::new(kata_guest_root_shared_fs().as_str())
.join(cid)
.join("rootfs")
.to_path_buf();
@@ -201,8 +207,9 @@ pub fn is_kata_virtual_volume(m: &kata_types::mount::Mount) -> bool {
#[cfg(test)]
mod tests {
use crate::rootfs::virtual_volume::kata_guest_root_shared_fs;
use crate::rootfs::virtual_volume::{
KATA_GUEST_ROOT_SHARED_FS, KATA_VIRTUAL_VOLUME_PREFIX, KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS,
KATA_VIRTUAL_VOLUME_PREFIX, KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS,
};
use super::get_image_reference;
@@ -277,7 +284,7 @@ mod tests {
let virt_vol_obj = result.unwrap();
// 1. Verify guest_path
let expected_guest_path = Path::new(KATA_GUEST_ROOT_SHARED_FS)
let expected_guest_path = Path::new(kata_guest_root_shared_fs().as_str())
.join(cid)
.join("rootfs");
assert_eq!(virt_vol_obj.guest_path, expected_guest_path);
@@ -292,7 +299,7 @@ mod tests {
assert_eq!(storage.driver, KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL);
assert_eq!(storage.fs_type, KATA_VIRTUAL_VOLUME_TYPE_OVERLAY_FS);
let expected_mount_point = Path::new(KATA_GUEST_ROOT_SHARED_FS)
let expected_mount_point = Path::new(kata_guest_root_shared_fs().as_str())
.join(cid)
.join("rootfs")
.display()

View File

@@ -16,8 +16,8 @@ pub use utils::{
do_get_guest_path, do_get_guest_share_path, do_get_host_path, get_host_rw_shared_path,
};
mod virtio_fs_share_mount;
pub use virtio_fs_share_mount::ephemeral_path;
use virtio_fs_share_mount::VirtiofsShareMount;
pub use virtio_fs_share_mount::EPHEMERAL_PATH;
pub mod sandbox_bind_mounts;
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
@@ -25,7 +25,7 @@ use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
use agent::Storage;
use anyhow::{anyhow, Context, Ok, Result};
use async_trait::async_trait;
use kata_types::config::hypervisor::SharedFsInfo;
use kata_types::{build_path, config::hypervisor::SharedFsInfo};
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
@@ -35,14 +35,22 @@ const VIRTIO_FS: &str = "virtio-fs";
const _VIRTIO_FS_NYDUS: &str = "virtio-fs-nydus";
const INLINE_VIRTIO_FS: &str = "inline-virtio-fs";
const KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/";
const DEFAULT_KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/";
/// share fs (for example virtio-fs) mount path in the guest
pub const KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/";
/// default share fs (for example virtio-fs) mount path in the guest
const DEFAULT_KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/";
pub const PASSTHROUGH_FS_DIR: &str = "passthrough";
const RAFS_DIR: &str = "rafs";
pub fn kata_host_shared_dir() -> String {
build_path(DEFAULT_KATA_HOST_SHARED_DIR)
}
pub fn kata_guest_share_dir() -> String {
build_path(DEFAULT_KATA_GUEST_SHARE_DIR)
}
#[async_trait]
pub trait ShareFs: Send + Sync {
fn get_share_fs_mount(&self) -> Arc<dyn ShareFsMount>;

View File

@@ -88,7 +88,7 @@ impl ShareFs for ShareVirtioFsInline {
fs_type: String::from(FS_TYPE_VIRTIO_FS),
fs_group: None,
options: SHARED_DIR_VIRTIO_FS_OPTIONS.clone(),
mount_point: String::from(KATA_GUEST_SHARE_DIR),
mount_point: kata_guest_share_dir(),
};
storages.push(shared_volume);

View File

@@ -4,7 +4,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::{collections::HashMap, process::Stdio, sync::Arc};
use std::{collections::HashMap, path::Path, process::Stdio, sync::Arc};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
@@ -18,18 +18,19 @@ use tokio::{
};
use agent::Storage;
use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
use kata_types::config::hypervisor::SharedFsInfo;
use hypervisor::{device::device_manager::DeviceManager, utils::chown_to_parent, Hypervisor};
use kata_types::{config::hypervisor::SharedFsInfo, rootless::is_rootless};
use super::{
share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path,
virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount,
};
use crate::share_fs::{
kata_guest_share_dir,
share_virtio_fs::{
prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG,
},
KATA_GUEST_SHARE_DIR, VIRTIO_FS,
VIRTIO_FS,
};
#[derive(Debug, Clone)]
@@ -106,14 +107,49 @@ impl ShareVirtioFsStandalone {
async fn setup_virtiofsd(&self, h: &dyn Hypervisor) -> Result<()> {
let sock_path = generate_sock_path(&h.get_jailer_root().await?);
let disable_guest_selinux = h.hypervisor_config().await.disable_guest_selinux;
let socket_path = if is_rootless() {
// In rootless mode, we use relative socket paths instead of absolute paths
// because the absolute path with rootless prefix can exceed the unix socket path length limit (typically 108 bytes)
// By using a relative path and changing the working directory, we can keep the socket path short
let sock_file = Path::new(sock_path.as_str())
.file_name()
.ok_or_else(|| anyhow!("failed to get file name of {:?}", sock_path))?;
sock_file.to_string_lossy().to_string()
} else {
sock_path.clone()
};
let args = self
.virtiofsd_args(&sock_path, disable_guest_selinux)
.virtiofsd_args(&socket_path, disable_guest_selinux)
.context("virtiofsd args")?;
let mut cmd = Command::new(&self.config.virtio_fs_daemon);
let child_cmd = cmd.args(&args).stderr(Stdio::piped());
if is_rootless() {
// Change working directory to socket's parent directory
// This allows virtiofsd to create the socket file using the short relative path
// avoiding the unix socket path length limitation
let work_dir = Path::new(&sock_path)
.parent()
.ok_or_else(|| anyhow!("failed to get parent dir of {:?}", sock_path))?;
child_cmd.current_dir(work_dir);
}
let child = child_cmd.spawn().context("spawn virtiofsd")?;
if is_rootless() {
// wait for the socket to be created
for _ in 0..10 {
if Path::new(&sock_path).exists() {
break;
}
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
}
chown_to_parent(&sock_path)?;
}
// update virtiofsd pid{
{
let mut inner = self.inner.write().await;
@@ -213,7 +249,7 @@ impl ShareFs for ShareVirtioFsStandalone {
fs_type: String::from(FS_TYPE_VIRTIO_FS),
fs_group: None,
options: vec![String::from("nodev")],
mount_point: String::from(KATA_GUEST_SHARE_DIR),
mount_point: kata_guest_share_dir(),
};
storages.push(shared_volume);

View File

@@ -67,15 +67,19 @@ pub(crate) fn share_to_guest(
//
// 3. host-guest shared files/directories are mounted one-level under /run/kata-containers/shared/sandboxes/$sbx_id/rw/passthrough and thus present to guest at one level under run/kata-containers/shared/containers/passthrough.
pub(crate) fn get_host_ro_shared_path(id: &str) -> PathBuf {
Path::new(KATA_HOST_SHARED_DIR).join(id).join("ro")
Path::new(kata_host_shared_dir().as_str())
.join(id)
.join("ro")
}
pub fn get_host_rw_shared_path(sid: &str) -> PathBuf {
Path::new(KATA_HOST_SHARED_DIR).join(sid).join("rw")
Path::new(kata_host_shared_dir().as_str())
.join(sid)
.join("rw")
}
pub fn get_host_shared_path(sid: &str) -> PathBuf {
Path::new(KATA_HOST_SHARED_DIR).join(sid)
Path::new(kata_host_shared_dir().as_str()).join(sid)
}
fn do_get_guest_any_path(
@@ -93,7 +97,7 @@ fn do_get_guest_any_path(
let guest_share_dir = if is_virtiofs {
Path::new("/").to_path_buf()
} else {
Path::new(KATA_GUEST_SHARE_DIR).to_path_buf()
Path::new(kata_guest_share_dir().as_str()).to_path_buf()
};
let path = if is_volume && !is_virtiofs {

View File

@@ -8,13 +8,15 @@ use agent::Storage;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use kata_sys_util::mount::{bind_remount, umount_all, umount_timeout};
use kata_types::k8s::is_watchable_mount;
use kata_types::{build_path, k8s::is_watchable_mount};
use std::fs;
use std::path::Path;
const WATCHABLE_PATH_NAME: &str = "watchable";
const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
const DEFAULT_EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
use crate::share_fs::kata_guest_share_dir;
use super::{
get_host_rw_shared_path,
@@ -22,10 +24,13 @@ use super::{
self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path,
mkdir_with_permissions,
},
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig, PASSTHROUGH_FS_DIR,
};
pub fn ephemeral_path() -> String {
build_path(DEFAULT_EPHEMERAL_PATH)
}
#[derive(Debug)]
pub struct VirtiofsShareMount {
id: String,
@@ -88,7 +93,7 @@ impl ShareFsMount for VirtiofsShareMount {
let file_name = Path::new(&guest_path)
.file_name()
.context("get file name from guest path")?;
let watchable_guest_mount = Path::new(KATA_GUEST_SHARE_DIR)
let watchable_guest_mount = Path::new(kata_guest_share_dir().as_str())
.join(PASSTHROUGH_FS_DIR)
.join(WATCHABLE_PATH_NAME)
.join(file_name)

View File

@@ -6,7 +6,7 @@
use anyhow::{anyhow, Context, Result};
use kata_types::mount::{
get_volume_mount_info, join_path, DirectVolumeMountInfo, KATA_DIRECT_VOLUME_ROOT_PATH,
get_volume_mount_info, join_path, kata_direct_volume_root_path, DirectVolumeMountInfo,
};
pub mod rawblock_volume;
@@ -25,8 +25,8 @@ pub fn volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {
// get direct volume path whose volume_path encoded with base64
pub fn get_direct_volume_path(volume_path: &str) -> Result<String> {
let volume_full_path =
join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path).context("failed to join path.")?;
let volume_full_path = join_path(kata_direct_volume_root_path().as_str(), volume_path)
.context("failed to join path.")?;
if volume_full_path.exists() {
Ok(volume_full_path.display().to_string())

View File

@@ -11,8 +11,7 @@ use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use kata_types::mount::DEFAULT_KATA_GUEST_SANDBOX_DIR;
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;
use kata_types::mount::{kata_guest_sandbox_dir, KATA_EPHEMERAL_VOLUME_TYPE};
use nix::sys::stat::stat;
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
@@ -51,7 +50,7 @@ impl EphemeralVolume {
let file_name = Path::new(source)
.file_name()
.context(format!("get file name from {:?}", &m.source()))?;
let source = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR)
let source = Path::new(kata_guest_sandbox_dir().as_str())
.join(KATA_EPHEMERAL_VOLUME_TYPE)
.join(file_name)
.into_os_string()

View File

@@ -11,7 +11,7 @@ use std::{
};
use super::{Volume, BIND};
use crate::share_fs::EPHEMERAL_PATH;
use crate::share_fs::ephemeral_path;
use agent::Storage;
use anyhow::{anyhow, Context, Ok, Result};
use async_trait::async_trait;
@@ -63,7 +63,7 @@ impl Hugepage {
let mut mount = mount.clone();
// Set the mount source path to a path that resides inside the VM
mount.set_source(Some(
format!("{}{}{}", EPHEMERAL_PATH, "/", base_name).into(),
format!("{}{}{}", ephemeral_path(), "/", base_name).into(),
));
// Set the mount type to "bind"
mount.set_typ(Some(BIND.to_string()));

View File

@@ -32,7 +32,7 @@ use tokio::{
use walkdir::WalkDir;
use super::Volume;
use crate::share_fs::KATA_GUEST_SHARE_DIR;
use crate::share_fs::kata_guest_share_dir;
use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig};
use kata_types::{
k8s::{is_configmap, is_downward_api, is_projected, is_secret},
@@ -988,7 +988,10 @@ fn generate_guest_path(cid: &str, mount_destination: &Path) -> Result<String> {
Ok(format!(
"{}{}-{}-{}",
KATA_GUEST_SHARE_DIR, cid, hex_str, dest_base
kata_guest_share_dir(),
cid,
hex_str,
dest_base
))
}

View File

@@ -11,9 +11,7 @@ use anyhow::Result;
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use kata_types::mount::{
DEFAULT_KATA_GUEST_SANDBOX_DIR, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DEVICE, SHM_DIR,
};
use kata_types::mount::{kata_guest_sandbox_dir, KATA_EPHEMERAL_VOLUME_TYPE, SHM_DEVICE, SHM_DIR};
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
@@ -27,9 +25,7 @@ impl ShmVolume {
let mut mount = oci::Mount::default();
mount.set_destination(m.destination().clone());
mount.set_typ(Some("bind".to_string()));
mount.set_source(Some(
PathBuf::from(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR),
));
mount.set_source(Some(PathBuf::from(kata_guest_sandbox_dir()).join(SHM_DIR)));
mount.set_options(Some(vec!["rbind".to_string()]));
Ok(Self { mount })