mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-25 18:53:44 +00:00
runtime-rs: Add sandbox level volume manager within non-sharedfs
This commit introduces a new `VolumeManager` to track the state of shared volumes, including their reference count and whether they have been copied to the guest. The manager's goal is to handle the lifecycle of shared filesystem volumes, including: (1) Volume State Tracking: Tracks the mapping from host source paths to guest destination paths. (2) Reference Counting: Manages reference counts for each volume, preventing premature cleanup when multiple containers share the same source. (3) Deterministic guest paths: Generates unique and deterministic guest paths using SHA-256 hashing to avoid naming conflicts. (4) Improved Management: Provides a centralized way to handle volume creation, copying, and release, including aborting file watchers when volumes are no longer in use. Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
This commit is contained in:
2
src/runtime-rs/Cargo.lock
generated
2
src/runtime-rs/Cargo.lock
generated
@@ -3878,6 +3878,7 @@ dependencies = [
|
|||||||
"cgroups-rs 0.4.0",
|
"cgroups-rs 0.4.0",
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures 0.3.28",
|
"futures 0.3.28",
|
||||||
|
"hex",
|
||||||
"hypervisor",
|
"hypervisor",
|
||||||
"inotify",
|
"inotify",
|
||||||
"kata-sys-util",
|
"kata-sys-util",
|
||||||
@@ -3896,6 +3897,7 @@ dependencies = [
|
|||||||
"scopeguard",
|
"scopeguard",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sha2 0.9.3",
|
||||||
"slog",
|
"slog",
|
||||||
"slog-scope",
|
"slog-scope",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
@@ -37,6 +37,8 @@ inotify = "0.11.0"
|
|||||||
walkdir = "2.5.0"
|
walkdir = "2.5.0"
|
||||||
flate2 = { version = "1.0", features = ["zlib"] }
|
flate2 = { version = "1.0", features = ["zlib"] }
|
||||||
tempfile = "3.19.1"
|
tempfile = "3.19.1"
|
||||||
|
hex = "0.4"
|
||||||
|
sha2 = "=0.9.3"
|
||||||
|
|
||||||
## Dependencies from `rust-netlink`
|
## Dependencies from `rust-netlink`
|
||||||
netlink-packet-route = "0.22"
|
netlink-packet-route = "0.22"
|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashSet, VecDeque},
|
collections::{HashMap, HashSet, VecDeque},
|
||||||
os::unix::fs::MetadataExt,
|
os::unix::fs::MetadataExt,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
str::FromStr,
|
str::FromStr,
|
||||||
@@ -20,6 +20,7 @@ use hypervisor::device::device_manager::DeviceManager;
|
|||||||
use inotify::{EventMask, Inotify, WatchMask};
|
use inotify::{EventMask, Inotify, WatchMask};
|
||||||
use kata_sys_util::mount::{get_mount_options, get_mount_path, get_mount_type};
|
use kata_sys_util::mount::{get_mount_options, get_mount_path, get_mount_type};
|
||||||
use nix::sys::stat::SFlag;
|
use nix::sys::stat::SFlag;
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
use tokio::{
|
use tokio::{
|
||||||
io::AsyncReadExt,
|
io::AsyncReadExt,
|
||||||
sync::{Mutex, RwLock},
|
sync::{Mutex, RwLock},
|
||||||
@@ -266,6 +267,160 @@ impl FsWatcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sandbox-level volume manager
|
||||||
|
/// Tracks which paths have been copied to the guest on the runtime side
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct VolumeManager {
|
||||||
|
// Mapping of source path -> volume
|
||||||
|
volumes: Arc<RwLock<HashMap<String, VolumeState>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
struct VolumeState {
|
||||||
|
// Source path (on the host)
|
||||||
|
source_path: String,
|
||||||
|
// Guest path
|
||||||
|
guest_path: String,
|
||||||
|
// Whether the volume has been copied to the guest
|
||||||
|
copied_to_guest: bool,
|
||||||
|
// Reference count (how many containers are using it)
|
||||||
|
ref_count: usize,
|
||||||
|
// List of container IDs using this volume
|
||||||
|
containers: HashSet<String>,
|
||||||
|
// Monitor task handle (if any)
|
||||||
|
monitor_task: Option<Arc<JoinHandle<()>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl VolumeManager {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
volumes: Arc::new(RwLock::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets or creates the volume's guest path
|
||||||
|
/// Returns: (guest_path, need_copy)
|
||||||
|
pub async fn get_or_create_volume(
|
||||||
|
&self,
|
||||||
|
source_path: &str,
|
||||||
|
container_id: &str,
|
||||||
|
mount_destination: &Path,
|
||||||
|
) -> Result<(String, bool)> {
|
||||||
|
let mut volumes = self.volumes.write().await;
|
||||||
|
|
||||||
|
// Canonicalize the source path as a key
|
||||||
|
let canonical_source = std::fs::canonicalize(source_path)
|
||||||
|
.map(|p| p.to_string_lossy().to_string())
|
||||||
|
.unwrap_or_else(|_| source_path.to_string());
|
||||||
|
|
||||||
|
if let Some(state) = volumes.get_mut(&canonical_source) {
|
||||||
|
// Existing volume
|
||||||
|
state.ref_count += 1;
|
||||||
|
state.containers.insert(container_id.to_string());
|
||||||
|
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"Reusing existing volume: source={:?}, guest={:?}, ref_count={}, already_copied={}",
|
||||||
|
canonical_source,
|
||||||
|
state.guest_path,
|
||||||
|
state.ref_count,
|
||||||
|
state.copied_to_guest
|
||||||
|
);
|
||||||
|
|
||||||
|
// Return guest path and whether a copy is needed (false, as it's already copied)
|
||||||
|
return Ok((state.guest_path.clone(), false));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new volume
|
||||||
|
let guest_path = generate_guest_path(&canonical_source, mount_destination);
|
||||||
|
|
||||||
|
let mut containers = HashSet::new();
|
||||||
|
containers.insert(container_id.to_string());
|
||||||
|
|
||||||
|
let state = VolumeState {
|
||||||
|
source_path: canonical_source.clone(),
|
||||||
|
guest_path: guest_path.clone(),
|
||||||
|
copied_to_guest: false, // Not yet copied
|
||||||
|
ref_count: 1,
|
||||||
|
containers,
|
||||||
|
monitor_task: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
volumes.insert(state.source_path.clone(), state.clone());
|
||||||
|
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"Created new volume: source={:?}, guest={:?}", state.source_path, state.guest_path,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Return guest path and whether a copy is needed (true, as it's new)
|
||||||
|
Ok((guest_path, true))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Marks the volume as copied to the guest
|
||||||
|
pub async fn mark_as_copied(
|
||||||
|
&self,
|
||||||
|
source_path: &str,
|
||||||
|
monitor_task: Option<JoinHandle<()>>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut states = self.volumes.write().await;
|
||||||
|
|
||||||
|
let canonical_source = std::fs::canonicalize(source_path)
|
||||||
|
.map(|p| p.to_string_lossy().to_string())
|
||||||
|
.unwrap_or_else(|_| source_path.to_string());
|
||||||
|
|
||||||
|
if let Some(state) = states.get_mut(&canonical_source) {
|
||||||
|
state.copied_to_guest = true;
|
||||||
|
if let Some(handle) = monitor_task {
|
||||||
|
state.monitor_task = Some(Arc::new(handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"Marked volume as copied: source={:?}, guest={:?}",
|
||||||
|
canonical_source,
|
||||||
|
state.guest_path
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Releases a volume reference
|
||||||
|
pub async fn release_volume(&self, source_path: &str, container_id: &str) -> Result<bool> {
|
||||||
|
let mut states = self.volumes.write().await;
|
||||||
|
|
||||||
|
let canonical_source = std::fs::canonicalize(source_path)
|
||||||
|
.map(|p| p.to_string_lossy().to_string())
|
||||||
|
.unwrap_or_else(|_| source_path.to_string());
|
||||||
|
|
||||||
|
if let Some(state) = states.get_mut(&canonical_source) {
|
||||||
|
state.containers.remove(container_id);
|
||||||
|
state.ref_count = state.ref_count.saturating_sub(1);
|
||||||
|
|
||||||
|
if state.ref_count == 0 {
|
||||||
|
// Abort the monitor task
|
||||||
|
if let Some(handle) = &state.monitor_task {
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"Volume has no more references, removing: source={:?}, guest={:?}",
|
||||||
|
canonical_source,
|
||||||
|
state.guest_path
|
||||||
|
);
|
||||||
|
|
||||||
|
states.remove(&canonical_source);
|
||||||
|
return Ok(true); // Can be cleaned up
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ShareFsVolume {
|
impl ShareFsVolume {
|
||||||
pub(crate) async fn new(
|
pub(crate) async fn new(
|
||||||
share_fs: &Option<Arc<dyn ShareFs>>,
|
share_fs: &Option<Arc<dyn ShareFs>>,
|
||||||
@@ -775,6 +930,25 @@ pub(crate) fn is_watchable_volume(source_path: &PathBuf) -> bool {
|
|||||||
|| is_configmap(source_path)
|
|| is_configmap(source_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generates a guest path with hashed source path
|
||||||
|
fn generate_guest_path(source_path: &str, mount_destination: &Path) -> String {
|
||||||
|
// Use a hash of the source path to generate a unique but deterministic identifier
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(source_path.as_bytes());
|
||||||
|
let hash = hasher.finalize();
|
||||||
|
let hash_str = hex::encode(&hash[..8]);
|
||||||
|
|
||||||
|
let dest_base = mount_destination
|
||||||
|
.file_name()
|
||||||
|
.and_then(|n| n.to_str())
|
||||||
|
.unwrap_or("volume");
|
||||||
|
|
||||||
|
format!(
|
||||||
|
"{}/{}/shared-{}-{}",
|
||||||
|
DEFAULT_KATA_GUEST_SANDBOX_DIR, PASSTHROUGH_FS_DIR, hash_str, dest_base
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
Reference in New Issue
Block a user