mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-31 07:19:06 +00:00
Merge pull request #7602 from jiangliu/agent-storage
Refine storage device management for kata-agent
This commit is contained in:
commit
91db888d83
@ -35,9 +35,9 @@ const VM_ROOTFS: &str = "/";
|
||||
const BLOCK: &str = "block";
|
||||
pub const DRIVER_9P_TYPE: &str = "9p";
|
||||
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
|
||||
pub const DRIVER_BLK_TYPE: &str = "blk";
|
||||
pub const DRIVER_BLK_PCI_TYPE: &str = "blk";
|
||||
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
|
||||
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
|
||||
pub const DRIVER_BLK_MMIO_TYPE: &str = "mmioblk";
|
||||
pub const DRIVER_SCSI_TYPE: &str = "scsi";
|
||||
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
|
||||
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
|
||||
@ -935,9 +935,9 @@ async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<Sp
|
||||
}
|
||||
|
||||
match device.type_.as_str() {
|
||||
DRIVER_BLK_TYPE => virtio_blk_device_handler(device, sandbox).await,
|
||||
DRIVER_BLK_PCI_TYPE => virtio_blk_device_handler(device, sandbox).await,
|
||||
DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, sandbox).await,
|
||||
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
|
||||
DRIVER_BLK_MMIO_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
|
||||
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await,
|
||||
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await,
|
||||
DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => {
|
||||
|
@ -48,6 +48,7 @@ mod pci;
|
||||
pub mod random;
|
||||
mod sandbox;
|
||||
mod signal;
|
||||
mod storage;
|
||||
mod uevent;
|
||||
mod util;
|
||||
mod version;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -7,14 +7,14 @@ use anyhow::{anyhow, Result};
|
||||
use nix::mount::MsFlags;
|
||||
use nix::sched::{unshare, CloneFlags};
|
||||
use nix::unistd::{getpid, gettid};
|
||||
use slog::Logger;
|
||||
use std::fmt;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::mount::{baremount, FLAGS};
|
||||
use slog::Logger;
|
||||
use crate::mount::baremount;
|
||||
|
||||
const PERSISTENT_NS_DIR: &str = "/var/run/sandbox-ns";
|
||||
pub const NSTYPEIPC: &str = "ipc";
|
||||
@ -116,15 +116,7 @@ impl Namespace {
|
||||
// Bind mount the new namespace from the current thread onto the mount point to persist it.
|
||||
|
||||
let mut flags = MsFlags::empty();
|
||||
|
||||
if let Some(x) = FLAGS.get("rbind") {
|
||||
let (clear, f) = *x;
|
||||
if clear {
|
||||
flags &= !f;
|
||||
} else {
|
||||
flags |= f;
|
||||
}
|
||||
};
|
||||
flags |= MsFlags::MS_BIND | MsFlags::MS_REC;
|
||||
|
||||
baremount(source, destination, "none", flags, "", &logger).map_err(|e| {
|
||||
anyhow!(
|
||||
|
@ -57,12 +57,13 @@ use crate::device::{
|
||||
};
|
||||
use crate::linux_abi::*;
|
||||
use crate::metrics::get_metrics;
|
||||
use crate::mount::{add_storages, baremount, update_ephemeral_mounts, STORAGE_HANDLER_LIST};
|
||||
use crate::mount::baremount;
|
||||
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
|
||||
use crate::network::setup_guest_dns;
|
||||
use crate::pci;
|
||||
use crate::random;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::storage::{add_storages, update_ephemeral_mounts, STORAGE_HANDLERS};
|
||||
use crate::version::{AGENT_VERSION, API_VERSION};
|
||||
use crate::AGENT_CONFIG;
|
||||
|
||||
@ -243,13 +244,7 @@ impl AgentService {
|
||||
// After all those storages have been processed, no matter the order
|
||||
// here, the agent will rely on rustjail (using the oci.Mounts
|
||||
// list) to bind mount all of them inside the container.
|
||||
let m = add_storages(
|
||||
sl(),
|
||||
&req.storages,
|
||||
&self.sandbox,
|
||||
Some(req.container_id.clone()),
|
||||
)
|
||||
.await?;
|
||||
let m = add_storages(sl(), req.storages, &self.sandbox, Some(req.container_id)).await?;
|
||||
|
||||
let mut s = self.sandbox.lock().await;
|
||||
s.container_mounts.insert(cid.clone(), m);
|
||||
@ -308,7 +303,7 @@ impl AgentService {
|
||||
if let Err(e) = ctr.destroy().await {
|
||||
error!(sl(), "failed to destroy container: {:?}", e);
|
||||
}
|
||||
if let Err(e) = remove_container_resources(&mut s, &cid) {
|
||||
if let Err(e) = remove_container_resources(&mut s, &cid).await {
|
||||
error!(sl(), "failed to remove container resources: {:?}", e);
|
||||
}
|
||||
return Err(err);
|
||||
@ -360,7 +355,7 @@ impl AgentService {
|
||||
.ok_or_else(|| anyhow!("Invalid container id"))?
|
||||
.destroy()
|
||||
.await?;
|
||||
remove_container_resources(&mut sandbox, &cid)?;
|
||||
remove_container_resources(&mut sandbox, &cid).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@ -382,7 +377,7 @@ impl AgentService {
|
||||
.await
|
||||
.map_err(|_| anyhow!(nix::Error::ETIME))???;
|
||||
|
||||
remove_container_resources(&mut *self.sandbox.lock().await, &cid)
|
||||
remove_container_resources(&mut *self.sandbox.lock().await, &cid).await
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
@ -1196,7 +1191,7 @@ impl agent_ttrpc::AgentService for AgentService {
|
||||
s.setup_shared_namespaces().await.map_ttrpc_err(same)?;
|
||||
}
|
||||
|
||||
let m = add_storages(sl(), &req.storages, &self.sandbox, None)
|
||||
let m = add_storages(sl(), req.storages, &self.sandbox, None)
|
||||
.await
|
||||
.map_ttrpc_err(same)?;
|
||||
self.sandbox.lock().await.mounts = m;
|
||||
@ -1585,7 +1580,7 @@ fn get_agent_details() -> AgentDetails {
|
||||
detail.init_daemon = unistd::getpid() == Pid::from_raw(1);
|
||||
|
||||
detail.device_handlers = Vec::new();
|
||||
detail.storage_handlers = STORAGE_HANDLER_LIST.iter().map(|x| x.to_string()).collect();
|
||||
detail.storage_handlers = STORAGE_HANDLERS.get_handlers();
|
||||
|
||||
detail
|
||||
}
|
||||
@ -1679,21 +1674,21 @@ fn update_container_namespaces(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> {
|
||||
async fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> {
|
||||
let mut cmounts: Vec<String> = vec![];
|
||||
|
||||
// Find the sandbox storage used by this container
|
||||
let mounts = sandbox.container_mounts.get(cid);
|
||||
if let Some(mounts) = mounts {
|
||||
for m in mounts.iter() {
|
||||
if sandbox.storages.get(m).is_some() {
|
||||
if sandbox.storages.contains_key(m) {
|
||||
cmounts.push(m.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for m in cmounts.iter() {
|
||||
if let Err(err) = sandbox.unset_and_remove_sandbox_storage(m) {
|
||||
if let Err(err) = sandbox.remove_sandbox_storage(m).await {
|
||||
error!(
|
||||
sl(),
|
||||
"failed to unset_and_remove_sandbox_storage for container {}, error: {:?}",
|
||||
|
@ -3,16 +3,20 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::{thread, time};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::cpu::CpuSet;
|
||||
use kata_types::mount::{StorageDevice, StorageDeviceGeneric};
|
||||
use libc::pid_t;
|
||||
use oci::{Hook, Hooks};
|
||||
use protocols::agent::OnlineCPUMemRequest;
|
||||
@ -28,7 +32,7 @@ use tokio::sync::Mutex;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::linux_abi::*;
|
||||
use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
|
||||
use crate::mount::{get_mount_fs_type, is_mounted, remove_mounts, TYPE_ROOTFS};
|
||||
use crate::namespace::Namespace;
|
||||
use crate::netlink::Handle;
|
||||
use crate::network::Network;
|
||||
@ -40,6 +44,46 @@ pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id";
|
||||
|
||||
type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StorageState {
|
||||
count: Arc<AtomicU32>,
|
||||
device: Arc<dyn StorageDevice>,
|
||||
}
|
||||
|
||||
impl Debug for StorageState {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("StorageState").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageState {
|
||||
fn new() -> Self {
|
||||
StorageState {
|
||||
count: Arc::new(AtomicU32::new(1)),
|
||||
device: Arc::new(StorageDeviceGeneric::new("".to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_device(device: Arc<dyn StorageDevice>) -> Self {
|
||||
Self {
|
||||
count: Arc::new(AtomicU32::new(1)),
|
||||
device,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn ref_count(&self) -> u32 {
|
||||
self.count.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
async fn inc_ref_count(&self) {
|
||||
self.count.fetch_add(1, Ordering::Acquire);
|
||||
}
|
||||
|
||||
async fn dec_and_test_ref_count(&self) -> bool {
|
||||
self.count.fetch_sub(1, Ordering::AcqRel) == 1
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Sandbox {
|
||||
pub logger: Logger,
|
||||
@ -54,7 +98,7 @@ pub struct Sandbox {
|
||||
pub shared_utsns: Namespace,
|
||||
pub shared_ipcns: Namespace,
|
||||
pub sandbox_pidns: Option<Namespace>,
|
||||
pub storages: HashMap<String, u32>,
|
||||
pub storages: HashMap<String, StorageState>,
|
||||
pub running: bool,
|
||||
pub no_pivot_root: bool,
|
||||
pub sender: Option<tokio::sync::oneshot::Sender<i32>>,
|
||||
@ -100,52 +144,55 @@ impl Sandbox {
|
||||
})
|
||||
}
|
||||
|
||||
// set_sandbox_storage sets the sandbox level reference
|
||||
// counter for the sandbox storage.
|
||||
// This method also returns a boolean to let
|
||||
// callers know if the storage already existed or not.
|
||||
// It will return true if storage is new.
|
||||
/// Add a new storage object or increase reference count of existing one.
|
||||
/// The caller may detect new storage object by checking `StorageState.refcount == 1`.
|
||||
#[instrument]
|
||||
pub fn set_sandbox_storage(&mut self, path: &str) -> bool {
|
||||
match self.storages.get_mut(path) {
|
||||
None => {
|
||||
self.storages.insert(path.to_string(), 1);
|
||||
true
|
||||
pub async fn add_sandbox_storage(&mut self, path: &str) -> StorageState {
|
||||
match self.storages.entry(path.to_string()) {
|
||||
Entry::Occupied(e) => {
|
||||
let state = e.get().clone();
|
||||
state.inc_ref_count().await;
|
||||
state
|
||||
}
|
||||
Some(count) => {
|
||||
*count += 1;
|
||||
false
|
||||
Entry::Vacant(e) => {
|
||||
let state = StorageState::new();
|
||||
e.insert(state.clone());
|
||||
state
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unset_sandbox_storage will decrement the sandbox storage
|
||||
// reference counter. If there aren't any containers using
|
||||
// that sandbox storage, this method will remove the
|
||||
// storage reference from the sandbox and return 'true' to
|
||||
// let the caller know that they can clean up the storage
|
||||
// related directories by calling remove_sandbox_storage
|
||||
#[instrument]
|
||||
pub fn unset_sandbox_storage(&mut self, path: &str) -> Result<bool> {
|
||||
match self.storages.get_mut(path) {
|
||||
None => Err(anyhow!("Sandbox storage with path {} not found", path)),
|
||||
Some(count) => {
|
||||
*count -= 1;
|
||||
if *count == 0 {
|
||||
self.storages.remove(path);
|
||||
return Ok(true);
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
/// Update the storage device associated with a path.
|
||||
pub fn update_sandbox_storage(
|
||||
&mut self,
|
||||
path: &str,
|
||||
device: Arc<dyn StorageDevice>,
|
||||
) -> std::result::Result<Arc<dyn StorageDevice>, Arc<dyn StorageDevice>> {
|
||||
if !self.storages.contains_key(path) {
|
||||
return Err(device);
|
||||
}
|
||||
|
||||
let state = StorageState::from_device(device);
|
||||
// Safe to unwrap() because we have just ensured existence of entry.
|
||||
let state = self.storages.insert(path.to_string(), state).unwrap();
|
||||
Ok(state.device)
|
||||
}
|
||||
|
||||
// remove_sandbox_storage removes the sandbox storage if no
|
||||
// containers are using that storage.
|
||||
// Clean mount and directory of a mountpoint.
|
||||
// This is actually StorageDeviceGeneric::cleanup(), kept here due to dependency chain.
|
||||
#[instrument]
|
||||
pub fn remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
|
||||
let mounts = vec![path.to_string()];
|
||||
remove_mounts(&mounts)?;
|
||||
fn cleanup_sandbox_storage(&mut self, path: &str) -> Result<()> {
|
||||
if path.is_empty() {
|
||||
return Err(anyhow!("mountpoint path is empty"));
|
||||
} else if !Path::new(path).exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if matches!(is_mounted(path), Ok(true)) {
|
||||
let mounts = vec![path.to_string()];
|
||||
remove_mounts(&mounts)?;
|
||||
}
|
||||
|
||||
// "remove_dir" will fail if the mount point is backed by a read-only filesystem.
|
||||
// This is the case with the device mapper snapshotter, where we mount the block device directly
|
||||
// at the underlying sandbox path which was provided from the base RO kataShared path from the host.
|
||||
@ -155,16 +202,23 @@ impl Sandbox {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// unset_and_remove_sandbox_storage unsets the storage from sandbox
|
||||
// and if there are no containers using this storage it will
|
||||
// remove it from the sandbox.
|
||||
/// Decrease reference count and destroy the storage object if reference count reaches zero.
|
||||
/// Returns `Ok(true)` if the reference count has reached zero and the storage object has been
|
||||
/// removed.
|
||||
#[instrument]
|
||||
pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
|
||||
if self.unset_sandbox_storage(path)? {
|
||||
return self.remove_sandbox_storage(path);
|
||||
pub async fn remove_sandbox_storage(&mut self, path: &str) -> Result<bool> {
|
||||
match self.storages.get(path) {
|
||||
None => Err(anyhow!("Sandbox storage with path {} not found", path)),
|
||||
Some(state) => {
|
||||
if state.dec_and_test_ref_count().await {
|
||||
self.storages.remove(path);
|
||||
self.cleanup_sandbox_storage(path)?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
@ -493,24 +547,22 @@ mod tests {
|
||||
let tmpdir_path = tmpdir.path().to_str().unwrap();
|
||||
|
||||
// Add a new sandbox storage
|
||||
let new_storage = s.set_sandbox_storage(tmpdir_path);
|
||||
let new_storage = s.add_sandbox_storage(tmpdir_path).await;
|
||||
|
||||
// Check the reference counter
|
||||
let ref_count = s.storages[tmpdir_path];
|
||||
let ref_count = new_storage.ref_count().await;
|
||||
assert_eq!(
|
||||
ref_count, 1,
|
||||
"Invalid refcount, got {} expected 1.",
|
||||
ref_count
|
||||
);
|
||||
assert!(new_storage);
|
||||
|
||||
// Use the existing sandbox storage
|
||||
let new_storage = s.set_sandbox_storage(tmpdir_path);
|
||||
assert!(!new_storage, "Should be false as already exists.");
|
||||
let new_storage = s.add_sandbox_storage(tmpdir_path).await;
|
||||
|
||||
// Since we are using existing storage, the reference counter
|
||||
// should be 2 by now.
|
||||
let ref_count = s.storages[tmpdir_path];
|
||||
let ref_count = new_storage.ref_count().await;
|
||||
assert_eq!(
|
||||
ref_count, 2,
|
||||
"Invalid refcount, got {} expected 2.",
|
||||
@ -546,22 +598,20 @@ mod tests {
|
||||
.tempdir_in(tmpdir_path)
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
s.remove_sandbox_storage(srcdir_path).is_err(),
|
||||
"Expect Err as the directory is not a mountpoint"
|
||||
);
|
||||
|
||||
assert!(s.remove_sandbox_storage("").is_err());
|
||||
assert!(s.cleanup_sandbox_storage("").is_err());
|
||||
|
||||
let invalid_dir = emptydir.path().join("invalid");
|
||||
|
||||
assert!(s
|
||||
.remove_sandbox_storage(invalid_dir.to_str().unwrap())
|
||||
.is_err());
|
||||
.cleanup_sandbox_storage(invalid_dir.to_str().unwrap())
|
||||
.is_ok());
|
||||
|
||||
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
|
||||
|
||||
assert!(s.remove_sandbox_storage(destdir_path).is_ok());
|
||||
assert!(s.cleanup_sandbox_storage(destdir_path).is_ok());
|
||||
|
||||
// remove a directory without umount
|
||||
s.cleanup_sandbox_storage(srcdir_path).unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@ -573,8 +623,7 @@ mod tests {
|
||||
let mut s = Sandbox::new(&logger).unwrap();
|
||||
|
||||
assert!(
|
||||
s.unset_and_remove_sandbox_storage("/tmp/testEphePath")
|
||||
.is_err(),
|
||||
s.remove_sandbox_storage("/tmp/testEphePath").await.is_err(),
|
||||
"Should fail because sandbox storage doesn't exist"
|
||||
);
|
||||
|
||||
@ -595,8 +644,8 @@ mod tests {
|
||||
|
||||
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
|
||||
|
||||
assert!(s.set_sandbox_storage(destdir_path));
|
||||
assert!(s.unset_and_remove_sandbox_storage(destdir_path).is_ok());
|
||||
s.add_sandbox_storage(destdir_path).await;
|
||||
assert!(s.remove_sandbox_storage(destdir_path).await.is_ok());
|
||||
|
||||
let other_dir_str;
|
||||
{
|
||||
@ -609,10 +658,10 @@ mod tests {
|
||||
let other_dir_path = other_dir.path().to_str().unwrap();
|
||||
other_dir_str = other_dir_path.to_string();
|
||||
|
||||
assert!(s.set_sandbox_storage(other_dir_path));
|
||||
s.add_sandbox_storage(other_dir_path).await;
|
||||
}
|
||||
|
||||
assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err());
|
||||
assert!(s.remove_sandbox_storage(&other_dir_str).await.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@ -624,28 +673,30 @@ mod tests {
|
||||
let storage_path = "/tmp/testEphe";
|
||||
|
||||
// Add a new sandbox storage
|
||||
assert!(s.set_sandbox_storage(storage_path));
|
||||
s.add_sandbox_storage(storage_path).await;
|
||||
// Use the existing sandbox storage
|
||||
let state = s.add_sandbox_storage(storage_path).await;
|
||||
assert!(
|
||||
!s.set_sandbox_storage(storage_path),
|
||||
state.ref_count().await > 1,
|
||||
"Expects false as the storage is not new."
|
||||
);
|
||||
|
||||
assert!(
|
||||
!s.unset_sandbox_storage(storage_path).unwrap(),
|
||||
!s.remove_sandbox_storage(storage_path).await.unwrap(),
|
||||
"Expects false as there is still a storage."
|
||||
);
|
||||
|
||||
// Reference counter should decrement to 1.
|
||||
let ref_count = s.storages[storage_path];
|
||||
let storage = &s.storages[storage_path];
|
||||
let refcount = storage.ref_count().await;
|
||||
assert_eq!(
|
||||
ref_count, 1,
|
||||
refcount, 1,
|
||||
"Invalid refcount, got {} expected 1.",
|
||||
ref_count
|
||||
refcount
|
||||
);
|
||||
|
||||
assert!(
|
||||
s.unset_sandbox_storage(storage_path).unwrap(),
|
||||
s.remove_sandbox_storage(storage_path).await.unwrap(),
|
||||
"Expects true as there is still a storage."
|
||||
);
|
||||
|
||||
@ -661,7 +712,7 @@ mod tests {
|
||||
// If no container is using the sandbox storage, the reference
|
||||
// counter for it should not exist.
|
||||
assert!(
|
||||
s.unset_sandbox_storage(storage_path).is_err(),
|
||||
s.remove_sandbox_storage(storage_path).await.is_err(),
|
||||
"Expects false as the reference counter should no exist."
|
||||
);
|
||||
}
|
||||
|
37
src/agent/src/storage/bind_watcher_handler.rs
Normal file
37
src/agent/src/storage/bind_watcher_handler.rs
Normal file
@ -0,0 +1,37 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use kata_types::mount::StorageDevice;
|
||||
use protocols::agent::Storage;
|
||||
use std::iter;
|
||||
use std::sync::Arc;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::storage::{new_device, StorageContext, StorageHandler};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BindWatcherHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for BindWatcherHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
if let Some(cid) = ctx.cid {
|
||||
ctx.sandbox
|
||||
.lock()
|
||||
.await
|
||||
.bind_watcher
|
||||
.add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger)
|
||||
.await?;
|
||||
}
|
||||
new_device("".to_string())
|
||||
}
|
||||
}
|
146
src/agent/src/storage/block_handler.rs
Normal file
146
src/agent/src/storage/block_handler.rs
Normal file
@ -0,0 +1,146 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::mount::StorageDevice;
|
||||
use protocols::agent::Storage;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::device::{
|
||||
get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name,
|
||||
wait_for_pmem_device,
|
||||
};
|
||||
use crate::pci;
|
||||
use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler};
|
||||
#[cfg(target_arch = "s390x")]
|
||||
use crate::{ccw, device::get_virtio_blk_ccw_device_name};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VirtioBlkMmioHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for VirtioBlkMmioHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
if !Path::new(&storage.source).exists() {
|
||||
get_virtio_mmio_device_name(ctx.sandbox, &storage.source)
|
||||
.await
|
||||
.context("failed to get mmio device name")?;
|
||||
}
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VirtioBlkPciHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for VirtioBlkPciHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
mut storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
// If hot-plugged, get the device node path based on the PCI path
|
||||
// otherwise use the virt path provided in Storage Source
|
||||
if storage.source.starts_with("/dev") {
|
||||
let metadata = fs::metadata(&storage.source)
|
||||
.context(format!("get metadata on file {:?}", &storage.source))?;
|
||||
let mode = metadata.permissions().mode();
|
||||
if mode & libc::S_IFBLK == 0 {
|
||||
return Err(anyhow!("Invalid device {}", &storage.source));
|
||||
}
|
||||
} else {
|
||||
let pcipath = pci::Path::from_str(&storage.source)?;
|
||||
let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?;
|
||||
storage.source = dev_path;
|
||||
}
|
||||
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VirtioBlkCcwHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for VirtioBlkCcwHandler {
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
mut storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
let ccw_device = ccw::Device::from_str(&storage.source)?;
|
||||
let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?;
|
||||
storage.source = dev_path;
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
_storage: Storage,
|
||||
_ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
Err(anyhow!("CCW is only supported on s390x"))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ScsiHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for ScsiHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
mut storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
// Retrieve the device path from SCSI address.
|
||||
let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?;
|
||||
storage.source = dev_path;
|
||||
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PmemHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for PmemHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
// Retrieve the device for pmem storage
|
||||
wait_for_pmem_device(ctx.sandbox, &storage.source).await?;
|
||||
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
293
src/agent/src/storage/ephemeral_handler.rs
Normal file
293
src/agent/src/storage/ephemeral_handler.rs
Normal file
@ -0,0 +1,293 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::fs;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::Write;
|
||||
use std::os::unix::fs::{MetadataExt, PermissionsExt};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_sys_util::mount::parse_mount_options;
|
||||
use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID};
|
||||
use nix::unistd::Gid;
|
||||
use protocols::agent::Storage;
|
||||
use slog::Logger;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::device::{DRIVER_EPHEMERAL_TYPE, FS_TYPE_HUGETLB};
|
||||
use crate::mount::baremount;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::storage::{
|
||||
common_storage_handler, new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID,
|
||||
};
|
||||
|
||||
const FS_GID_EQ: &str = "fsgid=";
|
||||
const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct EphemeralHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for EphemeralHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
mut storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
// hugetlbfs
|
||||
if storage.fstype == FS_TYPE_HUGETLB {
|
||||
info!(ctx.logger, "handle hugetlbfs storage");
|
||||
// Allocate hugepages before mount
|
||||
// /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
|
||||
// /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
|
||||
// options eg "pagesize=2097152,size=524288000"(2M, 500M)
|
||||
Self::allocate_hugepages(ctx.logger, &storage.options.to_vec())
|
||||
.context("allocate hugepages")?;
|
||||
common_storage_handler(ctx.logger, &storage)?;
|
||||
} else if !storage.options.is_empty() {
|
||||
// By now we only support one option field: "fsGroup" which
|
||||
// isn't an valid mount option, thus we should remove it when
|
||||
// do mount.
|
||||
let opts = parse_options(&storage.options);
|
||||
storage.options = Default::default();
|
||||
common_storage_handler(ctx.logger, &storage)?;
|
||||
|
||||
// ephemeral_storage didn't support mount options except fsGroup.
|
||||
if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) {
|
||||
let gid = fsgid.parse::<u32>()?;
|
||||
|
||||
nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
|
||||
|
||||
let meta = fs::metadata(&storage.mount_point)?;
|
||||
let mut permission = meta.permissions();
|
||||
|
||||
let o_mode = meta.mode() | MODE_SETGID;
|
||||
permission.set_mode(o_mode);
|
||||
fs::set_permissions(&storage.mount_point, permission)?;
|
||||
}
|
||||
} else {
|
||||
common_storage_handler(ctx.logger, &storage)?;
|
||||
}
|
||||
|
||||
new_device("".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl EphemeralHandler {
|
||||
// Allocate hugepages by writing to sysfs
|
||||
fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> {
|
||||
info!(logger, "mounting hugePages storage options: {:?}", options);
|
||||
|
||||
let (pagesize, size) = Self::get_pagesize_and_size_from_option(options)
|
||||
.context(format!("parse mount options: {:?}", &options))?;
|
||||
|
||||
info!(
|
||||
logger,
|
||||
"allocate hugepages. pageSize: {}, size: {}", pagesize, size
|
||||
);
|
||||
|
||||
// sysfs entry is always of the form hugepages-${pagesize}kB
|
||||
// Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
|
||||
let path = Path::new(SYS_FS_HUGEPAGES_PREFIX)
|
||||
.join(format!("hugepages-{}kB", pagesize / 1024))
|
||||
.join("nr_hugepages");
|
||||
|
||||
// write numpages to nr_hugepages file.
|
||||
let numpages = format!("{}", size / pagesize);
|
||||
info!(logger, "write {} pages to {:?}", &numpages, &path);
|
||||
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.open(&path)
|
||||
.context(format!("open nr_hugepages directory {:?}", &path))?;
|
||||
|
||||
file.write_all(numpages.as_bytes())
|
||||
.context(format!("write nr_hugepages failed: {:?}", &path))?;
|
||||
|
||||
// Even if the write succeeds, the kernel isn't guaranteed to be
|
||||
// able to allocate all the pages we requested. Verify that it
|
||||
// did.
|
||||
let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?;
|
||||
let allocated = verify
|
||||
.trim_end()
|
||||
.parse::<u64>()
|
||||
.map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?;
|
||||
if allocated != size / pagesize {
|
||||
return Err(anyhow!(
|
||||
"Only allocated {} of {} hugepages of size {}",
|
||||
allocated,
|
||||
numpages,
|
||||
pagesize
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Parse filesystem options string to retrieve hugepage details
|
||||
// options eg "pagesize=2048,size=107374182"
|
||||
fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> {
|
||||
let mut pagesize_str: Option<&str> = None;
|
||||
let mut size_str: Option<&str> = None;
|
||||
|
||||
for option in options {
|
||||
let vars: Vec<&str> = option.trim().split(',').collect();
|
||||
|
||||
for var in vars {
|
||||
if let Some(stripped) = var.strip_prefix("pagesize=") {
|
||||
pagesize_str = Some(stripped);
|
||||
} else if let Some(stripped) = var.strip_prefix("size=") {
|
||||
size_str = Some(stripped);
|
||||
}
|
||||
|
||||
if pagesize_str.is_some() && size_str.is_some() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if pagesize_str.is_none() || size_str.is_none() {
|
||||
return Err(anyhow!("no pagesize/size options found"));
|
||||
}
|
||||
|
||||
let pagesize = pagesize_str
|
||||
.unwrap()
|
||||
.parse::<u64>()
|
||||
.context(format!("parse pagesize: {:?}", &pagesize_str))?;
|
||||
let size = size_str
|
||||
.unwrap()
|
||||
.parse::<u64>()
|
||||
.context(format!("parse size: {:?}", &pagesize_str))?;
|
||||
|
||||
Ok((pagesize, size))
|
||||
}
|
||||
}
|
||||
|
||||
// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them
|
||||
// with mount options passed by the caller
|
||||
#[instrument]
|
||||
pub async fn update_ephemeral_mounts(
|
||||
logger: Logger,
|
||||
storages: &[Storage],
|
||||
_sandbox: &Arc<Mutex<Sandbox>>,
|
||||
) -> Result<()> {
|
||||
for storage in storages {
|
||||
let handler_name = &storage.driver;
|
||||
let logger = logger.new(o!(
|
||||
"msg" => "updating tmpfs storage",
|
||||
"subsystem" => "storage",
|
||||
"storage-type" => handler_name.to_owned()));
|
||||
|
||||
match handler_name.as_str() {
|
||||
DRIVER_EPHEMERAL_TYPE => {
|
||||
fs::create_dir_all(&storage.mount_point)?;
|
||||
|
||||
if storage.options.is_empty() {
|
||||
continue;
|
||||
} else {
|
||||
// assume that fsGid has already been set
|
||||
let mount_path = Path::new(&storage.mount_point);
|
||||
let src_path = Path::new(&storage.source);
|
||||
let opts: Vec<&String> = storage
|
||||
.options
|
||||
.iter()
|
||||
.filter(|&opt| !opt.starts_with(FS_GID_EQ))
|
||||
.collect();
|
||||
let (flags, options) = parse_mount_options(&opts)?;
|
||||
|
||||
info!(logger, "mounting storage";
|
||||
"mount-source" => src_path.display(),
|
||||
"mount-destination" => mount_path.display(),
|
||||
"mount-fstype" => storage.fstype.as_str(),
|
||||
"mount-options" => options.as_str(),
|
||||
);
|
||||
|
||||
baremount(
|
||||
src_path,
|
||||
mount_path,
|
||||
storage.fstype.as_str(),
|
||||
flags,
|
||||
options.as_str(),
|
||||
&logger,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!(
|
||||
"Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported",
|
||||
storage.driver
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_pagesize_and_size_from_option() {
|
||||
let expected_pagesize = 2048;
|
||||
let expected_size = 107374182;
|
||||
let expected = (expected_pagesize, expected_size);
|
||||
|
||||
let data = vec![
|
||||
// (input, expected, is_ok)
|
||||
("size-1=107374182,pagesize-1=2048", expected, false),
|
||||
("size-1=107374182,pagesize=2048", expected, false),
|
||||
("size=107374182,pagesize-1=2048", expected, false),
|
||||
("size=107374182,pagesize=abc", expected, false),
|
||||
("size=abc,pagesize=2048", expected, false),
|
||||
("size=,pagesize=2048", expected, false),
|
||||
("size=107374182,pagesize=", expected, false),
|
||||
("size=107374182,pagesize=2048", expected, true),
|
||||
("pagesize=2048,size=107374182", expected, true),
|
||||
("foo=bar,pagesize=2048,size=107374182", expected, true),
|
||||
(
|
||||
"foo=bar,pagesize=2048,foo1=bar1,size=107374182",
|
||||
expected,
|
||||
true,
|
||||
),
|
||||
(
|
||||
"pagesize=2048,foo1=bar1,foo=bar,size=107374182",
|
||||
expected,
|
||||
true,
|
||||
),
|
||||
(
|
||||
"foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2",
|
||||
expected,
|
||||
true,
|
||||
),
|
||||
(
|
||||
"foo=bar,size=107374182,foo1=bar1,pagesize=2048",
|
||||
expected,
|
||||
true,
|
||||
),
|
||||
];
|
||||
|
||||
for case in data {
|
||||
let input = case.0;
|
||||
let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]);
|
||||
|
||||
let is_ok = case.2;
|
||||
if is_ok {
|
||||
let expected = case.1;
|
||||
let (pagesize, size) = r.unwrap();
|
||||
assert_eq!(expected.0, pagesize);
|
||||
assert_eq!(expected.1, size);
|
||||
} else {
|
||||
assert!(r.is_err());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
89
src/agent/src/storage/fs_handler.rs
Normal file
89
src/agent/src/storage/fs_handler.rs
Normal file
@ -0,0 +1,89 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::mount::StorageDevice;
|
||||
use protocols::agent::Storage;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OverlayfsHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for OverlayfsHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
mut storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
if storage
|
||||
.options
|
||||
.iter()
|
||||
.any(|e| e == "io.katacontainers.fs-opt.overlay-rw")
|
||||
{
|
||||
let cid = ctx
|
||||
.cid
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow!("No container id in rw overlay"))?;
|
||||
let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid);
|
||||
let work = cpath.join("work");
|
||||
let upper = cpath.join("upper");
|
||||
|
||||
fs::create_dir_all(&work).context("Creating overlay work directory")?;
|
||||
fs::create_dir_all(&upper).context("Creating overlay upper directory")?;
|
||||
|
||||
storage.fstype = "overlay".into();
|
||||
storage
|
||||
.options
|
||||
.push(format!("upperdir={}", upper.to_string_lossy()));
|
||||
storage
|
||||
.options
|
||||
.push(format!("workdir={}", work.to_string_lossy()));
|
||||
}
|
||||
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Virtio9pHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for Virtio9pHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VirtioFsHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for VirtioFsHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
let path = common_storage_handler(ctx.logger, &storage)?;
|
||||
new_device(path)
|
||||
}
|
||||
}
|
61
src/agent/src/storage/local_handler.rs
Normal file
61
src/agent/src/storage/local_handler.rs
Normal file
@ -0,0 +1,61 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID};
|
||||
use nix::unistd::Gid;
|
||||
use protocols::agent::Storage;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::storage::{new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LocalHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl StorageHandler for LocalHandler {
|
||||
#[instrument]
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
_ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>> {
|
||||
fs::create_dir_all(&storage.mount_point).context(format!(
|
||||
"failed to create dir all {:?}",
|
||||
&storage.mount_point
|
||||
))?;
|
||||
|
||||
let opts = parse_options(&storage.options);
|
||||
|
||||
let mut need_set_fsgid = false;
|
||||
if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) {
|
||||
let gid = fsgid.parse::<u32>()?;
|
||||
|
||||
nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
|
||||
need_set_fsgid = true;
|
||||
}
|
||||
|
||||
if let Some(mode) = opts.get("mode") {
|
||||
let mut permission = fs::metadata(&storage.mount_point)?.permissions();
|
||||
|
||||
let mut o_mode = u32::from_str_radix(mode, 8)?;
|
||||
|
||||
if need_set_fsgid {
|
||||
// set SetGid mode mask.
|
||||
o_mode |= MODE_SETGID;
|
||||
}
|
||||
permission.set_mode(o_mode);
|
||||
|
||||
fs::set_permissions(&storage.mount_point, permission)?;
|
||||
}
|
||||
|
||||
new_device("".to_string())
|
||||
}
|
||||
}
|
648
src/agent/src/storage/mod.rs
Normal file
648
src/agent/src/storage/mod.rs
Normal file
@ -0,0 +1,648 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::os::unix::fs::{MetadataExt, PermissionsExt};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_sys_util::mount::{create_mount_destination, parse_mount_options};
|
||||
use kata_types::mount::{
|
||||
StorageDevice, StorageDeviceGeneric, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG,
|
||||
};
|
||||
use nix::unistd::{Gid, Uid};
|
||||
use protocols::agent::Storage;
|
||||
use protocols::types::FSGroupChangePolicy;
|
||||
use slog::Logger;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::instrument;
|
||||
|
||||
use self::bind_watcher_handler::BindWatcherHandler;
|
||||
use self::block_handler::{PmemHandler, ScsiHandler, VirtioBlkMmioHandler, VirtioBlkPciHandler};
|
||||
use self::ephemeral_handler::EphemeralHandler;
|
||||
use self::fs_handler::{OverlayfsHandler, Virtio9pHandler, VirtioFsHandler};
|
||||
use self::local_handler::LocalHandler;
|
||||
use crate::device::{
|
||||
DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_EPHEMERAL_TYPE,
|
||||
DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE,
|
||||
DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE,
|
||||
};
|
||||
use crate::mount::{baremount, is_mounted};
|
||||
use crate::sandbox::Sandbox;
|
||||
|
||||
pub use self::ephemeral_handler::update_ephemeral_mounts;
|
||||
|
||||
mod bind_watcher_handler;
|
||||
mod block_handler;
|
||||
mod ephemeral_handler;
|
||||
mod fs_handler;
|
||||
mod local_handler;
|
||||
|
||||
const RW_MASK: u32 = 0o660;
|
||||
const RO_MASK: u32 = 0o440;
|
||||
const EXEC_MASK: u32 = 0o110;
|
||||
const MODE_SETGID: u32 = 0o2000;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct StorageContext<'a> {
|
||||
cid: &'a Option<String>,
|
||||
logger: &'a Logger,
|
||||
sandbox: &'a Arc<Mutex<Sandbox>>,
|
||||
}
|
||||
|
||||
/// Trait object to handle storage device.
|
||||
#[async_trait::async_trait]
|
||||
pub trait StorageHandler: Send + Sync {
|
||||
/// Create a new storage device.
|
||||
async fn create_device(
|
||||
&self,
|
||||
storage: Storage,
|
||||
ctx: &mut StorageContext,
|
||||
) -> Result<Arc<dyn StorageDevice>>;
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
lazy_static! {
|
||||
pub static ref STORAGE_HANDLERS: StorageHandlerManager<Arc<dyn StorageHandler>> = {
|
||||
let mut manager: StorageHandlerManager<Arc<dyn StorageHandler>> = StorageHandlerManager::new();
|
||||
manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap();
|
||||
#[cfg(target_arch = "s390x")]
|
||||
manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(self::block_handler::VirtioBlkCcwHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap();
|
||||
manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap();
|
||||
manager
|
||||
};
|
||||
}
|
||||
|
||||
// add_storages takes a list of storages passed by the caller, and perform the
|
||||
// associated operations such as waiting for the device to show up, and mount
|
||||
// it to a specific location, according to the type of handler chosen, and for
|
||||
// each storage.
|
||||
#[instrument]
|
||||
pub async fn add_storages(
|
||||
logger: Logger,
|
||||
storages: Vec<Storage>,
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
cid: Option<String>,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut mount_list = Vec::new();
|
||||
|
||||
for storage in storages {
|
||||
let path = storage.mount_point.clone();
|
||||
let state = sandbox.lock().await.add_sandbox_storage(&path).await;
|
||||
if state.ref_count().await > 1 {
|
||||
// The device already exists.
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) {
|
||||
let logger =
|
||||
logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone()));
|
||||
let mut ctx = StorageContext {
|
||||
cid: &cid,
|
||||
logger: &logger,
|
||||
sandbox,
|
||||
};
|
||||
|
||||
match handler.create_device(storage, &mut ctx).await {
|
||||
Ok(device) => {
|
||||
match sandbox
|
||||
.lock()
|
||||
.await
|
||||
.update_sandbox_storage(&path, device.clone())
|
||||
{
|
||||
Ok(d) => {
|
||||
let path = device.path().to_string();
|
||||
if !path.is_empty() {
|
||||
mount_list.push(path.clone());
|
||||
}
|
||||
drop(d);
|
||||
}
|
||||
Err(device) => {
|
||||
error!(logger, "failed to update device for storage");
|
||||
if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await
|
||||
{
|
||||
warn!(logger, "failed to remove dummy sandbox storage {:?}", e);
|
||||
}
|
||||
device.cleanup();
|
||||
return Err(anyhow!("failed to update device for storage"));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(logger, "failed to create device for storage, error: {e:?}");
|
||||
if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await {
|
||||
warn!(logger, "failed to remove dummy sandbox storage {e:?}");
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"Failed to find the storage handler {}",
|
||||
storage.driver
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(mount_list)
|
||||
}
|
||||
|
||||
pub(crate) fn new_device(path: String) -> Result<Arc<dyn StorageDevice>> {
|
||||
let device = StorageDeviceGeneric::new(path);
|
||||
Ok(Arc::new(device))
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub(crate) fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String> {
|
||||
mount_storage(logger, storage)?;
|
||||
set_ownership(logger, storage)?;
|
||||
Ok(storage.mount_point.clone())
|
||||
}
|
||||
|
||||
// mount_storage performs the mount described by the storage structure.
|
||||
#[instrument]
|
||||
fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
|
||||
let logger = logger.new(o!("subsystem" => "mount"));
|
||||
|
||||
// There's a special mechanism to create mountpoint from a `sharedfs` instance before
|
||||
// starting the kata-agent. Check for such cases.
|
||||
if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? {
|
||||
warn!(
|
||||
logger,
|
||||
"{} already mounted on {}, ignoring...",
|
||||
KATA_SHAREDFS_GUEST_PREMOUNT_TAG,
|
||||
&storage.mount_point
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let (flags, options) = parse_mount_options(&storage.options)?;
|
||||
let mount_path = Path::new(&storage.mount_point);
|
||||
let src_path = Path::new(&storage.source);
|
||||
create_mount_destination(src_path, mount_path, "", &storage.fstype)
|
||||
.context("Could not create mountpoint")?;
|
||||
|
||||
info!(logger, "mounting storage";
|
||||
"mount-source" => src_path.display(),
|
||||
"mount-destination" => mount_path.display(),
|
||||
"mount-fstype" => storage.fstype.as_str(),
|
||||
"mount-options" => options.as_str(),
|
||||
);
|
||||
|
||||
baremount(
|
||||
src_path,
|
||||
mount_path,
|
||||
storage.fstype.as_str(),
|
||||
flags,
|
||||
options.as_str(),
|
||||
&logger,
|
||||
)
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub(crate) fn parse_options(option_list: &[String]) -> HashMap<String, String> {
|
||||
let mut options = HashMap::new();
|
||||
for opt in option_list {
|
||||
let fields: Vec<&str> = opt.split('=').collect();
|
||||
if fields.len() == 2 {
|
||||
options.insert(fields[0].to_string(), fields[1].to_string());
|
||||
}
|
||||
}
|
||||
options
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> {
|
||||
let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership"));
|
||||
|
||||
// If fsGroup is not set, skip performing ownership change
|
||||
if storage.fs_group.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let fs_group = storage.fs_group();
|
||||
let read_only = storage.options.contains(&String::from("ro"));
|
||||
let mount_path = Path::new(&storage.mount_point);
|
||||
let metadata = mount_path.metadata().map_err(|err| {
|
||||
error!(logger, "failed to obtain metadata for mount path";
|
||||
"mount-path" => mount_path.to_str(),
|
||||
"error" => err.to_string(),
|
||||
);
|
||||
err
|
||||
})?;
|
||||
|
||||
if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into()
|
||||
&& metadata.gid() == fs_group.group_id
|
||||
{
|
||||
let mut mask = if read_only { RO_MASK } else { RW_MASK };
|
||||
mask |= EXEC_MASK;
|
||||
|
||||
// With fsGroup change policy to OnRootMismatch, if the current
|
||||
// gid of the mount path root directory matches the desired gid
|
||||
// and the current permission of mount path root directory is correct,
|
||||
// then ownership change will be skipped.
|
||||
let current_mode = metadata.permissions().mode();
|
||||
if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) {
|
||||
info!(logger, "skipping ownership change for volume";
|
||||
"mount-path" => mount_path.to_str(),
|
||||
"fs-group" => fs_group.group_id.to_string(),
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
info!(logger, "performing recursive ownership change";
|
||||
"mount-path" => mount_path.to_str(),
|
||||
"fs-group" => fs_group.group_id.to_string(),
|
||||
);
|
||||
recursive_ownership_change(
|
||||
mount_path,
|
||||
None,
|
||||
Some(Gid::from_raw(fs_group.group_id)),
|
||||
read_only,
|
||||
)
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub fn recursive_ownership_change(
|
||||
path: &Path,
|
||||
uid: Option<Uid>,
|
||||
gid: Option<Gid>,
|
||||
read_only: bool,
|
||||
) -> Result<()> {
|
||||
let mut mask = if read_only { RO_MASK } else { RW_MASK };
|
||||
if path.is_dir() {
|
||||
for entry in fs::read_dir(path)? {
|
||||
recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?;
|
||||
}
|
||||
mask |= EXEC_MASK;
|
||||
mask |= MODE_SETGID;
|
||||
}
|
||||
|
||||
// We do not want to change the permission of the underlying file
|
||||
// using symlink. Hence we skip symlinks from recursive ownership
|
||||
// and permission changes.
|
||||
if path.is_symlink() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
nix::unistd::chown(path, uid, gid)?;
|
||||
|
||||
if gid.is_some() {
|
||||
let metadata = path.metadata()?;
|
||||
let mut permission = metadata.permissions();
|
||||
let target_mode = metadata.mode() | mask;
|
||||
permission.set_mode(target_mode);
|
||||
fs::set_permissions(path, permission)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use protocols::agent::FSGroup;
|
||||
use std::fs::File;
|
||||
use tempfile::tempdir;
|
||||
use test_utils::{
|
||||
skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root, TestUserType,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_mount_storage() {
|
||||
#[derive(Debug)]
|
||||
struct TestData<'a> {
|
||||
test_user: TestUserType,
|
||||
storage: Storage,
|
||||
error_contains: &'a str,
|
||||
|
||||
make_source_dir: bool,
|
||||
make_mount_dir: bool,
|
||||
deny_mount_permission: bool,
|
||||
}
|
||||
|
||||
impl Default for TestData<'_> {
|
||||
fn default() -> Self {
|
||||
TestData {
|
||||
test_user: TestUserType::Any,
|
||||
storage: Storage {
|
||||
mount_point: "mnt".to_string(),
|
||||
source: "src".to_string(),
|
||||
fstype: "tmpfs".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
make_source_dir: true,
|
||||
make_mount_dir: false,
|
||||
deny_mount_permission: false,
|
||||
error_contains: "",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tests = &[
|
||||
TestData {
|
||||
test_user: TestUserType::NonRootOnly,
|
||||
error_contains: "EPERM: Operation not permitted",
|
||||
..Default::default()
|
||||
},
|
||||
TestData {
|
||||
test_user: TestUserType::RootOnly,
|
||||
..Default::default()
|
||||
},
|
||||
TestData {
|
||||
storage: Storage {
|
||||
mount_point: "mnt".to_string(),
|
||||
source: "src".to_string(),
|
||||
fstype: "bind".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
make_source_dir: false,
|
||||
make_mount_dir: true,
|
||||
error_contains: "Could not create mountpoint",
|
||||
..Default::default()
|
||||
},
|
||||
TestData {
|
||||
test_user: TestUserType::NonRootOnly,
|
||||
deny_mount_permission: true,
|
||||
error_contains: "Could not create mountpoint",
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
|
||||
for (i, d) in tests.iter().enumerate() {
|
||||
let msg = format!("test[{}]: {:?}", i, d);
|
||||
|
||||
skip_loop_by_user!(msg, d.test_user);
|
||||
|
||||
let drain = slog::Discard;
|
||||
let logger = slog::Logger::root(drain, o!());
|
||||
|
||||
let tempdir = tempdir().unwrap();
|
||||
|
||||
let source = tempdir.path().join(&d.storage.source);
|
||||
let mount_point = tempdir.path().join(&d.storage.mount_point);
|
||||
|
||||
let storage = Storage {
|
||||
source: source.to_str().unwrap().to_string(),
|
||||
mount_point: mount_point.to_str().unwrap().to_string(),
|
||||
..d.storage.clone()
|
||||
};
|
||||
|
||||
if d.make_source_dir {
|
||||
fs::create_dir_all(&storage.source).unwrap();
|
||||
}
|
||||
if d.make_mount_dir {
|
||||
fs::create_dir_all(&storage.mount_point).unwrap();
|
||||
}
|
||||
|
||||
if d.deny_mount_permission {
|
||||
fs::set_permissions(
|
||||
mount_point.parent().unwrap(),
|
||||
fs::Permissions::from_mode(0o000),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let result = mount_storage(&logger, &storage);
|
||||
|
||||
// restore permissions so tempdir can be cleaned up
|
||||
if d.deny_mount_permission {
|
||||
fs::set_permissions(
|
||||
mount_point.parent().unwrap(),
|
||||
fs::Permissions::from_mode(0o755),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
if result.is_ok() {
|
||||
nix::mount::umount(&mount_point).unwrap();
|
||||
}
|
||||
|
||||
let msg = format!("{}: result: {:?}", msg, result);
|
||||
if d.error_contains.is_empty() {
|
||||
assert!(result.is_ok(), "{}", msg);
|
||||
} else {
|
||||
assert!(result.is_err(), "{}", msg);
|
||||
let error_msg = format!("{}", result.unwrap_err());
|
||||
assert!(error_msg.contains(d.error_contains), "{}", msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_ownership() {
|
||||
skip_if_not_root!();
|
||||
|
||||
let logger = slog::Logger::root(slog::Discard, o!());
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestData<'a> {
|
||||
mount_path: &'a str,
|
||||
fs_group: Option<FSGroup>,
|
||||
read_only: bool,
|
||||
expected_group_id: u32,
|
||||
expected_permission: u32,
|
||||
}
|
||||
|
||||
let tests = &[
|
||||
TestData {
|
||||
mount_path: "foo",
|
||||
fs_group: None,
|
||||
read_only: false,
|
||||
expected_group_id: 0,
|
||||
expected_permission: 0,
|
||||
},
|
||||
TestData {
|
||||
mount_path: "rw_mount",
|
||||
fs_group: Some(FSGroup {
|
||||
group_id: 3000,
|
||||
group_change_policy: FSGroupChangePolicy::Always.into(),
|
||||
..Default::default()
|
||||
}),
|
||||
read_only: false,
|
||||
expected_group_id: 3000,
|
||||
expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID,
|
||||
},
|
||||
TestData {
|
||||
mount_path: "ro_mount",
|
||||
fs_group: Some(FSGroup {
|
||||
group_id: 3000,
|
||||
group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(),
|
||||
..Default::default()
|
||||
}),
|
||||
read_only: true,
|
||||
expected_group_id: 3000,
|
||||
expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID,
|
||||
},
|
||||
];
|
||||
|
||||
let tempdir = tempdir().expect("failed to create tmpdir");
|
||||
|
||||
for (i, d) in tests.iter().enumerate() {
|
||||
let msg = format!("test[{}]: {:?}", i, d);
|
||||
|
||||
let mount_dir = tempdir.path().join(d.mount_path);
|
||||
fs::create_dir(&mount_dir)
|
||||
.unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
|
||||
|
||||
let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
|
||||
let mut storage_data = Storage::new();
|
||||
if d.read_only {
|
||||
storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]);
|
||||
}
|
||||
if let Some(fs_group) = d.fs_group.clone() {
|
||||
storage_data.set_fs_group(fs_group);
|
||||
}
|
||||
storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap();
|
||||
|
||||
let result = set_ownership(&logger, &storage_data);
|
||||
assert!(result.is_ok());
|
||||
|
||||
assert_eq!(
|
||||
mount_dir.as_path().metadata().unwrap().gid(),
|
||||
d.expected_group_id
|
||||
);
|
||||
assert_eq!(
|
||||
mount_dir.as_path().metadata().unwrap().permissions().mode(),
|
||||
(directory_mode | d.expected_permission)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_recursive_ownership_change() {
|
||||
skip_if_not_root!();
|
||||
|
||||
const COUNT: usize = 5;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestData<'a> {
|
||||
// Directory where the recursive ownership change should be performed on
|
||||
path: &'a str,
|
||||
|
||||
// User ID for ownership change
|
||||
uid: u32,
|
||||
|
||||
// Group ID for ownership change
|
||||
gid: u32,
|
||||
|
||||
// Set when the permission should be read-only
|
||||
read_only: bool,
|
||||
|
||||
// The expected permission of all directories after ownership change
|
||||
expected_permission_directory: u32,
|
||||
|
||||
// The expected permission of all files after ownership change
|
||||
expected_permission_file: u32,
|
||||
}
|
||||
|
||||
let tests = &[
|
||||
TestData {
|
||||
path: "no_gid_change",
|
||||
uid: 0,
|
||||
gid: 0,
|
||||
read_only: false,
|
||||
expected_permission_directory: 0,
|
||||
expected_permission_file: 0,
|
||||
},
|
||||
TestData {
|
||||
path: "rw_gid_change",
|
||||
uid: 0,
|
||||
gid: 3000,
|
||||
read_only: false,
|
||||
expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID,
|
||||
expected_permission_file: RW_MASK,
|
||||
},
|
||||
TestData {
|
||||
path: "ro_gid_change",
|
||||
uid: 0,
|
||||
gid: 3000,
|
||||
read_only: true,
|
||||
expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID,
|
||||
expected_permission_file: RO_MASK,
|
||||
},
|
||||
];
|
||||
|
||||
let tempdir = tempdir().expect("failed to create tmpdir");
|
||||
|
||||
for (i, d) in tests.iter().enumerate() {
|
||||
let msg = format!("test[{}]: {:?}", i, d);
|
||||
|
||||
let mount_dir = tempdir.path().join(d.path);
|
||||
fs::create_dir(&mount_dir)
|
||||
.unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
|
||||
|
||||
let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
|
||||
let mut file_mode: u32 = 0;
|
||||
|
||||
// create testing directories and files
|
||||
for n in 1..COUNT {
|
||||
let nest_dir = mount_dir.join(format!("nested{}", n));
|
||||
fs::create_dir(&nest_dir)
|
||||
.unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg));
|
||||
|
||||
for f in 1..COUNT {
|
||||
let filename = nest_dir.join(format!("file{}", f));
|
||||
File::create(&filename)
|
||||
.unwrap_or_else(|_| panic!("{}: failed to create file", msg));
|
||||
file_mode = filename.as_path().metadata().unwrap().permissions().mode();
|
||||
}
|
||||
}
|
||||
|
||||
let uid = if d.uid > 0 {
|
||||
Some(Uid::from_raw(d.uid))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let gid = if d.gid > 0 {
|
||||
Some(Gid::from_raw(d.gid))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only);
|
||||
|
||||
assert!(result.is_ok());
|
||||
|
||||
assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid);
|
||||
assert_eq!(
|
||||
mount_dir.as_path().metadata().unwrap().permissions().mode(),
|
||||
(directory_mode | d.expected_permission_directory)
|
||||
);
|
||||
|
||||
for n in 1..COUNT {
|
||||
let nest_dir = mount_dir.join(format!("nested{}", n));
|
||||
for f in 1..COUNT {
|
||||
let filename = nest_dir.join(format!("file{}", f));
|
||||
let file = Path::new(&filename);
|
||||
|
||||
assert_eq!(file.metadata().unwrap().gid(), d.gid);
|
||||
assert_eq!(
|
||||
file.metadata().unwrap().permissions().mode(),
|
||||
(file_mode | d.expected_permission_file)
|
||||
);
|
||||
}
|
||||
|
||||
let dir = Path::new(&nest_dir);
|
||||
assert_eq!(dir.metadata().unwrap().gid(), d.gid);
|
||||
assert_eq!(
|
||||
dir.metadata().unwrap().permissions().mode(),
|
||||
(directory_mode | d.expected_permission_directory)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -58,7 +58,8 @@ use crate::fs::is_symlink;
|
||||
use crate::sl;
|
||||
|
||||
/// Default permission for directories created for mountpoint.
|
||||
const MOUNT_PERM: u32 = 0o755;
|
||||
const MOUNT_DIR_PERM: u32 = 0o755;
|
||||
const MOUNT_FILE_PERM: u32 = 0o644;
|
||||
|
||||
pub const PROC_MOUNTS_FILE: &str = "/proc/mounts";
|
||||
const PROC_FIELDS_PER_LINE: usize = 6;
|
||||
@ -187,13 +188,16 @@ pub fn create_mount_destination<S: AsRef<Path>, D: AsRef<Path>, R: AsRef<Path>>(
|
||||
.parent()
|
||||
.ok_or_else(|| Error::InvalidPath(dst.to_path_buf()))?;
|
||||
let mut builder = fs::DirBuilder::new();
|
||||
builder.mode(MOUNT_PERM).recursive(true).create(parent)?;
|
||||
builder
|
||||
.mode(MOUNT_DIR_PERM)
|
||||
.recursive(true)
|
||||
.create(parent)?;
|
||||
|
||||
if fs_type == "bind" {
|
||||
// The source and destination for bind mounting must be the same type: file or directory.
|
||||
if !src.as_ref().is_dir() {
|
||||
fs::OpenOptions::new()
|
||||
.mode(MOUNT_PERM)
|
||||
.mode(MOUNT_FILE_PERM)
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(dst)?;
|
||||
@ -390,19 +394,17 @@ fn do_rebind_mount<P: AsRef<Path>>(path: P, readonly: bool, flags: MsFlags) -> R
|
||||
}
|
||||
|
||||
/// Take fstab style mount options and parses them for use with a standard mount() syscall.
|
||||
fn parse_mount_options(options: &[String]) -> Result<(MsFlags, String)> {
|
||||
pub fn parse_mount_options<T: AsRef<str>>(options: &[T]) -> Result<(MsFlags, String)> {
|
||||
let mut flags: MsFlags = MsFlags::empty();
|
||||
let mut data: Vec<String> = Vec::new();
|
||||
|
||||
for opt in options.iter() {
|
||||
if opt == "defaults" {
|
||||
continue;
|
||||
} else if opt == "loop" {
|
||||
if opt.as_ref() == "loop" {
|
||||
return Err(Error::InvalidMountOption("loop".to_string()));
|
||||
} else if let Some(v) = parse_mount_flags(flags, opt) {
|
||||
} else if let Some(v) = parse_mount_flags(flags, opt.as_ref()) {
|
||||
flags = v;
|
||||
} else {
|
||||
data.push(opt.clone());
|
||||
data.push(opt.as_ref().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@ -441,6 +443,7 @@ fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option<MsFlags> {
|
||||
// overridden by subsequent options, as in the option line users,exec,dev,suid).
|
||||
match flag_str {
|
||||
// Clear flags
|
||||
"defaults" => {}
|
||||
"async" => flags &= !MsFlags::MS_SYNCHRONOUS,
|
||||
"atime" => flags &= !MsFlags::MS_NOATIME,
|
||||
"dev" => flags &= !MsFlags::MS_NODEV,
|
||||
@ -464,6 +467,14 @@ fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option<MsFlags> {
|
||||
"noexec" => flags |= MsFlags::MS_NOEXEC,
|
||||
"nosuid" => flags |= MsFlags::MS_NOSUID,
|
||||
"rbind" => flags |= MsFlags::MS_BIND | MsFlags::MS_REC,
|
||||
"unbindable" => flags |= MsFlags::MS_UNBINDABLE,
|
||||
"runbindable" => flags |= MsFlags::MS_UNBINDABLE | MsFlags::MS_REC,
|
||||
"private" => flags |= MsFlags::MS_PRIVATE,
|
||||
"rprivate" => flags |= MsFlags::MS_PRIVATE | MsFlags::MS_REC,
|
||||
"shared" => flags |= MsFlags::MS_SHARED,
|
||||
"rshared" => flags |= MsFlags::MS_SHARED | MsFlags::MS_REC,
|
||||
"slave" => flags |= MsFlags::MS_SLAVE,
|
||||
"rslave" => flags |= MsFlags::MS_SLAVE | MsFlags::MS_REC,
|
||||
"relatime" => flags |= MsFlags::MS_RELATIME,
|
||||
"remount" => flags |= MsFlags::MS_REMOUNT,
|
||||
"ro" => flags |= MsFlags::MS_RDONLY,
|
||||
@ -1030,7 +1041,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_mount_options() {
|
||||
let options = vec![];
|
||||
let options: Vec<&str> = vec![];
|
||||
let (flags, data) = parse_mount_options(&options).unwrap();
|
||||
assert!(flags.is_empty());
|
||||
assert!(data.is_empty());
|
||||
|
@ -5,7 +5,9 @@
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Context, Error, Result};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::Formatter;
|
||||
use std::{collections::HashMap, fs, path::PathBuf};
|
||||
|
||||
/// Prefix to mark a volume as Kata special.
|
||||
@ -14,6 +16,9 @@ pub const KATA_VOLUME_TYPE_PREFIX: &str = "kata:";
|
||||
/// The Mount should be ignored by the host and handled by the guest.
|
||||
pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:";
|
||||
|
||||
/// The sharedfs volume is mounted by guest OS before starting the kata-agent.
|
||||
pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared";
|
||||
|
||||
/// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
|
||||
pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral";
|
||||
|
||||
@ -23,6 +28,9 @@ pub const KATA_HOST_DIR_VOLUME_TYPE: &str = "kata:hostdir";
|
||||
/// KATA_MOUNT_INFO_FILE_NAME is used for the file that holds direct-volume mount info
|
||||
pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json";
|
||||
|
||||
/// Specify `fsgid` for a volume or mount, `fsgid=1`.
|
||||
pub const KATA_MOUNT_OPTION_FS_GID: &str = "fsgid";
|
||||
|
||||
/// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path
|
||||
pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes";
|
||||
|
||||
@ -421,6 +429,84 @@ impl TryFrom<&NydusExtraOptions> for KataVirtualVolume {
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of generic storage device.
|
||||
pub struct StorageDeviceGeneric {
|
||||
path: String,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for StorageDeviceGeneric {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("StorageState")
|
||||
.field("path", &self.path)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageDeviceGeneric {
|
||||
/// Create a new instance of `StorageStateCommon`.
|
||||
pub fn new(path: String) -> Self {
|
||||
StorageDeviceGeneric { path }
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageDevice for StorageDeviceGeneric {
|
||||
fn path(&self) -> &str {
|
||||
&self.path
|
||||
}
|
||||
|
||||
fn cleanup(&self) {}
|
||||
}
|
||||
|
||||
/// Trait object for storage device.
|
||||
pub trait StorageDevice: Send + Sync {
|
||||
/// Path
|
||||
fn path(&self) -> &str;
|
||||
|
||||
/// Clean up resources related to the storage device.
|
||||
fn cleanup(&self);
|
||||
}
|
||||
|
||||
/// Manager to manage registered storage device handlers.
|
||||
pub struct StorageHandlerManager<H> {
|
||||
handlers: HashMap<String, H>,
|
||||
}
|
||||
|
||||
impl<H> Default for StorageHandlerManager<H> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<H> StorageHandlerManager<H> {
|
||||
/// Create a new instance of `StorageHandlerManager`.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
handlers: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a storage device handler.
|
||||
pub fn add_handler(&mut self, id: &str, handler: H) -> Result<()> {
|
||||
match self.handlers.entry(id.to_string()) {
|
||||
Entry::Occupied(_) => Err(anyhow!("storage handler for {} already exists", id)),
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(handler);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get storage handler with specified `id`.
|
||||
pub fn handler(&self, id: &str) -> Option<&H> {
|
||||
self.handlers.get(id)
|
||||
}
|
||||
|
||||
/// Get names of registered handlers.
|
||||
pub fn get_handlers(&self) -> Vec<String> {
|
||||
self.handlers.keys().map(|v| v.to_string()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Join user provided volume path with kata direct-volume root path.
|
||||
///
|
||||
/// The `volume_path` is base64-encoded and then safely joined to the `prefix`
|
||||
|
Loading…
Reference in New Issue
Block a user