Merge pull request #7602 from jiangliu/agent-storage

Refine storage device management for kata-agent
This commit is contained in:
Jiang Liu 2023-08-25 22:20:18 +08:00 committed by GitHub
commit 91db888d83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 1557 additions and 1363 deletions

View File

@ -35,9 +35,9 @@ const VM_ROOTFS: &str = "/";
const BLOCK: &str = "block";
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_PCI_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_BLK_MMIO_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
@ -935,9 +935,9 @@ async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<Sp
}
match device.type_.as_str() {
DRIVER_BLK_TYPE => virtio_blk_device_handler(device, sandbox).await,
DRIVER_BLK_PCI_TYPE => virtio_blk_device_handler(device, sandbox).await,
DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, sandbox).await,
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
DRIVER_BLK_MMIO_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await,
DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => {

View File

@ -48,6 +48,7 @@ mod pci;
pub mod random;
mod sandbox;
mod signal;
mod storage;
mod uevent;
mod util;
mod version;

File diff suppressed because it is too large Load Diff

View File

@ -7,14 +7,14 @@ use anyhow::{anyhow, Result};
use nix::mount::MsFlags;
use nix::sched::{unshare, CloneFlags};
use nix::unistd::{getpid, gettid};
use slog::Logger;
use std::fmt;
use std::fs;
use std::fs::File;
use std::path::{Path, PathBuf};
use tracing::instrument;
use crate::mount::{baremount, FLAGS};
use slog::Logger;
use crate::mount::baremount;
const PERSISTENT_NS_DIR: &str = "/var/run/sandbox-ns";
pub const NSTYPEIPC: &str = "ipc";
@ -116,15 +116,7 @@ impl Namespace {
// Bind mount the new namespace from the current thread onto the mount point to persist it.
let mut flags = MsFlags::empty();
if let Some(x) = FLAGS.get("rbind") {
let (clear, f) = *x;
if clear {
flags &= !f;
} else {
flags |= f;
}
};
flags |= MsFlags::MS_BIND | MsFlags::MS_REC;
baremount(source, destination, "none", flags, "", &logger).map_err(|e| {
anyhow!(

View File

@ -57,12 +57,13 @@ use crate::device::{
};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::{add_storages, baremount, update_ephemeral_mounts, STORAGE_HANDLER_LIST};
use crate::mount::baremount;
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
use crate::network::setup_guest_dns;
use crate::pci;
use crate::random;
use crate::sandbox::Sandbox;
use crate::storage::{add_storages, update_ephemeral_mounts, STORAGE_HANDLERS};
use crate::version::{AGENT_VERSION, API_VERSION};
use crate::AGENT_CONFIG;
@ -243,13 +244,7 @@ impl AgentService {
// After all those storages have been processed, no matter the order
// here, the agent will rely on rustjail (using the oci.Mounts
// list) to bind mount all of them inside the container.
let m = add_storages(
sl(),
&req.storages,
&self.sandbox,
Some(req.container_id.clone()),
)
.await?;
let m = add_storages(sl(), req.storages, &self.sandbox, Some(req.container_id)).await?;
let mut s = self.sandbox.lock().await;
s.container_mounts.insert(cid.clone(), m);
@ -308,7 +303,7 @@ impl AgentService {
if let Err(e) = ctr.destroy().await {
error!(sl(), "failed to destroy container: {:?}", e);
}
if let Err(e) = remove_container_resources(&mut s, &cid) {
if let Err(e) = remove_container_resources(&mut s, &cid).await {
error!(sl(), "failed to remove container resources: {:?}", e);
}
return Err(err);
@ -360,7 +355,7 @@ impl AgentService {
.ok_or_else(|| anyhow!("Invalid container id"))?
.destroy()
.await?;
remove_container_resources(&mut sandbox, &cid)?;
remove_container_resources(&mut sandbox, &cid).await?;
return Ok(());
}
@ -382,7 +377,7 @@ impl AgentService {
.await
.map_err(|_| anyhow!(nix::Error::ETIME))???;
remove_container_resources(&mut *self.sandbox.lock().await, &cid)
remove_container_resources(&mut *self.sandbox.lock().await, &cid).await
}
#[instrument]
@ -1196,7 +1191,7 @@ impl agent_ttrpc::AgentService for AgentService {
s.setup_shared_namespaces().await.map_ttrpc_err(same)?;
}
let m = add_storages(sl(), &req.storages, &self.sandbox, None)
let m = add_storages(sl(), req.storages, &self.sandbox, None)
.await
.map_ttrpc_err(same)?;
self.sandbox.lock().await.mounts = m;
@ -1585,7 +1580,7 @@ fn get_agent_details() -> AgentDetails {
detail.init_daemon = unistd::getpid() == Pid::from_raw(1);
detail.device_handlers = Vec::new();
detail.storage_handlers = STORAGE_HANDLER_LIST.iter().map(|x| x.to_string()).collect();
detail.storage_handlers = STORAGE_HANDLERS.get_handlers();
detail
}
@ -1679,21 +1674,21 @@ fn update_container_namespaces(
Ok(())
}
fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> {
async fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> {
let mut cmounts: Vec<String> = vec![];
// Find the sandbox storage used by this container
let mounts = sandbox.container_mounts.get(cid);
if let Some(mounts) = mounts {
for m in mounts.iter() {
if sandbox.storages.get(m).is_some() {
if sandbox.storages.contains_key(m) {
cmounts.push(m.to_string());
}
}
}
for m in cmounts.iter() {
if let Err(err) = sandbox.unset_and_remove_sandbox_storage(m) {
if let Err(err) = sandbox.remove_sandbox_storage(m).await {
error!(
sl(),
"failed to unset_and_remove_sandbox_storage for container {}, error: {:?}",

View File

@ -3,16 +3,20 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::str::FromStr;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use std::{thread, time};
use anyhow::{anyhow, Context, Result};
use kata_types::cpu::CpuSet;
use kata_types::mount::{StorageDevice, StorageDeviceGeneric};
use libc::pid_t;
use oci::{Hook, Hooks};
use protocols::agent::OnlineCPUMemRequest;
@ -28,7 +32,7 @@ use tokio::sync::Mutex;
use tracing::instrument;
use crate::linux_abi::*;
use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
use crate::mount::{get_mount_fs_type, is_mounted, remove_mounts, TYPE_ROOTFS};
use crate::namespace::Namespace;
use crate::netlink::Handle;
use crate::network::Network;
@ -40,6 +44,46 @@ pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id";
type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);
#[derive(Clone)]
pub struct StorageState {
count: Arc<AtomicU32>,
device: Arc<dyn StorageDevice>,
}
impl Debug for StorageState {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StorageState").finish()
}
}
impl StorageState {
fn new() -> Self {
StorageState {
count: Arc::new(AtomicU32::new(1)),
device: Arc::new(StorageDeviceGeneric::new("".to_string())),
}
}
pub fn from_device(device: Arc<dyn StorageDevice>) -> Self {
Self {
count: Arc::new(AtomicU32::new(1)),
device,
}
}
pub async fn ref_count(&self) -> u32 {
self.count.load(Ordering::Relaxed)
}
async fn inc_ref_count(&self) {
self.count.fetch_add(1, Ordering::Acquire);
}
async fn dec_and_test_ref_count(&self) -> bool {
self.count.fetch_sub(1, Ordering::AcqRel) == 1
}
}
#[derive(Debug)]
pub struct Sandbox {
pub logger: Logger,
@ -54,7 +98,7 @@ pub struct Sandbox {
pub shared_utsns: Namespace,
pub shared_ipcns: Namespace,
pub sandbox_pidns: Option<Namespace>,
pub storages: HashMap<String, u32>,
pub storages: HashMap<String, StorageState>,
pub running: bool,
pub no_pivot_root: bool,
pub sender: Option<tokio::sync::oneshot::Sender<i32>>,
@ -100,52 +144,55 @@ impl Sandbox {
})
}
// set_sandbox_storage sets the sandbox level reference
// counter for the sandbox storage.
// This method also returns a boolean to let
// callers know if the storage already existed or not.
// It will return true if storage is new.
/// Add a new storage object or increase reference count of existing one.
/// The caller may detect new storage object by checking `StorageState.refcount == 1`.
#[instrument]
pub fn set_sandbox_storage(&mut self, path: &str) -> bool {
match self.storages.get_mut(path) {
None => {
self.storages.insert(path.to_string(), 1);
true
pub async fn add_sandbox_storage(&mut self, path: &str) -> StorageState {
match self.storages.entry(path.to_string()) {
Entry::Occupied(e) => {
let state = e.get().clone();
state.inc_ref_count().await;
state
}
Some(count) => {
*count += 1;
false
Entry::Vacant(e) => {
let state = StorageState::new();
e.insert(state.clone());
state
}
}
}
// unset_sandbox_storage will decrement the sandbox storage
// reference counter. If there aren't any containers using
// that sandbox storage, this method will remove the
// storage reference from the sandbox and return 'true' to
// let the caller know that they can clean up the storage
// related directories by calling remove_sandbox_storage
#[instrument]
pub fn unset_sandbox_storage(&mut self, path: &str) -> Result<bool> {
match self.storages.get_mut(path) {
None => Err(anyhow!("Sandbox storage with path {} not found", path)),
Some(count) => {
*count -= 1;
if *count == 0 {
self.storages.remove(path);
return Ok(true);
}
Ok(false)
}
/// Update the storage device associated with a path.
pub fn update_sandbox_storage(
&mut self,
path: &str,
device: Arc<dyn StorageDevice>,
) -> std::result::Result<Arc<dyn StorageDevice>, Arc<dyn StorageDevice>> {
if !self.storages.contains_key(path) {
return Err(device);
}
let state = StorageState::from_device(device);
// Safe to unwrap() because we have just ensured existence of entry.
let state = self.storages.insert(path.to_string(), state).unwrap();
Ok(state.device)
}
// remove_sandbox_storage removes the sandbox storage if no
// containers are using that storage.
// Clean mount and directory of a mountpoint.
// This is actually StorageDeviceGeneric::cleanup(), kept here due to dependency chain.
#[instrument]
pub fn remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
let mounts = vec![path.to_string()];
remove_mounts(&mounts)?;
fn cleanup_sandbox_storage(&mut self, path: &str) -> Result<()> {
if path.is_empty() {
return Err(anyhow!("mountpoint path is empty"));
} else if !Path::new(path).exists() {
return Ok(());
}
if matches!(is_mounted(path), Ok(true)) {
let mounts = vec![path.to_string()];
remove_mounts(&mounts)?;
}
// "remove_dir" will fail if the mount point is backed by a read-only filesystem.
// This is the case with the device mapper snapshotter, where we mount the block device directly
// at the underlying sandbox path which was provided from the base RO kataShared path from the host.
@ -155,16 +202,23 @@ impl Sandbox {
Ok(())
}
// unset_and_remove_sandbox_storage unsets the storage from sandbox
// and if there are no containers using this storage it will
// remove it from the sandbox.
/// Decrease reference count and destroy the storage object if reference count reaches zero.
/// Returns `Ok(true)` if the reference count has reached zero and the storage object has been
/// removed.
#[instrument]
pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
if self.unset_sandbox_storage(path)? {
return self.remove_sandbox_storage(path);
pub async fn remove_sandbox_storage(&mut self, path: &str) -> Result<bool> {
match self.storages.get(path) {
None => Err(anyhow!("Sandbox storage with path {} not found", path)),
Some(state) => {
if state.dec_and_test_ref_count().await {
self.storages.remove(path);
self.cleanup_sandbox_storage(path)?;
Ok(true)
} else {
Ok(false)
}
}
}
Ok(())
}
#[instrument]
@ -493,24 +547,22 @@ mod tests {
let tmpdir_path = tmpdir.path().to_str().unwrap();
// Add a new sandbox storage
let new_storage = s.set_sandbox_storage(tmpdir_path);
let new_storage = s.add_sandbox_storage(tmpdir_path).await;
// Check the reference counter
let ref_count = s.storages[tmpdir_path];
let ref_count = new_storage.ref_count().await;
assert_eq!(
ref_count, 1,
"Invalid refcount, got {} expected 1.",
ref_count
);
assert!(new_storage);
// Use the existing sandbox storage
let new_storage = s.set_sandbox_storage(tmpdir_path);
assert!(!new_storage, "Should be false as already exists.");
let new_storage = s.add_sandbox_storage(tmpdir_path).await;
// Since we are using existing storage, the reference counter
// should be 2 by now.
let ref_count = s.storages[tmpdir_path];
let ref_count = new_storage.ref_count().await;
assert_eq!(
ref_count, 2,
"Invalid refcount, got {} expected 2.",
@ -546,22 +598,20 @@ mod tests {
.tempdir_in(tmpdir_path)
.unwrap();
assert!(
s.remove_sandbox_storage(srcdir_path).is_err(),
"Expect Err as the directory is not a mountpoint"
);
assert!(s.remove_sandbox_storage("").is_err());
assert!(s.cleanup_sandbox_storage("").is_err());
let invalid_dir = emptydir.path().join("invalid");
assert!(s
.remove_sandbox_storage(invalid_dir.to_str().unwrap())
.is_err());
.cleanup_sandbox_storage(invalid_dir.to_str().unwrap())
.is_ok());
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
assert!(s.remove_sandbox_storage(destdir_path).is_ok());
assert!(s.cleanup_sandbox_storage(destdir_path).is_ok());
// remove a directory without umount
s.cleanup_sandbox_storage(srcdir_path).unwrap();
}
#[tokio::test]
@ -573,8 +623,7 @@ mod tests {
let mut s = Sandbox::new(&logger).unwrap();
assert!(
s.unset_and_remove_sandbox_storage("/tmp/testEphePath")
.is_err(),
s.remove_sandbox_storage("/tmp/testEphePath").await.is_err(),
"Should fail because sandbox storage doesn't exist"
);
@ -595,8 +644,8 @@ mod tests {
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
assert!(s.set_sandbox_storage(destdir_path));
assert!(s.unset_and_remove_sandbox_storage(destdir_path).is_ok());
s.add_sandbox_storage(destdir_path).await;
assert!(s.remove_sandbox_storage(destdir_path).await.is_ok());
let other_dir_str;
{
@ -609,10 +658,10 @@ mod tests {
let other_dir_path = other_dir.path().to_str().unwrap();
other_dir_str = other_dir_path.to_string();
assert!(s.set_sandbox_storage(other_dir_path));
s.add_sandbox_storage(other_dir_path).await;
}
assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err());
assert!(s.remove_sandbox_storage(&other_dir_str).await.is_ok());
}
#[tokio::test]
@ -624,28 +673,30 @@ mod tests {
let storage_path = "/tmp/testEphe";
// Add a new sandbox storage
assert!(s.set_sandbox_storage(storage_path));
s.add_sandbox_storage(storage_path).await;
// Use the existing sandbox storage
let state = s.add_sandbox_storage(storage_path).await;
assert!(
!s.set_sandbox_storage(storage_path),
state.ref_count().await > 1,
"Expects false as the storage is not new."
);
assert!(
!s.unset_sandbox_storage(storage_path).unwrap(),
!s.remove_sandbox_storage(storage_path).await.unwrap(),
"Expects false as there is still a storage."
);
// Reference counter should decrement to 1.
let ref_count = s.storages[storage_path];
let storage = &s.storages[storage_path];
let refcount = storage.ref_count().await;
assert_eq!(
ref_count, 1,
refcount, 1,
"Invalid refcount, got {} expected 1.",
ref_count
refcount
);
assert!(
s.unset_sandbox_storage(storage_path).unwrap(),
s.remove_sandbox_storage(storage_path).await.unwrap(),
"Expects true as there is still a storage."
);
@ -661,7 +712,7 @@ mod tests {
// If no container is using the sandbox storage, the reference
// counter for it should not exist.
assert!(
s.unset_sandbox_storage(storage_path).is_err(),
s.remove_sandbox_storage(storage_path).await.is_err(),
"Expects false as the reference counter should no exist."
);
}

View File

@ -0,0 +1,37 @@
// Copyright (c) 2019 Ant Financial
// Copyright (c) 2023 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
use kata_types::mount::StorageDevice;
use protocols::agent::Storage;
use std::iter;
use std::sync::Arc;
use tracing::instrument;
use crate::storage::{new_device, StorageContext, StorageHandler};
#[derive(Debug)]
pub struct BindWatcherHandler {}
#[async_trait::async_trait]
impl StorageHandler for BindWatcherHandler {
#[instrument]
async fn create_device(
&self,
storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
if let Some(cid) = ctx.cid {
ctx.sandbox
.lock()
.await
.bind_watcher
.add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger)
.await?;
}
new_device("".to_string())
}
}

View File

@ -0,0 +1,146 @@
// Copyright (c) 2019 Ant Financial
// Copyright (c) 2023 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
use kata_types::mount::StorageDevice;
use protocols::agent::Storage;
use tracing::instrument;
use crate::device::{
get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name,
wait_for_pmem_device,
};
use crate::pci;
use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler};
#[cfg(target_arch = "s390x")]
use crate::{ccw, device::get_virtio_blk_ccw_device_name};
#[derive(Debug)]
pub struct VirtioBlkMmioHandler {}
#[async_trait::async_trait]
impl StorageHandler for VirtioBlkMmioHandler {
#[instrument]
async fn create_device(
&self,
storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
if !Path::new(&storage.source).exists() {
get_virtio_mmio_device_name(ctx.sandbox, &storage.source)
.await
.context("failed to get mmio device name")?;
}
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}
#[derive(Debug)]
pub struct VirtioBlkPciHandler {}
#[async_trait::async_trait]
impl StorageHandler for VirtioBlkPciHandler {
#[instrument]
async fn create_device(
&self,
mut storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
// If hot-plugged, get the device node path based on the PCI path
// otherwise use the virt path provided in Storage Source
if storage.source.starts_with("/dev") {
let metadata = fs::metadata(&storage.source)
.context(format!("get metadata on file {:?}", &storage.source))?;
let mode = metadata.permissions().mode();
if mode & libc::S_IFBLK == 0 {
return Err(anyhow!("Invalid device {}", &storage.source));
}
} else {
let pcipath = pci::Path::from_str(&storage.source)?;
let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?;
storage.source = dev_path;
}
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}
#[derive(Debug)]
pub struct VirtioBlkCcwHandler {}
#[async_trait::async_trait]
impl StorageHandler for VirtioBlkCcwHandler {
#[cfg(target_arch = "s390x")]
#[instrument]
async fn create_device(
&self,
mut storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
let ccw_device = ccw::Device::from_str(&storage.source)?;
let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?;
storage.source = dev_path;
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
#[cfg(not(target_arch = "s390x"))]
#[instrument]
async fn create_device(
&self,
_storage: Storage,
_ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
Err(anyhow!("CCW is only supported on s390x"))
}
}
#[derive(Debug)]
pub struct ScsiHandler {}
#[async_trait::async_trait]
impl StorageHandler for ScsiHandler {
#[instrument]
async fn create_device(
&self,
mut storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
// Retrieve the device path from SCSI address.
let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?;
storage.source = dev_path;
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}
#[derive(Debug)]
pub struct PmemHandler {}
#[async_trait::async_trait]
impl StorageHandler for PmemHandler {
#[instrument]
async fn create_device(
&self,
storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
// Retrieve the device for pmem storage
wait_for_pmem_device(ctx.sandbox, &storage.source).await?;
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}

View File

@ -0,0 +1,293 @@
// Copyright (c) 2019 Ant Financial
// Copyright (c) 2023 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs;
use std::fs::OpenOptions;
use std::io::Write;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::Path;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
use kata_sys_util::mount::parse_mount_options;
use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID};
use nix::unistd::Gid;
use protocols::agent::Storage;
use slog::Logger;
use tokio::sync::Mutex;
use tracing::instrument;
use crate::device::{DRIVER_EPHEMERAL_TYPE, FS_TYPE_HUGETLB};
use crate::mount::baremount;
use crate::sandbox::Sandbox;
use crate::storage::{
common_storage_handler, new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID,
};
const FS_GID_EQ: &str = "fsgid=";
const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages";
#[derive(Debug)]
pub struct EphemeralHandler {}
#[async_trait::async_trait]
impl StorageHandler for EphemeralHandler {
#[instrument]
async fn create_device(
&self,
mut storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
// hugetlbfs
if storage.fstype == FS_TYPE_HUGETLB {
info!(ctx.logger, "handle hugetlbfs storage");
// Allocate hugepages before mount
// /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
// /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
// options eg "pagesize=2097152,size=524288000"(2M, 500M)
Self::allocate_hugepages(ctx.logger, &storage.options.to_vec())
.context("allocate hugepages")?;
common_storage_handler(ctx.logger, &storage)?;
} else if !storage.options.is_empty() {
// By now we only support one option field: "fsGroup" which
// isn't an valid mount option, thus we should remove it when
// do mount.
let opts = parse_options(&storage.options);
storage.options = Default::default();
common_storage_handler(ctx.logger, &storage)?;
// ephemeral_storage didn't support mount options except fsGroup.
if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) {
let gid = fsgid.parse::<u32>()?;
nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
let meta = fs::metadata(&storage.mount_point)?;
let mut permission = meta.permissions();
let o_mode = meta.mode() | MODE_SETGID;
permission.set_mode(o_mode);
fs::set_permissions(&storage.mount_point, permission)?;
}
} else {
common_storage_handler(ctx.logger, &storage)?;
}
new_device("".to_string())
}
}
impl EphemeralHandler {
// Allocate hugepages by writing to sysfs
fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> {
info!(logger, "mounting hugePages storage options: {:?}", options);
let (pagesize, size) = Self::get_pagesize_and_size_from_option(options)
.context(format!("parse mount options: {:?}", &options))?;
info!(
logger,
"allocate hugepages. pageSize: {}, size: {}", pagesize, size
);
// sysfs entry is always of the form hugepages-${pagesize}kB
// Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
let path = Path::new(SYS_FS_HUGEPAGES_PREFIX)
.join(format!("hugepages-{}kB", pagesize / 1024))
.join("nr_hugepages");
// write numpages to nr_hugepages file.
let numpages = format!("{}", size / pagesize);
info!(logger, "write {} pages to {:?}", &numpages, &path);
let mut file = OpenOptions::new()
.write(true)
.open(&path)
.context(format!("open nr_hugepages directory {:?}", &path))?;
file.write_all(numpages.as_bytes())
.context(format!("write nr_hugepages failed: {:?}", &path))?;
// Even if the write succeeds, the kernel isn't guaranteed to be
// able to allocate all the pages we requested. Verify that it
// did.
let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?;
let allocated = verify
.trim_end()
.parse::<u64>()
.map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?;
if allocated != size / pagesize {
return Err(anyhow!(
"Only allocated {} of {} hugepages of size {}",
allocated,
numpages,
pagesize
));
}
Ok(())
}
// Parse filesystem options string to retrieve hugepage details
// options eg "pagesize=2048,size=107374182"
fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> {
let mut pagesize_str: Option<&str> = None;
let mut size_str: Option<&str> = None;
for option in options {
let vars: Vec<&str> = option.trim().split(',').collect();
for var in vars {
if let Some(stripped) = var.strip_prefix("pagesize=") {
pagesize_str = Some(stripped);
} else if let Some(stripped) = var.strip_prefix("size=") {
size_str = Some(stripped);
}
if pagesize_str.is_some() && size_str.is_some() {
break;
}
}
}
if pagesize_str.is_none() || size_str.is_none() {
return Err(anyhow!("no pagesize/size options found"));
}
let pagesize = pagesize_str
.unwrap()
.parse::<u64>()
.context(format!("parse pagesize: {:?}", &pagesize_str))?;
let size = size_str
.unwrap()
.parse::<u64>()
.context(format!("parse size: {:?}", &pagesize_str))?;
Ok((pagesize, size))
}
}
// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them
// with mount options passed by the caller
#[instrument]
pub async fn update_ephemeral_mounts(
logger: Logger,
storages: &[Storage],
_sandbox: &Arc<Mutex<Sandbox>>,
) -> Result<()> {
for storage in storages {
let handler_name = &storage.driver;
let logger = logger.new(o!(
"msg" => "updating tmpfs storage",
"subsystem" => "storage",
"storage-type" => handler_name.to_owned()));
match handler_name.as_str() {
DRIVER_EPHEMERAL_TYPE => {
fs::create_dir_all(&storage.mount_point)?;
if storage.options.is_empty() {
continue;
} else {
// assume that fsGid has already been set
let mount_path = Path::new(&storage.mount_point);
let src_path = Path::new(&storage.source);
let opts: Vec<&String> = storage
.options
.iter()
.filter(|&opt| !opt.starts_with(FS_GID_EQ))
.collect();
let (flags, options) = parse_mount_options(&opts)?;
info!(logger, "mounting storage";
"mount-source" => src_path.display(),
"mount-destination" => mount_path.display(),
"mount-fstype" => storage.fstype.as_str(),
"mount-options" => options.as_str(),
);
baremount(
src_path,
mount_path,
storage.fstype.as_str(),
flags,
options.as_str(),
&logger,
)?;
}
}
_ => {
return Err(anyhow!(
"Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported",
storage.driver
));
}
};
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_pagesize_and_size_from_option() {
let expected_pagesize = 2048;
let expected_size = 107374182;
let expected = (expected_pagesize, expected_size);
let data = vec![
// (input, expected, is_ok)
("size-1=107374182,pagesize-1=2048", expected, false),
("size-1=107374182,pagesize=2048", expected, false),
("size=107374182,pagesize-1=2048", expected, false),
("size=107374182,pagesize=abc", expected, false),
("size=abc,pagesize=2048", expected, false),
("size=,pagesize=2048", expected, false),
("size=107374182,pagesize=", expected, false),
("size=107374182,pagesize=2048", expected, true),
("pagesize=2048,size=107374182", expected, true),
("foo=bar,pagesize=2048,size=107374182", expected, true),
(
"foo=bar,pagesize=2048,foo1=bar1,size=107374182",
expected,
true,
),
(
"pagesize=2048,foo1=bar1,foo=bar,size=107374182",
expected,
true,
),
(
"foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2",
expected,
true,
),
(
"foo=bar,size=107374182,foo1=bar1,pagesize=2048",
expected,
true,
),
];
for case in data {
let input = case.0;
let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]);
let is_ok = case.2;
if is_ok {
let expected = case.1;
let (pagesize, size) = r.unwrap();
assert_eq!(expected.0, pagesize);
assert_eq!(expected.1, size);
} else {
assert!(r.is_err());
}
}
}
}

View File

@ -0,0 +1,89 @@
// Copyright (c) 2019 Ant Financial
// Copyright (c) 2023 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs;
use std::path::Path;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
use kata_types::mount::StorageDevice;
use protocols::agent::Storage;
use tracing::instrument;
use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler};
#[derive(Debug)]
pub struct OverlayfsHandler {}
#[async_trait::async_trait]
impl StorageHandler for OverlayfsHandler {
#[instrument]
async fn create_device(
&self,
mut storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
if storage
.options
.iter()
.any(|e| e == "io.katacontainers.fs-opt.overlay-rw")
{
let cid = ctx
.cid
.clone()
.ok_or_else(|| anyhow!("No container id in rw overlay"))?;
let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid);
let work = cpath.join("work");
let upper = cpath.join("upper");
fs::create_dir_all(&work).context("Creating overlay work directory")?;
fs::create_dir_all(&upper).context("Creating overlay upper directory")?;
storage.fstype = "overlay".into();
storage
.options
.push(format!("upperdir={}", upper.to_string_lossy()));
storage
.options
.push(format!("workdir={}", work.to_string_lossy()));
}
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}
#[derive(Debug)]
pub struct Virtio9pHandler {}
#[async_trait::async_trait]
impl StorageHandler for Virtio9pHandler {
#[instrument]
async fn create_device(
&self,
storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}
#[derive(Debug)]
pub struct VirtioFsHandler {}
#[async_trait::async_trait]
impl StorageHandler for VirtioFsHandler {
#[instrument]
async fn create_device(
&self,
storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}

View File

@ -0,0 +1,61 @@
// Copyright (c) 2019 Ant Financial
// Copyright (c) 2023 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::sync::Arc;
use anyhow::{Context, Result};
use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID};
use nix::unistd::Gid;
use protocols::agent::Storage;
use tracing::instrument;
use crate::storage::{new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID};
#[derive(Debug)]
pub struct LocalHandler {}
#[async_trait::async_trait]
impl StorageHandler for LocalHandler {
#[instrument]
async fn create_device(
&self,
storage: Storage,
_ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
fs::create_dir_all(&storage.mount_point).context(format!(
"failed to create dir all {:?}",
&storage.mount_point
))?;
let opts = parse_options(&storage.options);
let mut need_set_fsgid = false;
if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) {
let gid = fsgid.parse::<u32>()?;
nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
need_set_fsgid = true;
}
if let Some(mode) = opts.get("mode") {
let mut permission = fs::metadata(&storage.mount_point)?.permissions();
let mut o_mode = u32::from_str_radix(mode, 8)?;
if need_set_fsgid {
// set SetGid mode mask.
o_mode |= MODE_SETGID;
}
permission.set_mode(o_mode);
fs::set_permissions(&storage.mount_point, permission)?;
}
new_device("".to_string())
}
}

View File

@ -0,0 +1,648 @@
// Copyright (c) 2019 Ant Financial
// Copyright (c) 2023 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::collections::HashMap;
use std::fs;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::Path;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
use kata_sys_util::mount::{create_mount_destination, parse_mount_options};
use kata_types::mount::{
StorageDevice, StorageDeviceGeneric, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG,
};
use nix::unistd::{Gid, Uid};
use protocols::agent::Storage;
use protocols::types::FSGroupChangePolicy;
use slog::Logger;
use tokio::sync::Mutex;
use tracing::instrument;
use self::bind_watcher_handler::BindWatcherHandler;
use self::block_handler::{PmemHandler, ScsiHandler, VirtioBlkMmioHandler, VirtioBlkPciHandler};
use self::ephemeral_handler::EphemeralHandler;
use self::fs_handler::{OverlayfsHandler, Virtio9pHandler, VirtioFsHandler};
use self::local_handler::LocalHandler;
use crate::device::{
DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_EPHEMERAL_TYPE,
DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE,
DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE,
};
use crate::mount::{baremount, is_mounted};
use crate::sandbox::Sandbox;
pub use self::ephemeral_handler::update_ephemeral_mounts;
mod bind_watcher_handler;
mod block_handler;
mod ephemeral_handler;
mod fs_handler;
mod local_handler;
const RW_MASK: u32 = 0o660;
const RO_MASK: u32 = 0o440;
const EXEC_MASK: u32 = 0o110;
const MODE_SETGID: u32 = 0o2000;
#[derive(Debug)]
pub struct StorageContext<'a> {
cid: &'a Option<String>,
logger: &'a Logger,
sandbox: &'a Arc<Mutex<Sandbox>>,
}
/// Trait object to handle storage device.
#[async_trait::async_trait]
pub trait StorageHandler: Send + Sync {
/// Create a new storage device.
async fn create_device(
&self,
storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>>;
}
#[rustfmt::skip]
lazy_static! {
pub static ref STORAGE_HANDLERS: StorageHandlerManager<Arc<dyn StorageHandler>> = {
let mut manager: StorageHandlerManager<Arc<dyn StorageHandler>> = StorageHandlerManager::new();
manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap();
#[cfg(target_arch = "s390x")]
manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(self::block_handler::VirtioBlkCcwHandler{})).unwrap();
manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap();
manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap();
manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap();
manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap();
manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap();
manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap();
manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap();
manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap();
manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap();
manager
};
}
// add_storages takes a list of storages passed by the caller, and perform the
// associated operations such as waiting for the device to show up, and mount
// it to a specific location, according to the type of handler chosen, and for
// each storage.
#[instrument]
pub async fn add_storages(
logger: Logger,
storages: Vec<Storage>,
sandbox: &Arc<Mutex<Sandbox>>,
cid: Option<String>,
) -> Result<Vec<String>> {
let mut mount_list = Vec::new();
for storage in storages {
let path = storage.mount_point.clone();
let state = sandbox.lock().await.add_sandbox_storage(&path).await;
if state.ref_count().await > 1 {
// The device already exists.
continue;
}
if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) {
let logger =
logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone()));
let mut ctx = StorageContext {
cid: &cid,
logger: &logger,
sandbox,
};
match handler.create_device(storage, &mut ctx).await {
Ok(device) => {
match sandbox
.lock()
.await
.update_sandbox_storage(&path, device.clone())
{
Ok(d) => {
let path = device.path().to_string();
if !path.is_empty() {
mount_list.push(path.clone());
}
drop(d);
}
Err(device) => {
error!(logger, "failed to update device for storage");
if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await
{
warn!(logger, "failed to remove dummy sandbox storage {:?}", e);
}
device.cleanup();
return Err(anyhow!("failed to update device for storage"));
}
}
}
Err(e) => {
error!(logger, "failed to create device for storage, error: {e:?}");
if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await {
warn!(logger, "failed to remove dummy sandbox storage {e:?}");
}
return Err(e);
}
}
} else {
return Err(anyhow!(
"Failed to find the storage handler {}",
storage.driver
));
}
}
Ok(mount_list)
}
pub(crate) fn new_device(path: String) -> Result<Arc<dyn StorageDevice>> {
let device = StorageDeviceGeneric::new(path);
Ok(Arc::new(device))
}
#[instrument]
pub(crate) fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String> {
mount_storage(logger, storage)?;
set_ownership(logger, storage)?;
Ok(storage.mount_point.clone())
}
// mount_storage performs the mount described by the storage structure.
#[instrument]
fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount"));
// There's a special mechanism to create mountpoint from a `sharedfs` instance before
// starting the kata-agent. Check for such cases.
if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? {
warn!(
logger,
"{} already mounted on {}, ignoring...",
KATA_SHAREDFS_GUEST_PREMOUNT_TAG,
&storage.mount_point
);
return Ok(());
}
let (flags, options) = parse_mount_options(&storage.options)?;
let mount_path = Path::new(&storage.mount_point);
let src_path = Path::new(&storage.source);
create_mount_destination(src_path, mount_path, "", &storage.fstype)
.context("Could not create mountpoint")?;
info!(logger, "mounting storage";
"mount-source" => src_path.display(),
"mount-destination" => mount_path.display(),
"mount-fstype" => storage.fstype.as_str(),
"mount-options" => options.as_str(),
);
baremount(
src_path,
mount_path,
storage.fstype.as_str(),
flags,
options.as_str(),
&logger,
)
}
#[instrument]
pub(crate) fn parse_options(option_list: &[String]) -> HashMap<String, String> {
let mut options = HashMap::new();
for opt in option_list {
let fields: Vec<&str> = opt.split('=').collect();
if fields.len() == 2 {
options.insert(fields[0].to_string(), fields[1].to_string());
}
}
options
}
#[instrument]
pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership"));
// If fsGroup is not set, skip performing ownership change
if storage.fs_group.is_none() {
return Ok(());
}
let fs_group = storage.fs_group();
let read_only = storage.options.contains(&String::from("ro"));
let mount_path = Path::new(&storage.mount_point);
let metadata = mount_path.metadata().map_err(|err| {
error!(logger, "failed to obtain metadata for mount path";
"mount-path" => mount_path.to_str(),
"error" => err.to_string(),
);
err
})?;
if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into()
&& metadata.gid() == fs_group.group_id
{
let mut mask = if read_only { RO_MASK } else { RW_MASK };
mask |= EXEC_MASK;
// With fsGroup change policy to OnRootMismatch, if the current
// gid of the mount path root directory matches the desired gid
// and the current permission of mount path root directory is correct,
// then ownership change will be skipped.
let current_mode = metadata.permissions().mode();
if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) {
info!(logger, "skipping ownership change for volume";
"mount-path" => mount_path.to_str(),
"fs-group" => fs_group.group_id.to_string(),
);
return Ok(());
}
}
info!(logger, "performing recursive ownership change";
"mount-path" => mount_path.to_str(),
"fs-group" => fs_group.group_id.to_string(),
);
recursive_ownership_change(
mount_path,
None,
Some(Gid::from_raw(fs_group.group_id)),
read_only,
)
}
#[instrument]
pub fn recursive_ownership_change(
path: &Path,
uid: Option<Uid>,
gid: Option<Gid>,
read_only: bool,
) -> Result<()> {
let mut mask = if read_only { RO_MASK } else { RW_MASK };
if path.is_dir() {
for entry in fs::read_dir(path)? {
recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?;
}
mask |= EXEC_MASK;
mask |= MODE_SETGID;
}
// We do not want to change the permission of the underlying file
// using symlink. Hence we skip symlinks from recursive ownership
// and permission changes.
if path.is_symlink() {
return Ok(());
}
nix::unistd::chown(path, uid, gid)?;
if gid.is_some() {
let metadata = path.metadata()?;
let mut permission = metadata.permissions();
let target_mode = metadata.mode() | mask;
permission.set_mode(target_mode);
fs::set_permissions(path, permission)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use protocols::agent::FSGroup;
use std::fs::File;
use tempfile::tempdir;
use test_utils::{
skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root, TestUserType,
};
#[test]
fn test_mount_storage() {
#[derive(Debug)]
struct TestData<'a> {
test_user: TestUserType,
storage: Storage,
error_contains: &'a str,
make_source_dir: bool,
make_mount_dir: bool,
deny_mount_permission: bool,
}
impl Default for TestData<'_> {
fn default() -> Self {
TestData {
test_user: TestUserType::Any,
storage: Storage {
mount_point: "mnt".to_string(),
source: "src".to_string(),
fstype: "tmpfs".to_string(),
..Default::default()
},
make_source_dir: true,
make_mount_dir: false,
deny_mount_permission: false,
error_contains: "",
}
}
}
let tests = &[
TestData {
test_user: TestUserType::NonRootOnly,
error_contains: "EPERM: Operation not permitted",
..Default::default()
},
TestData {
test_user: TestUserType::RootOnly,
..Default::default()
},
TestData {
storage: Storage {
mount_point: "mnt".to_string(),
source: "src".to_string(),
fstype: "bind".to_string(),
..Default::default()
},
make_source_dir: false,
make_mount_dir: true,
error_contains: "Could not create mountpoint",
..Default::default()
},
TestData {
test_user: TestUserType::NonRootOnly,
deny_mount_permission: true,
error_contains: "Could not create mountpoint",
..Default::default()
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
skip_loop_by_user!(msg, d.test_user);
let drain = slog::Discard;
let logger = slog::Logger::root(drain, o!());
let tempdir = tempdir().unwrap();
let source = tempdir.path().join(&d.storage.source);
let mount_point = tempdir.path().join(&d.storage.mount_point);
let storage = Storage {
source: source.to_str().unwrap().to_string(),
mount_point: mount_point.to_str().unwrap().to_string(),
..d.storage.clone()
};
if d.make_source_dir {
fs::create_dir_all(&storage.source).unwrap();
}
if d.make_mount_dir {
fs::create_dir_all(&storage.mount_point).unwrap();
}
if d.deny_mount_permission {
fs::set_permissions(
mount_point.parent().unwrap(),
fs::Permissions::from_mode(0o000),
)
.unwrap();
}
let result = mount_storage(&logger, &storage);
// restore permissions so tempdir can be cleaned up
if d.deny_mount_permission {
fs::set_permissions(
mount_point.parent().unwrap(),
fs::Permissions::from_mode(0o755),
)
.unwrap();
}
if result.is_ok() {
nix::mount::umount(&mount_point).unwrap();
}
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
} else {
assert!(result.is_err(), "{}", msg);
let error_msg = format!("{}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
}
#[test]
fn test_set_ownership() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
#[derive(Debug)]
struct TestData<'a> {
mount_path: &'a str,
fs_group: Option<FSGroup>,
read_only: bool,
expected_group_id: u32,
expected_permission: u32,
}
let tests = &[
TestData {
mount_path: "foo",
fs_group: None,
read_only: false,
expected_group_id: 0,
expected_permission: 0,
},
TestData {
mount_path: "rw_mount",
fs_group: Some(FSGroup {
group_id: 3000,
group_change_policy: FSGroupChangePolicy::Always.into(),
..Default::default()
}),
read_only: false,
expected_group_id: 3000,
expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID,
},
TestData {
mount_path: "ro_mount",
fs_group: Some(FSGroup {
group_id: 3000,
group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(),
..Default::default()
}),
read_only: true,
expected_group_id: 3000,
expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID,
},
];
let tempdir = tempdir().expect("failed to create tmpdir");
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let mount_dir = tempdir.path().join(d.mount_path);
fs::create_dir(&mount_dir)
.unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
let mut storage_data = Storage::new();
if d.read_only {
storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]);
}
if let Some(fs_group) = d.fs_group.clone() {
storage_data.set_fs_group(fs_group);
}
storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap();
let result = set_ownership(&logger, &storage_data);
assert!(result.is_ok());
assert_eq!(
mount_dir.as_path().metadata().unwrap().gid(),
d.expected_group_id
);
assert_eq!(
mount_dir.as_path().metadata().unwrap().permissions().mode(),
(directory_mode | d.expected_permission)
);
}
}
#[test]
fn test_recursive_ownership_change() {
skip_if_not_root!();
const COUNT: usize = 5;
#[derive(Debug)]
struct TestData<'a> {
// Directory where the recursive ownership change should be performed on
path: &'a str,
// User ID for ownership change
uid: u32,
// Group ID for ownership change
gid: u32,
// Set when the permission should be read-only
read_only: bool,
// The expected permission of all directories after ownership change
expected_permission_directory: u32,
// The expected permission of all files after ownership change
expected_permission_file: u32,
}
let tests = &[
TestData {
path: "no_gid_change",
uid: 0,
gid: 0,
read_only: false,
expected_permission_directory: 0,
expected_permission_file: 0,
},
TestData {
path: "rw_gid_change",
uid: 0,
gid: 3000,
read_only: false,
expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID,
expected_permission_file: RW_MASK,
},
TestData {
path: "ro_gid_change",
uid: 0,
gid: 3000,
read_only: true,
expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID,
expected_permission_file: RO_MASK,
},
];
let tempdir = tempdir().expect("failed to create tmpdir");
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let mount_dir = tempdir.path().join(d.path);
fs::create_dir(&mount_dir)
.unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
let mut file_mode: u32 = 0;
// create testing directories and files
for n in 1..COUNT {
let nest_dir = mount_dir.join(format!("nested{}", n));
fs::create_dir(&nest_dir)
.unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg));
for f in 1..COUNT {
let filename = nest_dir.join(format!("file{}", f));
File::create(&filename)
.unwrap_or_else(|_| panic!("{}: failed to create file", msg));
file_mode = filename.as_path().metadata().unwrap().permissions().mode();
}
}
let uid = if d.uid > 0 {
Some(Uid::from_raw(d.uid))
} else {
None
};
let gid = if d.gid > 0 {
Some(Gid::from_raw(d.gid))
} else {
None
};
let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only);
assert!(result.is_ok());
assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid);
assert_eq!(
mount_dir.as_path().metadata().unwrap().permissions().mode(),
(directory_mode | d.expected_permission_directory)
);
for n in 1..COUNT {
let nest_dir = mount_dir.join(format!("nested{}", n));
for f in 1..COUNT {
let filename = nest_dir.join(format!("file{}", f));
let file = Path::new(&filename);
assert_eq!(file.metadata().unwrap().gid(), d.gid);
assert_eq!(
file.metadata().unwrap().permissions().mode(),
(file_mode | d.expected_permission_file)
);
}
let dir = Path::new(&nest_dir);
assert_eq!(dir.metadata().unwrap().gid(), d.gid);
assert_eq!(
dir.metadata().unwrap().permissions().mode(),
(directory_mode | d.expected_permission_directory)
);
}
}
}
}

View File

@ -58,7 +58,8 @@ use crate::fs::is_symlink;
use crate::sl;
/// Default permission for directories created for mountpoint.
const MOUNT_PERM: u32 = 0o755;
const MOUNT_DIR_PERM: u32 = 0o755;
const MOUNT_FILE_PERM: u32 = 0o644;
pub const PROC_MOUNTS_FILE: &str = "/proc/mounts";
const PROC_FIELDS_PER_LINE: usize = 6;
@ -187,13 +188,16 @@ pub fn create_mount_destination<S: AsRef<Path>, D: AsRef<Path>, R: AsRef<Path>>(
.parent()
.ok_or_else(|| Error::InvalidPath(dst.to_path_buf()))?;
let mut builder = fs::DirBuilder::new();
builder.mode(MOUNT_PERM).recursive(true).create(parent)?;
builder
.mode(MOUNT_DIR_PERM)
.recursive(true)
.create(parent)?;
if fs_type == "bind" {
// The source and destination for bind mounting must be the same type: file or directory.
if !src.as_ref().is_dir() {
fs::OpenOptions::new()
.mode(MOUNT_PERM)
.mode(MOUNT_FILE_PERM)
.write(true)
.create(true)
.open(dst)?;
@ -390,19 +394,17 @@ fn do_rebind_mount<P: AsRef<Path>>(path: P, readonly: bool, flags: MsFlags) -> R
}
/// Take fstab style mount options and parses them for use with a standard mount() syscall.
fn parse_mount_options(options: &[String]) -> Result<(MsFlags, String)> {
pub fn parse_mount_options<T: AsRef<str>>(options: &[T]) -> Result<(MsFlags, String)> {
let mut flags: MsFlags = MsFlags::empty();
let mut data: Vec<String> = Vec::new();
for opt in options.iter() {
if opt == "defaults" {
continue;
} else if opt == "loop" {
if opt.as_ref() == "loop" {
return Err(Error::InvalidMountOption("loop".to_string()));
} else if let Some(v) = parse_mount_flags(flags, opt) {
} else if let Some(v) = parse_mount_flags(flags, opt.as_ref()) {
flags = v;
} else {
data.push(opt.clone());
data.push(opt.as_ref().to_string());
}
}
@ -441,6 +443,7 @@ fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option<MsFlags> {
// overridden by subsequent options, as in the option line users,exec,dev,suid).
match flag_str {
// Clear flags
"defaults" => {}
"async" => flags &= !MsFlags::MS_SYNCHRONOUS,
"atime" => flags &= !MsFlags::MS_NOATIME,
"dev" => flags &= !MsFlags::MS_NODEV,
@ -464,6 +467,14 @@ fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option<MsFlags> {
"noexec" => flags |= MsFlags::MS_NOEXEC,
"nosuid" => flags |= MsFlags::MS_NOSUID,
"rbind" => flags |= MsFlags::MS_BIND | MsFlags::MS_REC,
"unbindable" => flags |= MsFlags::MS_UNBINDABLE,
"runbindable" => flags |= MsFlags::MS_UNBINDABLE | MsFlags::MS_REC,
"private" => flags |= MsFlags::MS_PRIVATE,
"rprivate" => flags |= MsFlags::MS_PRIVATE | MsFlags::MS_REC,
"shared" => flags |= MsFlags::MS_SHARED,
"rshared" => flags |= MsFlags::MS_SHARED | MsFlags::MS_REC,
"slave" => flags |= MsFlags::MS_SLAVE,
"rslave" => flags |= MsFlags::MS_SLAVE | MsFlags::MS_REC,
"relatime" => flags |= MsFlags::MS_RELATIME,
"remount" => flags |= MsFlags::MS_REMOUNT,
"ro" => flags |= MsFlags::MS_RDONLY,
@ -1030,7 +1041,7 @@ mod tests {
#[test]
fn test_parse_mount_options() {
let options = vec![];
let options: Vec<&str> = vec![];
let (flags, data) = parse_mount_options(&options).unwrap();
assert!(flags.is_empty());
assert!(data.is_empty());

View File

@ -5,7 +5,9 @@
//
use anyhow::{anyhow, Context, Error, Result};
use std::collections::hash_map::Entry;
use std::convert::TryFrom;
use std::fmt::Formatter;
use std::{collections::HashMap, fs, path::PathBuf};
/// Prefix to mark a volume as Kata special.
@ -14,6 +16,9 @@ pub const KATA_VOLUME_TYPE_PREFIX: &str = "kata:";
/// The Mount should be ignored by the host and handled by the guest.
pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:";
/// The sharedfs volume is mounted by guest OS before starting the kata-agent.
pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared";
/// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral";
@ -23,6 +28,9 @@ pub const KATA_HOST_DIR_VOLUME_TYPE: &str = "kata:hostdir";
/// KATA_MOUNT_INFO_FILE_NAME is used for the file that holds direct-volume mount info
pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json";
/// Specify `fsgid` for a volume or mount, `fsgid=1`.
pub const KATA_MOUNT_OPTION_FS_GID: &str = "fsgid";
/// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path
pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes";
@ -421,6 +429,84 @@ impl TryFrom<&NydusExtraOptions> for KataVirtualVolume {
}
}
/// An implementation of generic storage device.
pub struct StorageDeviceGeneric {
path: String,
}
impl std::fmt::Debug for StorageDeviceGeneric {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StorageState")
.field("path", &self.path)
.finish()
}
}
impl StorageDeviceGeneric {
/// Create a new instance of `StorageStateCommon`.
pub fn new(path: String) -> Self {
StorageDeviceGeneric { path }
}
}
impl StorageDevice for StorageDeviceGeneric {
fn path(&self) -> &str {
&self.path
}
fn cleanup(&self) {}
}
/// Trait object for storage device.
pub trait StorageDevice: Send + Sync {
/// Path
fn path(&self) -> &str;
/// Clean up resources related to the storage device.
fn cleanup(&self);
}
/// Manager to manage registered storage device handlers.
pub struct StorageHandlerManager<H> {
handlers: HashMap<String, H>,
}
impl<H> Default for StorageHandlerManager<H> {
fn default() -> Self {
Self::new()
}
}
impl<H> StorageHandlerManager<H> {
/// Create a new instance of `StorageHandlerManager`.
pub fn new() -> Self {
Self {
handlers: HashMap::new(),
}
}
/// Register a storage device handler.
pub fn add_handler(&mut self, id: &str, handler: H) -> Result<()> {
match self.handlers.entry(id.to_string()) {
Entry::Occupied(_) => Err(anyhow!("storage handler for {} already exists", id)),
Entry::Vacant(entry) => {
entry.insert(handler);
Ok(())
}
}
}
/// Get storage handler with specified `id`.
pub fn handler(&self, id: &str) -> Option<&H> {
self.handlers.get(id)
}
/// Get names of registered handlers.
pub fn get_handlers(&self) -> Vec<String> {
self.handlers.keys().map(|v| v.to_string()).collect()
}
}
/// Join user provided volume path with kata direct-volume root path.
///
/// The `volume_path` is base64-encoded and then safely joined to the `prefix`