mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-12 23:04:33 +00:00
Compare commits
18 Commits
topic/runt
...
topic/runt
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d53ea167b5 | ||
|
|
c90a4572e1 | ||
|
|
3ad3504bfa | ||
|
|
db485c7eae | ||
|
|
d28fb6f527 | ||
|
|
f4fa488b49 | ||
|
|
6880b1f16d | ||
|
|
6e09a77022 | ||
|
|
eeaf1017cd | ||
|
|
d217f2efda | ||
|
|
ae7b83980e | ||
|
|
675185be8c | ||
|
|
b23611459f | ||
|
|
2a104dbada | ||
|
|
2cd94ec92e | ||
|
|
815c3b91c1 | ||
|
|
fd8c6d726f | ||
|
|
18204b18da |
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -6144,7 +6144,6 @@ dependencies = [
|
||||
"agent",
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"base64 0.13.1",
|
||||
"byte-unit",
|
||||
"cgroups-rs 0.5.0",
|
||||
"flate2",
|
||||
@@ -6313,6 +6312,7 @@ dependencies = [
|
||||
"containerd-shim-protos",
|
||||
"go-flag",
|
||||
"nix 0.26.4",
|
||||
"pod-resources-rs",
|
||||
"runtimes",
|
||||
"shim",
|
||||
"tokio",
|
||||
@@ -6348,6 +6348,7 @@ dependencies = [
|
||||
"opentelemetry 0.18.0",
|
||||
"opentelemetry-jaeger",
|
||||
"persist",
|
||||
"pod-resources-rs",
|
||||
"procfs 0.12.0",
|
||||
"prometheus",
|
||||
"protobuf",
|
||||
@@ -8381,6 +8382,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"awaitgroup",
|
||||
"common",
|
||||
"container-device-interface",
|
||||
"containerd-shim-protos",
|
||||
"hypervisor",
|
||||
"kata-sys-util",
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
//! This module depends on kubelet internal implementation details, a better way is needed
|
||||
//! to detect K8S EmptyDir medium type from `oci::spec::Mount` objects.
|
||||
|
||||
use kata_types::config::{EMPTYDIR_MODE_BLOCK_ENCRYPTED, EMPTYDIR_MODE_SHARED_FS};
|
||||
use kata_types::mount;
|
||||
use oci_spec::runtime::{Mount, Spec};
|
||||
|
||||
@@ -59,22 +58,7 @@ pub fn is_host_empty_dir(path: &str) -> bool {
|
||||
// For the given pod ephemeral volume is created only once
|
||||
// backed by tmpfs inside the VM. For successive containers
|
||||
// of the same pod the already existing volume is reused.
|
||||
//
|
||||
// When emptydir_mode is "block-encrypted", host emptyDir mounts are left as
|
||||
// "bind" so that the volume dispatch layer can route them to the
|
||||
// EncryptedEmptyDirVolume handler instead of the local-storage path.
|
||||
pub fn update_ephemeral_storage_type(
|
||||
oci_spec: &mut Spec,
|
||||
disable_guest_empty_dir: bool,
|
||||
emptydir_mode: &str,
|
||||
) {
|
||||
// Treat an empty/unknown value the same as the default.
|
||||
let mode = if emptydir_mode.is_empty() {
|
||||
EMPTYDIR_MODE_SHARED_FS
|
||||
} else {
|
||||
emptydir_mode
|
||||
};
|
||||
|
||||
pub fn update_ephemeral_storage_type(oci_spec: &mut Spec, disable_guest_empty_dir: bool) {
|
||||
if let Some(mounts) = oci_spec.mounts_mut() {
|
||||
for m in mounts.iter_mut() {
|
||||
if let Some(typ) = &m.typ() {
|
||||
@@ -89,13 +73,7 @@ pub fn update_ephemeral_storage_type(
|
||||
if is_ephemeral_volume(m) {
|
||||
m.set_typ(Some(String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE)));
|
||||
}
|
||||
// Only rewrite host emptyDirs to "local" when NOT using
|
||||
// block-encrypted mode. In block-encrypted mode the mount stays
|
||||
// as "bind" and is handled downstream by EncryptedEmptyDirVolume.
|
||||
if is_host_empty_dir(mnt_src)
|
||||
&& !disable_guest_empty_dir
|
||||
&& mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED
|
||||
{
|
||||
if is_host_empty_dir(mnt_src) && !disable_guest_empty_dir {
|
||||
m.set_typ(Some(mount::KATA_K8S_LOCAL_STORAGE_TYPE.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -787,14 +787,6 @@ pub struct DeviceInfo {
|
||||
#[serde(default)]
|
||||
pub hotplug_vfio_on_root_bus: bool,
|
||||
|
||||
/// Cold-plug VFIO devices to a PCIe port type.
|
||||
///
|
||||
/// Accepted values: `"no-port"` (default, disabled), `"root-port"`.
|
||||
/// In confidential compute environments hot-plugging can compromise
|
||||
/// security, so devices are cold-plugged instead.
|
||||
#[serde(default)]
|
||||
pub cold_plug_vfio: String,
|
||||
|
||||
/// Number of PCIe root ports to create during VM creation.
|
||||
///
|
||||
/// Valid when `hotplug_vfio_on_root_bus = true` and `machine_type = "q35"`.
|
||||
|
||||
@@ -30,10 +30,7 @@ pub use self::hypervisor::{
|
||||
};
|
||||
|
||||
mod runtime;
|
||||
pub use self::runtime::{
|
||||
Runtime, RuntimeVendor, EMPTYDIR_MODE_BLOCK_ENCRYPTED, EMPTYDIR_MODE_SHARED_FS,
|
||||
RUNTIME_NAME_VIRTCONTAINER,
|
||||
};
|
||||
pub use self::runtime::{Runtime, RuntimeVendor, RUNTIME_NAME_VIRTCONTAINER};
|
||||
|
||||
pub use self::agent::AGENT_NAME_KATA;
|
||||
|
||||
|
||||
@@ -18,12 +18,6 @@ pub use shared_mount::SharedMount;
|
||||
/// Type of runtime VirtContainer.
|
||||
pub const RUNTIME_NAME_VIRTCONTAINER: &str = "virt_container";
|
||||
|
||||
/// emptydir_mode value: share the emptyDir via the shared filesystem (default).
|
||||
pub const EMPTYDIR_MODE_SHARED_FS: &str = "shared-fs";
|
||||
|
||||
/// emptydir_mode value: plug an encrypted block device (LUKS2 via CDH) for each emptyDir.
|
||||
pub const EMPTYDIR_MODE_BLOCK_ENCRYPTED: &str = "block-encrypted";
|
||||
|
||||
/// Kata runtime configuration information.
|
||||
#[derive(Debug, Default, Deserialize, Serialize)]
|
||||
pub struct Runtime {
|
||||
@@ -140,21 +134,6 @@ pub struct Runtime {
|
||||
#[serde(default)]
|
||||
pub disable_guest_empty_dir: bool,
|
||||
|
||||
/// Specifies how Kubernetes emptyDir volumes are handled.
|
||||
///
|
||||
/// Options:
|
||||
///
|
||||
/// - shared-fs (default)
|
||||
/// Shares the emptyDir folder with the guest using the method given
|
||||
/// by the `shared_fs` setting.
|
||||
///
|
||||
/// - block-encrypted
|
||||
/// Plugs a block device to be encrypted in the guest using LUKS2
|
||||
/// via the Confidential Data Hub (CDH).
|
||||
///
|
||||
#[serde(default)]
|
||||
pub emptydir_mode: String,
|
||||
|
||||
/// Determines how VFIO devices should be be presented to the container.
|
||||
///
|
||||
/// Options:
|
||||
@@ -274,17 +253,6 @@ impl ConfigOps for Runtime {
|
||||
)));
|
||||
}
|
||||
|
||||
let emptydir_mode = &conf.runtime.emptydir_mode;
|
||||
if !emptydir_mode.is_empty()
|
||||
&& emptydir_mode != EMPTYDIR_MODE_SHARED_FS
|
||||
&& emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED
|
||||
{
|
||||
return Err(std::io::Error::other(format!(
|
||||
"Invalid emptydir_mode `{emptydir_mode}` in configuration file, \
|
||||
allowed values: \"{EMPTYDIR_MODE_SHARED_FS}\", \"{EMPTYDIR_MODE_BLOCK_ENCRYPTED}\"",
|
||||
)));
|
||||
}
|
||||
|
||||
for shared_mount in &conf.runtime.shared_mounts {
|
||||
shared_mount.validate()?;
|
||||
}
|
||||
|
||||
@@ -29,3 +29,4 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
shim = { path = "crates/shim" }
|
||||
common = { workspace = true }
|
||||
runtimes = { workspace = true }
|
||||
pod-resources-rs = { workspace = true }
|
||||
|
||||
@@ -209,8 +209,6 @@ DEFFILEMEMBACKEND := ""
|
||||
DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
|
||||
DEFMSIZE9P := 8192
|
||||
DEFVFIOMODE := guest-kernel
|
||||
DEFEMPTYDIRMODE := shared-fs
|
||||
DEFEMPTYDIRMODE_COCO := block-encrypted
|
||||
DEFBINDMOUNTS := []
|
||||
DEFDANCONF := /run/kata-containers/dans
|
||||
DEFFORCEGUESTPULL := false
|
||||
@@ -709,8 +707,6 @@ USER_VARS += DEFDISABLEIMAGENVDIMM
|
||||
USER_VARS += DEFBINDMOUNTS
|
||||
USER_VARS += DEFVFIOMODE
|
||||
USER_VARS += DEFVFIOMODE_SE
|
||||
USER_VARS += DEFEMPTYDIRMODE
|
||||
USER_VARS += DEFEMPTYDIRMODE_COCO
|
||||
USER_VARS += BUILDFLAGS
|
||||
USER_VARS += RUNTIMENAME
|
||||
USER_VARS += HYPERVISOR_DB
|
||||
|
||||
@@ -768,18 +768,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -375,10 +375,16 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
|
||||
# Default false
|
||||
hotplug_vfio_on_root_bus = false
|
||||
|
||||
# Enable cold-plugging of VFIO devices to a PCIe port type.
|
||||
# Accepted values: "no-port" (default, disabled), "root-port".
|
||||
# When set to "root-port", devices discovered via CDI / Pod Resources
|
||||
# are cold-plugged before VM boot.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "root-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
@@ -780,18 +786,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE_NV@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -416,10 +416,16 @@ disable_image_nvdimm = true
|
||||
# Default false
|
||||
hotplug_vfio_on_root_bus = false
|
||||
|
||||
# Enable cold-plugging of VFIO devices to a PCIe port type.
|
||||
# Accepted values: "no-port" (default, disabled), "root-port".
|
||||
# When set to "root-port", devices discovered via CDI / Pod Resources
|
||||
# are cold-plugged before VM boot.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "root-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
@@ -712,18 +718,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE_NV@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -392,10 +392,16 @@ disable_image_nvdimm = true
|
||||
# Default false
|
||||
hotplug_vfio_on_root_bus = false
|
||||
|
||||
# Enable cold-plugging of VFIO devices to a PCIe port type.
|
||||
# Accepted values: "no-port" (default, disabled), "root-port".
|
||||
# When set to "root-port", devices discovered via CDI / Pod Resources
|
||||
# are cold-plugged before VM boot.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "root-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
@@ -688,18 +694,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE_NV@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -770,18 +770,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -653,18 +653,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE_SE@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -695,18 +695,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -671,18 +671,6 @@ sandbox_bind_mounts = @DEFBINDMOUNTS@
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
@@ -268,18 +268,6 @@ static_sandbox_resource_mgmt = true
|
||||
#
|
||||
vfio_mode = "@DEFVFIOMODE@"
|
||||
|
||||
# Controls how Kubernetes emptyDir volumes are handled:
|
||||
#
|
||||
# - "shared-fs" (default): shares the emptyDir folder with the guest via
|
||||
# the shared filesystem backend.
|
||||
#
|
||||
# - "block-encrypted": plugs a sparse disk image as a virtio-blk device
|
||||
# and has the kata-agent format and mount it with LUKS2 encryption via
|
||||
# the Confidential Data Hub (CDH). This is the recommended setting for
|
||||
# confidential computing variants.
|
||||
#
|
||||
emptydir_mode = "@DEFEMPTYDIRMODE@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
# Note: remote hypervisor has no sharing of emptydir mounts from host to guest
|
||||
|
||||
@@ -114,7 +114,6 @@ impl From<Storage> for agent::Storage {
|
||||
fs_group: from_option(from.fs_group),
|
||||
options: trans_vec(from.options),
|
||||
mount_point: from.mount_point,
|
||||
shared: from.shared,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,9 +60,6 @@ pub struct Storage {
|
||||
pub fs_group: Option<FSGroup>,
|
||||
pub options: Vec<String>,
|
||||
pub mount_point: String,
|
||||
/// When true the agent keeps this storage alive for the entire sandbox
|
||||
/// lifetime and does not tear it down when an individual container exits.
|
||||
pub shared: bool,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Default)]
|
||||
|
||||
@@ -207,11 +207,11 @@ impl Device for VfioDeviceModernHandle {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Register the device in the virtual PCIe topology
|
||||
let topo = pcie_topo.as_deref_mut().ok_or_else(|| {
|
||||
anyhow::anyhow!("VFIO device requires a PCIe topology but none was provided")
|
||||
})?;
|
||||
self.register(topo).await?;
|
||||
// Register the device in the virtual PCIe topology if provided
|
||||
match pcie_topo {
|
||||
Some(topo) => self.register(topo).await?,
|
||||
None => return Ok(()),
|
||||
}
|
||||
|
||||
// Request Hypervisor to perform the actual hardware passthrough
|
||||
if let Err(e) = h.add_device(DeviceType::VfioModern(self.arc())).await {
|
||||
@@ -219,7 +219,9 @@ impl Device for VfioDeviceModernHandle {
|
||||
|
||||
// Rollback state on failure
|
||||
self.decrease_attach_count().await?;
|
||||
self.unregister(topo).await?;
|
||||
if let Some(topo) = pcie_topo {
|
||||
self.unregister(topo).await?;
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
info!(
|
||||
|
||||
@@ -748,18 +748,14 @@ impl QemuInner {
|
||||
|
||||
let is_unaligned = !new_hotplugged_mem.is_multiple_of(guest_mem_block_size);
|
||||
if is_unaligned {
|
||||
let aligned = new_hotplugged_mem
|
||||
.checked_add(guest_mem_block_size - 1)
|
||||
.and_then(|v| v.checked_div(guest_mem_block_size))
|
||||
.and_then(|v| v.checked_mul(guest_mem_block_size))
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"alignment of {} B to the block size of {} B failed",
|
||||
new_hotplugged_mem,
|
||||
guest_mem_block_size
|
||||
)
|
||||
})?;
|
||||
new_hotplugged_mem = aligned;
|
||||
new_hotplugged_mem = ch_config::convert::checked_next_multiple_of(
|
||||
new_hotplugged_mem,
|
||||
guest_mem_block_size,
|
||||
)
|
||||
.ok_or(anyhow!(format!(
|
||||
"alignment of {} B to the block size of {} B failed",
|
||||
new_hotplugged_mem, guest_mem_block_size
|
||||
)))?
|
||||
}
|
||||
let new_hotplugged_mem = new_hotplugged_mem;
|
||||
|
||||
|
||||
@@ -293,6 +293,59 @@ impl Qmp {
|
||||
Ok(hotplugged_mem_size)
|
||||
}
|
||||
|
||||
/// Hotplug an iommufd QOM object in QEMU and return the object id.
|
||||
#[allow(dead_code)]
|
||||
pub fn hotplug_iommufd(
|
||||
&mut self,
|
||||
suffix_or_id: &str,
|
||||
external_fdname: Option<&str>,
|
||||
) -> Result<String> {
|
||||
// Object id in QEMU (also used as fdname for getfd)
|
||||
let obj_id = if suffix_or_id.starts_with("iommufd") {
|
||||
suffix_or_id.to_string()
|
||||
} else {
|
||||
format!("iommufd{suffix_or_id}")
|
||||
};
|
||||
|
||||
{
|
||||
let file = std::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.open("/dev/iommu")
|
||||
.context("open /dev/iommu failed")?;
|
||||
self.pass_fd(file.as_raw_fd(), &obj_id)?;
|
||||
}
|
||||
|
||||
let obj = match external_fdname {
|
||||
None => qmp::object_add(qapi_qmp::ObjectOptions::iommufd {
|
||||
id: obj_id.clone(),
|
||||
iommufd: qapi_qmp::IOMMUFDProperties { fd: None },
|
||||
}),
|
||||
Some(_fdname) => qmp::object_add(qapi_qmp::ObjectOptions::iommufd {
|
||||
id: obj_id.clone(),
|
||||
iommufd: qapi_qmp::IOMMUFDProperties {
|
||||
fd: Some(obj_id.to_string()),
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
match self.qmp.execute(&obj) {
|
||||
Ok(_) => Ok(obj_id),
|
||||
Err(e) => {
|
||||
let msg = format!("{e:#}");
|
||||
if msg.contains("duplicate ID")
|
||||
|| msg.contains("already exists")
|
||||
|| msg.contains("exists")
|
||||
{
|
||||
Ok(obj_id)
|
||||
} else {
|
||||
Err(anyhow!(e))
|
||||
.with_context(|| format!("object-add iommufd failed (id={obj_id})"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hotplug_memory(&mut self, size: u64) -> Result<()> {
|
||||
let memdev_idx = self
|
||||
.qmp
|
||||
|
||||
@@ -15,7 +15,6 @@ test-utils = { workspace = true }
|
||||
actix-rt = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
base64 = "0.13.0"
|
||||
byte-unit = "5.1.6"
|
||||
cgroups-rs = { version = "0.5.0", features = ["oci"] }
|
||||
futures = "0.3.11"
|
||||
|
||||
@@ -408,7 +408,6 @@ impl ResourceManagerInner {
|
||||
self.device_manager.as_ref(),
|
||||
&self.sid,
|
||||
self.agent.clone(),
|
||||
&self.toml_config.runtime.emptydir_mode,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -496,7 +495,7 @@ impl ResourceManagerInner {
|
||||
.config
|
||||
.guest_pci_path
|
||||
.clone()
|
||||
.context("VFIO device has no guest PCI path assigned")?;
|
||||
.unwrap_or_default();
|
||||
let host_bdf = vfio_device.device.primary.addr.to_string();
|
||||
info!(
|
||||
sl!(),
|
||||
@@ -669,13 +668,6 @@ impl ResourceManagerInner {
|
||||
swap.clean().await;
|
||||
}
|
||||
|
||||
// Remove host-side disk images and direct-volume metadata for any
|
||||
// block-encrypted emptyDir volumes created during this sandbox lifetime.
|
||||
self.volume_resource
|
||||
.cleanup_ephemeral_disks()
|
||||
.await
|
||||
.context("cleanup block-encrypted emptyDir disks")?;
|
||||
|
||||
// TODO cleanup other resources
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -89,7 +89,6 @@ impl ShareFs for ShareVirtioFsInline {
|
||||
fs_group: None,
|
||||
options: SHARED_DIR_VIRTIO_FS_OPTIONS.clone(),
|
||||
mount_point: kata_guest_share_dir(),
|
||||
shared: false,
|
||||
};
|
||||
|
||||
storages.push(shared_volume);
|
||||
|
||||
@@ -250,7 +250,6 @@ impl ShareFs for ShareVirtioFsStandalone {
|
||||
fs_group: None,
|
||||
options: vec![String::from("nodev")],
|
||||
mount_point: kata_guest_share_dir(),
|
||||
shared: false,
|
||||
};
|
||||
|
||||
storages.push(shared_volume);
|
||||
|
||||
@@ -108,7 +108,6 @@ impl ShareFsMount for VirtiofsShareMount {
|
||||
fs_group: None,
|
||||
options: config.mount_options.clone(),
|
||||
mount_point: watchable_guest_mount.clone(),
|
||||
shared: false,
|
||||
};
|
||||
|
||||
// Update the guest_path, in order to identify what will
|
||||
|
||||
@@ -1,399 +0,0 @@
|
||||
// Copyright (c) 2025 NVIDIA Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
//! Block-encrypted emptyDir volume handler for runtime-rs.
|
||||
//!
|
||||
//! When `emptydir_mode = "block-encrypted"` is set in the runtime configuration,
|
||||
//! each Kubernetes emptyDir volume backed by a host directory is handled here
|
||||
//! instead of the normal "local" shared-filesystem path.
|
||||
//!
|
||||
//! For every such volume this module:
|
||||
//!
|
||||
//! 1. Creates a sparse `disk.img` file inside the kubelet emptyDir folder
|
||||
//! so that Kubelet can enforce `sizeLimit` (idempotent: skipped if a
|
||||
//! previous container in the same pod already did it).
|
||||
//! 2. Writes a `mountInfo.json` file (direct-volume metadata) that records
|
||||
//! the block device path, filesystem type, and `encryption_key=ephemeral`.
|
||||
//! 3. Plugs the disk image into the VM as a virtio-blk block device via the
|
||||
//! hypervisor device manager.
|
||||
//! 4. Sends an `agent::Storage` with `driver_options: ["encryption_key=ephemeral"]`
|
||||
//! and `shared: true` to the kata-agent. The agent delegates formatting and
|
||||
//! mounting to the Confidential Data Hub (CDH) using LUKS2.
|
||||
//!
|
||||
//! The `shared: true` flag instructs the agent to keep the storage alive until
|
||||
//! the sandbox is destroyed. Correspondingly, `EncryptedEmptyDirVolume::cleanup()`
|
||||
//! is a deliberate no-op: the host-side `disk.img` and `mountInfo.json` are
|
||||
//! removed at sandbox teardown by `VolumeResource::cleanup_ephemeral_disks()`.
|
||||
|
||||
use std::{collections::HashMap, fs, io::ErrorKind, path::PathBuf, sync::Arc};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use base64::{encode_config, URL_SAFE};
|
||||
use hypervisor::{
|
||||
device::{
|
||||
device_manager::{do_handle_device, get_block_device_info, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
BlockConfigModern,
|
||||
};
|
||||
use kata_sys_util::k8s::is_host_empty_dir;
|
||||
use kata_types::{
|
||||
config::EMPTYDIR_MODE_BLOCK_ENCRYPTED,
|
||||
device::{
|
||||
DRIVER_BLK_CCW_TYPE as KATA_CCW_DEV_TYPE, DRIVER_BLK_PCI_TYPE as KATA_BLK_DEV_TYPE,
|
||||
DRIVER_SCSI_TYPE as KATA_SCSI_DEV_TYPE,
|
||||
},
|
||||
mount::{
|
||||
join_path, kata_direct_volume_root_path, kata_guest_sandbox_dir, DirectVolumeMountInfo,
|
||||
KATA_MOUNT_INFO_FILE_NAME,
|
||||
},
|
||||
};
|
||||
use nix::sys::{stat::stat, statvfs::statvfs};
|
||||
use oci_spec::runtime as oci;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::volume::Volume;
|
||||
|
||||
/// OCI mount type for bind mounts.
|
||||
const MOUNT_BIND_TYPE: &str = "bind";
|
||||
|
||||
/// Sub-directory of the guest sandbox dir used for block device mounts.
|
||||
/// Matches genpolicy `spath = /run/kata-containers/sandbox/storage`.
|
||||
const KATA_GUEST_SANDBOX_STORAGE_DIR: &str = "storage";
|
||||
|
||||
/// File name of the sparse disk image created inside each emptyDir folder.
|
||||
const EMPTYDIR_DISK_IMAGE_NAME: &str = "disk.img";
|
||||
|
||||
/// The driver option that tells the kata-agent to encrypt the device via CDH.
|
||||
const ENCRYPTION_KEY_EPHEMERAL: &str = "encryption_key=ephemeral";
|
||||
|
||||
/// Volume-type value written into `mountInfo.json`.
|
||||
const EMPTYDIR_VOLUME_TYPE_BLK: &str = "blk";
|
||||
|
||||
/// Filesystem type used when formatting the encrypted block device.
|
||||
const EMPTYDIR_FSTYPE: &str = "ext4";
|
||||
|
||||
/// Key / value written into the mountInfo.json `metadata` map.
|
||||
/// Must match Go runtime's direct-volume schema (src/runtime/pkg/direct-volume/utils.go).
|
||||
const EMPTYDIR_MKFS_METADATA_KEY: &str = "encryptionKey";
|
||||
const EMPTYDIR_MKFS_METADATA_VAL: &str = "ephemeral";
|
||||
|
||||
/// Key for fsGroup metadata in mountInfo.json.
|
||||
/// Must match Go runtime's direct-volume schema (src/runtime/pkg/direct-volume/utils.go).
|
||||
const FSGID_KEY: &str = "fsGroup";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Public types
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Descriptor of a block-encrypted emptyDir disk created on the host during a
|
||||
/// sandbox lifetime. Instances are collected in `VolumeResource` so the
|
||||
/// sandbox teardown path can delete the sparse image and its metadata.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EphemeralDisk {
|
||||
/// Absolute path to the `disk.img` file on the host.
|
||||
pub disk_path: String,
|
||||
/// Absolute path to the kubelet emptyDir folder (also the direct-volume key).
|
||||
pub source_path: String,
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Volume implementation
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Handles a single Kubernetes emptyDir volume in `block-encrypted` mode.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct EncryptedEmptyDirVolume {
|
||||
storage: Option<agent::Storage>,
|
||||
mount: oci::Mount,
|
||||
device_id: String,
|
||||
}
|
||||
|
||||
impl EncryptedEmptyDirVolume {
|
||||
pub(crate) async fn new(
|
||||
d: &RwLock<DeviceManager>,
|
||||
m: &oci::Mount,
|
||||
_sid: &str,
|
||||
ephemeral_disks: Arc<RwLock<Vec<EphemeralDisk>>>,
|
||||
) -> Result<Self> {
|
||||
let source_path = m
|
||||
.source()
|
||||
.as_ref()
|
||||
.and_then(|p| p.to_str())
|
||||
.context("block-encrypted emptyDir mount has no source path")?
|
||||
.to_string();
|
||||
|
||||
let disk_path = format!("{}/{}", source_path, EMPTYDIR_DISK_IMAGE_NAME);
|
||||
|
||||
// Idempotency: if mountInfo.json already exists, a previous container
|
||||
// in this pod already set the volume up. Skip creation.
|
||||
let is_new_disk = match kata_types::mount::get_volume_mount_info(&source_path) {
|
||||
Ok(_) => {
|
||||
if !std::path::Path::new(&disk_path).exists() {
|
||||
return Err(anyhow!(
|
||||
"mountInfo.json exists but disk image {} is missing",
|
||||
disk_path
|
||||
));
|
||||
}
|
||||
info!(
|
||||
sl!(),
|
||||
"encrypted emptyDir: reusing existing disk at {}", disk_path
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(e) => {
|
||||
let is_not_found = e
|
||||
.downcast_ref::<std::io::Error>()
|
||||
.is_some_and(|io| io.kind() == std::io::ErrorKind::NotFound);
|
||||
if !is_not_found {
|
||||
return Err(e).context("failed to read mountInfo for emptyDir");
|
||||
}
|
||||
setup_ephemeral_disk(&source_path, &disk_path)
|
||||
.with_context(|| format!("setup ephemeral disk at {disk_path}"))?;
|
||||
true
|
||||
}
|
||||
};
|
||||
|
||||
// Register the disk image as a virtio-blk block device.
|
||||
let blkdev_info = get_block_device_info(d).await;
|
||||
let block_config = BlockConfigModern {
|
||||
path_on_host: disk_path.clone(),
|
||||
driver_option: blkdev_info.block_device_driver,
|
||||
num_queues: blkdev_info.num_queues,
|
||||
queue_size: blkdev_info.queue_size,
|
||||
..Default::default()
|
||||
};
|
||||
let device_info = do_handle_device(d, &DeviceConfig::BlockCfgModern(block_config))
|
||||
.await
|
||||
.context("register encrypted emptyDir block device with hypervisor")?;
|
||||
|
||||
// Extract the guest-visible device address (PCI path, SCSI addr, etc.)
|
||||
// and the hypervisor driver string.
|
||||
let (source, driver, device_id) = extract_block_source(device_info).await?;
|
||||
|
||||
// Compute the stable in-guest mount path:
|
||||
// /run/kata-containers/sandbox/storage/<base64url(source)>
|
||||
//
|
||||
// This satisfies the genpolicy rules:
|
||||
// i_storage.mount_point == $(spath)/base64url.encode(i_storage.source)
|
||||
// i_storage.mount_point == i_mount.source
|
||||
let spath = format!(
|
||||
"{}/{}",
|
||||
kata_guest_sandbox_dir(),
|
||||
KATA_GUEST_SANDBOX_STORAGE_DIR
|
||||
);
|
||||
let b64_source = encode_config(source.as_bytes(), URL_SAFE);
|
||||
let mount_point = format!("{}/{}", spath, b64_source);
|
||||
|
||||
let storage = agent::Storage {
|
||||
driver,
|
||||
source,
|
||||
fs_type: EMPTYDIR_FSTYPE.to_string(),
|
||||
mount_point: mount_point.clone(),
|
||||
driver_options: vec![ENCRYPTION_KEY_EPHEMERAL.to_string()],
|
||||
// shared=true: the agent keeps this storage alive until the sandbox
|
||||
// is destroyed, not just until the first container using it exits.
|
||||
shared: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// The OCI mount source is the in-guest mount_point. The agent mounts
|
||||
// the LUKS2-formatted device there; the container bind-mounts from
|
||||
// that path to its own destination.
|
||||
let mut mount = oci::Mount::default();
|
||||
mount.set_destination(m.destination().clone());
|
||||
mount.set_typ(Some(MOUNT_BIND_TYPE.to_string()));
|
||||
mount.set_source(Some(PathBuf::from(&mount_point)));
|
||||
mount.set_options(m.options().clone());
|
||||
|
||||
if is_new_disk {
|
||||
info!(sl!(), "encrypted emptyDir: created disk at {}", disk_path);
|
||||
ephemeral_disks.write().await.push(EphemeralDisk {
|
||||
disk_path,
|
||||
source_path,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
storage: Some(storage),
|
||||
mount,
|
||||
device_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for EncryptedEmptyDirVolume {
|
||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
}
|
||||
|
||||
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
|
||||
Ok(self.storage.iter().cloned().collect())
|
||||
}
|
||||
|
||||
fn get_device_id(&self) -> Result<Option<String>> {
|
||||
Ok(Some(self.device_id.clone()))
|
||||
}
|
||||
|
||||
async fn cleanup(&self, _device_manager: &RwLock<DeviceManager>) -> Result<()> {
|
||||
// Intentional no-op: this volume is shared across all containers in
|
||||
// the pod. Host-side cleanup (disk.img + mountInfo.json) is deferred
|
||||
// to VolumeResource::cleanup_ephemeral_disks() at sandbox teardown.
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Detection predicate
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Returns `true` when `m` is a host emptyDir bind mount that should be
|
||||
/// handled as a block-encrypted volume.
|
||||
pub(crate) fn is_encrypted_emptydir_volume(m: &oci::Mount, emptydir_mode: &str) -> bool {
|
||||
if emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED {
|
||||
return false;
|
||||
}
|
||||
|
||||
// update_ephemeral_storage_type() leaves host emptyDirs as "bind" in
|
||||
// block-encrypted mode rather than rewriting them to "local".
|
||||
if m.typ().as_deref() != Some(MOUNT_BIND_TYPE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
m.source()
|
||||
.as_ref()
|
||||
.and_then(|p| p.to_str())
|
||||
.map(is_host_empty_dir)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Sandbox-level cleanup helper
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Removes the direct-volume `mountInfo.json` directory for `volume_path`.
|
||||
///
|
||||
/// Called at sandbox teardown for each `EphemeralDisk` registered during the
|
||||
/// sandbox lifetime.
|
||||
pub(crate) fn remove_volume_mount_info(volume_path: &str) -> Result<()> {
|
||||
let dir = join_path(kata_direct_volume_root_path().as_str(), volume_path)
|
||||
.with_context(|| format!("build direct-volume path for {volume_path}"))?;
|
||||
match fs::remove_dir_all(&dir) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => Ok(()),
|
||||
Err(e) => Err(e).with_context(|| format!("remove direct-volume dir {dir:?}")),
|
||||
}
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Private helpers
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Creates the sparse `disk.img` and writes `mountInfo.json`.
|
||||
fn setup_ephemeral_disk(source_path: &str, disk_path: &str) -> Result<()> {
|
||||
// Use the capacity of the filesystem that backs the emptyDir so that
|
||||
// Kubelet's sizeLimit enforcement still works correctly.
|
||||
let vfs =
|
||||
statvfs(source_path).with_context(|| format!("statvfs on emptyDir {source_path}"))?;
|
||||
let capacity = vfs
|
||||
.blocks()
|
||||
.checked_mul(vfs.fragment_size())
|
||||
.context("emptyDir capacity overflow")?;
|
||||
|
||||
// Create a sparse file: it appears `capacity` bytes large to Kubelet but
|
||||
// consumes negligible real disk space until the guest writes into it.
|
||||
let f = fs::File::create(disk_path).with_context(|| format!("create {disk_path}"))?;
|
||||
f.set_len(capacity)
|
||||
.with_context(|| format!("truncate {disk_path} to {capacity}"))?;
|
||||
drop(f);
|
||||
|
||||
// Capture the directory's gid to honour fsGroup semantics later.
|
||||
let dir_stat =
|
||||
stat(source_path).with_context(|| format!("stat emptyDir {source_path}"))?;
|
||||
let mut metadata: HashMap<String, String> = HashMap::new();
|
||||
metadata.insert(
|
||||
EMPTYDIR_MKFS_METADATA_KEY.to_string(),
|
||||
EMPTYDIR_MKFS_METADATA_VAL.to_string(),
|
||||
);
|
||||
if dir_stat.st_gid != 0 {
|
||||
metadata.insert(FSGID_KEY.to_string(), dir_stat.st_gid.to_string());
|
||||
}
|
||||
|
||||
let mount_info = DirectVolumeMountInfo {
|
||||
volume_type: EMPTYDIR_VOLUME_TYPE_BLK.to_string(),
|
||||
device: disk_path.to_string(),
|
||||
fs_type: EMPTYDIR_FSTYPE.to_string(),
|
||||
metadata,
|
||||
options: vec![],
|
||||
};
|
||||
|
||||
write_volume_mount_info(source_path, &mount_info)
|
||||
.with_context(|| format!("write mountInfo.json for {source_path}"))
|
||||
}
|
||||
|
||||
/// Serialises `info` and writes it as `mountInfo.json` under the Kata
|
||||
/// direct-volume root directory, keyed by `volume_path`.
|
||||
fn write_volume_mount_info(volume_path: &str, info: &DirectVolumeMountInfo) -> Result<()> {
|
||||
let dir = join_path(kata_direct_volume_root_path().as_str(), volume_path)
|
||||
.with_context(|| format!("build direct-volume path for {volume_path}"))?;
|
||||
fs::create_dir_all(&dir).with_context(|| format!("create dir {dir:?}"))?;
|
||||
let file_path = dir.join(KATA_MOUNT_INFO_FILE_NAME);
|
||||
let json = serde_json::to_string(info).context("serialise DirectVolumeMountInfo")?;
|
||||
fs::write(&file_path, &json).with_context(|| format!("write {file_path:?}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Extracts `(source, driver, device_id)` from a `DeviceType` returned by
|
||||
/// `do_handle_device`. Mirrors the logic in `utils::handle_block_volume`.
|
||||
async fn extract_block_source(device_info: DeviceType) -> Result<(String, String, String)> {
|
||||
if let DeviceType::BlockModern(device_mod) = device_info.clone() {
|
||||
let device = device_mod.lock().await;
|
||||
let driver = device.config.driver_option.clone();
|
||||
let source = match driver.as_str() {
|
||||
KATA_BLK_DEV_TYPE => device
|
||||
.config
|
||||
.pci_path
|
||||
.as_ref()
|
||||
.map(|p| p.to_string())
|
||||
.ok_or_else(|| anyhow!("blk device has no PCI path"))?,
|
||||
KATA_SCSI_DEV_TYPE => device
|
||||
.config
|
||||
.scsi_addr
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow!("SCSI device has no SCSI address"))?,
|
||||
_ => device.config.virt_path.clone(),
|
||||
};
|
||||
return Ok((source, driver, device.device_id.clone()));
|
||||
}
|
||||
|
||||
if let DeviceType::Block(device) = device_info {
|
||||
let driver = device.config.driver_option.clone();
|
||||
let source = match driver.as_str() {
|
||||
KATA_BLK_DEV_TYPE => device
|
||||
.config
|
||||
.pci_path
|
||||
.as_ref()
|
||||
.map(|p| p.to_string())
|
||||
.ok_or_else(|| anyhow!("blk device has no PCI path"))?,
|
||||
KATA_SCSI_DEV_TYPE => device
|
||||
.config
|
||||
.scsi_addr
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow!("SCSI device has no SCSI address"))?,
|
||||
KATA_CCW_DEV_TYPE => device
|
||||
.config
|
||||
.ccw_addr
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow!("CCW device has no CCW address"))?,
|
||||
_ => device.config.virt_path.clone(),
|
||||
};
|
||||
return Ok((source, driver, device.device_id));
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"encrypted emptyDir: unsupported device type from do_handle_device"
|
||||
))
|
||||
}
|
||||
@@ -67,7 +67,6 @@ impl EphemeralVolume {
|
||||
fs_group: None,
|
||||
options: dir_options,
|
||||
mount_point: source.clone(),
|
||||
shared: false,
|
||||
};
|
||||
|
||||
let mut mount = oci::Mount::default();
|
||||
|
||||
@@ -75,7 +75,6 @@ impl LocalStorage {
|
||||
fs_group: None,
|
||||
options: dir_options,
|
||||
mount_point: source.clone(),
|
||||
shared: false,
|
||||
};
|
||||
|
||||
let mounts: Vec<oci::Mount> = if sid != cid {
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
mod block_volume;
|
||||
mod default_volume;
|
||||
pub(crate) mod encrypted_emptydir_volume;
|
||||
mod ephemeral_volume;
|
||||
pub mod hugepage;
|
||||
mod local_volume;
|
||||
@@ -25,9 +24,6 @@ use crate::{share_fs::ShareFs, volume::block_volume::is_block_volume};
|
||||
use agent::Agent;
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use encrypted_emptydir_volume::{
|
||||
is_encrypted_emptydir_volume, remove_volume_mount_info, EncryptedEmptyDirVolume, EphemeralDisk,
|
||||
};
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
use kata_sys_util::mount::get_mount_options;
|
||||
use oci_spec::runtime as oci;
|
||||
@@ -48,24 +44,14 @@ pub struct VolumeResourceInner {
|
||||
volumes: Vec<Arc<dyn Volume>>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VolumeResource {
|
||||
inner: Arc<RwLock<VolumeResourceInner>>,
|
||||
// The core purpose of introducing `volume_manager` to `VolumeResource` is to centralize
|
||||
// the management of shared file system volumes. By creating a single VolumeManager
|
||||
// instance within VolumeResource, all shared file volumes are managed by one central
|
||||
// entity. This single volume_manager can accurately track the references of all
|
||||
// ShareFsVolume instances to the shared volumes, ensuring correct reference counting,
|
||||
// proper volume lifecycle management, and preventing issues like volumes being overwritten.
|
||||
// The core purpose of introducing `volume_manager` to `VolumeResource` is to centralize the management of shared file system volumes.
|
||||
// By creating a single VolumeManager instance within VolumeResource, all shared file volumes are managed by one central entity.
|
||||
// This single volume_manager can accurately track the references of all ShareFsVolume instances to the shared volumes,
|
||||
// ensuring correct reference counting, proper volume lifecycle management, and preventing issues like volumes being overwritten.
|
||||
volume_manager: Arc<VolumeManager>,
|
||||
// Tracks block-encrypted emptyDir disks that were created during the sandbox lifetime.
|
||||
// Populated by handler_volumes(); drained by cleanup_ephemeral_disks() at sandbox teardown.
|
||||
ephemeral_disks: Arc<RwLock<Vec<EphemeralDisk>>>,
|
||||
}
|
||||
|
||||
impl Default for VolumeResource {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl VolumeResource {
|
||||
@@ -73,11 +59,9 @@ impl VolumeResource {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(VolumeResourceInner::default())),
|
||||
volume_manager: Arc::new(VolumeManager::new()),
|
||||
ephemeral_disks: Arc::new(RwLock::new(Vec::new())),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn handler_volumes(
|
||||
&self,
|
||||
share_fs: &Option<Arc<dyn ShareFs>>,
|
||||
@@ -86,7 +70,6 @@ impl VolumeResource {
|
||||
d: &RwLock<DeviceManager>,
|
||||
sid: &str,
|
||||
agent: Arc<dyn Agent>,
|
||||
emptydir_mode: &str,
|
||||
) -> Result<Vec<Arc<dyn Volume>>> {
|
||||
let mut volumes: Vec<Arc<dyn Volume>> = vec![];
|
||||
let oci_mounts = &spec.mounts().clone().unwrap_or_default();
|
||||
@@ -94,15 +77,7 @@ impl VolumeResource {
|
||||
// handle mounts
|
||||
for m in oci_mounts {
|
||||
let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro");
|
||||
let volume: Arc<dyn Volume> = if is_encrypted_emptydir_volume(m, emptydir_mode) {
|
||||
// Block-encrypted Kubernetes emptyDir: plug a LUKS2-encrypted
|
||||
// virtio-blk disk and let CDH format/mount it in the guest.
|
||||
Arc::new(
|
||||
EncryptedEmptyDirVolume::new(d, m, sid, self.ephemeral_disks.clone())
|
||||
.await
|
||||
.with_context(|| format!("new encrypted emptyDir volume {m:?}"))?,
|
||||
)
|
||||
} else if shm_volume::is_shm_volume(m) {
|
||||
let volume: Arc<dyn Volume> = if shm_volume::is_shm_volume(m) {
|
||||
Arc::new(
|
||||
shm_volume::ShmVolume::new(m)
|
||||
.with_context(|| format!("new shm volume {m:?}"))?,
|
||||
@@ -175,37 +150,6 @@ impl VolumeResource {
|
||||
Ok(volumes)
|
||||
}
|
||||
|
||||
/// Removes host-side `disk.img` files and `mountInfo.json` directories for
|
||||
/// all block-encrypted emptyDir volumes created during this sandbox's lifetime.
|
||||
///
|
||||
/// Must be called once at sandbox teardown, after the VM has stopped.
|
||||
pub async fn cleanup_ephemeral_disks(&self) -> Result<()> {
|
||||
let mut disks = self.ephemeral_disks.write().await;
|
||||
for disk in disks.drain(..) {
|
||||
// Remove the sparse disk image.
|
||||
match std::fs::remove_file(&disk.disk_path) {
|
||||
Ok(()) => {
|
||||
info!(sl!(), "removed ephemeral disk {}", disk.disk_path);
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
sl!(),
|
||||
"failed to remove ephemeral disk {}: {}", disk.disk_path, e
|
||||
);
|
||||
}
|
||||
}
|
||||
// Remove the direct-volume mountInfo.json directory.
|
||||
if let Err(e) = remove_volume_mount_info(&disk.source_path) {
|
||||
warn!(
|
||||
sl!(),
|
||||
"failed to remove mountInfo.json for {}: {}", disk.source_path, e
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn dump(&self) {
|
||||
let inner = self.inner.read().await;
|
||||
for v in &inner.volumes {
|
||||
|
||||
@@ -41,6 +41,7 @@ kata-types = { workspace = true }
|
||||
protocols = { workspace = true }
|
||||
protobuf = { workspace = true }
|
||||
kata-sys-util = { workspace = true }
|
||||
pod-resources-rs = { workspace = true }
|
||||
logging = { workspace = true }
|
||||
runtime-spec = { workspace = true }
|
||||
shim-interface = { workspace = true }
|
||||
|
||||
@@ -8,6 +8,7 @@ license = { workspace = true }
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
container-device-interface = "0.1.2"
|
||||
awaitgroup = "0.6.0"
|
||||
containerd-shim-protos = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
|
||||
@@ -135,7 +135,6 @@ impl Container {
|
||||
toml_config.runtime.disable_guest_seccomp,
|
||||
disable_guest_selinux,
|
||||
toml_config.runtime.disable_guest_empty_dir,
|
||||
&toml_config.runtime.emptydir_mode,
|
||||
)
|
||||
.context("amend spec")?;
|
||||
|
||||
@@ -673,7 +672,6 @@ fn amend_spec(
|
||||
disable_guest_seccomp: bool,
|
||||
disable_guest_selinux: bool,
|
||||
disable_guest_empty_dir: bool,
|
||||
emptydir_mode: &str,
|
||||
) -> Result<()> {
|
||||
// Only the StartContainer hook needs to be reserved for execution in the guest
|
||||
if let Some(hooks) = spec.hooks().as_ref() {
|
||||
@@ -683,7 +681,7 @@ fn amend_spec(
|
||||
}
|
||||
|
||||
// special process K8s ephemeral volumes.
|
||||
update_ephemeral_storage_type(spec, disable_guest_empty_dir, emptydir_mode);
|
||||
update_ephemeral_storage_type(spec, disable_guest_empty_dir);
|
||||
|
||||
if let Some(linux) = &mut spec.linux_mut() {
|
||||
if disable_guest_seccomp {
|
||||
@@ -756,11 +754,11 @@ mod tests {
|
||||
assert!(spec.linux().as_ref().unwrap().seccomp().is_some());
|
||||
|
||||
// disable_guest_seccomp = false
|
||||
amend_spec(&mut spec, false, false, false, "").unwrap();
|
||||
amend_spec(&mut spec, false, false, false).unwrap();
|
||||
assert!(spec.linux().as_ref().unwrap().seccomp().is_some());
|
||||
|
||||
// disable_guest_seccomp = true
|
||||
amend_spec(&mut spec, true, false, false, "").unwrap();
|
||||
amend_spec(&mut spec, true, false, false).unwrap();
|
||||
assert!(spec.linux().as_ref().unwrap().seccomp().is_none());
|
||||
}
|
||||
|
||||
@@ -783,12 +781,12 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// disable_guest_selinux = false, selinux labels are left alone
|
||||
amend_spec(&mut spec, false, false, false, "").unwrap();
|
||||
amend_spec(&mut spec, false, false, false).unwrap();
|
||||
assert!(spec.process().as_ref().unwrap().selinux_label() == &Some("xxx".to_owned()));
|
||||
assert!(spec.linux().as_ref().unwrap().mount_label() == &Some("yyy".to_owned()));
|
||||
|
||||
// disable_guest_selinux = true, selinux labels are reset
|
||||
amend_spec(&mut spec, false, true, false, "").unwrap();
|
||||
amend_spec(&mut spec, false, true, false).unwrap();
|
||||
assert!(spec.process().as_ref().unwrap().selinux_label().is_none());
|
||||
assert!(spec.linux().as_ref().unwrap().mount_label().is_none());
|
||||
}
|
||||
|
||||
@@ -270,22 +270,6 @@ impl VirtSandbox {
|
||||
&self,
|
||||
sandbox_config: &SandboxConfig,
|
||||
) -> Result<Vec<ResourceConfig>> {
|
||||
let hypervisor_config = self.hypervisor.hypervisor_config().await;
|
||||
let cold_plug_vfio = &hypervisor_config.device_info.cold_plug_vfio;
|
||||
if cold_plug_vfio.is_empty() || cold_plug_vfio == "no-port" {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let port = match cold_plug_vfio.as_str() {
|
||||
"root-port" => PCIePort::RootPort,
|
||||
other => {
|
||||
return Err(anyhow!(
|
||||
"unsupported cold_plug_vfio value {:?}; only \"root-port\" is supported",
|
||||
other
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let config = self.resource_manager.config().await;
|
||||
let pod_resource_socket = &config.runtime.pod_resource_api_sock;
|
||||
info!(
|
||||
@@ -318,13 +302,18 @@ impl VirtSandbox {
|
||||
.filter_map(pod_resources_rs::device_node_host_path)
|
||||
.collect();
|
||||
|
||||
// FQN: nvidia.com/gpu=X
|
||||
let mut vfio_configs = Vec::new();
|
||||
for path in paths.iter() {
|
||||
let dev_info = VfioDeviceBase {
|
||||
host_path: path.clone(),
|
||||
// CDI passes the per-device cdev (e.g. /dev/vfio/devices/vfio0); device_manager
|
||||
// also copies host_path here — set early so configs are self-consistent in logs
|
||||
// and any code path that runs before that assignment still discovers VFIO correctly.
|
||||
iommu_group_devnode: PathBuf::from(path),
|
||||
dev_type: "c".to_string(),
|
||||
port,
|
||||
// bus_type: bus_type.clone(),
|
||||
port: PCIePort::RootPort,
|
||||
hostdev_prefix: "vfio_device".to_owned(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/cluster_config/encrypted_emptydir",
|
||||
"value": false
|
||||
}
|
||||
]
|
||||
2
src/tools/kata-ctl/Cargo.lock
generated
2
src/tools/kata-ctl/Cargo.lock
generated
@@ -3768,7 +3768,6 @@ dependencies = [
|
||||
"agent",
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"base64 0.13.1",
|
||||
"byte-unit",
|
||||
"cgroups-rs",
|
||||
"flate2",
|
||||
@@ -5252,6 +5251,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"awaitgroup",
|
||||
"common",
|
||||
"container-device-interface",
|
||||
"containerd-shim-protos",
|
||||
"hypervisor",
|
||||
"kata-sys-util",
|
||||
|
||||
@@ -182,6 +182,13 @@ setup_file() {
|
||||
export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml.in"
|
||||
export POD_EMBEDQA_YAML="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml"
|
||||
|
||||
# runtime-rs does not support trusted storage yet, so use alternative
|
||||
# TEE templates without emptyDir/PVC volumes and higher memory.
|
||||
if is_runtime_rs && [[ "${TEE}" = "true" ]]; then
|
||||
export POD_INSTRUCT_YAML_IN="${pod_config_dir}/${POD_NAME_INSTRUCT}-no-trusted-storage.yaml.in"
|
||||
export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}-no-trusted-storage.yaml.in"
|
||||
fi
|
||||
|
||||
dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq
|
||||
|
||||
setup_langchain_flow
|
||||
|
||||
@@ -9,6 +9,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
setup() {
|
||||
is_confidential_runtime_class || skip "Only supported for CoCo"
|
||||
[[ "${KATA_HYPERVISOR}" == *-runtime-rs ]] && skip "Not supported with runtime-rs"
|
||||
|
||||
setup_common
|
||||
get_pod_config_dir
|
||||
@@ -85,6 +86,7 @@ setup() {
|
||||
|
||||
teardown() {
|
||||
is_confidential_runtime_class || skip "Only supported for CoCo"
|
||||
[[ "${KATA_HYPERVISOR}" == *-runtime-rs ]] && skip "Not supported with runtime-rs"
|
||||
|
||||
confidential_teardown_common "${node}" "${node_start_time:-}"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
# Copyright (c) 2026 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# TEE variant without trusted storage support (e.g. for runtime-rs which
|
||||
# does not yet implement block-encrypted emptyDir or trusted PVC).
|
||||
# Uses higher memory to compensate for the lack of offloaded storage.
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${POD_NAME_INSTRUCT}
|
||||
labels:
|
||||
app: ${POD_NAME_INSTRUCT}
|
||||
annotations:
|
||||
# Start CDH process and configure AA for KBS communication
|
||||
# aa_kbc_params tells the Attestation Agent where KBS is located
|
||||
io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
|
||||
# cc_init_data annotation will be added by genpolicy with CDH configuration
|
||||
# from the custom default-initdata.toml created by create_nim_initdata_file()
|
||||
spec:
|
||||
# Explicit user/group/supplementary groups to support nydus guest-pull.
|
||||
# See issue https://github.com/kata-containers/kata-containers/issues/11162 and
|
||||
# other references to this issue in the genpolicy source folder.
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
supplementalGroups: [4, 20, 24, 25, 27, 29, 30, 44, 46]
|
||||
restartPolicy: Never
|
||||
runtimeClassName: kata
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-instruct
|
||||
containers:
|
||||
- name: ${POD_NAME_INSTRUCT}
|
||||
image: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.13.1
|
||||
# Ports exposed by the container:
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http-openai
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 360
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 30
|
||||
env:
|
||||
- name: NGC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: ngc-api-key-sealed-instruct
|
||||
key: api-key
|
||||
# GPU resource limit (for NVIDIA GPU)
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
cpu: "16"
|
||||
memory: "128Gi"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-instruct
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-api-key-sealed-instruct
|
||||
type: Opaque
|
||||
data:
|
||||
# Sealed secret pointing to kbs:///default/ngc-api-key/instruct
|
||||
# CDH will unseal this by fetching the actual key from KBS
|
||||
api-key: "${NGC_API_KEY_SEALED_SECRET_INSTRUCT_BASE64}"
|
||||
@@ -0,0 +1,107 @@
|
||||
# Copyright (c) 2026 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# TEE variant without trusted storage support (e.g. for runtime-rs which
|
||||
# does not yet implement block-encrypted emptyDir or trusted PVC).
|
||||
# Uses higher memory to compensate for the lack of offloaded storage.
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${POD_NAME_EMBEDQA}
|
||||
labels:
|
||||
app: ${POD_NAME_EMBEDQA}
|
||||
annotations:
|
||||
# Start CDH process and configure AA for KBS communication
|
||||
# aa_kbc_params tells the Attestation Agent where KBS is located
|
||||
io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
|
||||
# cc_init_data annotation will be added by genpolicy with CDH configuration
|
||||
# from the custom default-initdata.toml created by create_nim_initdata_file()
|
||||
spec:
|
||||
# Explicit user/group/supplementary groups to support nydus guest-pull.
|
||||
# See issue https://github.com/kata-containers/kata-containers/issues/11162 and
|
||||
# other references to this issue in the genpolicy source folder.
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
restartPolicy: Always
|
||||
runtimeClassName: kata
|
||||
serviceAccountName: default
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-embedqa
|
||||
containers:
|
||||
- name: ${POD_NAME_EMBEDQA}
|
||||
image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: NGC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: ngc-api-key-sealed-embedqa
|
||||
key: api-key
|
||||
- name: NIM_HTTP_API_PORT
|
||||
value: "8000"
|
||||
- name: NIM_JSONL_LOGGING
|
||||
value: "1"
|
||||
- name: NIM_LOG_LEVEL
|
||||
value: "INFO"
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: 8000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 180
|
||||
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
cpu: "16"
|
||||
memory: "48Gi"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-embedqa
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-api-key-sealed-embedqa
|
||||
type: Opaque
|
||||
data:
|
||||
# Sealed secret pointing to kbs:///default/ngc-api-key/embedqa
|
||||
# CDH will unseal this by fetching the actual key from KBS
|
||||
api-key: "${NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64}"
|
||||
@@ -157,6 +157,10 @@ install_genpolicy_drop_ins() {
|
||||
cp "${examples_dir}/20-experimental-force-guest-pull-drop-in.json" "${settings_d}/"
|
||||
fi
|
||||
|
||||
# 20-* runtime-rs overlay (disable encrypted emptyDir, not supported yet)
|
||||
if is_runtime_rs; then
|
||||
cp "${examples_dir}/20-runtime-rs-drop-in.json" "${settings_d}/"
|
||||
fi
|
||||
}
|
||||
|
||||
# If auto-generated policy testing is enabled, make a copy of the genpolicy settings
|
||||
|
||||
Reference in New Issue
Block a user