genpolicy: Support trusted ephemeral data storage

This introduces a new genpolicy setting to prescribe the behavior of specified
storage classes.

Policy validation is necessary to prevent rogue injection of devices into the
TEE.

Furthermore, the policy specifies the contents of the Storage object sent from
the shim because:

 * Genpolicy has no way to infer the contents by itself when e.g. using a CSI
   driver.
 * The Storage object specifies whether the device will be encrypted by the
   agent.
 * Other use cases (e.g. tardev-snapshotter) rely on non-encrypted
   integrity-protected devices, meaning encryption cannot be hardcoded in the
   agent.

Note that the structure of the new genpolicy setting differs from the original
proposal to give more flexibility to users and accommodate future use cases.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
This commit is contained in:
Aurélien Bombo
2025-01-10 15:38:49 -06:00
parent 7b336e9759
commit 0cb7e1895a
10 changed files with 218 additions and 41 deletions

4
.gitignore vendored
View File

@@ -18,3 +18,7 @@ src/tools/log-parser/kata-log-parser
tools/packaging/static-build/agent/install_libseccomp.sh
.envrc
.direnv
# Generated by genpolicy and stored in the working directory, so only
# the basename is ignored.
layers-cache.json

7
src/agent/Cargo.lock generated
View File

@@ -971,6 +971,12 @@ dependencies = [
"parking_lot_core",
]
[[package]]
name = "data-encoding"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
[[package]]
name = "deranged"
version = "0.4.0"
@@ -3423,6 +3429,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843c3d97f07e3b5ac0955d53ad0af4c91fe4a4f8525843ece5bf014f27829b73"
dependencies = [
"anyhow",
"data-encoding",
"lazy_static",
"rand",
"regex",

View File

@@ -18,6 +18,8 @@ serde_json.workspace = true
# Agent Policy
regorus = { version = "0.2.8", default-features = false, features = [
"arc",
"base64",
"base64url",
"regex",
"std",
] }

View File

@@ -518,6 +518,12 @@ dependencies = [
"syn 2.0.104",
]
[[package]]
name = "data-encoding"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
[[package]]
name = "derive-new"
version = "0.5.9"
@@ -2159,6 +2165,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843c3d97f07e3b5ac0955d53ad0af4c91fe4a4f8525843ece5bf014f27829b73"
dependencies = [
"anyhow",
"data-encoding",
"lazy_static",
"rand",
"regex",

View File

@@ -228,6 +228,7 @@
},
"common": {
"cpath": "/run/kata-containers/shared/containers",
"spath": "/run/kata-containers/sandbox/storage",
"root_path": "/run/kata-containers/$(bundle-id)/rootfs",
"sfprefix": "^$(cpath)/$(bundle-id)-[a-z0-9]{16}-",
"ip_p": "[0-9]{1,5}",
@@ -293,7 +294,18 @@
"CAP_BPF",
"CAP_CHECKPOINT_RESTORE"
],
"image_layer_verification" : "none"
"image_layer_verification" : "none",
"storage_classes": {
"csi-kata-directvolume-sc": {
"driver": "blk",
"driver_options": [
"confidential=true",
"ephemeral=true"
],
"fs_type": "ext4",
"options": []
}
}
},
"kata_config": {
"oci_version": "1.1.0",

View File

@@ -1027,6 +1027,27 @@ mount_source_allows(p_mount, i_mount, bundle_id, sandbox_id) if {
print("mount_source_allows 2: true")
}
mount_source_allows(p_mount, i_mount, bundle_id, sandbox_id) {
print("mount_source_allows 3: i_mount.source=", i_mount.source)
i_source_parts = split(i_mount.source, "/")
b64_device_id = i_source_parts[count(i_source_parts) - 1]
base64.is_valid(b64_device_id)
source1 := p_mount.source
print("mount_source_allows 3: source1 =", source1)
source2 := replace(source1, "$(spath)", policy_data.common.spath)
print("mount_source_allows 3: source2 =", source2)
source3 := replace(source2, "$(b64_device_id)", b64_device_id)
print("mount_source_allows 3: source3 =", source3)
source3 == i_mount.source
print("mount_source_allows 3: true")
}
######################################################################
# Create container Storages
@@ -1105,7 +1126,6 @@ allow_storage_source(p_storage, i_storage, bundle_id) if {
allow_storage_options(p_storage, i_storage) if {
print("allow_storage_options 1: start")
p_storage.driver != "blk"
p_storage.driver != "overlayfs"
p_storage.options == i_storage.options
@@ -1154,6 +1174,24 @@ allow_mount_point(p_storage, i_storage, bundle_id, sandbox_id) if {
print("allow_mount_point 3: true")
}
# This rule is for storages shared via the direct volume assignment API.
allow_mount_point(p_storage, i_storage, bundle_id, sandbox_id, layer_ids) if {
p_storage.fstype == i_storage.fstype
mount1 := p_storage.mount_point
print("allow_mount_point 6: mount1 =", mount1)
mount2 := replace(mount1, "$(spath)", policy_data.common.spath)
print("allow_mount_point 6: mount2 =", mount2)
device_id := i_storage.source
mount3 := replace(mount2, "$(b64_device_id)", base64url.encode(device_id))
print("allow_mount_point 6: mount3 =", mount3)
mount3 == i_storage.mount_point
print("allow_mount_point 6: true")
}
# ExecProcessRequest.process.Capabilities
allow_exec_caps(i_caps) if {

View File

@@ -111,6 +111,21 @@ pub fn get_mount_and_storage(
&yaml_volume
);
let options = {
let propagation = match &yaml_mount.mountPropagation {
Some(p) if p == "Bidirectional" => "rshared",
_ => "rprivate",
};
let access = if let Some(true) = yaml_mount.readOnly {
"ro"
} else {
"rw"
};
(propagation, access)
};
if let Some(emptyDir) = &yaml_volume.emptyDir {
let settings_volumes = &settings.volumes;
let mut volume: Option<&settings::EmptyDirVolume> = None;
@@ -127,15 +142,24 @@ pub fn get_mount_and_storage(
get_empty_dir_mount_and_storage(settings, p_mounts, storages, yaml_mount, volume.unwrap());
} else if yaml_volume.persistentVolumeClaim.is_some() || yaml_volume.azureFile.is_some() {
get_shared_bind_mount(yaml_mount, p_mounts, "rprivate", "rw");
get_shared_bind_mount(yaml_mount, p_mounts, ("rprivate", "rw"));
} else if yaml_volume.hostPath.is_some() {
get_host_path_mount(yaml_mount, yaml_volume, p_mounts);
get_host_path_mount(yaml_mount, yaml_volume, p_mounts, options);
} else if yaml_volume.configMap.is_some() || yaml_volume.secret.is_some() {
get_config_map_mount_and_storage(settings, p_mounts, storages, yaml_mount);
} else if yaml_volume.projected.is_some() {
get_shared_bind_mount(yaml_mount, p_mounts, "rprivate", "ro");
get_shared_bind_mount(yaml_mount, p_mounts, ("rprivate", "ro"));
} else if yaml_volume.downwardAPI.is_some() {
get_downward_api_mount(yaml_mount, p_mounts);
} else if yaml_volume.ephemeral.is_some() {
get_ephemeral_mount(
settings,
yaml_mount,
yaml_volume,
p_mounts,
storages,
options,
);
} else {
todo!("Unsupported volume type {:?}", yaml_volume);
}
@@ -201,25 +225,11 @@ fn get_host_path_mount(
yaml_mount: &pod::VolumeMount,
yaml_volume: &volume::Volume,
p_mounts: &mut Vec<policy::KataMount>,
mount_options: (&str, &str),
) {
let host_path = yaml_volume.hostPath.as_ref().unwrap().path.clone();
let path = Path::new(&host_path);
let mut biderectional = false;
if let Some(mount_propagation) = &yaml_mount.mountPropagation {
if mount_propagation.eq("Bidirectional") {
debug!("get_host_path_mount: Bidirectional");
biderectional = true;
}
}
let access = match yaml_mount.readOnly {
Some(true) => {
debug!("setting read only access for host path mount");
"ro"
}
_ => "rw",
};
// TODO:
//
// - When volume.hostPath.path: /dev/ttyS0
@@ -230,17 +240,11 @@ fn get_host_path_mount(
// What is the reason for this source path difference in the Guest OS?
if !path.starts_with("/dev/") && !path.starts_with("/sys/") {
debug!("get_host_path_mount: calling get_shared_bind_mount");
let propagation = if biderectional { "rshared" } else { "rprivate" };
get_shared_bind_mount(yaml_mount, p_mounts, propagation, access);
get_shared_bind_mount(yaml_mount, p_mounts, mount_options);
} else {
let dest = yaml_mount.mountPath.clone();
let type_ = "bind".to_string();
let mount_option = if biderectional { "rshared" } else { "rprivate" };
let options = vec![
"rbind".to_string(),
mount_option.to_string(),
access.to_string(),
];
let options = build_options_vec(mount_options);
if let Some(policy_mount) = p_mounts.iter_mut().find(|m| m.destination.eq(&dest)) {
debug!("get_host_path_mount: updating dest = {dest}, source = {host_path}");
@@ -298,8 +302,7 @@ fn get_config_map_mount_and_storage(
fn get_shared_bind_mount(
yaml_mount: &pod::VolumeMount,
p_mounts: &mut Vec<policy::KataMount>,
propagation: &str,
access: &str,
mount_options: (&str, &str),
) {
let mount_path = if let Some(byte_index) = str::rfind(&yaml_mount.mountPath, '/') {
str::from_utf8(&yaml_mount.mountPath.as_bytes()[byte_index + 1..]).unwrap()
@@ -310,11 +313,7 @@ fn get_shared_bind_mount(
let dest = yaml_mount.mountPath.clone();
let type_ = "bind".to_string();
let options = vec![
"rbind".to_string(),
propagation.to_string(),
access.to_string(),
];
let options = build_options_vec(mount_options);
if let Some(policy_mount) = p_mounts.iter_mut().find(|m| m.destination.eq(&dest)) {
debug!("get_shared_bind_mount: updating dest = {dest}, source = {source}");
@@ -364,6 +363,67 @@ fn get_downward_api_mount(yaml_mount: &pod::VolumeMount, p_mounts: &mut Vec<poli
}
}
fn get_ephemeral_mount(
settings: &settings::Settings,
yaml_mount: &pod::VolumeMount,
yaml_volume: &volume::Volume,
p_mounts: &mut Vec<policy::KataMount>,
storages: &mut Vec<agent::Storage>,
mount_options: (&str, &str),
) {
let storage_class = &yaml_volume
.ephemeral
.as_ref()
.unwrap()
.volumeClaimTemplate
.spec
.storageClassName
.as_ref();
if let Some(sc_config) = storage_class.and_then(|sc| settings.common.storage_classes.get(sc)) {
// Mounting a device into a container takes two steps:
// 1. In the guest: Mount the device from `Storage.source` on
// this path (i.e. `Storage.mount_point`).
// 2. In the container: Bind mount this path on the pod spec
// mount point (volumeMount).
let source = "$(spath)/$(b64_device_id)".to_string();
storages.push(agent::Storage {
driver: sc_config.driver.clone(),
driver_options: sc_config.driver_options.clone(),
fstype: sc_config.fs_type.clone(),
options: sc_config.options.clone(),
source: "$(device_id)".to_string(),
mount_point: source.to_string(),
fs_group: protobuf::MessageField::none(),
special_fields: ::protobuf::SpecialFields::new(),
});
let dest = yaml_mount.mountPath.clone();
let type_ = "bind".to_string();
let options = build_options_vec(mount_options);
if let Some(policy_mount) = p_mounts.iter_mut().find(|m| m.destination == dest) {
debug!("get_ephemeral_mount: updating dest = {dest}, source = {source}");
policy_mount.type_ = type_;
policy_mount.source = source;
policy_mount.options = options;
} else {
debug!("get_ephemeral_mount: adding dest = {dest}, source = {source}");
p_mounts.push(policy::KataMount {
destination: dest,
type_,
source,
options,
});
}
} else {
get_shared_bind_mount(yaml_mount, p_mounts, mount_options);
}
}
pub fn get_image_mount_and_storage(
settings: &settings::Settings,
p_mounts: &mut Vec<policy::KataMount>,
@@ -406,3 +466,12 @@ pub fn get_image_mount_and_storage(
options: settings_image.options.clone(),
});
}
fn build_options_vec(mount_options: (&str, &str)) -> Vec<String> {
let (propagation, access) = mount_options;
vec![
"rbind".to_string(),
propagation.to_string(),
access.to_string(),
]
}

View File

@@ -21,19 +21,19 @@ pub struct PersistentVolumeClaim {
kind: Option<String>,
pub metadata: obj_meta::ObjectMeta,
spec: PersistentVolumeClaimSpec,
pub spec: PersistentVolumeClaimSpec,
}
/// See Reference / Kubernetes API / Config and Storage Resources / PersistentVolumeClaim.
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
struct PersistentVolumeClaimSpec {
pub struct PersistentVolumeClaimSpec {
resources: ResourceRequirements,
#[serde(skip_serializing_if = "Option::is_none")]
accessModes: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
storageClassName: Option<String>,
pub storageClassName: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
volumeMode: Option<String>,

View File

@@ -24,7 +24,7 @@ use protocols::agent;
use serde::{Deserialize, Serialize};
use serde_yaml::Value;
use std::boxed;
use std::collections::{BTreeMap, BTreeSet};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fs::read_to_string;
use std::io::Write;
@@ -399,6 +399,9 @@ pub struct CommonData {
/// Regex prefix for shared file paths - e.g., "^$(cpath)/$(bundle-id)-[a-z0-9]{16}-".
pub sfprefix: String,
/// Path to the shared sandbox storage - e.g., "/run/kata-containers/sandbox/storage".
pub spath: String,
/// Regex for an IPv4 address.
pub ipv4_a: String,
@@ -416,6 +419,22 @@ pub struct CommonData {
/// Default capabilities for a privileged container.
pub privileged_caps: Vec<String>,
/// A mapping of storage classes to configurations, which determines
/// the behavior of storage classes.
pub storage_classes: HashMap<String, StorageConfig>,
}
/// A subset of the `agent::Storage` gRPC object sent by the shim. This
/// prescribes the values for the fields of that object. This is
/// necessary with e.g. CSI drivers, as genpolicy cannot possibly infer
/// the content of the gRPC object on its own in such cases.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct StorageConfig {
pub driver: String,
pub driver_options: Vec<String>,
pub fs_type: String,
pub options: Vec<String>,
}
/// Configuration from "kubectl config".

View File

@@ -6,7 +6,7 @@
// Allow K8s YAML field names.
#![allow(non_snake_case)]
use crate::pod;
use crate::{obj_meta, persistent_volume_claim, pod};
use serde::{Deserialize, Serialize};
@@ -37,7 +37,11 @@ pub struct Volume {
pub secret: Option<SecretVolumeSource>,
#[serde(skip_serializing_if = "Option::is_none")]
pub downwardAPI: Option<DownwardAPIVolumeSource>, // TODO: additional fields.
pub downwardAPI: Option<DownwardAPIVolumeSource>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ephemeral: Option<EphemeralVolumeSource>,
// TODO: additional fields.
}
/// See Reference / Kubernetes API / Config and Storage Resources / Volume.
@@ -129,3 +133,18 @@ pub struct DownwardAPIVolumeFile {
#[serde(skip_serializing_if = "Option::is_none")]
pub fieldRef: Option<pod::ObjectFieldSelector>,
}
/// See Reference / Kubernetes API / Config and Storage Resources / Volume.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct EphemeralVolumeSource {
pub volumeClaimTemplate: PersistentVolumeClaimTemplate,
}
/// See Reference / Kubernetes API / Config and Storage Resources / Volume.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PersistentVolumeClaimTemplate {
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<obj_meta::ObjectMeta>,
pub spec: persistent_volume_claim::PersistentVolumeClaimSpec,
}