Compare commits

...

14 Commits

Author SHA1 Message Date
Aurélien Bombo
b87b4dc3be relax bind mount regex
the source path can be cached from the first container now

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
11dfe0ffac allow cached bundle-id from pause container
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
701e67cfd6 move cache handling to shared_fs=none branch
this should only be needed in that branch since virtio-fs should already handle dupes

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
d50f103a13 Revert "debug: fix cache key"
This reverts commit 2c3ee1eda5.
2026-02-13 12:42:49 -06:00
Aurélien Bombo
bd2428e19f Revert "debug: different approach"
This reverts commit c0d3c31ec8.
2026-02-13 12:42:49 -06:00
Aurélien Bombo
13b8dda322 debug: different approach
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
36ca7990aa tests: Introduce new env variables to ease development
It can be useful to set these variables during local testing:

 * AZ_REGION: Region for the cluster.
 * AZ_NODEPOOL_TAGS: Node pool tags for the cluster.
 * GENPOLICY_BINARY: Path to the genpolicy binary.
 * GENPOLICY_SETTINGS_DIR: Directory holding the genpolicy settings.

I've also made it so that tests_common.sh modifies the duplicated
genpolicy-settings.json (used for testing) instead of the original git-tracked
one.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
9894e14e99 debug: fix cache key
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
7357373dff debug: properly invalidate cache
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
56254ecdff debug: smaller mutex critical sections
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
be8a112316 debug: enable disable_guest_empty_dir=true and shared_fs=none
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
ed415fa91a runtime-rs: Set disable_guest_empty_dir = true by default
This should be furthermore not be configurable in 4.0.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
4a37f4c673 genpolicy: Assume disable_guest_empty_dir = true
This option should be removed for 4.0, so we don't handle `false`.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
Aurélien Bombo
0db136cfa9 runtime: Set disable_guest_empty_dir = true by default
This makes the runtime share the host Kubelet emptyDir folder with the guest
instead of the agent creating an empty folder in the container rootfs. Doing so
enables the Kubelet to track emptyDir usage and evict greedy pods.

In other words, with virtio-fs the container rootfs uses host storage whether
this is true or false, however with true, Kata uses the k8s emptyDir folder so
the sizeLimit is properly enforced by k8s.

Addresses part of #12203.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2026-02-13 12:42:49 -06:00
10 changed files with 64 additions and 93 deletions

View File

@@ -180,7 +180,7 @@ DEFNETQUEUES := 1
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\"]
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
DEFDISABLEGUESTSECCOMP := true
DEFDISABLEGUESTEMPTYDIR := false
DEFDISABLEGUESTEMPTYDIR := true
##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled
DEFAULTEXPFEATURES := []
DEFDISABLESELINUX := false

View File

@@ -220,7 +220,7 @@ DEFBRIDGES := 1
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\"]
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
DEFDISABLEGUESTSECCOMP := true
DEFDISABLEGUESTEMPTYDIR := false
DEFDISABLEGUESTEMPTYDIR := true
#Default experimental features enabled
DEFAULTEXPFEATURES := []

View File

@@ -83,9 +83,13 @@ type FilesystemShare struct {
configVolRegex *regexp.Regexp
// Regex to match only the timestamped directory inside the k8's volume mount
timestampDirRegex *regexp.Regexp
// The same volume mount can be shared by multiple containers in the same sandbox (pod)
srcDstMap map[string][]string
srcDstMapLock sync.Mutex
// srcDstMap tracks file-level source to destination mappings for configmap/secret watching
srcDstMap map[string][]string
srcDstMapLock sync.Mutex
// srcGuestMap caches volume source path to guest path, enabling multiple containers
// in the same pod to share the same volume mount
srcGuestMap map[string]string
srcGuestMapLock sync.Mutex
eventLoopStarted bool
eventLoopStartedLock sync.Mutex
watcherDoneChannel chan bool
@@ -108,6 +112,7 @@ func NewFilesystemShare(s *Sandbox) (*FilesystemShare, error) {
sandbox: s,
watcherDoneChannel: make(chan bool),
srcDstMap: make(map[string][]string),
srcGuestMap: make(map[string]string),
watcher: watcher,
configVolRegex: configVolRegex,
timestampDirRegex: timestampDirRegex,
@@ -309,6 +314,13 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount)
// bind mount it in the shared directory.
caps := f.sandbox.hypervisor.Capabilities(ctx)
if !caps.IsFsSharingSupported() {
f.srcGuestMapLock.Lock()
if guestPath, ok := f.srcGuestMap[m.Source]; ok {
f.srcGuestMapLock.Unlock()
return &SharedFile{guestPath: guestPath}, nil
}
f.srcGuestMapLock.Unlock()
f.Logger().Debug("filesystem sharing is not supported, files will be copied")
var ignored bool
@@ -418,6 +430,11 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount)
m.HostPath = mountDest
}
// Cache the guestPath for this volume source so other containers can share it
f.srcGuestMapLock.Lock()
defer f.srcGuestMapLock.Unlock()
f.srcGuestMap[m.Source] = guestPath
return &SharedFile{
guestPath: guestPath,
}, nil
@@ -442,6 +459,10 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun
}
}
f.srcGuestMapLock.Lock()
delete(f.srcGuestMap, m.Source)
f.srcGuestMapLock.Unlock()
return nil
}

View File

@@ -152,17 +152,6 @@
}
},
"volumes": {
"emptyDir": {
"mount_type": "local",
"mount_source": "^$(cpath)/$(sandbox-id)/rootfs/local/",
"mount_point": "^$(cpath)/$(sandbox-id)/rootfs/local/",
"driver": "local",
"source": "local",
"fstype": "local",
"options": [
"mode=0777"
]
},
"emptyDir_memory": {
"mount_type": "bind",
"mount_source": "^/run/kata-containers/sandbox/ephemeral/",

View File

@@ -1160,7 +1160,7 @@ mount_source_allows(p_mount, i_mount, bundle_id, sandbox_id) if {
regex3 := replace(regex2, "$(cpath)", policy_data.common.cpath)
print("mount_source_allows 2: regex3 =", regex3)
regex4 := replace(regex3, "$(sandbox-id)", sandbox_id)
regex4 := replace(regex3, "$(bundle-id)", "[a-z0-9]{64}")
print("mount_source_allows 2: regex4 =", regex4)
regex.match(regex4, i_mount.source)

View File

@@ -105,7 +105,6 @@ pub fn get_mount_and_storage(
storages: &mut Vec<agent::Storage>,
yaml_volume: &volume::Volume,
yaml_mount: &pod::VolumeMount,
pod_security_context: &Option<pod::PodSecurityContext>,
) {
debug!(
"get_mount_and_storage: adding mount and storage for: {:?}",
@@ -113,27 +112,18 @@ pub fn get_mount_and_storage(
);
if let Some(emptyDir) = &yaml_volume.emptyDir {
let settings_volumes = &settings.volumes;
let mut volume: Option<&settings::EmptyDirVolume> = None;
if let Some(medium) = &emptyDir.medium {
if medium == "Memory" {
volume = Some(&settings_volumes.emptyDir_memory);
}
let is_tmpfs = emptyDir.medium.as_ref().is_some_and(|m| m == "Memory");
if is_tmpfs {
get_memory_empty_dir_mount_and_storage(settings, p_mounts, storages, yaml_mount);
} else {
let access = if yaml_mount.readOnly == Some(true) {
debug!("setting read only access for emptyDir mount");
"ro"
} else {
"rw"
};
get_shared_bind_mount(yaml_mount, p_mounts, "rprivate", access);
}
if volume.is_none() {
volume = Some(&settings_volumes.emptyDir);
}
get_empty_dir_mount_and_storage(
settings,
p_mounts,
storages,
yaml_mount,
volume.unwrap(),
pod_security_context,
);
} else if yaml_volume.persistentVolumeClaim.is_some() || yaml_volume.azureFile.is_some() {
get_shared_bind_mount(yaml_mount, p_mounts, "rprivate", "rw");
} else if yaml_volume.hostPath.is_some() {
@@ -149,50 +139,25 @@ pub fn get_mount_and_storage(
}
}
fn get_empty_dir_mount_and_storage(
fn get_memory_empty_dir_mount_and_storage(
settings: &settings::Settings,
p_mounts: &mut Vec<policy::KataMount>,
storages: &mut Vec<agent::Storage>,
yaml_mount: &pod::VolumeMount,
settings_empty_dir: &settings::EmptyDirVolume,
pod_security_context: &Option<pod::PodSecurityContext>,
) {
debug!("Settings emptyDir: {:?}", settings_empty_dir);
let settings_empty_dir = &settings.volumes.emptyDir_memory;
debug!("Settings emptyDir_memory: {:?}", settings_empty_dir);
if yaml_mount.subPathExpr.is_none() {
let mut options = settings_empty_dir.options.clone();
if let Some(gid) = pod_security_context.as_ref().and_then(|sc| sc.fsGroup) {
// This matches the runtime behavior of only setting the fsgid if the mountpoint GID is not 0.
// https://github.com/kata-containers/kata-containers/blob/b69da5f3ba8385c5833b31db41a846a203812675/src/runtime/virtcontainers/kata_agent.go#L1602-L1607
if gid != 0 {
options.push(format!("fsgid={gid}"));
}
}
storages.push(agent::Storage {
driver: settings_empty_dir.driver.clone(),
driver_options: Vec::new(),
source: settings_empty_dir.source.clone(),
fstype: settings_empty_dir.fstype.clone(),
options,
mount_point: format!("{}{}$", &settings_empty_dir.mount_point, &yaml_mount.name),
fs_group: protobuf::MessageField::none(),
special_fields: ::protobuf::SpecialFields::new(),
});
}
let source = if yaml_mount.subPathExpr.is_some() {
let file_name = Path::new(&yaml_mount.mountPath).file_name().unwrap();
let name = OsString::from(file_name).into_string().unwrap();
format!("{}{name}$", &settings.volumes.configMap.mount_source)
} else {
format!("{}{}$", &settings_empty_dir.mount_source, &yaml_mount.name)
};
let mount_type = if yaml_mount.subPathExpr.is_some() {
"bind"
} else {
&settings_empty_dir.mount_type
};
storages.push(agent::Storage {
driver: settings_empty_dir.driver.clone(),
driver_options: Vec::new(),
source: settings_empty_dir.source.clone(),
fstype: settings_empty_dir.fstype.clone(),
options: settings_empty_dir.options.clone(),
mount_point: format!("{}{}$", &settings_empty_dir.mount_point, &yaml_mount.name),
fs_group: protobuf::MessageField::none(),
special_fields: ::protobuf::SpecialFields::new(),
});
let access = match yaml_mount.readOnly {
Some(true) => {
@@ -204,8 +169,8 @@ fn get_empty_dir_mount_and_storage(
p_mounts.push(policy::KataMount {
destination: yaml_mount.mountPath.to_string(),
type_: mount_type.to_string(),
source,
type_: settings_empty_dir.mount_type.clone(),
source: format!("{}{}$", &settings_empty_dir.mount_source, &yaml_mount.name),
options: vec![
"rbind".to_string(),
"rprivate".to_string(),
@@ -318,13 +283,7 @@ fn get_shared_bind_mount(
propagation: &str,
access: &str,
) {
// The Kata Shim filepath.Base() to extract the last element of this path, in
// https://github.com/kata-containers/kata-containers/blob/5e46f814dd79ab6b34588a83825260413839735a/src/runtime/virtcontainers/fs_share_linux.go#L305
// In Rust, Path::file_name() has a similar behavior.
let path = Path::new(&yaml_mount.mountPath);
let mount_path = path.file_name().unwrap().to_str().unwrap();
let source = format!("$(sfprefix){mount_path}$");
let source = "$(sfprefix)[a-zA-Z0-9_.-]+$".to_string();
let dest = yaml_mount.mountPath.clone();
let type_ = "bind".to_string();

View File

@@ -31,7 +31,6 @@ pub struct Settings {
/// Volume settings loaded from genpolicy-settings.json.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Volumes {
pub emptyDir: EmptyDirVolume,
pub emptyDir_memory: EmptyDirVolume,
pub configMap: ConfigMapVolume,
pub image_volume: ImageVolume,

View File

@@ -304,7 +304,6 @@ pub fn get_container_mounts_and_storages(
storages,
volume,
volume_mount,
&podSpec.securityContext,
);
}
}

View File

@@ -9,6 +9,8 @@ source "${tests_dir}/common.bash"
kubernetes_dir="${tests_dir}/integration/kubernetes"
helm_chart_dir="${repo_root_dir}/tools/packaging/kata-deploy/helm-chart/kata-deploy"
AZ_REGION="${AZ_REGION:-eastus}"
AZ_NODEPOOL_TAGS="${AZ_NODEPOOL_TAGS:-}"
GENPOLICY_PULL_METHOD="${GENPOLICY_PULL_METHOD:-oci-distribution}"
GH_PR_NUMBER="${GH_PR_NUMBER:-}"
HELM_DEFAULT_INSTALLATION="${HELM_DEFAULT_INSTALLATION:-false}"
@@ -111,7 +113,7 @@ function create_cluster() {
"GENPOLICY_PULL_METHOD=${GENPOLICY_PULL_METHOD:0:1}")
az group create \
-l eastus \
-l "${AZ_REGION}" \
-n "${rg}"
# Required by e.g. AKS App Routing for KBS installation.
@@ -129,7 +131,8 @@ function create_cluster() {
--node-count 1 \
--generate-ssh-keys \
--tags "${tags[@]}" \
$([[ "${KATA_HOST_OS}" = "cbl-mariner" ]] && echo "--os-sku AzureLinux --workload-runtime KataVmIsolation")
$([[ "${KATA_HOST_OS}" = "cbl-mariner" ]] && echo "--os-sku AzureLinux --workload-runtime KataVmIsolation") \
$([ -n "${AZ_NODEPOOL_TAGS}" ] && echo "--nodepool-tags "${AZ_NODEPOOL_TAGS}"")
}
function install_bats() {

View File

@@ -37,6 +37,8 @@ K8S_TEST_DIR="${kubernetes_dir:-"${BATS_TEST_DIRNAME}"}"
AUTO_GENERATE_POLICY="${AUTO_GENERATE_POLICY:-}"
GENPOLICY_PULL_METHOD="${GENPOLICY_PULL_METHOD:-}"
GENPOLICY_BINARY="${GENPOLICY_BINARY:-"/opt/kata/bin/genpolicy"}"
GENPOLICY_SETTINGS_DIR="${GENPOLICY_SETTINGS_DIR:-"/opt/kata/share/defaults/kata-containers"}"
KATA_HYPERVISOR="${KATA_HYPERVISOR:-}"
KATA_HOST_OS="${KATA_HOST_OS:-}"
@@ -191,12 +193,11 @@ adapt_common_policy_settings() {
# and change these settings to use Kata CI cluster's default namespace.
create_common_genpolicy_settings() {
declare -r genpolicy_settings_dir="$1"
declare -r default_genpolicy_settings_dir="/opt/kata/share/defaults/kata-containers"
auto_generate_policy_enabled || return 0
cp "${default_genpolicy_settings_dir}/genpolicy-settings.json" "${genpolicy_settings_dir}"
cp "${default_genpolicy_settings_dir}/rules.rego" "${genpolicy_settings_dir}"
cp "${GENPOLICY_SETTINGS_DIR}/genpolicy-settings.json" "${genpolicy_settings_dir}"
cp "${GENPOLICY_SETTINGS_DIR}/rules.rego" "${genpolicy_settings_dir}"
adapt_common_policy_settings "${genpolicy_settings_dir}"
}
@@ -247,7 +248,7 @@ auto_generate_policy_no_added_flags() {
declare -r additional_flags="${4:-""}"
auto_generate_policy_enabled || return 0
local genpolicy_command="RUST_LOG=info /opt/kata/bin/genpolicy -u -y ${yaml_file}"
local genpolicy_command="RUST_LOG=info ${GENPOLICY_BINARY} -u -y ${yaml_file}"
genpolicy_command+=" -p ${settings_dir}/rules.rego"
genpolicy_command+=" -j ${settings_dir}/genpolicy-settings.json"