Merge pull request #12739 from fidencio/topic/kata-deploy-nydus-use-a-different-namespace

kata-deploy: rename nydus-snapshotter to nydus-for-kata-tee
This commit is contained in:
Fabiano Fidêncio
2026-03-27 14:32:58 +01:00
committed by GitHub
7 changed files with 56 additions and 160 deletions

View File

@@ -790,23 +790,6 @@ function helm_helper() {
# Always unset first to clear any defaults from base file
yq -i ".snapshotter.setup = []" "${values_yaml}"
# For TDX and SNP shims, snapshotter.setup must ALWAYS be disabled in CI
# Check if any TDX/SNP shims are enabled
disable_snapshotter_setup=false
for shim in ${HELM_SHIMS}; do
case "${shim}" in
qemu-snp)
disable_snapshotter_setup=true
break
;;
esac
done
# Safety check: Fail if EXPERIMENTAL_SETUP_SNAPSHOTTER is set when using SNP/TDX shims
if [[ "${disable_snapshotter_setup}" == "true" ]] && [[ -n "${HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER}" ]]; then
die "ERROR: HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER cannot be set when using SNP shims (qemu-snp). snapshotter.setup must always be disabled for these shims."
fi
if [[ -n "${HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER}" ]]; then
# Convert space-separated or comma-separated list to YAML array
IFS=', ' read -ra snapshotter_list <<< "${HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER}"

View File

@@ -176,7 +176,7 @@ function deploy_kata() {
# Workaround to avoid modifying the workflow yaml files
case "${KATA_HYPERVISOR}" in
qemu-tdx|qemu-nvidia-gpu-*)
qemu-tdx|qemu-snp|qemu-nvidia-gpu-*)
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER=true
SNAPSHOTTER="nydus"
EXPERIMENTAL_FORCE_GUEST_PULL=false
@@ -208,26 +208,14 @@ function deploy_kata() {
HOST_OS="${KATA_HOST_OS}"
fi
# nydus and erofs are always deployed by kata-deploy; set this unconditionally
# based on the snapshotter so that all architectures and hypervisors work
# without needing per-workflow USE_EXPERIMENTAL_SETUP_SNAPSHOTTER overrides.
EXPERIMENTAL_SETUP_SNAPSHOTTER=""
if [[ "${USE_EXPERIMENTAL_SETUP_SNAPSHOTTER:-false}" == "true" ]]; then
case "${SNAPSHOTTER}" in
nydus|erofs)
ARCH="$(uname -m)"
# We only want to tests this for the qemu-coco-dev and
# qemu-coco-dev-runtime-rs runtime classes
# as they are running on a GitHub runner (and not on a BM machine),
# and there the snapshotter is deployed on every run (rather than
# deployed when the machine is configured, as on the BM machines).
if [[ ${ARCH} == "x86_64" ]]; then
case "${KATA_HYPERVISOR}" in
qemu-tdx|qemu-coco-dev*|qemu-nvidia-gpu-*) EXPERIMENTAL_SETUP_SNAPSHOTTER="${SNAPSHOTTER}" ;;
*) ;;
esac
fi
;;
*) ;;
esac
fi
case "${SNAPSHOTTER}" in
nydus|erofs) EXPERIMENTAL_SETUP_SNAPSHOTTER="${SNAPSHOTTER}" ;;
*) ;;
esac
EXPERIMENTAL_FORCE_GUEST_PULL="${EXPERIMENTAL_FORCE_GUEST_PULL:-}"
@@ -476,92 +464,11 @@ function cleanup_snapshotter() {
}
function deploy_nydus_snapshotter() {
echo "::group::deploy_nydus_snapshotter"
ensure_yq
local nydus_snapshotter_install_dir
nydus_snapshotter_install_dir="/tmp/nydus-snapshotter"
if [[ -d "${nydus_snapshotter_install_dir}" ]]; then
rm -rf "${nydus_snapshotter_install_dir}"
fi
mkdir -p "${nydus_snapshotter_install_dir}"
nydus_snapshotter_url=$(get_from_kata_deps ".externals.nydus-snapshotter.url")
nydus_snapshotter_version=$(get_from_kata_deps ".externals.nydus-snapshotter.version")
git clone -b "${nydus_snapshotter_version}" "${nydus_snapshotter_url}" "${nydus_snapshotter_install_dir}"
pushd "${nydus_snapshotter_install_dir}"
if [[ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]]; then
cleanup_nydus_snapshotter || true
fi
if [[ "${PULL_TYPE}" == "guest-pull" ]]; then
# Enable guest pull feature in nydus snapshotter
yq -i \
'select(.kind == "ConfigMap").data.FS_DRIVER = "proxy"' \
misc/snapshotter/base/nydus-snapshotter.yaml
else
>&2 echo "Invalid pull type"; exit 2
fi
# Disable to read snapshotter config from configmap
yq -i \
'select(.kind == "ConfigMap").data.ENABLE_CONFIG_FROM_VOLUME = "false"' \
misc/snapshotter/base/nydus-snapshotter.yaml
# Enable to run snapshotter as a systemd service
yq -i \
'select(.kind == "ConfigMap").data.ENABLE_SYSTEMD_SERVICE = "true"' \
misc/snapshotter/base/nydus-snapshotter.yaml
# Enable "runtime specific snapshotter" feature in containerd when configuring containerd for snapshotter
yq -i \
'select(.kind == "ConfigMap").data.ENABLE_RUNTIME_SPECIFIC_SNAPSHOTTER = "true"' \
misc/snapshotter/base/nydus-snapshotter.yaml
# Pin the version of nydus-snapshotter image.
# TODO: replace with a definitive solution (see https://github.com/kata-containers/kata-containers/issues/9742)
yq -i \
"select(.kind == \"DaemonSet\").spec.template.spec.containers[0].image = \"ghcr.io/containerd/nydus-snapshotter:${nydus_snapshotter_version}\"" \
misc/snapshotter/base/nydus-snapshotter.yaml
# Deploy nydus snapshotter as a daemonset
kubectl_retry create -f "misc/snapshotter/nydus-snapshotter-rbac.yaml"
if [[ "${KUBERNETES}" = "k3s" ]]; then
kubectl_retry apply -k "misc/snapshotter/overlays/k3s"
else
kubectl_retry apply -f "misc/snapshotter/base/nydus-snapshotter.yaml"
fi
popd
kubectl rollout status daemonset nydus-snapshotter -n nydus-system --timeout "${SNAPSHOTTER_DEPLOY_WAIT_TIMEOUT}"
echo "::endgroup::"
echo "::group::nydus snapshotter logs"
kubectl_retry logs --selector=app=nydus-snapshotter -n nydus-system
echo "::endgroup::"
echo "::group::nydus snapshotter describe"
kubectl_retry describe pod --selector=app=nydus-snapshotter -n nydus-system
echo "::endgroup::"
echo "nydus-for-kata-tee is now deployed and managed by kata-deploy; nothing to do here."
}
function cleanup_nydus_snapshotter() {
echo "cleanup_nydus_snapshotter"
local nydus_snapshotter_install_dir
nydus_snapshotter_install_dir="/tmp/nydus-snapshotter"
if [[ ! -d "${nydus_snapshotter_install_dir}" ]]; then
>&2 echo "nydus snapshotter dir not found"
exit 1
fi
pushd "${nydus_snapshotter_install_dir}"
if [[ "${KUBERNETES}" = "k3s" ]]; then
kubectl_retry delete --ignore-not-found -k "misc/snapshotter/overlays/k3s"
else
kubectl_retry delete --ignore-not-found -f "misc/snapshotter/base/nydus-snapshotter.yaml"
fi
sleep 180s
kubectl_retry delete --ignore-not-found -f "misc/snapshotter/nydus-snapshotter-rbac.yaml"
popd
sleep 30s
echo "::endgroup::"
echo "nydus-for-kata-tee is now deployed and managed by kata-deploy; nothing to do here."
}
function main() {

View File

@@ -150,7 +150,7 @@ install_genpolicy_drop_ins() {
cp "${examples_dir}/20-oci-1.2.0-drop-in.json" "${settings_d}/"
elif is_k3s_or_rke2; then
cp "${examples_dir}/20-oci-1.2.1-drop-in.json" "${settings_d}/"
elif is_nvidia_gpu_platform || [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] || [[ -n "${CONTAINER_ENGINE_VERSION:-}" ]]; then
elif is_nvidia_gpu_platform || [[ "${KATA_HYPERVISOR}" == "qemu-snp" ]] || [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] || [[ -n "${CONTAINER_ENGINE_VERSION:-}" ]]; then
cp "${examples_dir}/20-oci-1.3.0-drop-in.json" "${settings_d}/"
fi

View File

@@ -3,7 +3,7 @@
//
// SPDX-License-Identifier: Apache-2.0
use crate::config::Config;
use crate::config::{Config, NYDUS_FOR_KATA_TEE};
use crate::runtime::containerd;
use crate::utils;
use crate::utils::toml as toml_utils;
@@ -49,17 +49,14 @@ pub async fn configure_nydus_snapshotter(
configuration_file: &Path,
pluginid: &str,
) -> Result<()> {
info!("Configuring nydus-snapshotter");
info!("Configuring {NYDUS_FOR_KATA_TEE}");
let nydus = match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("nydus-{suffix}"),
_ => "nydus".to_string(),
Some(suffix) if !suffix.is_empty() => format!("{NYDUS_FOR_KATA_TEE}-{suffix}"),
_ => NYDUS_FOR_KATA_TEE.to_string(),
};
let containerd_nydus = match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("nydus-snapshotter-{suffix}"),
_ => "nydus-snapshotter".to_string(),
};
let containerd_nydus = nydus.clone();
toml_utils::set_toml_value(
configuration_file,
@@ -118,8 +115,8 @@ pub async fn configure_snapshotter(
configure_nydus_snapshotter(config, &configuration_file, pluginid).await?;
let nydus_snapshotter = match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("nydus-snapshotter-{suffix}"),
_ => "nydus-snapshotter".to_string(),
Some(suffix) if !suffix.is_empty() => format!("{NYDUS_FOR_KATA_TEE}-{suffix}"),
_ => NYDUS_FOR_KATA_TEE.to_string(),
};
utils::host_systemctl(&["restart", &nydus_snapshotter])?;
@@ -136,17 +133,17 @@ pub async fn configure_snapshotter(
}
pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
info!("Deploying nydus-snapshotter");
info!("Deploying {NYDUS_FOR_KATA_TEE}");
let nydus_snapshotter = match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("nydus-snapshotter-{suffix}"),
_ => "nydus-snapshotter".to_string(),
Some(suffix) if !suffix.is_empty() => format!("{NYDUS_FOR_KATA_TEE}-{suffix}"),
_ => NYDUS_FOR_KATA_TEE.to_string(),
};
// Stop the service if it is currently running so we can replace the binaries safely.
let _ = utils::host_systemctl(&["stop", &format!("{nydus_snapshotter}.service")]);
// The nydus data directory (/var/lib/nydus-snapshotter) is intentionally preserved
// The nydus data directory (/var/lib/nydus-for-kata-tee) is intentionally preserved
// across reinstalls. Removing it would create a split-brain state: the nydus backend
// would start empty while containerd's BoltDB (meta.db) still holds snapshot records
// from the previous run. Any subsequent image pull then fails with:
@@ -184,7 +181,7 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
config_content = config_content.replace(
"@NYDUS_OVERLAYFS_PATH@",
&format!(
"{}/nydus-snapshotter/nydus-overlayfs",
"{}/{NYDUS_FOR_KATA_TEE}/nydus-overlayfs",
&config
.host_install_dir
.strip_prefix("/host")
@@ -196,7 +193,7 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
service_content = service_content.replace(
"@CONTAINERD_NYDUS_GRPC_BINARY@",
&format!(
"{}/nydus-snapshotter/containerd-nydus-grpc",
"{}/{NYDUS_FOR_KATA_TEE}/containerd-nydus-grpc",
&config
.host_install_dir
.strip_prefix("/host")
@@ -206,7 +203,7 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
service_content = service_content.replace(
"@CONFIG_GUEST_PULLING@",
&format!(
"{}/nydus-snapshotter/config-guest-pulling.toml",
"{}/{NYDUS_FOR_KATA_TEE}/config-guest-pulling.toml",
&config
.host_install_dir
.strip_prefix("/host")
@@ -214,7 +211,7 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
),
);
fs::create_dir_all(format!("{}/nydus-snapshotter", config.host_install_dir))?;
fs::create_dir_all(format!("{}/{NYDUS_FOR_KATA_TEE}", config.host_install_dir))?;
// Remove existing binaries before copying new ones.
// This is crucial for atomic updates (same pattern as copy_artifacts in install.rs):
@@ -223,13 +220,13 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
// - Running processes keep using the old inode until they exit
// - New processes use the new file immediately
// Without this, fs::copy would fail with ETXTBSY ("Text file busy") if the
// nydus-snapshotter service is still running from a previous installation.
// nydus-for-kata-tee service is still running from a previous installation.
let grpc_binary = format!(
"{}/nydus-snapshotter/containerd-nydus-grpc",
"{}/{NYDUS_FOR_KATA_TEE}/containerd-nydus-grpc",
config.host_install_dir
);
let overlayfs_binary = format!(
"{}/nydus-snapshotter/nydus-overlayfs",
"{}/{NYDUS_FOR_KATA_TEE}/nydus-overlayfs",
config.host_install_dir
);
for binary in [&grpc_binary, &overlayfs_binary] {
@@ -251,7 +248,7 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
fs::write(
format!(
"{}/nydus-snapshotter/config-guest-pulling.toml",
"{}/{NYDUS_FOR_KATA_TEE}/config-guest-pulling.toml",
config.host_install_dir
),
config_content,
@@ -269,11 +266,11 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
}
pub async fn uninstall_nydus_snapshotter(config: &Config) -> Result<()> {
info!("Removing deployed nydus-snapshotter");
info!("Removing deployed {NYDUS_FOR_KATA_TEE}");
let nydus_snapshotter = match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("nydus-snapshotter-{suffix}"),
_ => "nydus-snapshotter".to_string(),
Some(suffix) if !suffix.is_empty() => format!("{NYDUS_FOR_KATA_TEE}-{suffix}"),
_ => NYDUS_FOR_KATA_TEE.to_string(),
};
utils::host_systemctl(&["disable", "--now", &format!("{nydus_snapshotter}.service")])?;
@@ -282,9 +279,9 @@ pub async fn uninstall_nydus_snapshotter(config: &Config) -> Result<()> {
"/host/etc/systemd/system/{nydus_snapshotter}.service"
))
.ok();
fs::remove_dir_all(format!("{}/nydus-snapshotter", config.host_install_dir)).ok();
fs::remove_dir_all(format!("{}/{NYDUS_FOR_KATA_TEE}", config.host_install_dir)).ok();
// The nydus data directory (/var/lib/nydus-snapshotter) is intentionally preserved.
// The nydus data directory (/var/lib/nydus-for-kata-tee) is intentionally preserved.
// See install_nydus_snapshotter for the full explanation: meta.db and the nydus backend
// must always agree, and the only way to guarantee that without complex, fragile cleanup
// logic is to never remove the data directory. After uninstall, containerd is

View File

@@ -16,6 +16,11 @@ use crate::k8s;
pub const K3S_RKE2_CONTAINERD_V3_TMPL: &str = "/etc/containerd/config-v3.toml.tmpl";
pub const K3S_RKE2_CONTAINERD_V2_TMPL: &str = "/etc/containerd/config.toml.tmpl";
/// Name of the nydus-snapshotter instance deployed and managed by kata-deploy for TEE workloads.
/// Used as the systemd service name, the containerd proxy plugin key, the runtime class
/// snapshotter field, and the base name for the data directory and socket path on the host.
pub const NYDUS_FOR_KATA_TEE: &str = "nydus-for-kata-tee";
/// Resolves whether to use containerd config v3 (true) or v2 (false) for K3s/RKE2.
/// 1. Tries config.toml (containerd config file): if it exists and contains "version = 3" or "version = 2", use that.
/// 2. Else falls back to the node's containerRuntimeVersion (e.g. "containerd://2.1.5-k3s1").

View File

@@ -88,6 +88,8 @@ pub async fn update_existing_runtimeclasses_for_nfd(config: &Config) -> Result<(
#[cfg(test)]
mod tests {
use crate::config::NYDUS_FOR_KATA_TEE;
#[test]
fn test_runtime_class_name_without_suffix() {
// Test runtime class name without MULTI_INSTALL_SUFFIX
@@ -144,24 +146,24 @@ mod tests {
#[test]
fn test_snapshotter_name_with_suffix() {
// Test snapshotter name adjustment with MULTI_INSTALL_SUFFIX
// Test that the nydus snapshotter produces the nydus-for-kata-tee containerd plugin
// name, with the suffix appended when MULTI_INSTALL_SUFFIX is set.
let suffix = Some("dev".to_string());
let snapshotter = "nydus";
if let Some(s) = suffix {
let adjusted = format!("{}-{}", snapshotter, s);
assert_eq!(adjusted, "nydus-dev");
let adjusted = format!("{NYDUS_FOR_KATA_TEE}-{}", s);
assert_eq!(adjusted, format!("{NYDUS_FOR_KATA_TEE}-dev"));
}
}
#[test]
fn test_nydus_snapshotter_systemd_service_with_suffix() {
// Test nydus-snapshotter systemd service name with suffix
// Test nydus-for-kata-tee systemd service name with suffix
let suffix = Some("test".to_string());
if let Some(s) = suffix {
let service_name = format!("nydus-snapshotter-{}", s);
assert_eq!(service_name, "nydus-snapshotter-test");
let service_name = format!("{NYDUS_FOR_KATA_TEE}-{}", s);
assert_eq!(service_name, format!("{NYDUS_FOR_KATA_TEE}-test"));
}
}
}

View File

@@ -3,7 +3,7 @@
//
// SPDX-License-Identifier: Apache-2.0
use crate::config::{Config, ContainerdPaths, CustomRuntime};
use crate::config::{Config, ContainerdPaths, CustomRuntime, NYDUS_FOR_KATA_TEE};
use crate::k8s;
use crate::utils;
use crate::utils::toml as toml_utils;
@@ -195,8 +195,10 @@ pub async fn configure_containerd_runtime(
let value = parts[1];
let snapshotter_value = if value == "nydus" {
match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("\"{value}-{suffix}\""),
_ => format!("\"{value}\""),
Some(suffix) if !suffix.is_empty() => {
format!("\"{NYDUS_FOR_KATA_TEE}-{suffix}\"")
}
_ => format!("\"{NYDUS_FOR_KATA_TEE}\""),
}
} else {
format!("\"{value}\"")
@@ -262,8 +264,8 @@ pub async fn configure_custom_containerd_runtime(
let snapshotter = custom_runtime.containerd_snapshotter.as_ref().map(|s| {
if s == "nydus" {
match config.multi_install_suffix.as_ref() {
Some(suffix) if !suffix.is_empty() => format!("\"{s}-{suffix}\""),
_ => format!("\"{s}\""),
Some(suffix) if !suffix.is_empty() => format!("\"{NYDUS_FOR_KATA_TEE}-{suffix}\""),
_ => format!("\"{NYDUS_FOR_KATA_TEE}\""),
}
} else {
format!("\"{s}\"")