mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 14:38:33 +00:00
kata-deploy: split install/cleanup into staged actions
Phase 1 of migrating kata-deploy from a DaemonSet to a staged JobSet
workflow: refactor the binary's install/cleanup flows into discrete,
independently invocable stages while keeping the existing DaemonSet
path fully working.
Add new staged subcommands that each run one step and exit, so a JobSet
can drive them as ordered initContainers/Jobs per node:
install: host-check -> artifacts -> cri -> label
cleanup (reverse): unlabel -> revert-cri -> remove-artifacts
`install` becomes a compatibility wrapper composing the install stages
in the canonical order, so the DaemonSet deployment model is unchanged.
The DaemonSet `cleanup` (with its DaemonSet-presence gating) is left
intact; the staged cleanup actions are added alongside it and skip that
gating since the JobSet workflow only schedules them on a real uninstall.
Each stage has an idempotent skip check so reruns are safe:
- install label / cleanup unlabel: short-circuit via the node label
- cleanup remove-artifacts: skip when the install dir is already gone
- cleanup revert-cri: skip the disruptive runtime restart when the CRI
drop-ins are already absent (new cri_drop_in_present helper)
Introduce a shared KATA_RUNTIME_LABEL constant and add rstest-based
tests covering the subcommand-name -> Action mapping, rejection of
unknown actions, and the visible/hidden help semantics.
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
d4205c7fcc
commit
225ff2209e
@@ -56,6 +56,39 @@ enum Action {
|
||||
Install,
|
||||
Cleanup,
|
||||
Reset,
|
||||
/// Stage 0 of a staged (JobSet) install: validate host/node prerequisites
|
||||
/// without mutating the host. Fails fast with actionable diagnostics when
|
||||
/// the node cannot support installation.
|
||||
#[clap(name = "install-stage-host-check")]
|
||||
InstallStageHostCheck,
|
||||
/// Stage 1 of a staged (JobSet) install: install kata artifacts/config on
|
||||
/// the host and set up configured snapshotters. Does not touch CRI
|
||||
/// configuration, but is still privileged (host writes + snapshotter setup
|
||||
/// shell into the host via nsenter).
|
||||
#[clap(name = "install-stage-artifacts")]
|
||||
InstallStageArtifacts,
|
||||
/// Stage 2 of a staged (JobSet) install: write CRI drop-ins, restart the
|
||||
/// runtime, and wait for node readiness. Privileged + short-lived.
|
||||
#[clap(name = "install-stage-cri")]
|
||||
InstallStageCri,
|
||||
/// Stage 3 of a staged (JobSet) install: apply the kata-runtime node label.
|
||||
/// Unprivileged, Kubernetes API only.
|
||||
#[clap(name = "install-stage-label")]
|
||||
InstallStageLabel,
|
||||
/// Cleanup stage 1 of a staged (JobSet) uninstall: remove the kata-runtime
|
||||
/// node label first so the scheduler stops placing kata workloads here.
|
||||
/// Unprivileged, Kubernetes API only.
|
||||
#[clap(name = "cleanup-stage-unlabel")]
|
||||
CleanupStageUnlabel,
|
||||
/// Cleanup stage 2 of a staged (JobSet) uninstall: remove CRI drop-ins,
|
||||
/// restart the runtime, and wait for readiness. Privileged + short-lived.
|
||||
#[clap(name = "cleanup-stage-revert-cri")]
|
||||
CleanupStageRevertCri,
|
||||
/// Cleanup stage 3 of a staged (JobSet) uninstall: remove kata
|
||||
/// artifacts/config/symlinks from the host. Privileged (mutates the host
|
||||
/// filesystem under the install dir).
|
||||
#[clap(name = "cleanup-stage-remove-artifacts")]
|
||||
CleanupStageRemoveArtifacts,
|
||||
/// Internal: entered via re-exec after install completes. Holds the
|
||||
/// DaemonSet pod alive waiting for SIGTERM, then runs cleanup. Hidden
|
||||
/// from `--help`; users should never invoke this directly.
|
||||
@@ -63,6 +96,10 @@ enum Action {
|
||||
InternalPostInstallWait,
|
||||
}
|
||||
|
||||
/// Node label applied to mark a node as kata-capable. Shared across the
|
||||
/// install/cleanup label stages so the key stays consistent.
|
||||
const KATA_RUNTIME_LABEL: &str = "katacontainers.io/kata-runtime";
|
||||
|
||||
// Cap the tokio runtime to a small fixed number of worker threads. The default
|
||||
// multi-thread runtime allocates `num_cpus()` workers (each with a ~2 MiB
|
||||
// stack), which on a 200+ vCPU GPU node is the dominant contributor to the
|
||||
@@ -107,6 +144,13 @@ async fn main() -> Result<()> {
|
||||
Action::Install => "install",
|
||||
Action::Cleanup => "cleanup",
|
||||
Action::Reset => "reset",
|
||||
Action::InstallStageHostCheck => "install-stage-host-check",
|
||||
Action::InstallStageArtifacts => "install-stage-artifacts",
|
||||
Action::InstallStageCri => "install-stage-cri",
|
||||
Action::InstallStageLabel => "install-stage-label",
|
||||
Action::CleanupStageUnlabel => "cleanup-stage-unlabel",
|
||||
Action::CleanupStageRevertCri => "cleanup-stage-revert-cri",
|
||||
Action::CleanupStageRemoveArtifacts => "cleanup-stage-remove-artifacts",
|
||||
Action::InternalPostInstallWait => "internal-post-install-wait",
|
||||
};
|
||||
config.print_info(action_str);
|
||||
@@ -245,6 +289,42 @@ async fn main() -> Result<()> {
|
||||
// Exit after completion so the job can complete
|
||||
info!("Reset completed, exiting");
|
||||
}
|
||||
// Staged (JobSet) install actions. Each runs one step of the install
|
||||
// pipeline as a short-lived Job/initContainer and exits. The DaemonSet
|
||||
// path does not use these directly; it goes through `install` above,
|
||||
// which composes the same stage functions.
|
||||
Action::InstallStageHostCheck => {
|
||||
install_stage_host_check(&config, &runtime).await?;
|
||||
info!("Install host-check stage completed, exiting");
|
||||
}
|
||||
Action::InstallStageArtifacts => {
|
||||
install_stage_artifacts(&config, &runtime).await?;
|
||||
info!("Install artifacts stage completed, exiting");
|
||||
}
|
||||
Action::InstallStageCri => {
|
||||
install_stage_cri(&config, &runtime).await?;
|
||||
info!("Install CRI stage completed, exiting");
|
||||
}
|
||||
Action::InstallStageLabel => {
|
||||
install_stage_label(&config).await?;
|
||||
info!("Install label stage completed, exiting");
|
||||
}
|
||||
// Staged (JobSet) cleanup actions. These run in reverse order
|
||||
// (unlabel -> revert-cri -> remove-artifacts) and, unlike the DaemonSet
|
||||
// `cleanup` above, do not perform DaemonSet-presence gating: the JobSet
|
||||
// workflow only schedules these when an uninstall is actually intended.
|
||||
Action::CleanupStageUnlabel => {
|
||||
cleanup_stage_unlabel(&config).await?;
|
||||
info!("Cleanup unlabel stage completed, exiting");
|
||||
}
|
||||
Action::CleanupStageRevertCri => {
|
||||
cleanup_stage_revert_cri(&config, &runtime).await?;
|
||||
info!("Cleanup revert-cri stage completed, exiting");
|
||||
}
|
||||
Action::CleanupStageRemoveArtifacts => {
|
||||
cleanup_stage_remove_artifacts(&config).await?;
|
||||
info!("Cleanup remove-artifacts stage completed, exiting");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -273,20 +353,39 @@ fn reexec_into_post_install_wait(
|
||||
))
|
||||
}
|
||||
|
||||
/// Full install pipeline. Used by the DaemonSet deployment model. Composes the
|
||||
/// same per-stage functions the staged JobSet workflow invokes individually, in
|
||||
/// the canonical order: host-check -> artifacts -> cri -> label.
|
||||
async fn install(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("Installing Kata Containers");
|
||||
|
||||
const SUPPORTED_RUNTIMES: &[&str] = &[
|
||||
"crio",
|
||||
"containerd",
|
||||
"k3s",
|
||||
"k3s-agent",
|
||||
"rke2-agent",
|
||||
"rke2-server",
|
||||
"k0s-worker",
|
||||
"k0s-controller",
|
||||
"microk8s",
|
||||
];
|
||||
install_stage_host_check(config, runtime).await?;
|
||||
install_stage_artifacts(config, runtime).await?;
|
||||
install_stage_cri(config, runtime).await?;
|
||||
install_stage_label(config).await?;
|
||||
|
||||
info!("Kata Containers installation completed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const SUPPORTED_RUNTIMES: &[&str] = &[
|
||||
"crio",
|
||||
"containerd",
|
||||
"k3s",
|
||||
"k3s-agent",
|
||||
"rke2-agent",
|
||||
"rke2-server",
|
||||
"k0s-worker",
|
||||
"k0s-controller",
|
||||
"microk8s",
|
||||
];
|
||||
|
||||
/// Install stage 0 (host-check): validate that this node can support a Kata
|
||||
/// installation before any host mutation happens. This is read-only and safe
|
||||
/// to run repeatedly; it fails fast with actionable diagnostics so a staged
|
||||
/// JobSet can abort the per-node pipeline before the privileged stages run.
|
||||
async fn install_stage_host_check(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("install (host-check): validating node prerequisites for runtime {runtime}");
|
||||
|
||||
if !SUPPORTED_RUNTIMES.contains(&runtime) {
|
||||
return Err(anyhow::anyhow!(
|
||||
@@ -345,16 +444,44 @@ async fn install(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
runtime::containerd::setup_containerd_config_files(runtime, config).await?;
|
||||
info!("install (host-check): node prerequisites satisfied");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Install stage 1 (artifacts): place kata artifacts/config on the host and set
|
||||
/// up any configured snapshotters. This does not touch CRI configuration, but it
|
||||
/// still needs privileged host access: writing under the host install dir and
|
||||
/// the snapshotter setup (e.g. nydus) shell into the host via nsenter.
|
||||
async fn install_stage_artifacts(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("install (artifacts): installing kata artifacts on host");
|
||||
|
||||
artifacts::install_artifacts(config, runtime).await?;
|
||||
|
||||
if runtime != "crio" {
|
||||
if let Some(snapshotters) = config.experimental_setup_snapshotter.as_ref() {
|
||||
for snapshotter in snapshotters {
|
||||
artifacts::snapshotters::install_snapshotter(snapshotter, config).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("install (artifacts): artifacts installed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Install stage 2 (cri): write CRI drop-ins, configure snapshotters, restart
|
||||
/// the runtime, and wait for the node to become ready. This is the privileged,
|
||||
/// node-disrupting stage and is kept short-lived.
|
||||
async fn install_stage_cri(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("install (cri): configuring CRI runtime");
|
||||
|
||||
runtime::containerd::setup_containerd_config_files(runtime, config).await?;
|
||||
|
||||
runtime::configure_cri_runtime(config, runtime).await?;
|
||||
|
||||
if runtime != "crio" {
|
||||
if let Some(snapshotters) = config.experimental_setup_snapshotter.as_ref() {
|
||||
for snapshotter in snapshotters {
|
||||
artifacts::snapshotters::install_snapshotter(snapshotter, config).await?;
|
||||
artifacts::snapshotters::configure_snapshotter(snapshotter, runtime, config)
|
||||
.await?;
|
||||
}
|
||||
@@ -365,9 +492,29 @@ async fn install(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
runtime::lifecycle::restart_runtime(config, runtime).await?;
|
||||
info!("Runtime restart completed successfully");
|
||||
|
||||
label_node_with_retry(config, "katacontainers.io/kata-runtime", "true").await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Install stage 3 (label): apply the kata-runtime node label. Unprivileged,
|
||||
/// Kubernetes API only. Skips re-applying when the label is already correct.
|
||||
async fn install_stage_label(config: &config::Config) -> Result<()> {
|
||||
info!("install (label): applying node label");
|
||||
|
||||
match k8s::get_node_label(config, KATA_RUNTIME_LABEL).await {
|
||||
Ok(Some(ref val)) if val == "true" => {
|
||||
info!(
|
||||
"install (label): node already labeled {}=true, skipping",
|
||||
KATA_RUNTIME_LABEL
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
// Any other state (absent, different value, or a transient read error)
|
||||
// falls through to label_node_with_retry, which applies and verifies.
|
||||
_ => {}
|
||||
}
|
||||
|
||||
label_node_with_retry(config, KATA_RUNTIME_LABEL, "true").await?;
|
||||
|
||||
info!("Kata Containers installation completed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -539,7 +686,7 @@ async fn cleanup(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("No other kata-deploy DaemonSets found, performing full shared cleanup");
|
||||
|
||||
info!("Removing kata-runtime label from node");
|
||||
k8s::label_node(config, "katacontainers.io/kata-runtime", None, false).await?;
|
||||
k8s::label_node(config, KATA_RUNTIME_LABEL, None, false).await?;
|
||||
info!("Successfully removed kata-runtime label");
|
||||
|
||||
// Restart the CRI runtime last. On k3s/rke2 this restarts the entire
|
||||
@@ -553,10 +700,111 @@ async fn cleanup(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Cleanup stage 1 (unlabel): remove the kata-runtime node label first so the
|
||||
/// scheduler stops placing kata workloads on this node before any host
|
||||
/// mutation. Unprivileged, Kubernetes API only. Skips when already absent.
|
||||
async fn cleanup_stage_unlabel(config: &config::Config) -> Result<()> {
|
||||
info!("cleanup (unlabel): removing node label");
|
||||
|
||||
// If the label is already absent, there is nothing to do. Any other state
|
||||
// (present, or unknown due to a transient read error) falls through to the
|
||||
// removal below.
|
||||
if let Ok(None) = k8s::get_node_label(config, KATA_RUNTIME_LABEL).await {
|
||||
info!(
|
||||
"cleanup (unlabel): label {} already absent, skipping",
|
||||
KATA_RUNTIME_LABEL
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
k8s::label_node(config, KATA_RUNTIME_LABEL, None, false).await?;
|
||||
info!("cleanup (unlabel): label removed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Cleanup stage 2 (revert-cri): remove CRI drop-ins (and any snapshotter
|
||||
/// config), then restart the runtime and wait for readiness. This is the
|
||||
/// privileged, node-disrupting cleanup stage and is kept short-lived. Skips
|
||||
/// entirely when the CRI drop-ins are already absent, avoiding an unnecessary
|
||||
/// runtime restart.
|
||||
async fn cleanup_stage_revert_cri(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("cleanup (revert-cri): reverting CRI configuration");
|
||||
|
||||
if !cri_drop_in_present(config, runtime).await {
|
||||
info!("cleanup (revert-cri): CRI drop-ins already absent, skipping");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if runtime != "crio" {
|
||||
if let Some(snapshotters) = config.experimental_setup_snapshotter.as_ref() {
|
||||
for snapshotter in snapshotters {
|
||||
info!("cleanup (revert-cri): uninstalling snapshotter {snapshotter}");
|
||||
artifacts::snapshotters::uninstall_snapshotter(snapshotter, config).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
runtime::cleanup_cri_runtime_config(config, runtime).await?;
|
||||
|
||||
info!("cleanup (revert-cri): restarting runtime");
|
||||
runtime::restart_and_wait_for_ready(config, runtime).await?;
|
||||
info!("cleanup (revert-cri): runtime restarted");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Cleanup stage 3 (remove-artifacts): delete kata artifacts/config/symlinks
|
||||
/// from the host. Skips when the install directory is already gone.
|
||||
async fn cleanup_stage_remove_artifacts(config: &config::Config) -> Result<()> {
|
||||
info!("cleanup (remove-artifacts): removing kata artifacts from host");
|
||||
|
||||
if !std::path::Path::new(&config.host_install_dir).exists() {
|
||||
info!(
|
||||
"cleanup (remove-artifacts): install dir {} already absent, skipping",
|
||||
config.host_install_dir
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
artifacts::remove_artifacts(config).await?;
|
||||
info!("cleanup (remove-artifacts): artifacts removed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Best-effort check for whether kata's CRI drop-in configuration is present on
|
||||
/// the host for this runtime. Used by the staged cleanup to skip a disruptive
|
||||
/// runtime restart when there is nothing to revert. On any uncertainty (e.g.
|
||||
/// the containerd paths cannot be resolved) this returns `true` so the caller
|
||||
/// errs on the side of running the revert rather than incorrectly skipping it.
|
||||
async fn cri_drop_in_present(config: &config::Config, runtime: &str) -> bool {
|
||||
if runtime == "crio" {
|
||||
return std::path::Path::new(&config.crio_drop_in_conf_file).exists();
|
||||
}
|
||||
|
||||
match config.get_containerd_paths(runtime).await {
|
||||
Ok(paths) => {
|
||||
// /etc/containerd is mounted directly; other paths live under /host.
|
||||
let resolved = if paths.drop_in_file.starts_with("/etc/containerd/") {
|
||||
std::path::PathBuf::from(&paths.drop_in_file)
|
||||
} else {
|
||||
std::path::Path::new("/host").join(paths.drop_in_file.trim_start_matches('/'))
|
||||
};
|
||||
resolved.exists()
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!(
|
||||
"cleanup (revert-cri): could not resolve containerd paths to check drop-in \
|
||||
presence ({e}); proceeding with revert"
|
||||
);
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn reset(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("Resetting Kata Containers");
|
||||
|
||||
k8s::label_node(config, "katacontainers.io/kata-runtime", None, false).await?;
|
||||
k8s::label_node(config, KATA_RUNTIME_LABEL, None, false).await?;
|
||||
runtime::lifecycle::restart_cri_runtime(config, runtime).await?;
|
||||
if matches!(runtime, "crio" | "containerd") {
|
||||
utils::host_systemctl(&["restart", "kubelet"])?;
|
||||
@@ -566,3 +814,86 @@ async fn reset(config: &config::Config, runtime: &str) -> Result<()> {
|
||||
info!("Kata Containers reset completed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
//! Tests for CLI action wiring. The staged install/cleanup actions are the
|
||||
//! entrypoints the JobSet workflow invokes per node, so we lock in their
|
||||
//! exact subcommand names (a rename would silently break the chart) and the
|
||||
//! mapping into the `Action` enum.
|
||||
|
||||
use super::*;
|
||||
use clap::ValueEnum;
|
||||
use rstest::rstest;
|
||||
|
||||
/// Every staged subcommand name parses into the expected `Action` variant.
|
||||
/// Keep this in sync with the `#[clap(name = ...)]` attributes above.
|
||||
#[rstest]
|
||||
#[case("install", Action::Install)]
|
||||
#[case("cleanup", Action::Cleanup)]
|
||||
#[case("reset", Action::Reset)]
|
||||
#[case("install-stage-host-check", Action::InstallStageHostCheck)]
|
||||
#[case("install-stage-artifacts", Action::InstallStageArtifacts)]
|
||||
#[case("install-stage-cri", Action::InstallStageCri)]
|
||||
#[case("install-stage-label", Action::InstallStageLabel)]
|
||||
#[case("cleanup-stage-unlabel", Action::CleanupStageUnlabel)]
|
||||
#[case("cleanup-stage-revert-cri", Action::CleanupStageRevertCri)]
|
||||
#[case("cleanup-stage-remove-artifacts", Action::CleanupStageRemoveArtifacts)]
|
||||
#[case("internal-post-install-wait", Action::InternalPostInstallWait)]
|
||||
fn test_action_parses_from_arg(#[case] arg: &str, #[case] expected: Action) {
|
||||
let args = Args::try_parse_from(["kata-deploy", arg])
|
||||
.unwrap_or_else(|e| panic!("failed to parse action {arg:?}: {e}"));
|
||||
assert_eq!(
|
||||
std::mem::discriminant(&args.action),
|
||||
std::mem::discriminant(&expected),
|
||||
"arg {arg:?} parsed into the wrong Action variant",
|
||||
);
|
||||
}
|
||||
|
||||
/// Unknown actions must be rejected rather than silently accepted.
|
||||
#[rstest]
|
||||
#[case("install-stage")]
|
||||
#[case("cleanup-stage")]
|
||||
#[case("install-stage-foo")]
|
||||
#[case("bogus")]
|
||||
fn test_unknown_action_is_rejected(#[case] arg: &str) {
|
||||
assert!(
|
||||
Args::try_parse_from(["kata-deploy", arg]).is_err(),
|
||||
"expected action {arg:?} to be rejected",
|
||||
);
|
||||
}
|
||||
|
||||
/// The hidden internal waiter must stay hidden from `--help` so users never
|
||||
/// invoke it directly, while still being parseable (asserted above).
|
||||
#[test]
|
||||
fn test_internal_action_is_hidden() {
|
||||
let internal = Action::InternalPostInstallWait
|
||||
.to_possible_value()
|
||||
.expect("internal action should have a possible value");
|
||||
assert!(
|
||||
internal.is_hide_set(),
|
||||
"internal-post-install-wait should be hidden from --help",
|
||||
);
|
||||
}
|
||||
|
||||
/// All non-internal staged actions remain visible in `--help` so operators
|
||||
/// can discover and run individual stages.
|
||||
#[rstest]
|
||||
#[case(Action::InstallStageHostCheck)]
|
||||
#[case(Action::InstallStageArtifacts)]
|
||||
#[case(Action::InstallStageCri)]
|
||||
#[case(Action::InstallStageLabel)]
|
||||
#[case(Action::CleanupStageUnlabel)]
|
||||
#[case(Action::CleanupStageRevertCri)]
|
||||
#[case(Action::CleanupStageRemoveArtifacts)]
|
||||
fn test_staged_actions_are_visible(#[case] action: Action) {
|
||||
let value = action
|
||||
.to_possible_value()
|
||||
.expect("staged action should have a possible value");
|
||||
assert!(
|
||||
!value.is_hide_set(),
|
||||
"staged action {:?} should be visible in --help",
|
||||
value.get_name(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user