Compare commits

...

3 Commits

Author SHA1 Message Date
Anastassios Nanos
dbc51c87b8 runtime-rs: Add FC tests and rename rs-fc to fc-rs
WiP: Add tests for runtime-rs FC support.

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2026-04-03 18:32:46 +00:00
Anastassios Nanos
2cd745c1ad runtime-rs: Split FC spawn sequence
To account for network hooks and the lack of device hotplugging on FC, we split the
spawn process into start and boot. This way, the VMM is already up, but the VM sandbox
is not -- we can add devices and then we can kick-off the VM with InstanceStart.

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2026-04-03 11:16:15 +00:00
Anastassios Nanos
cb5ba485a2 runtime-rs: Fix FC spawn with network
Refactor network link detection so that FC can boot properly.

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2026-04-03 11:16:15 +00:00
31 changed files with 438 additions and 110 deletions

View File

@@ -88,7 +88,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs', 'fc-rs']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -327,6 +327,7 @@ jobs:
- qemu
- cloud-hypervisor
- qemu-runtime-rs
- fc-rs
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}

View File

@@ -382,11 +382,13 @@ jobs:
{ containerd_version: lts, vmm: qemu },
{ containerd_version: lts, vmm: cloud-hypervisor },
{ containerd_version: lts, vmm: qemu-runtime-rs },
{ containerd_version: lts, vmm: fc-rs },
{ containerd_version: active, vmm: clh },
{ containerd_version: active, vmm: dragonball },
{ containerd_version: active, vmm: qemu },
{ containerd_version: active, vmm: cloud-hypervisor },
{ containerd_version: active, vmm: qemu-runtime-rs },
{ containerd_version: active, vmm: fc-rs },
]
uses: ./.github/workflows/run-cri-containerd-tests.yaml
with:

View File

@@ -46,6 +46,8 @@ jobs:
{ vmm: qemu-runtime-rs, containerd_version: active },
{ vmm: cloud-hypervisor, containerd_version: lts },
{ vmm: cloud-hypervisor, containerd_version: active },
{ vmm: fc-rs, containerd_version: lts, snapshotter: devmapper },
{ vmm: fc-rs, containerd_version: active, snapshotter: devmapper },
]
runs-on: ubuntu-24.04
permissions:
@@ -61,6 +63,7 @@ jobs:
K8S_TEST_HOST_TYPE: baremetal-no-attestation
CONTAINER_ENGINE: containerd
CONTAINER_ENGINE_VERSION: ${{ matrix.environment.containerd_version }}
SNAPSHOTTER: ${{ matrix.environment.snapshotter || '' }}
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -105,6 +108,10 @@ jobs:
- name: Deploy k8s (kubeadm)
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
- name: Configure snapshotter
if: matrix.environment.snapshotter != ''
run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats

View File

@@ -275,7 +275,7 @@ Each hypervisor has a dedicated configuration file:
|------------|----------------|-----------------|
| QEMU |`configuration-qemu-runtime-rs.toml` |`configuration-qemu.toml` |
| Cloud Hypervisor | `configuration-cloud-hypervisor.toml` | `configuration-clh.toml` |
| Firecracker | `configuration-rs-fc.toml` | `configuration-fc.toml` |
| Firecracker | `configuration-fc-rs.toml` | `configuration-fc.toml` |
| Dragonball | `configuration-dragonball.toml` (default) | `No` |
> **Note:** Configuration files are typically installed in `/opt/kata/share/defaults/kata-containers/` or `/opt/kata/share/defaults/kata-containers/runtime-rs/` or `/usr/share/defaults/kata-containers/`.

View File

@@ -419,7 +419,7 @@ endif
ifneq (,$(FCCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_FC)
CONFIG_FILE_FC = configuration-rs-fc.toml
CONFIG_FILE_FC = configuration-fc-rs.toml
CONFIG_FC = config/$(CONFIG_FILE_FC)
CONFIG_FC_IN = $(CONFIG_FC).in
CONFIG_PATH_FC = $(abspath $(CONFDIR)/$(CONFIG_FILE_FC))

View File

@@ -157,7 +157,7 @@ Configuration files in `config/`:
| `configuration-dragonball.toml.in` | Dragonball | Built-in VMM |
| `configuration-qemu-runtime-rs.toml.in` | QEMU | Default external |
| `configuration-cloud-hypervisor.toml.in` | Cloud Hypervisor | Modern VMM |
| `configuration-rs-fc.toml.in` | Firecracker | Lightweight microVM |
| `configuration-fc-rs.toml.in` | Firecracker | Lightweight microVM |
| `configuration-remote.toml.in` | Remote | Remote hypervisor |
| `configuration-qemu-tdx-runtime-rs.toml.in` | QEMU + TDX | Intel TDX confidential computing |
| `configuration-qemu-snp-runtime-rs.toml.in` | QEMU + SEV-SNP | AMD SEV-SNP confidential computing |

View File

@@ -3,6 +3,8 @@
//
//SPDX-License-Identifier: Apache-2.0
use std::convert::TryFrom;
use crate::{
firecracker::{
inner_hypervisor::{FC_AGENT_SOCKET_NAME, ROOT},
@@ -13,7 +15,7 @@ use crate::{
};
use anyhow::{anyhow, Context, Result};
use dbs_utils::net::MacAddr;
use hyper::{Body, Method, Request, Response};
use hyper::{Body, Method, Request};
use hyperlocal::Uri;
use kata_sys_util::mount;
use kata_types::config::hypervisor::RateLimiterConfig;
@@ -23,6 +25,16 @@ use tokio::{fs, fs::File};
const REQUEST_RETRY: u32 = 500;
const FC_KERNEL: &str = "vmlinux";
/// Distinguishes a transient transport error (FC not ready yet, retry allowed)
/// from a permanent HTTP-level API error returned by FC (no retry).
#[derive(Debug)]
enum FcRequestError {
/// Could not reach the FC API socket (connection refused, etc.)
Transport(String),
/// FC returned a non-2xx HTTP status. (status_code, response_body)
Api(u16, String),
}
const FC_ROOT_FS: &str = "rootfs";
const DRIVE_PREFIX: &str = "drive";
const DISK_POOL_SIZE: u32 = 6;
@@ -111,7 +123,8 @@ impl FcInner {
let body_config: String = json!({
"mem_size_mib": self.config.memory_info.default_memory,
"vcpu_count": self.config.cpu_info.default_vcpus.ceil() as u8,
"vcpu_count": u8::try_from(self.config.cpu_info.default_vcpus.ceil() as u64)
.context("vcpu_count overflows u8")?,
})
.to_string();
let body_kernel: String = json!({
@@ -215,13 +228,29 @@ impl FcInner {
Some(mac) => MacAddr::from_bytes(&mac.0).ok(),
None => None,
};
let rx_rate_limiter = RateLimiterConfig::new(
self.config.network_info.rx_rate_limiter_max_rate,
0,
None,
None,
);
let tx_rate_limiter = RateLimiterConfig::new(
self.config.network_info.tx_rate_limiter_max_rate,
0,
None,
None,
);
let body: String = json!({
"iface_id": &device_id,
"guest_mac": g_mac,
"host_dev_name": &config.host_dev_name
"host_dev_name": &config.host_dev_name,
"rx_rate_limiter": rx_rate_limiter,
"tx_rate_limiter": tx_rate_limiter,
})
.to_string();
info!(sl(), "FC: add network device: iface_id={} guest_mac={:?} host_dev_name={}", device_id, g_mac, config.host_dev_name);
self.request_with_retry(
Method::PUT,
&["/network-interfaces/", &device_id].concat(),
@@ -259,50 +288,54 @@ impl FcInner {
.body(Body::from(data.clone()))?;
match self.send_request(req).await {
Ok(resp) => {
debug!(sl(), "Request sent, resp: {:?}", resp);
return Ok(());
}
Err(resp) => {
debug!(sl(), "Request sent with error, resp: {:?}", resp);
std::thread::sleep(std::time::Duration::from_millis(10));
Ok(_) => return Ok(()),
// A transport error (FC not ready yet) — retry.
Err(FcRequestError::Transport(e)) => {
debug!(sl(), "FC not reachable yet, retrying: {:?}", e);
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
continue;
}
// An HTTP-level error from FC — fail immediately with the
// actual error body so the problem is visible in logs.
Err(FcRequestError::Api(status, body)) => {
return Err(anyhow::anyhow!(
"FC API error: status={} body={}",
status,
body
));
}
}
}
Err(anyhow::anyhow!(
"After {} attempts, it still doesn't work.",
REQUEST_RETRY
"FC not reachable after {} attempts (method={:?} uri={:?})",
REQUEST_RETRY,
method,
uri,
))
}
pub(crate) async fn send_request(&self, req: Request<Body>) -> Result<Response<Body>> {
let resp = self.client.request(req).await?;
async fn send_request(&self, req: Request<Body>) -> Result<(), FcRequestError> {
let resp = self
.client
.request(req)
.await
.map_err(|e| FcRequestError::Transport(e.to_string()))?;
let status = resp.status();
debug!(sl(), "Request RESPONSE {:?} {:?}", &status, resp);
if status.is_success() {
return Ok(resp);
} else {
let body = hyper::body::to_bytes(resp.into_body()).await?;
if body.is_empty() {
debug!(sl(), "Request FAILED WITH STATUS: {:?}", status);
None
} else {
let body = String::from_utf8_lossy(&body).into_owned();
debug!(
sl(),
"Request FAILED WITH STATUS: {:?} and BODY: {:?}", status, body
);
Some(body)
};
debug!(sl(), "FC request succeeded: {:?}", status);
return Ok(());
}
Err(anyhow::anyhow!(
"After {} attempts, it
still doesn't work.",
REQUEST_RETRY
))
let body = hyper::body::to_bytes(resp.into_body())
.await
.map(|b| String::from_utf8_lossy(&b).into_owned())
.unwrap_or_default();
error!(
sl(),
"FC API rejected request: status={:?} body={:?}", status, body
);
Err(FcRequestError::Api(status.as_u16(), body))
}
pub(crate) fn cleanup_resource(&self) {
if self.jailed {

View File

@@ -4,6 +4,7 @@
//SPDX-License-Identifier: Apache-2.0
use crate::firecracker::{inner_hypervisor::FC_API_SOCKET_NAME, sl};
use crate::device::driver::NetworkConfig;
use crate::MemoryConfig;
use crate::HYPERVISOR_FIRECRACKER;
use crate::{device::DeviceType, VmmState};
@@ -43,6 +44,9 @@ pub struct FcInner {
pub(crate) jailed: bool,
pub(crate) run_dir: String,
pub(crate) pending_devices: Vec<DeviceType>,
/// Network devices buffered until start_vm() so they are always sent to FC
/// before InstanceStart, mirroring the Go runtime's batch-configuration approach.
pub(crate) pending_net_devices: Vec<(NetworkConfig, String)>,
pub(crate) capabilities: Capabilities,
pub(crate) fc_process: Mutex<Option<Child>>,
pub(crate) exit_notify: Option<mpsc::Sender<()>>,
@@ -51,7 +55,9 @@ pub struct FcInner {
impl FcInner {
pub fn new(exit_notify: mpsc::Sender<()>) -> FcInner {
let mut capabilities = Capabilities::new();
capabilities.set(CapabilityBits::BlockDeviceSupport);
capabilities.set(
CapabilityBits::BlockDeviceSupport | CapabilityBits::HybridVsockSupport,
);
FcInner {
id: String::default(),
@@ -66,6 +72,7 @@ impl FcInner {
jailed: false,
run_dir: String::default(),
pending_devices: vec![],
pending_net_devices: vec![],
capabilities,
fc_process: Mutex::new(None),
exit_notify: Some(exit_notify),
@@ -80,7 +87,7 @@ impl FcInner {
debug!(sl(), "Running Jailed");
cmd = Command::new(&self.config.jailer_path);
let api_socket = ["/run/", FC_API_SOCKET_NAME].join("/");
let args = [
let mut args = vec![
"--id",
&self.id,
"--gid",
@@ -91,11 +98,16 @@ impl FcInner {
&self.config.path,
"--chroot-base-dir",
&self.jailer_root,
"--",
"--api-sock",
&api_socket,
];
cmd.args(args);
// Pass the network namespace to the jailer so that the FC process
// is placed in the correct netns. This is the recommended approach
// over relying on pre_exec setns inheritance.
let netns_path = netns.clone().unwrap_or_default();
if !netns_path.is_empty() {
args.extend_from_slice(&["--netns", &netns_path]);
}
args.extend_from_slice(&["--", "--api-sock", &api_socket]);
cmd.args(&args);
}
false => {
debug!(sl(), "Running non-Jailed");
@@ -108,15 +120,22 @@ impl FcInner {
}
debug!(sl(), "Exec: {:?}", cmd);
// Make sure we're in the correct Network Namespace
// For the non-jailed case, enter the network namespace via pre_exec so that
// the FC process inherits it. For the jailed case, --netns is passed to the
// jailer above and pre_exec setns is skipped.
let jailed = self.jailed;
unsafe {
let selinux_label = self.config.security_info.selinux_label.clone();
let _pre = cmd.pre_exec(move || {
if let Some(netns_path) = &netns {
debug!(sl(), "set netns for vmm master {:?}", &netns_path);
let netns_fd = std::fs::File::open(netns_path);
let _ = setns(netns_fd?.as_raw_fd(), CloneFlags::CLONE_NEWNET)
.context("set netns failed");
if !jailed {
if let Some(netns_path) = &netns {
debug!(sl(), "set netns for vmm master {:?}", &netns_path);
let netns_fd = std::fs::File::open(netns_path)?;
setns(netns_fd.as_raw_fd(), CloneFlags::CLONE_NEWNET)
.map_err(|e| std::io::Error::other(
format!("setns into {:?} failed: {}", netns_path, e),
))?;
}
}
if let Some(label) = selinux_label.as_ref() {
if let Err(e) = selinux::set_exec_label(label) {
@@ -256,6 +275,7 @@ impl Persist for FcInner {
jailer_root: hypervisor_state.jailer_root,
client: Client::unix(),
pending_devices: vec![],
pending_net_devices: vec![],
run_dir: hypervisor_state.run_dir,
capabilities: Capabilities::new(),
fc_process: Mutex::new(None),

View File

@@ -31,10 +31,29 @@ impl FcInner {
.hotplug_block_device(block.config.path_on_host.as_str(), block.config.index)
.await
.context("add block device"),
DeviceType::Network(network) => self
.add_net_device(&network.config, network.device_id)
.await
.context("add net device"),
DeviceType::Network(network) => {
// Buffer network devices and send them to FC just before InstanceStart
// in boot_vm(). Firecracker rejects PUT /network-interfaces after the
// VM has started, so we must ensure they arrive before InstanceStart.
// This mirrors the Go runtime's batch-configuration approach.
//
// If the VM is already running (e.g. a post-start prestart-hooks rescan
// called add_device again), we cannot do anything useful — FC has already
// started and does not support network-interface hotplug. Log a warning
// and return Ok so the rest of the setup path can continue.
if self.state == VmmState::VmRunning {
warn!(
sl(),
"FC: ignoring late network device add for iface {} — VM already running, hotplug not supported",
network.device_id
);
return Ok(());
}
debug!(sl(), "buffering network device for pre-start flush");
self.pending_net_devices
.push((network.config, network.device_id));
Ok(())
}
DeviceType::HybridVsock(hvsock) => {
self.add_hvsock(&hvsock.config).await.context("add vsock")
}

View File

@@ -76,7 +76,29 @@ impl FcInner {
}
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
debug!(sl(), "Starting sandbox");
// For Firecracker, the VMM process was already started in prepare_vm.
// Network interfaces must be configured before InstanceStart, but
// OCI hooks (which create the container veth via CNI) have not run
// yet. Defer the network flush and InstanceStart to boot_vm(), which
// sandbox.rs calls after the hooks + network rescan.
debug!(sl(), "FC start_vm: VMM already running; deferring InstanceStart to boot_vm");
Ok(())
}
pub(crate) async fn boot_vm(&mut self) -> Result<()> {
debug!(sl(), "FC boot_vm: flushing network devices and sending InstanceStart");
// Flush all buffered network devices. These were populated by
// add_device(Network) after the OCI hooks ran and the netns was
// rescanned by sandbox.rs. FC rejects PUT /network-interfaces once
// the VM is running, so this must happen before InstanceStart.
let net_devices = std::mem::take(&mut self.pending_net_devices);
for (config, device_id) in net_devices {
self.add_net_device(&config, device_id)
.await
.context("configure network interface before InstanceStart")?;
}
let body: String = serde_json::json!({
"action_type": "InstanceStart"
})

View File

@@ -75,6 +75,11 @@ impl Hypervisor for Firecracker {
inner.start_vm(timeout).await
}
async fn boot_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
inner.boot_vm().await
}
async fn stop_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
inner.stop_vm().await

View File

@@ -106,6 +106,23 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
selinux_label: Option<String>,
) -> Result<()>;
async fn start_vm(&self, timeout: i32) -> Result<()>;
/// Finalize VM boot after OCI hooks and network setup have run.
///
/// For hypervisors that require all devices (including network) to be
/// registered before the guest boots (e.g. Firecracker, which has no
/// hotplug), `start_vm` only starts the VMM process, while `boot_vm`
/// flushes the device queue and issues the actual boot command
/// (InstanceStart for FC). For hypervisors that start the guest
/// immediately in `start_vm` (QEMU, dragonball, cloud-hypervisor), the
/// default no-op implementation is sufficient.
///
/// sandbox.rs calls this after OCI hooks and the post-hooks network
/// rescan, but before connecting to the kata-agent.
async fn boot_vm(&self) -> Result<()> {
Ok(())
}
async fn stop_vm(&self) -> Result<()>;
async fn wait_vm(&self) -> Result<i32>;
async fn pause_vm(&self) -> Result<()>;

View File

@@ -691,14 +691,12 @@ impl QemuInner {
let is_unaligned = !new_hotplugged_mem.is_multiple_of(guest_mem_block_size);
if is_unaligned {
new_hotplugged_mem = ch_config::convert::checked_next_multiple_of(
new_hotplugged_mem,
guest_mem_block_size,
)
.ok_or(anyhow!(format!(
"alignment of {} B to the block size of {} B failed",
new_hotplugged_mem, guest_mem_block_size
)))?
new_hotplugged_mem = new_hotplugged_mem
.checked_next_multiple_of(guest_mem_block_size)
.ok_or(anyhow!(format!(
"alignment of {} B to the block size of {} B failed",
new_hotplugged_mem, guest_mem_block_size
)))?
}
let new_hotplugged_mem = new_hotplugged_mem;

View File

@@ -249,13 +249,48 @@ impl ResourceManagerInner {
}
async fn handle_interfaces(&self, network: &dyn Network) -> Result<()> {
// The guest virtio-net device may not be visible to the kernel immediately
// after InstanceStart completes. Retry on "Link not found" to allow time
// for virtio-net driver initialisation in the guest.
// Use a generous window (100 × 100 ms = 10 s) since on some systems
// virtio-net initialisation is slower than the Go runtime's 20 × 20 ms.
const MAX_ATTEMPTS: u32 = 100;
const RETRY_DELAY_MS: u64 = 100;
for i in network.interfaces().await.context("get interfaces")? {
// update interface
info!(sl!(), "update interface {:?}", i);
self.agent
.update_interface(agent::UpdateInterfaceRequest { interface: Some(i) })
.await
.context("update interface")?;
info!(sl!(), "update interface: hw_addr={} name={}", i.hw_addr, i.name);
let mut last_err = None;
for attempt in 0..MAX_ATTEMPTS {
let result = self
.agent
.update_interface(agent::UpdateInterfaceRequest {
interface: Some(i.clone()),
})
.await;
if let Err(e) = result {
let msg = e.to_string();
if msg.contains("Link not found") {
info!(
sl!(),
"update interface: link not found (attempt {}/{}), retrying in {}ms",
attempt + 1,
MAX_ATTEMPTS,
RETRY_DELAY_MS,
);
last_err = Some(e);
tokio::time::sleep(std::time::Duration::from_millis(RETRY_DELAY_MS))
.await;
} else {
return Err(e).context("update interface");
}
} else {
last_err = None;
break;
}
}
if let Some(e) = last_err {
return Err(e).context("update interface");
}
}
Ok(())

View File

@@ -49,7 +49,13 @@ impl NetworkPair {
let unique_id = kata_sys_util::rand::UUID::new();
let model = network_model::new(model).context("new network model")?;
let tap_iface_name = format!("tap{idx}{TAP_SUFFIX}");
let virt_iface_name = format!("eth{idx}");
// Use the actual interface name from the netns scan. Fall back to eth{idx}
// only if the caller passed an empty name.
let virt_iface_name = if name.is_empty() {
format!("eth{idx}")
} else {
name.to_string()
};
let tap_link = create_link(handle, &tap_iface_name, queues)
.await
.context("create link")?;
@@ -106,7 +112,7 @@ impl NetworkPair {
.await
.context("set link up")?;
let mut net_pair = NetworkPair {
let net_pair = NetworkPair {
tap: TapInterface {
id: String::from(&unique_id),
name: format!("br{idx}{TAP_SUFFIX}"),
@@ -125,10 +131,6 @@ impl NetworkPair {
network_qos: false,
};
if !name.is_empty() {
net_pair.virt_iface.name = String::from(name);
}
Ok(net_pair)
}

View File

@@ -140,21 +140,22 @@ impl Network for NetworkWithNetns {
async fn remove(&self, h: &dyn Hypervisor) -> Result<()> {
let inner = self.inner.read().await;
// The network namespace would have been deleted at this point
// if it has not been created by virtcontainers.
if !inner.network_created {
return Ok(());
}
{
// Always clean up endpoint resources (TC filter rules, TAP devices) regardless
// of who created the network namespace.
if !inner.netns_path.is_empty() {
let _netns_guard =
netns::NetnsGuard::new(&inner.netns_path).context("net netns guard")?;
for e in &inner.entity_list {
e.endpoint.detach(h).await.context("detach")?;
}
}
let netns = get_from_path(inner.netns_path.clone())?;
netns.remove()?;
fs::remove_dir_all(inner.netns_path.clone()).context("failed to remove netns path")?;
// Only remove the network namespace itself if virtcontainers created it.
if inner.network_created {
let netns = get_from_path(inner.netns_path.clone())?;
netns.remove()?;
fs::remove_dir_all(inner.netns_path.clone())
.context("failed to remove netns path")?;
}
Ok(())
}
}

View File

@@ -613,7 +613,6 @@ impl Sandbox for VirtSandbox {
// start vm
self.hypervisor.start_vm(10_000).await.context("start vm")?;
info!(sl!(), "start vm");
// execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks
let (prestart_hooks, create_runtime_hooks) =
@@ -661,6 +660,18 @@ impl Sandbox for VirtSandbox {
}
}
// Give the hypervisor a chance to finalize boot now that OCI hooks and
// the post-hooks network rescan have completed. For hypervisors that
// require all devices (including network) to be registered before the
// guest boots (e.g. Firecracker), start_vm defers the actual boot
// command to this call. For hypervisors that boot the guest in
// start_vm (QEMU, dragonball, cloud-hypervisor), this is a no-op.
self.hypervisor
.boot_vm()
.await
.context("boot vm")?;
info!(sl!(), "start vm");
// connect agent
// set agent socket
let address = self

View File

@@ -178,7 +178,7 @@ function extract_kata_env() {
local req_num_vcpus
case "${KATA_HYPERVISOR}" in
dragonball)
dragonball|fc-rs|cloud-hypervisor|qemu-runtime-rs|qemu-se-runtime-rs)
cmd=kata-ctl
config_path=".runtime.config.path"
runtime_version=".runtime.version"
@@ -205,7 +205,7 @@ function extract_kata_env() {
req_num_vcpus=""
;;
esac
kata_env="$(sudo ${cmd} env --json)"
kata_env="$(sudo ${cmd} env --json 2>/dev/null || true)"
RUNTIME_CONFIG_PATH="$(echo "${kata_env}" | jq -r ${config_path})"
RUNTIME_VERSION="$(echo "${kata_env}" | jq -r ${runtime_version} | grep ${runtime_version_semver} | cut -d'"' -f4)"
@@ -228,8 +228,19 @@ function extract_kata_env() {
VIRTIOFSD_PATH=$(echo "${kata_env}" | jq -r ${virtio_fs_daemon_path})
INITRD_PATH=$(echo "${kata_env}" | jq -r ${initrd_path})
# Fall back to well-known paths when kata-ctl is unavailable (e.g. local builds
# that pre-date kata-ctl or installations that only ship the shim binary).
if [ "${HYPERVISOR_PATH}" = "null" ] || [ -z "${HYPERVISOR_PATH}" ]; then
case "${KATA_HYPERVISOR}" in
fc-rs) HYPERVISOR_PATH="/opt/kata/bin/firecracker" ;;
cloud-hypervisor) HYPERVISOR_PATH="/opt/kata/bin/cloud-hypervisor" ;;
qemu-runtime-rs|\
qemu-se-runtime-rs) HYPERVISOR_PATH="/opt/kata/bin/qemu-system-x86_64" ;;
esac
fi
# TODO: there is no ${cmd} of rust version currently
if [ "${KATA_HYPERVISOR}" != "dragonball" ]; then
if [[ ! "${KATA_HYPERVISOR}" =~ ^(dragonball|cloud-hypervisor|qemu-runtime-rs|qemu-se-runtime-rs)$ ]]; then
if [ "${KATA_HYPERVISOR}" = "stratovirt" ]; then
HYPERVISOR_VERSION=$(sudo -E ${HYPERVISOR_PATH} -version | head -n1)
else
@@ -517,12 +528,14 @@ function enabling_hypervisor() {
declare -r CONTAINERD_SHIM_KATA="/usr/local/bin/containerd-shim-kata-${KATA_HYPERVISOR}-v2"
case "${KATA_HYPERVISOR}" in
dragonball|cloud-hypervisor|qemu-runtime-rs|qemu-se-runtime-rs)
dragonball|cloud-hypervisor|qemu-runtime-rs|qemu-se-runtime-rs|fc-rs)
sudo ln -sf "${KATA_DIR}/runtime-rs/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}"
sudo ln -sf "${KATA_DIR}/runtime-rs/bin/containerd-shim-kata-v2" "/usr/local/bin/containerd-shim-kata-v2"
declare -r CONFIG_DIR="${KATA_DIR}/share/defaults/kata-containers/runtime-rs"
;;
*)
sudo ln -sf "${KATA_DIR}/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}"
sudo ln -sf "${KATA_DIR}/bin/containerd-shim-kata-v2" "/usr/local/bin/containerd-shim-kata-v2"
declare -r CONFIG_DIR="${KATA_DIR}/share/defaults/kata-containers"
;;
esac
@@ -536,6 +549,65 @@ function enabling_hypervisor() {
}
# Sets up a devmapper thin-pool and reconfigures standalone containerd to use
# it as the default snapshotter. Required for block-device based hypervisors
# (e.g. Firecracker / fc-rs) that cannot use the overlayfs snapshotter.
# Expects containerd to already be installed and /etc/containerd/config.toml
# to exist (e.g. after `containerd config default | sudo tee ...`).
function configure_devmapper_for_containerd() {
info "Configuring devmapper snapshotter for standalone containerd"
sudo mkdir -p /var/lib/containerd/devmapper
sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img
sudo truncate --size 1G /var/lib/containerd/devmapper/meta-disk.img
# Allocate loop devices dynamically to avoid conflicts with pre-existing ones.
local loop_data loop_meta
loop_data=$(sudo losetup --find --show /var/lib/containerd/devmapper/data-disk.img)
loop_meta=$(sudo losetup --find --show /var/lib/containerd/devmapper/meta-disk.img)
info "devmapper: data=${loop_data} meta=${loop_meta}"
local data_sectors
data_sectors=$(sudo blockdev --getsz "${loop_data}")
sudo dmsetup create contd-thin-pool \
--table "0 ${data_sectors} thin-pool ${loop_meta} ${loop_data} 512 32768 1 skip_block_zeroing"
# Write a complete containerd v2 config combining the kata runtime with the
# devmapper snapshotter. We replace whatever was there before (both the v2
# config from overwrite_containerd_config and the v3 default from
# 'containerd config default') so the result is always correct regardless of
# format or prior content.
sudo tee /etc/containerd/config.toml <<'TOML_EOF'
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "devmapper"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins."io.containerd.snapshotter.v1.devmapper"]
pool_name = "contd-thin-pool"
root_path = "/var/lib/containerd/devmapper"
base_image_size = "4096MB"
discard_blocks = true
TOML_EOF
sudo systemctl restart containerd
# Verify the plugin came up healthy
local dm_status
dm_status=$(sudo ctr plugins ls | awk '$2 ~ /^devmapper$/ { print $4 }' || true)
[ "${dm_status}" = "ok" ] || \
die "containerd devmapper snapshotter not healthy (status: '${dm_status}')"
info "devmapper snapshotter configured and healthy"
}
function check_containerd_config_for_kata() {
# check containerd config
declare -r line1="default_runtime_name = \"kata\""

View File

@@ -66,6 +66,18 @@ function install_dependencies() {
clone_cri_containerd $(get_from_kata_deps ".externals.containerd.${CONTAINERD_VERSION}")
}
function install_kata_for_cri_containerd() {
install_kata
# Firecracker (fc-rs) uses block devices and requires the devmapper
# snapshotter; other hypervisors work fine with the default overlayfs.
# Must run AFTER install_kata since install_kata may overwrite the
# containerd config via overwrite_containerd_config.
if [ "${KATA_HYPERVISOR:-}" = "fc-rs" ]; then
configure_devmapper_for_containerd
fi
}
function run() {
info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor"
@@ -78,7 +90,8 @@ function main() {
action="${1:-}"
case "${action}" in
install-dependencies) install_dependencies ;;
install-kata) install_kata ;;
install-kata) install_kata_for_cri_containerd ;;
configure-snapshotter) configure_devmapper_for_containerd ;;
run) run ;;
*) >&2 die "Invalid argument" ;;
esac

View File

@@ -126,6 +126,9 @@ cat << EOF | sudo tee "${CONTAINERD_CONFIG_FILE}"
echo 'pod_annotations = ["io.katacontainers.*"]' && \
echo ' container_annotations = ["io.katacontainers.*"]'
)
$( [ "${KATA_HYPERVISOR}" = "fc-rs" ] && \
echo 'snapshotter = "devmapper"'
)
[plugins.${pluginid}.containerd.runtimes.${runtime}.options]
ConfigPath = "${runtime_config_path}"
BinaryName = "${runtime_binary_path}"
@@ -133,6 +136,14 @@ $( [[ -n "$containerd_shim_path" ]] && \
echo "[plugins.linux]" && \
echo " shim = \"${containerd_shim_path}\""
)
$( [ "${KATA_HYPERVISOR}" = "fc-rs" ] && \
printf '%s\n' \
'[plugins."io.containerd.snapshotter.v1.devmapper"]' \
' pool_name = "contd-thin-pool"' \
' root_path = "/var/lib/containerd/devmapper"' \
' base_image_size = "4096MB"' \
' discard_blocks = true'
)
EOF
}

View File

@@ -41,8 +41,16 @@ export RUNS_ON_AKS="${RUNS_ON_AKS:-false}"
function configure_devmapper() {
sudo mkdir -p /var/lib/containerd/devmapper
sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img
sudo truncate --size 10G /var/lib/containerd/devmapper/meta-disk.img
sudo truncate --size 1G /var/lib/containerd/devmapper/meta-disk.img
# Allocate loop devices dynamically to avoid conflicts with pre-existing ones
# (e.g. snap loop mounts on ubuntu-24.04).
local loop_data loop_meta
loop_data=$(sudo losetup --find --show /var/lib/containerd/devmapper/data-disk.img)
loop_meta=$(sudo losetup --find --show /var/lib/containerd/devmapper/meta-disk.img)
info "devmapper: data=${loop_data} meta=${loop_meta}"
# Persist the loop device mapping across reboots / containerd restarts.
cat<<EOF | sudo tee /etc/systemd/system/containerd-devmapper.service
[Unit]
Description=Setup containerd devmapper device
@@ -53,14 +61,14 @@ Wants=systemd-udev-settle.service
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=-/sbin/losetup /dev/loop20 /var/lib/containerd/devmapper/data-disk.img
ExecStart=-/sbin/losetup /dev/loop21 /var/lib/containerd/devmapper/meta-disk.img
ExecStart=-/sbin/losetup ${loop_data} /var/lib/containerd/devmapper/data-disk.img
ExecStart=-/sbin/losetup ${loop_meta} /var/lib/containerd/devmapper/meta-disk.img
[Install]
WantedBy=local-fs.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable --now containerd-devmapper
sudo systemctl enable containerd-devmapper
# Time to setup the thin pool for consumption.
# The table arguments are such.
@@ -72,15 +80,17 @@ EOF
# low_water_mark. Copied this from containerd snapshotter test setup
# no. of feature arguments
# Skip zeroing blocks for new volumes.
local data_sectors
data_sectors=$(sudo blockdev --getsz "${loop_data}")
sudo dmsetup create contd-thin-pool \
--table "0 20971520 thin-pool /dev/loop21 /dev/loop20 512 32768 1 skip_block_zeroing"
--table "0 ${data_sectors} thin-pool ${loop_meta} ${loop_data} 512 32768 1 skip_block_zeroing"
case "${KUBERNETES}" in
k3s)
containerd_config_file="/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl"
sudo cp /var/lib/rancher/k3s/agent/etc/containerd/config.toml "${containerd_config_file}"
;;
kubeadm)
kubeadm|vanilla)
containerd_config_file="/etc/containerd/config.toml"
;;
*) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;;
@@ -120,7 +130,7 @@ EOF
case "${KUBERNETES}" in
k3s)
sudo systemctl restart k3s ;;
kubeadm)
kubeadm|vanilla)
sudo systemctl restart containerd ;;
*) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;;
esac

View File

@@ -51,7 +51,7 @@ function install_dependencies() {
# Create the default containerd configuration
sudo mkdir -p /etc/containerd
containerd config default > sudo tee /etc/containerd/config.toml
containerd config default | sudo tee /etc/containerd/config.toml
sudo systemctl restart containerd
}
@@ -106,27 +106,46 @@ function run() {
info "Running nerdctl smoke test tests using ${KATA_HYPERVISOR} hypervisor"
# fc-rs uses devmapper block devices; nerdctl must be told to use the
# devmapper snapshotter explicitly so images are unpacked into the pool.
local snapshotter_flag=""
if [ "${KATA_HYPERVISOR}" = "fc-rs" ]; then
snapshotter_flag="--snapshotter devmapper"
fi
info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR})"
sudo nerdctl run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com
sudo nerdctl run --rm ${snapshotter_flag} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com
info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR}) and multiple bridge nwtorks"
sudo nerdctl run --rm --net ${net1} --net ${net2} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a
sudo nerdctl run --rm ${snapshotter_flag} --net ${net1} --net ${net2} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a
info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR}) and ipvlan network"
sudo nerdctl run --rm --net ${ipvlan_net_name} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a | grep "eth0"
sudo nerdctl run --rm ${snapshotter_flag} --net ${ipvlan_net_name} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a | grep "eth0"
info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR}) and macvlan network"
sudo nerdctl run --rm --net ${macvlan_net_name} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a | grep "eth0"
sudo nerdctl run --rm ${snapshotter_flag} --net ${macvlan_net_name} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a | grep "eth0"
info "Removing networks"
sudo nerdctl network rm ${macvlan_net_name} ${ipvlan_net_name}
}
function install_kata_for_nerdctl() {
install_kata
# Firecracker (fc-rs) uses block devices and requires the devmapper
# snapshotter; other hypervisors work fine with the default overlayfs.
# Must run AFTER install_kata since install_kata may overwrite the
# containerd config via overwrite_containerd_config.
if [ "${KATA_HYPERVISOR:-}" = "fc-rs" ]; then
configure_devmapper_for_containerd
fi
}
function main() {
action="${1:-}"
case "${action}" in
install-dependencies) install_dependencies ;;
install-kata) install_kata ;;
install-kata) install_kata_for_nerdctl ;;
run) run ;;
collect-artifacts) collect_artifacts ;;
*) >&2 die "Invalid argument" ;;

View File

@@ -35,6 +35,12 @@ function install_dependencies() {
function run() {
info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor"
enabling_hypervisor
export CTR_RUNTIME="io.containerd.kata-${KATA_HYPERVISOR}.v2"
if [ "${KATA_HYPERVISOR}" = "fc-rs" ]; then
export CTR_SNAPSHOTTER="devmapper"
fi
export ITERATIONS=2 MAX_CONTAINERS=20
bash "${stability_dir}/soak_parallel_rm.sh"
@@ -48,11 +54,21 @@ function run() {
# bash "${stability_dir}/agent_stability_test.sh"
}
function install_kata_for_stability() {
install_kata
# Firecracker (fc-rs) uses block devices and requires the devmapper
# snapshotter; other hypervisors work fine with the default overlayfs.
if [ "${KATA_HYPERVISOR:-}" = "fc-rs" ]; then
configure_devmapper_for_containerd
fi
}
function main() {
action="${1:-}"
case "${action}" in
install-dependencies) install_dependencies ;;
install-kata) install_kata ;;
install-kata) install_kata_for_stability ;;
enabling-hypervisor) enabling_hypervisor ;;
run) run ;;
*) >&2 die "Invalid argument" ;;

View File

@@ -46,7 +46,7 @@ function main() {
for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do
containers+=($(random_name))
sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" ${CTR_SNAPSHOTTER:+--snapshotter "${CTR_SNAPSHOTTER}"} "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
((not_started_count--))
info "$not_started_count remaining containers"
done

View File

@@ -124,7 +124,7 @@ function go() {
local i
for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do
containers+=($(random_name))
sudo "${CTR_EXE}" run --runtime="${CTR_RUNTIME}" -d "${nginx_image}" "${containers[-1]}" sh -c "${COMMAND}"
sudo "${CTR_EXE}" run --runtime="${CTR_RUNTIME}" ${CTR_SNAPSHOTTER:+--snapshotter "${CTR_SNAPSHOTTER}"} -d "${nginx_image}" "${containers[-1]}" sh -c "${COMMAND}"
((how_many++))
done

View File

@@ -18,7 +18,7 @@ function main() {
init_env
check_cmds "${cmds[@]}"
sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}"
sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" ${CTR_SNAPSHOTTER:+--snapshotter "${CTR_SNAPSHOTTER}"} "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}"
# Run 1 iomix stressor (mix of I/O operations) for 20 seconds with verbose output
info "Running iomix stressor test"

View File

@@ -22,6 +22,7 @@ const ALL_SHIMS: &[&str] = &[
"cloud-hypervisor",
"dragonball",
"fc",
"fc-rs",
"firecracker",
"remote",
// QEMU shims
@@ -61,7 +62,7 @@ fn get_hypervisor_name(shim: &str) -> Result<&str> {
"clh" => Ok("clh"),
"cloud-hypervisor" => Ok("cloud-hypervisor"),
"dragonball" => Ok("dragonball"),
"fc" | "firecracker" => Ok("firecracker"),
"fc" | "fc-rs" | "firecracker" => Ok("firecracker"),
"remote" => Ok("remote"),
_ => anyhow::bail!(
"Unknown shim '{}'. Valid shims are: {}",
@@ -642,7 +643,7 @@ fn get_hypervisor_path(config: &Config, shim: &str) -> Result<String> {
// For non-QEMU shims, use the appropriate hypervisor binary
let binary = match shim {
"clh" | "cloud-hypervisor" => "cloud-hypervisor",
"fc" | "firecracker" => "firecracker",
"fc" | "fc-rs" | "firecracker" => "firecracker",
"dragonball" => "dragonball",
"stratovirt" => "stratovirt",
// Remote and other shims don't have a local hypervisor binary
@@ -971,6 +972,7 @@ mod tests {
#[case("cloud-hypervisor", "cloud-hypervisor")]
#[case("dragonball", "dragonball")]
#[case("fc", "firecracker")]
#[case("fc-rs", "firecracker")]
#[case("firecracker", "firecracker")]
#[case("remote", "remote")]
fn test_get_hypervisor_name_other_hypervisors(#[case] shim: &str, #[case] expected: &str) {

View File

@@ -9,6 +9,7 @@ use std::process::Command;
pub const RUST_SHIMS: &[&str] = &[
"cloud-hypervisor",
"dragonball",
"fc-rs",
"qemu-runtime-rs",
"qemu-coco-dev-runtime-rs",
"qemu-se-runtime-rs",
@@ -100,6 +101,7 @@ mod tests {
#[case("qemu", "/opt/kata", "/opt/kata/share/defaults/kata-containers")]
#[case("qemu-tdx", "/opt/kata", "/opt/kata/share/defaults/kata-containers")]
#[case("fc", "/opt/kata", "/opt/kata/share/defaults/kata-containers")]
#[case("fc-rs", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtime-rs")]
#[case("clh", "/opt/kata", "/opt/kata/share/defaults/kata-containers")]
#[case("cloud-hypervisor", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtime-rs")]
#[case("qemu-runtime-rs", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtime-rs")]
@@ -118,6 +120,7 @@ mod tests {
#[case("qemu", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtimes/qemu")]
#[case("qemu-tdx", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtimes/qemu-tdx")]
#[case("fc", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtimes/fc")]
#[case("fc-rs", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtime-rs/runtimes/fc-rs")]
#[case("cloud-hypervisor", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtime-rs/runtimes/cloud-hypervisor")]
#[case("qemu-runtime-rs", "/opt/kata", "/opt/kata/share/defaults/kata-containers/runtime-rs/runtimes/qemu-runtime-rs")]
#[case("qemu", "/custom/path", "/custom/path/share/defaults/kata-containers/runtimes/qemu")]

View File

@@ -86,6 +86,7 @@ scheduling:
"cloud-hypervisor" (dict "memory" "130Mi" "cpu" "250m")
"dragonball" (dict "memory" "130Mi" "cpu" "250m")
"fc" (dict "memory" "130Mi" "cpu" "250m")
"fc-rs" (dict "memory" "130Mi" "cpu" "250m")
"qemu" (dict "memory" "160Mi" "cpu" "250m")
"qemu-coco-dev" (dict "memory" "160Mi" "cpu" "250m")
"qemu-coco-dev-runtime-rs" (dict "memory" "160Mi" "cpu" "250m")

View File

@@ -111,6 +111,14 @@ shims:
containerd:
snapshotter: "devmapper" # requires pre-configuration on the user side
fc-rs:
enabled: ~
supportedArches:
- amd64
allowedHypervisorAnnotations: []
containerd:
snapshotter: "devmapper" # requires pre-configuration on the user side
qemu:
enabled: ~
supportedArches: