Compare commits

...

3 Commits

Author SHA1 Message Date
Anastassios Nanos
f48cd23836 runtime-rs: Add FC tests and rename rs-fc to fc-rs
WiP: Add tests for runtime-rs FC support.

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2026-04-02 07:45:56 +00:00
Anastassios Nanos
c4a0b8e102 runtime-rs: Split FC spawn sequence
To account for network hooks and the lack of device hotplugging on FC, we split the
spawn process into start and boot. This way, the VMM is already up, but the VM sandbox
is not -- we can add devices and then we can kick-off the VM with InstanceStart.

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2026-04-02 07:45:51 +00:00
Anastassios Nanos
26b6ad72c1 runtime-rs: Fix FC spawn with network
Refactor network link detection so that FC can boot properly.

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2026-04-02 07:45:21 +00:00
19 changed files with 324 additions and 89 deletions

View File

@@ -88,7 +88,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs', 'fc-rs']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -283,6 +283,7 @@ jobs:
- qemu
- cloud-hypervisor
- qemu-runtime-rs
- fc-rs
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}

View File

@@ -275,7 +275,7 @@ Each hypervisor has a dedicated configuration file:
|------------|----------------|-----------------|
| QEMU |`configuration-qemu-runtime-rs.toml` |`configuration-qemu.toml` |
| Cloud Hypervisor | `configuration-cloud-hypervisor.toml` | `configuration-clh.toml` |
| Firecracker | `configuration-rs-fc.toml` | `configuration-fc.toml` |
| Firecracker | `configuration-fc-rs.toml` | `configuration-fc.toml` |
| Dragonball | `configuration-dragonball.toml` (default) | `No` |
> **Note:** Configuration files are typically installed in `/opt/kata/share/defaults/kata-containers/` or `/opt/kata/share/defaults/kata-containers/runtime-rs/` or `/usr/share/defaults/kata-containers/`.

View File

@@ -423,7 +423,7 @@ endif
ifneq (,$(FCCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_FC)
CONFIG_FILE_FC = configuration-rs-fc.toml
CONFIG_FILE_FC = configuration-fc-rs.toml
CONFIG_FC = config/$(CONFIG_FILE_FC)
CONFIG_FC_IN = $(CONFIG_FC).in
CONFIG_PATH_FC = $(abspath $(CONFDIR)/$(CONFIG_FILE_FC))

View File

@@ -157,7 +157,7 @@ Configuration files in `config/`:
| `configuration-dragonball.toml.in` | Dragonball | Built-in VMM |
| `configuration-qemu-runtime-rs.toml.in` | QEMU | Default external |
| `configuration-cloud-hypervisor.toml.in` | Cloud Hypervisor | Modern VMM |
| `configuration-rs-fc.toml.in` | Firecracker | Lightweight microVM |
| `configuration-fc-rs.toml.in` | Firecracker | Lightweight microVM |
| `configuration-remote.toml.in` | Remote | Remote hypervisor |
| `configuration-qemu-tdx-runtime-rs.toml.in` | QEMU + TDX | Intel TDX confidential computing |
| `configuration-qemu-snp-runtime-rs.toml.in` | QEMU + SEV-SNP | AMD SEV-SNP confidential computing |

View File

@@ -3,6 +3,8 @@
//
//SPDX-License-Identifier: Apache-2.0
use std::convert::TryFrom;
use crate::{
firecracker::{
inner_hypervisor::{FC_AGENT_SOCKET_NAME, ROOT},
@@ -13,7 +15,7 @@ use crate::{
};
use anyhow::{anyhow, Context, Result};
use dbs_utils::net::MacAddr;
use hyper::{Body, Method, Request, Response};
use hyper::{Body, Method, Request};
use hyperlocal::Uri;
use kata_sys_util::mount;
use kata_types::config::hypervisor::RateLimiterConfig;
@@ -23,6 +25,16 @@ use tokio::{fs, fs::File};
const REQUEST_RETRY: u32 = 500;
const FC_KERNEL: &str = "vmlinux";
/// Distinguishes a transient transport error (FC not ready yet, retry allowed)
/// from a permanent HTTP-level API error returned by FC (no retry).
#[derive(Debug)]
enum FcRequestError {
/// Could not reach the FC API socket (connection refused, etc.)
Transport(String),
/// FC returned a non-2xx HTTP status. (status_code, response_body)
Api(u16, String),
}
const FC_ROOT_FS: &str = "rootfs";
const DRIVE_PREFIX: &str = "drive";
const DISK_POOL_SIZE: u32 = 6;
@@ -111,7 +123,8 @@ impl FcInner {
let body_config: String = json!({
"mem_size_mib": self.config.memory_info.default_memory,
"vcpu_count": self.config.cpu_info.default_vcpus.ceil() as u8,
"vcpu_count": u8::try_from(self.config.cpu_info.default_vcpus.ceil() as u64)
.context("vcpu_count overflows u8")?,
})
.to_string();
let body_kernel: String = json!({
@@ -215,13 +228,29 @@ impl FcInner {
Some(mac) => MacAddr::from_bytes(&mac.0).ok(),
None => None,
};
let rx_rate_limiter = RateLimiterConfig::new(
self.config.network_info.rx_rate_limiter_max_rate,
0,
None,
None,
);
let tx_rate_limiter = RateLimiterConfig::new(
self.config.network_info.tx_rate_limiter_max_rate,
0,
None,
None,
);
let body: String = json!({
"iface_id": &device_id,
"guest_mac": g_mac,
"host_dev_name": &config.host_dev_name
"host_dev_name": &config.host_dev_name,
"rx_rate_limiter": rx_rate_limiter,
"tx_rate_limiter": tx_rate_limiter,
})
.to_string();
info!(sl(), "FC: add network device: iface_id={} guest_mac={:?} host_dev_name={}", device_id, g_mac, config.host_dev_name);
self.request_with_retry(
Method::PUT,
&["/network-interfaces/", &device_id].concat(),
@@ -259,50 +288,54 @@ impl FcInner {
.body(Body::from(data.clone()))?;
match self.send_request(req).await {
Ok(resp) => {
debug!(sl(), "Request sent, resp: {:?}", resp);
return Ok(());
}
Err(resp) => {
debug!(sl(), "Request sent with error, resp: {:?}", resp);
std::thread::sleep(std::time::Duration::from_millis(10));
Ok(_) => return Ok(()),
// A transport error (FC not ready yet) — retry.
Err(FcRequestError::Transport(e)) => {
debug!(sl(), "FC not reachable yet, retrying: {:?}", e);
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
continue;
}
// An HTTP-level error from FC — fail immediately with the
// actual error body so the problem is visible in logs.
Err(FcRequestError::Api(status, body)) => {
return Err(anyhow::anyhow!(
"FC API error: status={} body={}",
status,
body
));
}
}
}
Err(anyhow::anyhow!(
"After {} attempts, it still doesn't work.",
REQUEST_RETRY
"FC not reachable after {} attempts (method={:?} uri={:?})",
REQUEST_RETRY,
method,
uri,
))
}
pub(crate) async fn send_request(&self, req: Request<Body>) -> Result<Response<Body>> {
let resp = self.client.request(req).await?;
async fn send_request(&self, req: Request<Body>) -> Result<(), FcRequestError> {
let resp = self
.client
.request(req)
.await
.map_err(|e| FcRequestError::Transport(e.to_string()))?;
let status = resp.status();
debug!(sl(), "Request RESPONSE {:?} {:?}", &status, resp);
if status.is_success() {
return Ok(resp);
} else {
let body = hyper::body::to_bytes(resp.into_body()).await?;
if body.is_empty() {
debug!(sl(), "Request FAILED WITH STATUS: {:?}", status);
None
} else {
let body = String::from_utf8_lossy(&body).into_owned();
debug!(
sl(),
"Request FAILED WITH STATUS: {:?} and BODY: {:?}", status, body
);
Some(body)
};
debug!(sl(), "FC request succeeded: {:?}", status);
return Ok(());
}
Err(anyhow::anyhow!(
"After {} attempts, it
still doesn't work.",
REQUEST_RETRY
))
let body = hyper::body::to_bytes(resp.into_body())
.await
.map(|b| String::from_utf8_lossy(&b).into_owned())
.unwrap_or_default();
error!(
sl(),
"FC API rejected request: status={:?} body={:?}", status, body
);
Err(FcRequestError::Api(status.as_u16(), body))
}
pub(crate) fn cleanup_resource(&self) {
if self.jailed {

View File

@@ -4,6 +4,7 @@
//SPDX-License-Identifier: Apache-2.0
use crate::firecracker::{inner_hypervisor::FC_API_SOCKET_NAME, sl};
use crate::device::driver::NetworkConfig;
use crate::MemoryConfig;
use crate::HYPERVISOR_FIRECRACKER;
use crate::{device::DeviceType, VmmState};
@@ -43,6 +44,9 @@ pub struct FcInner {
pub(crate) jailed: bool,
pub(crate) run_dir: String,
pub(crate) pending_devices: Vec<DeviceType>,
/// Network devices buffered until start_vm() so they are always sent to FC
/// before InstanceStart, mirroring the Go runtime's batch-configuration approach.
pub(crate) pending_net_devices: Vec<(NetworkConfig, String)>,
pub(crate) capabilities: Capabilities,
pub(crate) fc_process: Mutex<Option<Child>>,
pub(crate) exit_notify: Option<mpsc::Sender<()>>,
@@ -51,7 +55,9 @@ pub struct FcInner {
impl FcInner {
pub fn new(exit_notify: mpsc::Sender<()>) -> FcInner {
let mut capabilities = Capabilities::new();
capabilities.set(CapabilityBits::BlockDeviceSupport);
capabilities.set(
CapabilityBits::BlockDeviceSupport | CapabilityBits::HybridVsockSupport,
);
FcInner {
id: String::default(),
@@ -66,6 +72,7 @@ impl FcInner {
jailed: false,
run_dir: String::default(),
pending_devices: vec![],
pending_net_devices: vec![],
capabilities,
fc_process: Mutex::new(None),
exit_notify: Some(exit_notify),
@@ -80,7 +87,7 @@ impl FcInner {
debug!(sl(), "Running Jailed");
cmd = Command::new(&self.config.jailer_path);
let api_socket = ["/run/", FC_API_SOCKET_NAME].join("/");
let args = [
let mut args = vec![
"--id",
&self.id,
"--gid",
@@ -91,11 +98,16 @@ impl FcInner {
&self.config.path,
"--chroot-base-dir",
&self.jailer_root,
"--",
"--api-sock",
&api_socket,
];
cmd.args(args);
// Pass the network namespace to the jailer so that the FC process
// is placed in the correct netns. This is the recommended approach
// over relying on pre_exec setns inheritance.
let netns_path = netns.clone().unwrap_or_default();
if !netns_path.is_empty() {
args.extend_from_slice(&["--netns", &netns_path]);
}
args.extend_from_slice(&["--", "--api-sock", &api_socket]);
cmd.args(&args);
}
false => {
debug!(sl(), "Running non-Jailed");
@@ -108,15 +120,22 @@ impl FcInner {
}
debug!(sl(), "Exec: {:?}", cmd);
// Make sure we're in the correct Network Namespace
// For the non-jailed case, enter the network namespace via pre_exec so that
// the FC process inherits it. For the jailed case, --netns is passed to the
// jailer above and pre_exec setns is skipped.
let jailed = self.jailed;
unsafe {
let selinux_label = self.config.security_info.selinux_label.clone();
let _pre = cmd.pre_exec(move || {
if let Some(netns_path) = &netns {
debug!(sl(), "set netns for vmm master {:?}", &netns_path);
let netns_fd = std::fs::File::open(netns_path);
let _ = setns(netns_fd?.as_raw_fd(), CloneFlags::CLONE_NEWNET)
.context("set netns failed");
if !jailed {
if let Some(netns_path) = &netns {
debug!(sl(), "set netns for vmm master {:?}", &netns_path);
let netns_fd = std::fs::File::open(netns_path)?;
setns(netns_fd.as_raw_fd(), CloneFlags::CLONE_NEWNET)
.map_err(|e| std::io::Error::other(
format!("setns into {:?} failed: {}", netns_path, e),
))?;
}
}
if let Some(label) = selinux_label.as_ref() {
if let Err(e) = selinux::set_exec_label(label) {
@@ -256,6 +275,7 @@ impl Persist for FcInner {
jailer_root: hypervisor_state.jailer_root,
client: Client::unix(),
pending_devices: vec![],
pending_net_devices: vec![],
run_dir: hypervisor_state.run_dir,
capabilities: Capabilities::new(),
fc_process: Mutex::new(None),

View File

@@ -31,10 +31,29 @@ impl FcInner {
.hotplug_block_device(block.config.path_on_host.as_str(), block.config.index)
.await
.context("add block device"),
DeviceType::Network(network) => self
.add_net_device(&network.config, network.device_id)
.await
.context("add net device"),
DeviceType::Network(network) => {
// Buffer network devices and send them to FC just before InstanceStart
// in boot_vm(). Firecracker rejects PUT /network-interfaces after the
// VM has started, so we must ensure they arrive before InstanceStart.
// This mirrors the Go runtime's batch-configuration approach.
//
// If the VM is already running (e.g. a post-start prestart-hooks rescan
// called add_device again), we cannot do anything useful — FC has already
// started and does not support network-interface hotplug. Log a warning
// and return Ok so the rest of the setup path can continue.
if self.state == VmmState::VmRunning {
warn!(
sl(),
"FC: ignoring late network device add for iface {} — VM already running, hotplug not supported",
network.device_id
);
return Ok(());
}
debug!(sl(), "buffering network device for pre-start flush");
self.pending_net_devices
.push((network.config, network.device_id));
Ok(())
}
DeviceType::HybridVsock(hvsock) => {
self.add_hvsock(&hvsock.config).await.context("add vsock")
}

View File

@@ -76,7 +76,29 @@ impl FcInner {
}
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
debug!(sl(), "Starting sandbox");
// For Firecracker, the VMM process was already started in prepare_vm.
// Network interfaces must be configured before InstanceStart, but
// OCI hooks (which create the container veth via CNI) have not run
// yet. Defer the network flush and InstanceStart to boot_vm(), which
// sandbox.rs calls after the hooks + network rescan.
debug!(sl(), "FC start_vm: VMM already running; deferring InstanceStart to boot_vm");
Ok(())
}
pub(crate) async fn boot_vm(&mut self) -> Result<()> {
debug!(sl(), "FC boot_vm: flushing network devices and sending InstanceStart");
// Flush all buffered network devices. These were populated by
// add_device(Network) after the OCI hooks ran and the netns was
// rescanned by sandbox.rs. FC rejects PUT /network-interfaces once
// the VM is running, so this must happen before InstanceStart.
let net_devices = std::mem::take(&mut self.pending_net_devices);
for (config, device_id) in net_devices {
self.add_net_device(&config, device_id)
.await
.context("configure network interface before InstanceStart")?;
}
let body: String = serde_json::json!({
"action_type": "InstanceStart"
})

View File

@@ -75,6 +75,11 @@ impl Hypervisor for Firecracker {
inner.start_vm(timeout).await
}
async fn boot_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
inner.boot_vm().await
}
async fn stop_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
inner.stop_vm().await

View File

@@ -106,6 +106,23 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
selinux_label: Option<String>,
) -> Result<()>;
async fn start_vm(&self, timeout: i32) -> Result<()>;
/// Finalize VM boot after OCI hooks and network setup have run.
///
/// For hypervisors that require all devices (including network) to be
/// registered before the guest boots (e.g. Firecracker, which has no
/// hotplug), `start_vm` only starts the VMM process, while `boot_vm`
/// flushes the device queue and issues the actual boot command
/// (InstanceStart for FC). For hypervisors that start the guest
/// immediately in `start_vm` (QEMU, dragonball, cloud-hypervisor), the
/// default no-op implementation is sufficient.
///
/// sandbox.rs calls this after OCI hooks and the post-hooks network
/// rescan, but before connecting to the kata-agent.
async fn boot_vm(&self) -> Result<()> {
Ok(())
}
async fn stop_vm(&self) -> Result<()>;
async fn wait_vm(&self) -> Result<i32>;
async fn pause_vm(&self) -> Result<()>;

View File

@@ -689,14 +689,12 @@ impl QemuInner {
let is_unaligned = !new_hotplugged_mem.is_multiple_of(guest_mem_block_size);
if is_unaligned {
new_hotplugged_mem = ch_config::convert::checked_next_multiple_of(
new_hotplugged_mem,
guest_mem_block_size,
)
.ok_or(anyhow!(format!(
"alignment of {} B to the block size of {} B failed",
new_hotplugged_mem, guest_mem_block_size
)))?
new_hotplugged_mem = new_hotplugged_mem
.checked_next_multiple_of(guest_mem_block_size)
.ok_or(anyhow!(format!(
"alignment of {} B to the block size of {} B failed",
new_hotplugged_mem, guest_mem_block_size
)))?
}
let new_hotplugged_mem = new_hotplugged_mem;

View File

@@ -249,13 +249,48 @@ impl ResourceManagerInner {
}
async fn handle_interfaces(&self, network: &dyn Network) -> Result<()> {
// The guest virtio-net device may not be visible to the kernel immediately
// after InstanceStart completes. Retry on "Link not found" to allow time
// for virtio-net driver initialisation in the guest.
// Use a generous window (100 × 100 ms = 10 s) since on some systems
// virtio-net initialisation is slower than the Go runtime's 20 × 20 ms.
const MAX_ATTEMPTS: u32 = 100;
const RETRY_DELAY_MS: u64 = 100;
for i in network.interfaces().await.context("get interfaces")? {
// update interface
info!(sl!(), "update interface {:?}", i);
self.agent
.update_interface(agent::UpdateInterfaceRequest { interface: Some(i) })
.await
.context("update interface")?;
info!(sl!(), "update interface: hw_addr={} name={}", i.hw_addr, i.name);
let mut last_err = None;
for attempt in 0..MAX_ATTEMPTS {
let result = self
.agent
.update_interface(agent::UpdateInterfaceRequest {
interface: Some(i.clone()),
})
.await;
if let Err(e) = result {
let msg = e.to_string();
if msg.contains("Link not found") {
info!(
sl!(),
"update interface: link not found (attempt {}/{}), retrying in {}ms",
attempt + 1,
MAX_ATTEMPTS,
RETRY_DELAY_MS,
);
last_err = Some(e);
tokio::time::sleep(std::time::Duration::from_millis(RETRY_DELAY_MS))
.await;
} else {
return Err(e).context("update interface");
}
} else {
last_err = None;
break;
}
}
if let Some(e) = last_err {
return Err(e).context("update interface");
}
}
Ok(())

View File

@@ -49,7 +49,13 @@ impl NetworkPair {
let unique_id = kata_sys_util::rand::UUID::new();
let model = network_model::new(model).context("new network model")?;
let tap_iface_name = format!("tap{idx}{TAP_SUFFIX}");
let virt_iface_name = format!("eth{idx}");
// Use the actual interface name from the netns scan. Fall back to eth{idx}
// only if the caller passed an empty name.
let virt_iface_name = if name.is_empty() {
format!("eth{idx}")
} else {
name.to_string()
};
let tap_link = create_link(handle, &tap_iface_name, queues)
.await
.context("create link")?;
@@ -106,7 +112,7 @@ impl NetworkPair {
.await
.context("set link up")?;
let mut net_pair = NetworkPair {
let net_pair = NetworkPair {
tap: TapInterface {
id: String::from(&unique_id),
name: format!("br{idx}{TAP_SUFFIX}"),
@@ -125,10 +131,6 @@ impl NetworkPair {
network_qos: false,
};
if !name.is_empty() {
net_pair.virt_iface.name = String::from(name);
}
Ok(net_pair)
}

View File

@@ -140,21 +140,22 @@ impl Network for NetworkWithNetns {
async fn remove(&self, h: &dyn Hypervisor) -> Result<()> {
let inner = self.inner.read().await;
// The network namespace would have been deleted at this point
// if it has not been created by virtcontainers.
if !inner.network_created {
return Ok(());
}
{
// Always clean up endpoint resources (TC filter rules, TAP devices) regardless
// of who created the network namespace.
if !inner.netns_path.is_empty() {
let _netns_guard =
netns::NetnsGuard::new(&inner.netns_path).context("net netns guard")?;
for e in &inner.entity_list {
e.endpoint.detach(h).await.context("detach")?;
}
}
let netns = get_from_path(inner.netns_path.clone())?;
netns.remove()?;
fs::remove_dir_all(inner.netns_path.clone()).context("failed to remove netns path")?;
// Only remove the network namespace itself if virtcontainers created it.
if inner.network_created {
let netns = get_from_path(inner.netns_path.clone())?;
netns.remove()?;
fs::remove_dir_all(inner.netns_path.clone())
.context("failed to remove netns path")?;
}
Ok(())
}
}

View File

@@ -613,7 +613,6 @@ impl Sandbox for VirtSandbox {
// start vm
self.hypervisor.start_vm(10_000).await.context("start vm")?;
info!(sl!(), "start vm");
// execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks
let (prestart_hooks, create_runtime_hooks) =
@@ -661,6 +660,18 @@ impl Sandbox for VirtSandbox {
}
}
// Give the hypervisor a chance to finalize boot now that OCI hooks and
// the post-hooks network rescan have completed. For hypervisors that
// require all devices (including network) to be registered before the
// guest boots (e.g. Firecracker), start_vm defers the actual boot
// command to this call. For hypervisors that boot the guest in
// start_vm (QEMU, dragonball, cloud-hypervisor), this is a no-op.
self.hypervisor
.boot_vm()
.await
.context("boot vm")?;
info!(sl!(), "start vm");
// connect agent
// set agent socket
let address = self

View File

@@ -517,7 +517,7 @@ function enabling_hypervisor() {
declare -r CONTAINERD_SHIM_KATA="/usr/local/bin/containerd-shim-kata-${KATA_HYPERVISOR}-v2"
case "${KATA_HYPERVISOR}" in
dragonball|cloud-hypervisor|qemu-runtime-rs|qemu-se-runtime-rs)
dragonball|cloud-hypervisor|qemu-runtime-rs|qemu-se-runtime-rs|fc-rs)
sudo ln -sf "${KATA_DIR}/runtime-rs/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}"
declare -r CONFIG_DIR="${KATA_DIR}/share/defaults/kata-containers/runtime-rs"
;;
@@ -536,6 +536,61 @@ function enabling_hypervisor() {
}
# Sets up a devmapper thin-pool and reconfigures standalone containerd to use
# it as the default snapshotter. Required for block-device based hypervisors
# (e.g. Firecracker / fc-rs) that cannot use the overlayfs snapshotter.
# Expects containerd to already be installed and /etc/containerd/config.toml
# to exist (e.g. after `containerd config default | sudo tee ...`).
function configure_devmapper_for_containerd() {
info "Configuring devmapper snapshotter for standalone containerd"
sudo mkdir -p /var/lib/containerd/devmapper
sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img
sudo truncate --size 1G /var/lib/containerd/devmapper/meta-disk.img
# Allocate loop devices dynamically to avoid conflicts with pre-existing ones.
local loop_data loop_meta
loop_data=$(sudo losetup --find --show /var/lib/containerd/devmapper/data-disk.img)
loop_meta=$(sudo losetup --find --show /var/lib/containerd/devmapper/meta-disk.img)
info "devmapper: data=${loop_data} meta=${loop_meta}"
# data device size in 512-byte sectors: 10 GiB = 10*1024*1024*1024/512 = 20971520
local data_sectors
data_sectors=$(sudo blockdev --getsz "${loop_data}")
sudo dmsetup create contd-thin-pool \
--table "0 ${data_sectors} thin-pool ${loop_meta} ${loop_data} 512 32768 1 skip_block_zeroing"
# Add the devmapper snapshotter plugin config only if not already present
# (makes the function idempotent on re-runs).
if ! sudo grep -q 'io.containerd.snapshotter.v1.devmapper' /etc/containerd/config.toml; then
cat <<'EOF' | sudo tee -a /etc/containerd/config.toml
[plugins."io.containerd.snapshotter.v1.devmapper"]
pool_name = "contd-thin-pool"
root_path = "/var/lib/containerd/devmapper"
base_image_size = "4096MB"
discard_blocks = true
EOF
fi
# Patch the default snapshotter to devmapper if not already set.
if ! sudo grep -q 'snapshotter = "devmapper"' /etc/containerd/config.toml; then
sudo sed -i \
's|snapshotter = "overlayfs"|snapshotter = "devmapper"|g' \
/etc/containerd/config.toml
fi
sudo systemctl restart containerd
# Verify the plugin came up healthy
local dm_status
dm_status=$(sudo ctr plugins ls | awk '$2 ~ /^devmapper$/ { print $4 }' || true)
[ "${dm_status}" = "ok" ] || \
die "containerd devmapper snapshotter not healthy (status: '${dm_status}')"
info "devmapper snapshotter configured and healthy"
}
function check_containerd_config_for_kata() {
# check containerd config
declare -r line1="default_runtime_name = \"kata\""

View File

@@ -51,8 +51,14 @@ function install_dependencies() {
# Create the default containerd configuration
sudo mkdir -p /etc/containerd
containerd config default > sudo tee /etc/containerd/config.toml
containerd config default | sudo tee /etc/containerd/config.toml
sudo systemctl restart containerd
# Firecracker (fc-rs) uses block devices and requires the devmapper
# snapshotter; other hypervisors work fine with the default overlayfs.
if [ "${KATA_HYPERVISOR:-}" = "fc-rs" ]; then
configure_devmapper_for_containerd
fi
}
function collect_artifacts() {

View File

@@ -48,11 +48,21 @@ function run() {
# bash "${stability_dir}/agent_stability_test.sh"
}
function install_kata_for_stability() {
install_kata
# Firecracker (fc-rs) uses block devices and requires the devmapper
# snapshotter; other hypervisors work fine with the default overlayfs.
if [ "${KATA_HYPERVISOR:-}" = "fc-rs" ]; then
configure_devmapper_for_containerd
fi
}
function main() {
action="${1:-}"
case "${action}" in
install-dependencies) install_dependencies ;;
install-kata) install_kata ;;
install-kata) install_kata_for_stability ;;
enabling-hypervisor) enabling_hypervisor ;;
run) run ;;
*) >&2 die "Invalid argument" ;;