mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-05 19:43:40 +00:00
Compare commits
11 Commits
fix/docker
...
topic/dock
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6031a1219b | ||
|
|
f074ceec6d | ||
|
|
945aa5b43f | ||
|
|
ccfdf5e11b | ||
|
|
26bd5ad754 | ||
|
|
517882f93d | ||
|
|
4a1c2b6620 | ||
|
|
3e23ee9998 | ||
|
|
f7878cc385 | ||
|
|
67af63a540 | ||
|
|
99eaa8fcb1 |
45
.github/workflows/basic-ci-amd64.yaml
vendored
45
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -269,6 +269,51 @@ jobs:
|
||||
timeout-minutes: 15
|
||||
run: bash tests/functional/vfio/gha-run.sh run
|
||||
|
||||
run-docker-tests:
|
||||
name: run-docker-tests
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
# all due to a single flaky instance.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu
|
||||
- qemu-runtime-rs
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run docker smoke test
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/docker/gha-run.sh run
|
||||
|
||||
run-nerdctl-tests:
|
||||
name: run-nerdctl-tests
|
||||
strategy:
|
||||
|
||||
43
.github/workflows/basic-ci-s390x.yaml
vendored
43
.github/workflows/basic-ci-s390x.yaml
vendored
@@ -123,3 +123,46 @@ jobs:
|
||||
- name: Run containerd-stability tests
|
||||
timeout-minutes: 15
|
||||
run: bash tests/stability/gha-run.sh run
|
||||
|
||||
run-docker-tests:
|
||||
name: run-docker-tests
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
# all due to a single flaky instance.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu
|
||||
- qemu-runtime-rs
|
||||
runs-on: s390x-large
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-s390x${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run docker smoke test
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/docker/gha-run.sh run
|
||||
|
||||
42
.github/workflows/stale_issues.yaml
vendored
Normal file
42
.github/workflows/stale_issues.yaml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
name: 'Stale issues with activity before a fixed date'
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
date:
|
||||
description: "Date of stale cut-off. All issues not updated since this date will be marked as stale. Format: YYYY-MM-DD e.g. 2022-10-09"
|
||||
default: "2022-10-09"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
name: stale
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
actions: write # Needed to manage caches for state persistence across runs
|
||||
issues: write # Needed to add/remove labels, post comments, or close issues
|
||||
steps:
|
||||
- name: Calculate the age to stale
|
||||
run: |
|
||||
echo AGE=$(( ( $(date +%s) - $(date -d "${DATE:-2022-10-09}" +%s) ) / 86400 )) >> "$GITHUB_ENV"
|
||||
env:
|
||||
DATE: ${{ inputs.date }}
|
||||
|
||||
- name: Run the stale action
|
||||
uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
||||
with:
|
||||
stale-pr-message: 'This issue has had no activity since before ${DATE}. Please comment on the issue, or it will be closed in 30 days'
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
||||
days-before-issue-stale: ${AGE}
|
||||
days-before-issue-close: 30
|
||||
env:
|
||||
DATE: ${{ inputs.date }}
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -5824,6 +5824,7 @@ dependencies = [
|
||||
"protobuf",
|
||||
"protocols",
|
||||
"resource",
|
||||
"rstest",
|
||||
"runtime-spec",
|
||||
"serde_json",
|
||||
"shim-interface",
|
||||
|
||||
@@ -858,7 +858,12 @@ impl QemuInner {
|
||||
block_device.config.index,
|
||||
&block_device.config.path_on_host,
|
||||
&block_device.config.blkdev_aio.to_string(),
|
||||
block_device.config.is_direct,
|
||||
Some(
|
||||
block_device
|
||||
.config
|
||||
.is_direct
|
||||
.unwrap_or(self.config.blockdev_info.block_device_cache_direct),
|
||||
),
|
||||
block_device.config.is_readonly,
|
||||
block_device.config.no_drop,
|
||||
)
|
||||
|
||||
@@ -84,6 +84,16 @@ impl ResourceManager {
|
||||
inner.handle_network(network_config).await
|
||||
}
|
||||
|
||||
pub async fn has_network_endpoints(&self) -> bool {
|
||||
let inner = self.inner.read().await;
|
||||
inner.has_network_endpoints().await
|
||||
}
|
||||
|
||||
pub async fn setup_network_in_guest(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.setup_network_in_guest().await
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub async fn setup_after_start_vm(&self) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
|
||||
@@ -296,6 +296,33 @@ impl ResourceManagerInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn has_network_endpoints(&self) -> bool {
|
||||
if let Some(network) = &self.network {
|
||||
match network.interfaces().await {
|
||||
std::result::Result::Ok(interfaces) => !interfaces.is_empty(),
|
||||
Err(_) => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn setup_network_in_guest(&self) -> Result<()> {
|
||||
if let Some(network) = self.network.as_ref() {
|
||||
let network = network.as_ref();
|
||||
self.handle_interfaces(network)
|
||||
.await
|
||||
.context("handle interfaces during network rescan")?;
|
||||
self.handle_neighbours(network)
|
||||
.await
|
||||
.context("handle neighbours during network rescan")?;
|
||||
self.handle_routes(network)
|
||||
.await
|
||||
.context("handle routes during network rescan")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn setup_after_start_vm(&mut self) -> Result<()> {
|
||||
self.cgroups_resource
|
||||
.setup_after_start_vm(self.hypervisor.as_ref())
|
||||
|
||||
@@ -165,6 +165,6 @@ pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {
|
||||
VIRTIO_FS => Ok(Arc::new(
|
||||
ShareVirtioFsStandalone::new(id, config).context("new standalone virtio fs")?,
|
||||
)),
|
||||
_ => Err(anyhow!("unsupported shred fs {:?}", &shared_fs)),
|
||||
_ => Err(anyhow!("unsupported shared fs {:?}", &shared_fs)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +53,9 @@ linux_container = { workspace = true, optional = true }
|
||||
virt_container = { workspace = true, optional = true }
|
||||
wasm_container = { workspace = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
rstest = { workspace = true }
|
||||
|
||||
[features]
|
||||
default = ["virt"]
|
||||
linux = ["linux_container"]
|
||||
|
||||
@@ -51,6 +51,13 @@ pub trait Sandbox: Send + Sync {
|
||||
shim_pid: u32,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Re-scan the network namespace for late-discovered endpoints.
|
||||
/// This handles runtimes like Docker 26+ that configure networking
|
||||
/// after the Start response. The default implementation is a no-op.
|
||||
async fn rescan_network(&self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// metrics function
|
||||
async fn agent_metrics(&self) -> Result<String>;
|
||||
async fn hypervisor_metrics(&self) -> Result<String>;
|
||||
|
||||
@@ -69,6 +69,53 @@ use crate::{
|
||||
tracer::{KataTracer, ROOTSPAN},
|
||||
};
|
||||
|
||||
const DOCKER_LIBNETWORK_SETKEY: &str = "libnetwork-setkey";
|
||||
|
||||
const DOCKER_NETNS_PREFIXES: &[&str] = &["/var/run/docker/netns/", "/run/docker/netns/"];
|
||||
|
||||
fn is_valid_docker_sandbox_id(id: &str) -> bool {
|
||||
id.len() == 64 && id.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
|
||||
}
|
||||
|
||||
/// Discover Docker's pre-created network namespace path from OCI spec hooks.
|
||||
///
|
||||
/// Docker's libnetwork-setkey hook contains the sandbox ID as its
|
||||
/// argument following "libnetwork-setkey", which maps to a netns file
|
||||
/// under /var/run/docker/netns/<sandbox_id> or /run/docker/netns/<sandbox_id>.
|
||||
fn docker_netns_path(spec: &oci::Spec) -> Option<String> {
|
||||
let hooks = spec.hooks().as_ref()?;
|
||||
|
||||
let hook_sets: [&[oci::Hook]; 2] = [
|
||||
hooks.prestart().as_deref().unwrap_or_default(),
|
||||
hooks.create_runtime().as_deref().unwrap_or_default(),
|
||||
];
|
||||
|
||||
for hooks in &hook_sets {
|
||||
for hook in *hooks {
|
||||
if let Some(args) = hook.args() {
|
||||
for (i, arg) in args.iter().enumerate() {
|
||||
if arg == DOCKER_LIBNETWORK_SETKEY && i + 1 < args.len() {
|
||||
let sandbox_id = &args[i + 1];
|
||||
if !is_valid_docker_sandbox_id(sandbox_id) {
|
||||
continue;
|
||||
}
|
||||
for prefix in DOCKER_NETNS_PREFIXES {
|
||||
let ns_path = format!("{}{}", prefix, sandbox_id);
|
||||
if let Ok(metadata) = std::fs::symlink_metadata(&ns_path) {
|
||||
if metadata.is_file() {
|
||||
return Some(ns_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn convert_string_to_slog_level(string_level: &str) -> slog::Level {
|
||||
match string_level {
|
||||
"trace" => slog::Level::Trace,
|
||||
@@ -377,8 +424,17 @@ impl RuntimeHandlerManager {
|
||||
if ns.path().is_some() {
|
||||
netns = ns.path().clone().map(|p| p.display().to_string());
|
||||
}
|
||||
// if we get empty netns from oci spec, we need to create netns for the VM
|
||||
else {
|
||||
// Docker 26+ may configure networking outside of the OCI
|
||||
// spec namespace path. Try to discover the netns from hook
|
||||
// args before falling back to creating a placeholder.
|
||||
else if let Some(docker_ns) = docker_netns_path(spec) {
|
||||
info!(
|
||||
sl!(),
|
||||
"discovered Docker network namespace from hook args";
|
||||
"netns" => &docker_ns
|
||||
);
|
||||
netns = Some(docker_ns);
|
||||
} else {
|
||||
let ns_name = generate_netns_name();
|
||||
let raw_netns = NetNs::new(ns_name)?;
|
||||
let path = Some(PathBuf::from(raw_netns.path()).display().to_string());
|
||||
@@ -639,6 +695,7 @@ impl RuntimeHandlerManager {
|
||||
Ok(TaskResponse::WaitProcess(exit_status))
|
||||
}
|
||||
TaskRequest::StartProcess(process_id) => {
|
||||
let is_sandbox_container = cm.is_sandbox_container(&process_id).await;
|
||||
let shim_pid = cm
|
||||
.start_process(&process_id)
|
||||
.await
|
||||
@@ -647,6 +704,25 @@ impl RuntimeHandlerManager {
|
||||
let pid = shim_pid.pid;
|
||||
let process_type = process_id.process_type;
|
||||
let container_id = process_id.container_id().to_string();
|
||||
|
||||
// Schedule an async network rescan for sandbox containers.
|
||||
// This handles runtimes that configure networking after the
|
||||
// Start response (e.g. Docker 26+). rescan_network is
|
||||
// idempotent — it returns immediately if endpoints already
|
||||
// exist.
|
||||
if is_sandbox_container {
|
||||
let sandbox_rescan = sandbox.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = sandbox_rescan.rescan_network().await {
|
||||
error!(
|
||||
sl!(),
|
||||
"async network rescan failed — container may lack networking: {:?}",
|
||||
e
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
tokio::spawn(async move {
|
||||
let result = sandbox.wait_process(cm, process_id, pid).await;
|
||||
if let Err(e) = result {
|
||||
@@ -920,3 +996,85 @@ fn configure_non_root_hypervisor(config: &mut Hypervisor) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use oci_spec::runtime::{HookBuilder, HooksBuilder, SpecBuilder};
|
||||
use rstest::rstest;
|
||||
|
||||
const VALID_SANDBOX_ID: &str =
|
||||
"a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2";
|
||||
|
||||
#[rstest]
|
||||
#[case::all_lowercase_hex(VALID_SANDBOX_ID, true)]
|
||||
#[case::all_zeros("0000000000000000000000000000000000000000000000000000000000000000", true)]
|
||||
#[case::uppercase_hex("A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2", false)]
|
||||
#[case::too_short("a1b2c3d4", false)]
|
||||
#[case::non_hex("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", false)]
|
||||
#[case::path_traversal("../../../etc/passwd", false)]
|
||||
#[case::empty("", false)]
|
||||
fn test_is_valid_docker_sandbox_id(#[case] id: &str, #[case] expected: bool) {
|
||||
assert_eq!(is_valid_docker_sandbox_id(id), expected);
|
||||
}
|
||||
|
||||
fn make_hook_with_args(args: Vec<&str>) -> oci::Hook {
|
||||
HookBuilder::default()
|
||||
.path("/usr/bin/test")
|
||||
.args(args.into_iter().map(String::from).collect::<Vec<_>>())
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case::no_hooks(None, None)]
|
||||
#[case::unrelated_hooks(
|
||||
Some(HooksBuilder::default()
|
||||
.prestart(vec![make_hook_with_args(vec!["some-hook", "arg1"])])
|
||||
.build().unwrap()),
|
||||
None
|
||||
)]
|
||||
#[case::invalid_sandbox_id(
|
||||
Some(HooksBuilder::default()
|
||||
.prestart(vec![make_hook_with_args(vec![
|
||||
"/usr/bin/dockerd", "libnetwork-setkey", "not-a-valid-id",
|
||||
])])
|
||||
.build().unwrap()),
|
||||
None
|
||||
)]
|
||||
#[case::setkey_at_end_of_args(
|
||||
Some(HooksBuilder::default()
|
||||
.prestart(vec![make_hook_with_args(vec![
|
||||
"/usr/bin/dockerd", "libnetwork-setkey",
|
||||
])])
|
||||
.build().unwrap()),
|
||||
None
|
||||
)]
|
||||
#[case::valid_prestart_but_no_file(
|
||||
Some(HooksBuilder::default()
|
||||
.prestart(vec![make_hook_with_args(vec![
|
||||
"/usr/bin/dockerd", "libnetwork-setkey", VALID_SANDBOX_ID,
|
||||
])])
|
||||
.build().unwrap()),
|
||||
None
|
||||
)]
|
||||
#[case::valid_create_runtime_but_no_file(
|
||||
Some(HooksBuilder::default()
|
||||
.create_runtime(vec![make_hook_with_args(vec![
|
||||
"/usr/bin/dockerd", "libnetwork-setkey", VALID_SANDBOX_ID,
|
||||
])])
|
||||
.build().unwrap()),
|
||||
None
|
||||
)]
|
||||
fn test_docker_netns_path(
|
||||
#[case] hooks: Option<oci::Hooks>,
|
||||
#[case] expected: Option<String>,
|
||||
) {
|
||||
let mut builder = SpecBuilder::default();
|
||||
if let Some(h) = hooks {
|
||||
builder = builder.hooks(h);
|
||||
}
|
||||
let spec = builder.build().unwrap();
|
||||
assert_eq!(docker_netns_path(&spec), expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ use resource::{ResourceConfig, ResourceManager};
|
||||
use runtime_spec as spec;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use strum::Display;
|
||||
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
|
||||
use tracing::instrument;
|
||||
@@ -973,6 +974,71 @@ impl Sandbox for VirtSandbox {
|
||||
self.hypervisor.get_hypervisor_metrics().await
|
||||
}
|
||||
|
||||
async fn rescan_network(&self) -> Result<()> {
|
||||
let config = self.resource_manager.config().await;
|
||||
if config.runtime.disable_new_netns {
|
||||
return Ok(());
|
||||
}
|
||||
if dan_config_path(&config, &self.sid).exists() {
|
||||
return Ok(());
|
||||
}
|
||||
if self.resource_manager.has_network_endpoints().await {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let sandbox_config = match &self.sandbox_config {
|
||||
Some(c) => c,
|
||||
None => return Ok(()),
|
||||
};
|
||||
let netns_path = match &sandbox_config.network_env.netns {
|
||||
Some(p) => p.clone(),
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
const MAX_WAIT: Duration = Duration::from_secs(5);
|
||||
const POLL_INTERVAL: Duration = Duration::from_millis(50);
|
||||
let deadline = tokio::time::Instant::now() + MAX_WAIT;
|
||||
|
||||
info!(sl!(), "waiting for network interfaces in namespace");
|
||||
|
||||
loop {
|
||||
let network_config = NetworkConfig::NetNs(NetworkWithNetNsConfig {
|
||||
network_model: config.runtime.internetworking_model.clone(),
|
||||
netns_path: netns_path.clone(),
|
||||
queues: self
|
||||
.hypervisor
|
||||
.hypervisor_config()
|
||||
.await
|
||||
.network_info
|
||||
.network_queues as usize,
|
||||
network_created: sandbox_config.network_env.network_created,
|
||||
});
|
||||
|
||||
if let Err(e) = self.resource_manager.handle_network(network_config).await {
|
||||
warn!(sl!(), "network rescan attempt failed: {:?}", e);
|
||||
}
|
||||
|
||||
if self.resource_manager.has_network_endpoints().await {
|
||||
info!(sl!(), "network interfaces discovered during rescan");
|
||||
return self
|
||||
.resource_manager
|
||||
.setup_network_in_guest()
|
||||
.await
|
||||
.context("setup network in guest after rescan");
|
||||
}
|
||||
|
||||
if tokio::time::Instant::now() >= deadline {
|
||||
warn!(
|
||||
sl!(),
|
||||
"no network interfaces found after timeout — networking may be configured later"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tokio::time::sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn set_policy(&self, policy: &str) -> Result<()> {
|
||||
if policy.is_empty() {
|
||||
debug!(sl!(), "sb: set_policy skipped without policy");
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||
)
|
||||
|
||||
@@ -46,6 +46,19 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error
|
||||
}
|
||||
go watchSandbox(ctx, s)
|
||||
|
||||
// If no network endpoints were discovered during sandbox creation,
|
||||
// schedule an async rescan. This handles runtimes that configure
|
||||
// networking after task creation (e.g. Docker 26+ configures
|
||||
// networking after the Start response, and prestart hooks may
|
||||
// not have run yet on slower architectures).
|
||||
// RescanNetwork is idempotent — it returns immediately if
|
||||
// endpoints already exist.
|
||||
go func() {
|
||||
if err := s.sandbox.RescanNetwork(s.ctx); err != nil {
|
||||
shimLog.WithError(err).Error("async network rescan failed — container may lack networking")
|
||||
}
|
||||
}()
|
||||
|
||||
// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
|
||||
// shim context and the context passed to startContainer for tracing.
|
||||
go watchOOMEvents(ctx, s)
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
|
||||
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
@@ -140,6 +141,17 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo
|
||||
sandboxConfig.Containers[0].RootFs = rootFs
|
||||
}
|
||||
|
||||
// Docker 26+ may set up networking before task creation instead of using
|
||||
// prestart hooks. The netns path is not in the OCI spec but can be
|
||||
// discovered from Docker's libnetwork hook args which contain the sandbox
|
||||
// ID that maps to /var/run/docker/netns/<sandbox_id>.
|
||||
if sandboxConfig.NetworkConfig.NetworkID == "" && !sandboxConfig.NetworkConfig.DisableNewNetwork {
|
||||
if dockerNetns := utils.DockerNetnsPath(&ociSpec); dockerNetns != "" {
|
||||
sandboxConfig.NetworkConfig.NetworkID = dockerNetns
|
||||
kataUtilsLogger.WithField("netns", dockerNetns).Info("discovered Docker network namespace from hook args")
|
||||
}
|
||||
}
|
||||
|
||||
// Important to create the network namespace before the sandbox is
|
||||
// created, because it is not responsible for the creation of the
|
||||
// netns if it does not exist.
|
||||
|
||||
@@ -20,7 +20,9 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const procMountInfoFile = "/proc/self/mountinfo"
|
||||
const (
|
||||
procMountInfoFile = "/proc/self/mountinfo"
|
||||
)
|
||||
|
||||
// EnterNetNS is free from any call to a go routine, and it calls
|
||||
// into runtime.LockOSThread(), meaning it won't be executed in a
|
||||
@@ -29,27 +31,30 @@ func EnterNetNS(networkID string, cb func() error) error {
|
||||
return vc.EnterNetNS(networkID, cb)
|
||||
}
|
||||
|
||||
// SetupNetworkNamespace create a network namespace
|
||||
// SetupNetworkNamespace creates a network namespace if one is not already
|
||||
// provided via NetworkID. When NetworkID is empty and networking is not
|
||||
// disabled, a new namespace is created as a placeholder; the actual
|
||||
// hypervisor namespace will be discovered later by addAllEndpoints after
|
||||
// the VM has started.
|
||||
func SetupNetworkNamespace(config *vc.NetworkConfig) error {
|
||||
if config.DisableNewNetwork {
|
||||
kataUtilsLogger.Info("DisableNewNetNs is on, shim and hypervisor are running in the host netns")
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
var n ns.NetNS
|
||||
|
||||
if config.NetworkID == "" {
|
||||
var (
|
||||
err error
|
||||
n ns.NetNS
|
||||
)
|
||||
|
||||
if rootless.IsRootless() {
|
||||
n, err = rootless.NewNS()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
n, err = testutils.NewNS()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
config.NetworkID = n.Path()
|
||||
@@ -71,11 +76,23 @@ func SetupNetworkNamespace(config *vc.NetworkConfig) error {
|
||||
}
|
||||
|
||||
const (
|
||||
netNsMountType = "nsfs"
|
||||
mountTypeFieldIdx = 8
|
||||
mountDestIdx = 4
|
||||
netNsMountType = "nsfs"
|
||||
mountDestIdx = 4
|
||||
)
|
||||
|
||||
// mountinfoFsType finds the filesystem type in a parsed mountinfo line.
|
||||
// The mountinfo format has optional tagged fields (shared:, master:, etc.)
|
||||
// between field 7 and a "-" separator. The fs type is the field immediately
|
||||
// after "-". Returns "" if the separator is not found.
|
||||
func mountinfoFsType(fields []string) string {
|
||||
for i, f := range fields {
|
||||
if f == "-" && i+1 < len(fields) {
|
||||
return fields[i+1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// getNetNsFromBindMount returns the network namespace for the bind-mounted path
|
||||
func getNetNsFromBindMount(nsPath string, procMountFile string) (string, error) {
|
||||
// Resolve all symlinks in the path as the mountinfo file contains
|
||||
@@ -100,16 +117,15 @@ func getNetNsFromBindMount(nsPath string, procMountFile string) (string, error)
|
||||
// "711 26 0:3 net:[4026532009] /run/docker/netns/default rw shared:535 - nsfs nsfs rw"
|
||||
//
|
||||
// Reference: https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
||||
|
||||
// We are interested in the first 9 fields of this file,
|
||||
// to check for the correct mount type.
|
||||
// The "-" separator has a variable position due to optional tagged
|
||||
// fields, so we locate the fs type dynamically.
|
||||
fields := strings.Split(text, " ")
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
|
||||
// We check here if the mount type is a network namespace mount type, namely "nsfs"
|
||||
if fields[mountTypeFieldIdx] != netNsMountType {
|
||||
if mountinfoFsType(fields) != netNsMountType {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -149,3 +149,23 @@ func TestSetupNetworkNamespace(t *testing.T) {
|
||||
err = SetupNetworkNamespace(config)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestMountinfoFsType(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Standard mountinfo line with optional tagged fields
|
||||
fields := []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw", "shared:535", "-", "nsfs", "nsfs", "rw"}
|
||||
assert.Equal("nsfs", mountinfoFsType(fields))
|
||||
|
||||
// Multiple optional tags before separator
|
||||
fields = []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw", "shared:535", "master:1", "-", "nsfs", "nsfs", "rw"}
|
||||
assert.Equal("nsfs", mountinfoFsType(fields))
|
||||
|
||||
// No separator
|
||||
fields = []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw"}
|
||||
assert.Equal("", mountinfoFsType(fields))
|
||||
|
||||
// Separator at end (malformed)
|
||||
fields = []string{"711", "26", "-"}
|
||||
assert.Equal("", mountinfoFsType(fields))
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ type VCSandbox interface {
|
||||
|
||||
GetOOMEvent(ctx context.Context) (string, error)
|
||||
GetHypervisorPid() (int, error)
|
||||
// RescanNetwork re-scans the network namespace for late-discovered endpoints.
|
||||
RescanNetwork(ctx context.Context) error
|
||||
|
||||
UpdateRuntimeMetrics() error
|
||||
GetAgentMetrics(ctx context.Context) (string, error)
|
||||
|
||||
@@ -17,9 +17,11 @@ import (
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containernetworking/plugins/pkg/ns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
"github.com/vishvananda/netns"
|
||||
otelTrace "go.opentelemetry.io/otel/trace"
|
||||
@@ -45,6 +47,11 @@ type LinuxNetwork struct {
|
||||
interworkingModel NetInterworkingModel
|
||||
netNSCreated bool
|
||||
danConfigPath string
|
||||
// placeholderNetNS holds the path to a placeholder network namespace
|
||||
// that we created but later abandoned in favour of the hypervisor's
|
||||
// netns. If best-effort deletion in addAllEndpoints fails, teardown
|
||||
// retries the cleanup via RemoveEndpoints.
|
||||
placeholderNetNS string
|
||||
}
|
||||
|
||||
// NewNetwork creates a new Linux Network from a NetworkConfig.
|
||||
@@ -68,11 +75,11 @@ func NewNetwork(configs ...*NetworkConfig) (Network, error) {
|
||||
}
|
||||
|
||||
return &LinuxNetwork{
|
||||
config.NetworkID,
|
||||
[]Endpoint{},
|
||||
config.InterworkingModel,
|
||||
config.NetworkCreated,
|
||||
config.DanConfigPath,
|
||||
netNSPath: config.NetworkID,
|
||||
eps: []Endpoint{},
|
||||
interworkingModel: config.InterworkingModel,
|
||||
netNSCreated: config.NetworkCreated,
|
||||
danConfigPath: config.DanConfigPath,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -325,28 +332,91 @@ func (n *LinuxNetwork) GetEndpointsNum() (int, error) {
|
||||
// Scan the networking namespace through netlink and then:
|
||||
// 1. Create the endpoints for the relevant interfaces found there.
|
||||
// 2. Attach them to the VM.
|
||||
//
|
||||
// If no usable interfaces are found and the hypervisor is running in a
|
||||
// different network namespace (e.g. Docker 26+ places QEMU in its own
|
||||
// pre-configured namespace), switch to the hypervisor's namespace and
|
||||
// rescan there. This handles the case where the OCI spec does not
|
||||
// communicate the network namespace path.
|
||||
func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug bool) error {
|
||||
netnsHandle, err := netns.GetFromPath(n.netNSPath)
|
||||
endpoints, err := n.scanEndpointsInNs(ctx, s, n.netNSPath, hotplug)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If the scan found no usable endpoints, check whether the
|
||||
// hypervisor is running in a different namespace and retry there.
|
||||
if len(endpoints) == 0 && s != nil {
|
||||
if hypervisorNs, ok := n.detectHypervisorNetns(s); ok {
|
||||
networkLogger().WithFields(logrus.Fields{
|
||||
"original_netns": n.netNSPath,
|
||||
"hypervisor_netns": hypervisorNs,
|
||||
}).Debug("no endpoints in original netns, switching to hypervisor netns")
|
||||
|
||||
origPath := n.netNSPath
|
||||
origCreated := n.netNSCreated
|
||||
n.netNSPath = hypervisorNs
|
||||
|
||||
_, err = n.scanEndpointsInNs(ctx, s, n.netNSPath, hotplug)
|
||||
if err != nil {
|
||||
n.netNSPath = origPath
|
||||
n.netNSCreated = origCreated
|
||||
return err
|
||||
}
|
||||
|
||||
// Clean up the placeholder namespace we created — we're now
|
||||
// using the hypervisor's namespace and the placeholder is empty.
|
||||
// Only clear netNSCreated once deletion succeeds; on failure,
|
||||
// stash the path so RemoveEndpoints can retry during teardown.
|
||||
if origCreated {
|
||||
if delErr := deleteNetNS(origPath); delErr != nil {
|
||||
networkLogger().WithField("netns", origPath).WithError(delErr).Warn("failed to delete placeholder netns, will retry during teardown")
|
||||
n.placeholderNetNS = origPath
|
||||
}
|
||||
}
|
||||
// The hypervisor's namespace was not created by us.
|
||||
n.netNSCreated = false
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(n.eps, func(i, j int) bool {
|
||||
return n.eps[i].Name() < n.eps[j].Name()
|
||||
})
|
||||
|
||||
networkLogger().WithField("endpoints", n.eps).Info("endpoints found after scan")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// scanEndpointsInNs scans a network namespace for usable (non-loopback,
|
||||
// configured) interfaces and adds them as endpoints. Returns the list of
|
||||
// newly added endpoints.
|
||||
func (n *LinuxNetwork) scanEndpointsInNs(ctx context.Context, s *Sandbox, nsPath string, hotplug bool) ([]Endpoint, error) {
|
||||
netnsHandle, err := netns.GetFromPath(nsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer netnsHandle.Close()
|
||||
|
||||
netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
defer netlinkHandle.Close()
|
||||
|
||||
linkList, err := netlinkHandle.LinkList()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
epsBefore := len(n.eps)
|
||||
var added []Endpoint
|
||||
|
||||
for _, link := range linkList {
|
||||
netInfo, err := networkInfoFromLink(netlinkHandle, link)
|
||||
if err != nil {
|
||||
return err
|
||||
// No rollback needed — no endpoints were added in this iteration yet.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Ignore unconfigured network interfaces. These are
|
||||
@@ -368,22 +438,62 @@ func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug
|
||||
continue
|
||||
}
|
||||
|
||||
if err := doNetNS(n.netNSPath, func(_ ns.NetNS) error {
|
||||
_, err = n.addSingleEndpoint(ctx, s, netInfo, hotplug)
|
||||
return err
|
||||
if err := doNetNS(nsPath, func(_ ns.NetNS) error {
|
||||
ep, addErr := n.addSingleEndpoint(ctx, s, netInfo, hotplug)
|
||||
if addErr == nil {
|
||||
added = append(added, ep)
|
||||
}
|
||||
return addErr
|
||||
}); err != nil {
|
||||
return err
|
||||
// Rollback: remove any endpoints added during this scan
|
||||
// so that a failed scan does not leave partial side effects.
|
||||
n.eps = n.eps[:epsBefore]
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sort.Slice(n.eps, func(i, j int) bool {
|
||||
return n.eps[i].Name() < n.eps[j].Name()
|
||||
})
|
||||
return added, nil
|
||||
}
|
||||
|
||||
networkLogger().WithField("endpoints", n.eps).Info("endpoints found after scan")
|
||||
// detectHypervisorNetns checks whether the hypervisor process is running in a
|
||||
// network namespace different from the one we are currently tracking. If so it
|
||||
// returns the procfs path to the hypervisor's netns and true.
|
||||
func (n *LinuxNetwork) detectHypervisorNetns(s *Sandbox) (string, bool) {
|
||||
pid, err := s.GetHypervisorPid()
|
||||
if err != nil || pid <= 0 {
|
||||
return "", false
|
||||
}
|
||||
|
||||
return nil
|
||||
// Guard against PID recycling: verify the process belongs to this
|
||||
// sandbox by checking its command line for the sandbox ID. QEMU is
|
||||
// started with -name sandbox-<id>, so the ID will appear in cmdline.
|
||||
// /proc/pid/cmdline uses null bytes as argument separators; replace
|
||||
// them so the substring search works on the joined argument string.
|
||||
cmdlineRaw, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
cmdline := strings.ReplaceAll(string(cmdlineRaw), "\x00", " ")
|
||||
if !strings.Contains(cmdline, s.id) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
hypervisorNs := fmt.Sprintf("/proc/%d/ns/net", pid)
|
||||
|
||||
// Compare device and inode numbers. Inode numbers are only unique
|
||||
// within a device, so both must match to confirm the same namespace.
|
||||
var currentStat, hvStat unix.Stat_t
|
||||
if err := unix.Stat(n.netNSPath, ¤tStat); err != nil {
|
||||
return "", false
|
||||
}
|
||||
if err := unix.Stat(hypervisorNs, &hvStat); err != nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
if currentStat.Dev != hvStat.Dev || currentStat.Ino != hvStat.Ino {
|
||||
return hypervisorNs, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func convertDanDeviceToNetworkInfo(device *vctypes.DanDevice) (*NetworkInfo, error) {
|
||||
@@ -571,6 +681,17 @@ func (n *LinuxNetwork) RemoveEndpoints(ctx context.Context, s *Sandbox, endpoint
|
||||
return deleteNetNS(n.netNSPath)
|
||||
}
|
||||
|
||||
// Retry cleanup of a placeholder namespace whose earlier deletion
|
||||
// failed in addAllEndpoints.
|
||||
if n.placeholderNetNS != "" && endpoints == nil {
|
||||
if delErr := deleteNetNS(n.placeholderNetNS); delErr != nil {
|
||||
networkLogger().WithField("netns", n.placeholderNetNS).WithError(delErr).Warn("failed to delete placeholder netns during teardown")
|
||||
} else {
|
||||
networkLogger().WithField("netns", n.placeholderNetNS).Info("placeholder network namespace deleted")
|
||||
n.placeholderNetNS = ""
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -363,11 +363,11 @@ func TestConvertDanDeviceToNetworkInfo(t *testing.T) {
|
||||
func TestAddEndpoints_Dan(t *testing.T) {
|
||||
|
||||
network := &LinuxNetwork{
|
||||
"net-123",
|
||||
[]Endpoint{},
|
||||
NetXConnectDefaultModel,
|
||||
true,
|
||||
"testdata/dan-config.json",
|
||||
netNSPath: "net-123",
|
||||
eps: []Endpoint{},
|
||||
interworkingModel: NetXConnectDefaultModel,
|
||||
netNSCreated: true,
|
||||
danConfigPath: "testdata/dan-config.json",
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
|
||||
@@ -255,6 +255,10 @@ func (s *Sandbox) GetHypervisorPid() (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) RescanNetwork(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) GuestVolumeStats(ctx context.Context, path string) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
v2 "github.com/containerd/cgroups/v2/stats"
|
||||
@@ -330,6 +331,81 @@ func (s *Sandbox) GetHypervisorPid() (int, error) {
|
||||
return pids[0], nil
|
||||
}
|
||||
|
||||
// RescanNetwork re-scans the network namespace for endpoints if none have
|
||||
// been discovered yet. This is idempotent: if endpoints already exist it
|
||||
// returns immediately. It enables Docker 26+ support where networking is
|
||||
// configured after task creation but before Start.
|
||||
//
|
||||
// Docker 26+ configures networking (veth pair, IP addresses) between
|
||||
// Create and Start. The interfaces may not be present immediately, so
|
||||
// this method polls until they appear or a timeout is reached.
|
||||
//
|
||||
// When new endpoints are found, the guest agent is informed about the
|
||||
// interfaces and routes so that networking becomes functional inside the VM.
|
||||
func (s *Sandbox) RescanNetwork(ctx context.Context) error {
|
||||
if s.config.NetworkConfig.DisableNewNetwork {
|
||||
return nil
|
||||
}
|
||||
if len(s.network.Endpoints()) > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
const maxWait = 5 * time.Second
|
||||
const pollInterval = 50 * time.Millisecond
|
||||
deadline := time.NewTimer(maxWait)
|
||||
defer deadline.Stop()
|
||||
ticker := time.NewTicker(pollInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
s.Logger().Debug("waiting for network interfaces in namespace")
|
||||
|
||||
for {
|
||||
if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(s.network.Endpoints()) > 0 {
|
||||
return s.configureGuestNetwork(ctx)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-deadline.C:
|
||||
s.Logger().Warn("no network interfaces found after timeout — networking may be configured by prestart hooks")
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// configureGuestNetwork informs the guest agent about discovered network
|
||||
// endpoints so that interfaces and routes become functional inside the VM.
|
||||
func (s *Sandbox) configureGuestNetwork(ctx context.Context) error {
|
||||
endpoints := s.network.Endpoints()
|
||||
s.Logger().WithField("endpoints", len(endpoints)).Info("configuring hotplugged network in guest")
|
||||
|
||||
// Note: ARP neighbors (3rd return value) are not propagated here
|
||||
// because the agent interface only exposes per-entry updates. The
|
||||
// full setupNetworks path in kataAgent handles them; this path is
|
||||
// only reached for late-discovered endpoints where neighbor entries
|
||||
// are populated dynamically by the kernel.
|
||||
interfaces, routes, _, err := generateVCNetworkStructures(ctx, endpoints)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating network structures: %w", err)
|
||||
}
|
||||
for _, ifc := range interfaces {
|
||||
if _, err := s.agent.updateInterface(ctx, ifc); err != nil {
|
||||
return fmt.Errorf("updating interface %s in guest: %w", ifc.Name, err)
|
||||
}
|
||||
}
|
||||
if len(routes) > 0 {
|
||||
if _, err := s.agent.updateRoutes(ctx, routes); err != nil {
|
||||
return fmt.Errorf("updating routes in guest: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllContainers returns all containers.
|
||||
func (s *Sandbox) GetAllContainers() []VCContainer {
|
||||
ifa := make([]VCContainer, len(s.containers))
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -493,17 +494,38 @@ func RevertBytes(num uint64) uint64 {
|
||||
return 1024*RevertBytes(a) + b
|
||||
}
|
||||
|
||||
// dockerLibnetworkSetkey is the hook argument that identifies Docker's
|
||||
// network configuration hook. The argument following it is the sandbox ID.
|
||||
const dockerLibnetworkSetkey = "libnetwork-setkey"
|
||||
|
||||
// dockerNetnsPrefixes are the well-known filesystem paths where the Docker
|
||||
// daemon bind-mounts container network namespaces.
|
||||
var dockerNetnsPrefixes = []string{"/var/run/docker/netns/", "/run/docker/netns/"}
|
||||
|
||||
// validSandboxID matches Docker sandbox IDs: exactly 64 lowercase hex characters.
|
||||
var validSandboxID = regexp.MustCompile(`^[0-9a-f]{64}$`)
|
||||
|
||||
// IsDockerContainer returns if the container is managed by docker
|
||||
// This is done by checking the prestart hook for `libnetwork` arguments.
|
||||
// This is done by checking the prestart and createRuntime hooks for
|
||||
// `libnetwork` arguments. Docker 26+ may use CreateRuntime hooks
|
||||
// instead of the deprecated Prestart hooks.
|
||||
func IsDockerContainer(spec *specs.Spec) bool {
|
||||
if spec == nil || spec.Hooks == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, hook := range spec.Hooks.Prestart { //nolint:all
|
||||
for _, arg := range hook.Args {
|
||||
if strings.HasPrefix(arg, "libnetwork") {
|
||||
return true
|
||||
// Check both Prestart (Docker < 26) and CreateRuntime (Docker >= 26) hooks.
|
||||
hookSets := [][]specs.Hook{
|
||||
spec.Hooks.Prestart, //nolint:all
|
||||
spec.Hooks.CreateRuntime,
|
||||
}
|
||||
|
||||
for _, hooks := range hookSets {
|
||||
for _, hook := range hooks {
|
||||
for _, arg := range hook.Args {
|
||||
if strings.HasPrefix(arg, "libnetwork") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -511,6 +533,50 @@ func IsDockerContainer(spec *specs.Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// DockerNetnsPath attempts to discover Docker's pre-created network namespace
|
||||
// path from OCI spec hooks. Docker's libnetwork-setkey hook contains the
|
||||
// sandbox ID as its second argument, which maps to the netns file under
|
||||
// /var/run/docker/netns/<sandbox_id>.
|
||||
func DockerNetnsPath(spec *specs.Spec) string {
|
||||
if spec == nil || spec.Hooks == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Search both Prestart and CreateRuntime hooks for libnetwork-setkey.
|
||||
hookSets := [][]specs.Hook{
|
||||
spec.Hooks.Prestart, //nolint:all
|
||||
spec.Hooks.CreateRuntime,
|
||||
}
|
||||
|
||||
for _, hooks := range hookSets {
|
||||
for _, hook := range hooks {
|
||||
for i, arg := range hook.Args {
|
||||
if arg == dockerLibnetworkSetkey && i+1 < len(hook.Args) {
|
||||
sandboxID := hook.Args[i+1]
|
||||
// Docker sandbox IDs are exactly 64 lowercase hex
|
||||
// characters. Reject anything else to prevent path
|
||||
// traversal and unexpected input.
|
||||
if !validSandboxID.MatchString(sandboxID) {
|
||||
continue
|
||||
}
|
||||
// Docker stores netns under well-known paths.
|
||||
// Use Lstat to reject symlinks (which could point
|
||||
// outside the Docker netns directory) and non-regular
|
||||
// files such as directories.
|
||||
for _, prefix := range dockerNetnsPrefixes {
|
||||
nsPath := prefix + sandboxID
|
||||
if fi, err := os.Lstat(nsPath); err == nil && fi.Mode().IsRegular() {
|
||||
return nsPath
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetGuestNUMANodes constructs guest NUMA nodes mapping to host NUMA nodes and host CPUs.
|
||||
func GetGuestNUMANodes(numaMapping []string) ([]types.GuestNUMANode, error) {
|
||||
// Add guest NUMA node for each specified subsets of host NUMA nodes.
|
||||
|
||||
@@ -579,24 +579,178 @@ func TestRevertBytes(t *testing.T) {
|
||||
assert.Equal(expectedNum, num)
|
||||
}
|
||||
|
||||
// TestIsDockerContainer validates hook-detection logic in isolation.
|
||||
// End-to-end Docker→containerd→kata integration is covered by
|
||||
// external tests (see tests/integration/kubernetes/).
|
||||
func TestIsDockerContainer(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// nil spec
|
||||
assert.False(IsDockerContainer(nil))
|
||||
|
||||
// nil hooks
|
||||
assert.False(IsDockerContainer(&specs.Spec{}))
|
||||
|
||||
// Unrelated prestart hook
|
||||
ociSpec := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Args: []string{
|
||||
"haha",
|
||||
},
|
||||
},
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"haha"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.False(IsDockerContainer(ociSpec))
|
||||
|
||||
// Prestart hook with libnetwork (Docker < 26)
|
||||
ociSpec.Hooks.Prestart = append(ociSpec.Hooks.Prestart, specs.Hook{ //nolint:all
|
||||
Args: []string{"libnetwork-xxx"},
|
||||
})
|
||||
assert.True(IsDockerContainer(ociSpec))
|
||||
|
||||
// CreateRuntime hook with libnetwork (Docker >= 26)
|
||||
ociSpec2 := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
CreateRuntime: []specs.Hook{
|
||||
{Args: []string{"/usr/bin/docker-proxy", "libnetwork-setkey", "abc123", "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.True(IsDockerContainer(ociSpec2))
|
||||
|
||||
// CreateRuntime hook without libnetwork
|
||||
ociSpec3 := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
CreateRuntime: []specs.Hook{
|
||||
{Args: []string{"/some/other/hook"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.False(IsDockerContainer(ociSpec3))
|
||||
}
|
||||
|
||||
// TestDockerNetnsPath validates netns path discovery from OCI hook args.
|
||||
// This does not test the actual namespace opening or endpoint scanning;
|
||||
// see integration tests for full-path coverage.
|
||||
func TestDockerNetnsPath(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Valid 64-char hex sandbox IDs for test cases.
|
||||
validID := strings.Repeat("ab", 32) // 64 hex chars
|
||||
validID2 := strings.Repeat("cd", 32) // another 64 hex chars
|
||||
invalidShortID := "abc123" // too short
|
||||
invalidUpperID := strings.Repeat("AB", 32) // uppercase rejected
|
||||
|
||||
// nil spec
|
||||
assert.Equal("", DockerNetnsPath(nil))
|
||||
|
||||
// nil hooks
|
||||
assert.Equal("", DockerNetnsPath(&specs.Spec{}))
|
||||
|
||||
// Hook without libnetwork-setkey
|
||||
spec := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/some/binary", "unrelated"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with libnetwork-setkey but sandbox ID too short (rejected by regex)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", invalidShortID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with libnetwork-setkey but uppercase hex (rejected by regex)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", invalidUpperID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with valid sandbox ID but netns file doesn't exist on disk
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with libnetwork-setkey and existing netns file — success path
|
||||
tmpDir := t.TempDir()
|
||||
fakeNsDir := filepath.Join(tmpDir, "netns")
|
||||
err := os.MkdirAll(fakeNsDir, 0755)
|
||||
assert.NoError(err)
|
||||
fakeNsFile := filepath.Join(fakeNsDir, validID)
|
||||
err = os.WriteFile(fakeNsFile, []byte{}, 0644)
|
||||
assert.NoError(err)
|
||||
|
||||
// Temporarily override dockerNetnsPrefixes so DockerNetnsPath can find
|
||||
// the netns file we created under the temp directory.
|
||||
origPrefixes := dockerNetnsPrefixes
|
||||
dockerNetnsPrefixes = []string{fakeNsDir + "/"}
|
||||
defer func() { dockerNetnsPrefixes = origPrefixes }()
|
||||
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal(fakeNsFile, DockerNetnsPath(spec))
|
||||
|
||||
// Sandbox ID that is a directory rather than a regular file — must be rejected
|
||||
dirID := validID2
|
||||
err = os.MkdirAll(filepath.Join(fakeNsDir, dirID), 0755)
|
||||
assert.NoError(err)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", dirID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// CreateRuntime hook with valid sandbox ID — file doesn't exist
|
||||
validID3 := strings.Repeat("ef", 32)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
CreateRuntime: []specs.Hook{
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID3, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Hook with libnetwork-setkey as last arg (no sandbox ID follows) — no panic
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"libnetwork-setkey"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Empty args slice
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
}
|
||||
|
||||
45
tests/integration/docker/gha-run.sh
Executable file
45
tests/integration/docker/gha-run.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
kata_tarball_dir="${2:-kata-artifacts}"
|
||||
docker_dir="$(dirname "$(readlink -f "$0")")"
|
||||
source "${docker_dir}/../../common.bash"
|
||||
image="${image:-instrumentisto/nmap:latest}"
|
||||
|
||||
function install_dependencies() {
|
||||
info "Installing the dependencies needed for running the docker smoke test"
|
||||
|
||||
sudo -E docker pull "${image}"
|
||||
}
|
||||
|
||||
function run() {
|
||||
info "Running docker smoke test tests using ${KATA_HYPERVISOR} hypervisor"
|
||||
|
||||
enabling_hypervisor
|
||||
|
||||
info "Running docker with runc"
|
||||
sudo docker run --rm --entrypoint nping "${image}" --tcp-connect -c 2 -p 80 www.github.com
|
||||
|
||||
info "Running docker with Kata Containers (${KATA_HYPERVISOR})"
|
||||
sudo docker run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping "${image}" --tcp-connect -c 2 -p 80 www.github.com
|
||||
}
|
||||
|
||||
function main() {
|
||||
action="${1:-}"
|
||||
case "${action}" in
|
||||
install-dependencies) install_dependencies ;;
|
||||
install-kata) install_kata ;;
|
||||
run) run ;;
|
||||
*) >&2 die "Invalid argument" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -234,7 +234,7 @@ externals:
|
||||
nvrc:
|
||||
# yamllint disable-line rule:line-length
|
||||
desc: "The NVRC project provides a Rust binary that implements a simple init system for microVMs"
|
||||
version: "v0.1.3"
|
||||
version: "v0.1.4"
|
||||
url: "https://github.com/NVIDIA/nvrc/releases/download/"
|
||||
|
||||
nvidia:
|
||||
|
||||
Reference in New Issue
Block a user