diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index a8faacc065..5f1300bad5 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -380,6 +380,65 @@ pub async fn wait_for_pci_device( Ok(addr) } +#[derive(Debug)] +struct NetPciMatcher { + devpath: String, +} + +impl NetPciMatcher { + fn new(relpath: &str) -> NetPciMatcher { + let root_bus = create_pci_root_bus_path(); + + NetPciMatcher { + devpath: format!("{}{}", root_bus, relpath), + } + } +} + +impl UeventMatcher for NetPciMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.devpath.starts_with(self.devpath.as_str()) + && uev.subsystem == "net" + && !uev.interface.is_empty() + && uev.action == "add" + } +} + +pub async fn wait_for_net_interface( + sandbox: &Arc>, + pcipath: &pci::Path, +) -> Result<()> { + let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path()); + let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?; + + let matcher = NetPciMatcher::new(&sysfs_rel_path); + + // Check if the interface is already added in case network is cold-plugged + // or the uevent loop is started before network is added. + // We check for the pci deive in the sysfs directory for network devices. + let pattern = format!( + r"[./]+{}/[a-z0-9/]*net/[a-z0-9/]*", + matcher.devpath.as_str() + ); + let re = Regex::new(&pattern).expect("BUG: Failed to compile regex for NetPciMatcher"); + + for entry in fs::read_dir(SYSFS_NET_PATH)? { + let entry = entry?; + let path = entry.path(); + let target_path = fs::read_link(path)?; + let target_path_str = target_path + .to_str() + .ok_or_else(|| anyhow!("Expected symlink in dir {}", SYSFS_NET_PATH))?; + + if re.is_match(target_path_str) { + return Ok(()); + } + } + let _uev = wait_for_uevent(sandbox, matcher).await?; + + Ok(()) +} + #[derive(Debug)] struct VfioMatcher { syspath: String, @@ -1784,6 +1843,41 @@ mod tests { assert!(!matcher_a.is_match(&uev_b)); } + #[tokio::test] + #[allow(clippy::redundant_clone)] + async fn test_net_pci_matcher() { + let root_bus = create_pci_root_bus_path(); + let relpath_a = "/0000:00:02.0/0000:01:01.0"; + + let mut uev_a = crate::uevent::Uevent::default(); + uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); + uev_a.devpath = format!("{}{}", root_bus, relpath_a); + uev_a.subsystem = String::from("net"); + uev_a.interface = String::from("eth0"); + let matcher_a = NetPciMatcher::new(relpath_a); + println!("Matcher a : {}", matcher_a.devpath); + + let relpath_b = "/0000:00:02.0/0000:01:02.0"; + let mut uev_b = uev_a.clone(); + uev_b.devpath = format!("{}{}", root_bus, relpath_b); + let matcher_b = NetPciMatcher::new(relpath_b); + + assert!(matcher_a.is_match(&uev_a)); + assert!(matcher_b.is_match(&uev_b)); + assert!(!matcher_b.is_match(&uev_a)); + assert!(!matcher_a.is_match(&uev_b)); + + let relpath_c = "/0000:00:02.0/0000:01:03.0"; + let net_substr = "/net/eth0"; + let mut uev_c = uev_a.clone(); + uev_c.devpath = format!("{}{}{}", root_bus, relpath_c, net_substr); + let matcher_c = NetPciMatcher::new(relpath_c); + + assert!(matcher_c.is_match(&uev_c)); + assert!(!matcher_a.is_match(&uev_c)); + assert!(!matcher_b.is_match(&uev_c)); + } + #[tokio::test] #[allow(clippy::redundant_clone)] async fn test_mmio_block_matcher() { diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index bfd2e31ec2..6c9bc3aaa1 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -90,6 +90,7 @@ pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/pr pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory"; pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host"; +pub const SYSFS_NET_PATH: &str = "/sys/class/net"; pub const SYSFS_BUS_PCI_PATH: &str = "/sys/bus/pci"; diff --git a/src/agent/src/netlink.rs b/src/agent/src/netlink.rs index b41cf9b096..6bbbab91f1 100644 --- a/src/agent/src/netlink.rs +++ b/src/agent/src/netlink.rs @@ -95,16 +95,30 @@ impl Handle { let mut new_link = None; if link.name() != iface.name { if let Ok(link) = self.find_link(LinkFilter::Name(iface.name.as_str())).await { + // Bring down interface if it is UP + if link.is_up() { + self.enable_link(link.index(), false).await?; + } + // update the existing interface name with a temporary name, otherwise // it would failed to udpate this interface with an existing name. let mut request = self.handle.link().set(link.index()); request.message_mut().header = link.header.clone(); + let link_name = link.name(); + let temp_name = link_name.clone() + "_temp"; request - .name(format!("{}_temp", link.name())) - .up() + .name(temp_name.clone()) .execute() - .await?; + .await + .map_err(|err| { + anyhow!( + "Failed to rename interface {} to {}with error: {}", + link_name, + temp_name, + err + ) + })?; new_link = Some(link); } @@ -120,14 +134,33 @@ impl Handle { .arp(iface.raw_flags & libc::IFF_NOARP as u32 == 0) .up() .execute() - .await?; + .await + .map_err(|err| { + anyhow!( + "Failure in LinkSetRequest for interface {}: {}", + iface.name.as_str(), + err + ) + })?; // swap the updated iface's name. if let Some(nlink) = new_link { let mut request = self.handle.link().set(nlink.index()); request.message_mut().header = nlink.header.clone(); - request.name(link.name()).up().execute().await?; + request + .name(link.name()) + .up() + .execute() + .await + .map_err(|err| { + anyhow!( + "Error swapping back interface name {} to {}: {}", + nlink.name().as_str(), + link.name(), + err + ) + })?; } Ok(()) diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 78dee8b4f1..f731671eca 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -14,6 +14,7 @@ use std::fmt::Debug; use std::io; use std::os::unix::ffi::OsStrExt; use std::path::Path; +use std::str::FromStr; use std::sync::Arc; use ttrpc::{ self, @@ -54,7 +55,9 @@ use nix::sys::{stat, statfs}; use nix::unistd::{self, Pid}; use rustjail::process::ProcessOperations; -use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_env_pci}; +use crate::device::{ + add_devices, get_virtio_blk_pci_device_name, update_env_pci, wait_for_net_interface, +}; use crate::features::get_build_features; use crate::linux_abi::*; use crate::metrics::get_metrics; @@ -949,6 +952,17 @@ impl agent_ttrpc::AgentService for AgentService { "empty update interface request", )?; + // For network devices passed on the pci bus, check for the network interface + // to be available first. + if !interface.pciPath.is_empty() { + let pcipath = pci::Path::from_str(&interface.pciPath) + .map_ttrpc_err(|e| format!("Unexpected pci-path for network interface: {:?}", e))?; + + wait_for_net_interface(&self.sandbox, &pcipath) + .await + .map_ttrpc_err(|e| format!("interface not available: {:?}", e))?; + } + self.sandbox .lock() .await diff --git a/tests/integration/nerdctl/gha-run.sh b/tests/integration/nerdctl/gha-run.sh index 0a611054f0..91fbff1f52 100755 --- a/tests/integration/nerdctl/gha-run.sh +++ b/tests/integration/nerdctl/gha-run.sh @@ -90,6 +90,12 @@ function run() { info "Creating macvlan network with eth0 interface on host as parent" sudo nerdctl network create ${macvlan_net_name} --driver ipvlan --subnet=10.8.0.0/24 -o parent=${parent_interface} + # Create two bridge networks for testing multiple networks with Kata + local net1="foo" + local net2="bar" + sudo nerdctl network create ${net1} + sudo nerdctl network create ${net2} + enabling_hypervisor if [ -n "${GITHUB_ENV:-}" ]; then @@ -103,6 +109,9 @@ function run() { info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR})" sudo nerdctl run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com + info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR}) and multiple bridge nwtorks" + sudo nerdctl run --rm --net ${net1} --net ${net2} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a + info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR}) and ipvlan network" sudo nerdctl run --rm --net ${ipvlan_net_name} --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 alpine ip a | grep "eth0"