diff --git a/src/libs/kata-sys-util/Cargo.toml b/src/libs/kata-sys-util/Cargo.toml index 7eba88a420..781d0a5bb5 100644 --- a/src/libs/kata-sys-util/Cargo.toml +++ b/src/libs/kata-sys-util/Cargo.toml @@ -11,7 +11,7 @@ license = "Apache-2.0" edition = "2018" [dependencies] -byteorder = "~1" +byteorder = "1.4.3" cgroups = { package = "cgroups-rs", version = "0.2.7" } chrono = "0.4.0" common-path = "=1.0.0" @@ -24,7 +24,7 @@ serde_json = "1.0.73" slog = "2.5.2" slog-scope = "4.4.0" subprocess = "0.2.8" -rand = "^0.7.2" +rand = "0.7.2" thiserror = "1.0.30" kata-types = { path = "../kata-types" } diff --git a/src/libs/kata-sys-util/src/rand/random_bytes.rs b/src/libs/kata-sys-util/src/rand/random_bytes.rs index 44f7929620..183856d6b5 100644 --- a/src/libs/kata-sys-util/src/rand/random_bytes.rs +++ b/src/libs/kata-sys-util/src/rand/random_bytes.rs @@ -46,6 +46,17 @@ mod tests { fn random_bytes() { let b = RandomBytes::new(16); assert_eq!(b.bytes.len(), 16); - println!("{:?}", b.bytes); + + // check lower hex + let lower_hex = format!("{:x}", b); + assert_eq!(lower_hex, lower_hex.to_lowercase()); + + // check upper hex + let upper_hex = format!("{:X}", b); + assert_eq!(upper_hex, upper_hex.to_uppercase()); + + // check new random bytes + let b1 = RandomBytes::new(16); + assert_ne!(b.bytes, b1.bytes); } } diff --git a/src/libs/kata-sys-util/src/rand/uuid.rs b/src/libs/kata-sys-util/src/rand/uuid.rs index a257c94803..905ba05e24 100644 --- a/src/libs/kata-sys-util/src/rand/uuid.rs +++ b/src/libs/kata-sys-util/src/rand/uuid.rs @@ -27,6 +27,7 @@ impl UUID { } } +/// From: convert UUID to string impl From<&UUID> for String { fn from(from: &UUID) -> Self { let time_low = BigEndian::read_u32(&from.0[..4]); @@ -57,13 +58,17 @@ mod tests { #[test] fn test_uuid() { - let uuid = UUID::new(); - let sss: String = String::from(&uuid); - println!("{}", sss); + let uuid1 = UUID::new(); + let s1: String = String::from(&uuid1); - let uuid2 = UUID([0u8, 1u8, 2u8, 3u8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - let sss2 = String::from(&uuid2); - println!("Display: {}", uuid2); - assert_eq!(&sss2, "00010203-0405-0607-0809-0a0b0c0d0e0f"); + let uuid2 = UUID::new(); + let s2: String = String::from(&uuid2); + + assert_eq!(s1.len(), s2.len()); + assert_ne!(s1, s2); + + let uuid3 = UUID([0u8, 1u8, 2u8, 3u8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let s3 = String::from(&uuid3); + assert_eq!(&s3, "00010203-0405-0607-0809-0a0b0c0d0e0f"); } } diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 09abdaca59..8a59c5df06 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -792,6 +792,71 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "netlink-packet-core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345b8ab5bd4e71a2986663e88c56856699d060e78e152e6e9d7966fcd5491297" +dependencies = [ + "anyhow", + "byteorder", + "libc", + "netlink-packet-utils", +] + +[[package]] +name = "netlink-packet-route" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733ea73609acfd7fa7ddadfb7bf709b0471668c456ad9513685af543a06342b2" +dependencies = [ + "anyhow", + "bitflags", + "byteorder", + "libc", + "netlink-packet-core", + "netlink-packet-utils", +] + +[[package]] +name = "netlink-packet-utils" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25af9cf0dc55498b7bd94a1508af7a78706aa0ab715a73c5169273e03c84845e" +dependencies = [ + "anyhow", + "byteorder", + "paste", + "thiserror", +] + +[[package]] +name = "netlink-proto" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef8785b8141e8432aa45fceb922a7e876d7da3fad37fa7e7ec702ace3aa0826b" +dependencies = [ + "bytes 1.1.0", + "futures 0.3.21", + "log", + "netlink-packet-core", + "netlink-sys", + "tokio", +] + +[[package]] +name = "netlink-sys" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e4c9f9547a08241bee7b6558b9b98e1f290d187de8b7cfca2bbb4937bcaa8f8" +dependencies = [ + "bytes 1.1.0", + "futures 0.3.21", + "libc", + "log", + "tokio", +] + [[package]] name = "nix" version = "0.16.1" @@ -805,6 +870,19 @@ dependencies = [ "void", ] +[[package]] +name = "nix" +version = "0.22.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4916f159ed8e5de0082076562152a76b7a1f64a01fd9d1e0fea002c37624faf" +dependencies = [ + "bitflags", + "cc", + "cfg-if 1.0.0", + "libc", + "memoffset", +] + [[package]] name = "nix" version = "0.23.1" @@ -921,6 +999,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "paste" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1254,22 +1338,43 @@ dependencies = [ "agent", "anyhow", "async-trait", + "bitflags", "cgroups-rs", + "futures 0.3.21", "hypervisor", "kata-sys-util", "kata-types", "lazy_static", "libc", - "log", "logging", + "netlink-packet-route", + "netlink-sys", "nix 0.16.1", "oci", + "rand 0.7.3", + "rtnetlink", + "scopeguard", "slog", "slog-scope", "tokio", "uuid", ] +[[package]] +name = "rtnetlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f54290e54521dac3de4149d83ddf9f62a359b3cc93bcb494a794a41e6f4744b" +dependencies = [ + "futures 0.3.21", + "log", + "netlink-packet-route", + "netlink-proto", + "nix 0.22.3", + "thiserror", + "tokio", +] + [[package]] name = "runtimes" version = "0.1.0" diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index de8c172375..0889f3322e 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -10,6 +10,7 @@ extern crate slog; logging::logger_with_subsystem!(sl, "hypervisor"); pub mod device; +pub use device::*; use std::collections::HashMap; diff --git a/src/runtime-rs/crates/resource/Cargo.toml b/src/runtime-rs/crates/resource/Cargo.toml index 115d4fc56b..e6365b1705 100644 --- a/src/runtime-rs/crates/resource/Cargo.toml +++ b/src/runtime-rs/crates/resource/Cargo.toml @@ -7,11 +7,17 @@ edition = "2018" [dependencies] anyhow = "^1.0" async-trait = "0.1.48" +bitflags = "1.2.1" cgroups-rs = "0.2.9" +futures = "0.3.11" lazy_static = "1.4.0" libc = ">=0.2.39" -log = "^0.4.0" +netlink-sys = "0.8.2" +netlink-packet-route = "0.11.0" nix = "0.16.0" +rand = "^0.7.2" +rtnetlink = "0.9.1" +scopeguard = "1.0.0" slog = "2.5.2" slog-scope = "4.4.0" tokio = { version = "1.8.0", features = ["process"] } diff --git a/src/runtime-rs/crates/resource/src/lib.rs b/src/runtime-rs/crates/resource/src/lib.rs index 86dc71fe78..28ffc56019 100644 --- a/src/runtime-rs/crates/resource/src/lib.rs +++ b/src/runtime-rs/crates/resource/src/lib.rs @@ -15,6 +15,8 @@ logging::logger_with_subsystem!(sl, "resource"); pub mod cgroups; pub mod manager; mod manager_inner; +pub mod network; +use network::NetworkConfig; pub mod rootfs; pub mod share_fs; pub mod volume; @@ -24,5 +26,6 @@ use kata_types::config::hypervisor::SharedFsInfo; #[derive(Debug)] pub enum ResourceConfig { + Network(NetworkConfig), ShareFs(SharedFsInfo), } diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 7b7cfea4fd..c254352429 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -15,6 +15,7 @@ use oci::LinuxResources; use crate::{ cgroups::CgroupsResource, + network::{self, Network}, rootfs::{RootFsResource, Rootfs}, share_fs::{self, ShareFs}, volume::{Volume, VolumeResource}, @@ -23,10 +24,9 @@ use crate::{ pub(crate) struct ResourceManagerInner { sid: String, - // TODO: remove - #[allow(dead_code)] agent: Arc, hypervisor: Arc, + network: Option>, share_fs: Option>, pub rootfs_resource: RootFsResource, @@ -45,6 +45,7 @@ impl ResourceManagerInner { sid: sid.to_string(), agent, hypervisor, + network: None, share_fs: None, rootfs_resource: RootFsResource::new(), volume_resource: VolumeResource::new(), @@ -66,12 +67,60 @@ impl ResourceManagerInner { .context("setup share fs device before start vm")?; self.share_fs = Some(share_fs); } + ResourceConfig::Network(c) => { + let d = network::new(&c).await.context("new network")?; + d.setup(self.hypervisor.as_ref()) + .await + .context("setup network")?; + self.network = Some(d) + } }; } Ok(()) } + async fn handle_interfaces(&self, network: &dyn Network) -> Result<()> { + for i in network.interfaces().await.context("get interfaces")? { + // update interface + info!(sl!(), "update interface {:?}", i); + self.agent + .update_interface(agent::UpdateInterfaceRequest { interface: Some(i) }) + .await + .context("update interface")?; + } + + Ok(()) + } + + async fn handle_neighbours(&self, network: &dyn Network) -> Result<()> { + let neighbors = network.neighs().await.context("neighs")?; + if !neighbors.is_empty() { + info!(sl!(), "update neighbors {:?}", neighbors); + self.agent + .add_arp_neighbors(agent::AddArpNeighborRequest { + neighbors: Some(agent::ARPNeighbors { neighbors }), + }) + .await + .context("update neighbors")?; + } + Ok(()) + } + + async fn handle_routes(&self, network: &dyn Network) -> Result<()> { + let routes = network.routes().await.context("routes")?; + if !routes.is_empty() { + info!(sl!(), "update routes {:?}", routes); + self.agent + .update_routes(agent::UpdateRoutesRequest { + route: Some(agent::Routes { routes }), + }) + .await + .context("update routes")?; + } + Ok(()) + } + pub async fn setup_after_start_vm(&mut self) -> Result<()> { if let Some(share_fs) = self.share_fs.as_ref() { share_fs @@ -80,6 +129,16 @@ impl ResourceManagerInner { .context("setup share fs device after start vm")?; } + if let Some(network) = self.network.as_ref() { + let network = network.as_ref(); + self.handle_interfaces(network) + .await + .context("handle interfaces")?; + self.handle_neighbours(network) + .await + .context("handle neighbors")?; + self.handle_routes(network).await.context("handle routes")?; + } Ok(()) } diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs new file mode 100644 index 0000000000..caaa69dca6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs @@ -0,0 +1,22 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod physical_endpoint; +pub use physical_endpoint::PhysicalEndpoint; +mod veth_endpoint; +pub use veth_endpoint::VethEndpoint; + +use anyhow::Result; +use async_trait::async_trait; +use hypervisor::Hypervisor; + +#[async_trait] +pub trait Endpoint: std::fmt::Debug + Send + Sync { + async fn name(&self) -> String; + async fn hardware_addr(&self) -> String; + async fn attach(&self, hypervisor: &dyn Hypervisor) -> Result<()>; + async fn detach(&self, hypervisor: &dyn Hypervisor) -> Result<()>; +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs new file mode 100644 index 0000000000..78387a5ce4 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::Path; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::{device, Hypervisor}; + +use super::Endpoint; +use crate::network::utils::{self, link}; + +pub const SYS_PCI_DEVICES_PATH: &str = "/sys/bus/pci/devices"; + +#[derive(Debug)] +pub struct VendorDevice { + vendor_id: String, + device_id: String, +} + +impl VendorDevice { + pub fn new(vendor_id: &str, device_id: &str) -> Result { + if vendor_id.is_empty() || device_id.is_empty() { + return Err(anyhow!( + "invalid parameters vendor_id {} device_id {}", + vendor_id, + device_id + )); + } + Ok(Self { + vendor_id: vendor_id.to_string(), + device_id: device_id.to_string(), + }) + } + + pub fn vendor_device_id(&self) -> String { + format!("{}_{}", &self.vendor_id, &self.device_id) + } +} + +#[derive(Debug)] +pub struct PhysicalEndpoint { + iface_name: String, + hard_addr: String, + bdf: String, + driver: String, + vendor_device_id: VendorDevice, +} + +impl PhysicalEndpoint { + pub fn new(name: &str, hardware_addr: &[u8]) -> Result { + let driver_info = link::get_driver_info(name).context("get driver info")?; + let bdf = driver_info.bus_info; + let sys_pci_devices_path = Path::new(SYS_PCI_DEVICES_PATH); + // get driver by following symlink /sys/bus/pci/devices/$bdf/driver + let driver_path = sys_pci_devices_path.join(&bdf).join("driver"); + let link = driver_path.read_link().context("read link")?; + let driver = link + .file_name() + .map_or(String::new(), |v| v.to_str().unwrap().to_owned()); + + // get vendor and device id from pci space (sys/bus/pci/devices/$bdf) + let iface_device_path = sys_pci_devices_path.join(&bdf).join("device"); + let device_id = std::fs::read_to_string(&iface_device_path) + .context(format!("read device path {:?}", &iface_device_path))?; + + let iface_vendor_path = sys_pci_devices_path.join(&bdf).join("vendor"); + let vendor_id = std::fs::read_to_string(&iface_vendor_path) + .context(format!("read vendor path {:?}", &iface_vendor_path))?; + + Ok(Self { + iface_name: name.to_string(), + hard_addr: utils::get_mac_addr(hardware_addr).context("get mac addr")?, + vendor_device_id: VendorDevice::new(&vendor_id, &device_id) + .context("new vendor device")?, + driver, + bdf, + }) + } +} + +#[async_trait] +impl Endpoint for PhysicalEndpoint { + async fn name(&self) -> String { + self.iface_name.clone() + } + + async fn hardware_addr(&self) -> String { + self.hard_addr.clone() + } + + async fn attach(&self, hypervisor: &dyn Hypervisor) -> Result<()> { + // bind physical interface from host driver and bind to vfio + device::bind_device_to_vfio( + &self.bdf, + &self.driver, + &self.vendor_device_id.vendor_device_id(), + ) + .context(format!( + "bind physical endpoint from {} to vfio", + &self.driver + ))?; + + // set vfio's bus type, pci or mmio. Mostly use pci by default. + let mode = match self.driver.as_str() { + "virtio-pci" => "mmio", + _ => "pci", + }; + + // add vfio device + let d = device::Device::Vfio(device::VfioConfig { + id: format!("physical_nic_{}", self.name().await), + sysfs_path: "".to_string(), + bus_slot_func: self.bdf.clone(), + mode: device::VfioBusMode::new(mode) + .context(format!("new vfio bus mode {:?}", mode))?, + }); + hypervisor.add_device(d).await.context("add device")?; + Ok(()) + } + + // detach for physical endpoint unbinds the physical network interface from vfio-pci + // and binds it back to the saved host driver. + async fn detach(&self, _hypervisor: &dyn Hypervisor) -> Result<()> { + // bind back the physical network interface to host. + // we need to do this even if a new network namespace has not + // been created by virt-containers. + + // we do not need to enter the network namespace to bind back the + // physical interface to host driver. + device::bind_device_to_host( + &self.bdf, + &self.driver, + &self.vendor_device_id.vendor_device_id(), + ) + .context(format!( + "bind physical endpoint device from vfio to {}", + &self.driver + ))?; + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/veth_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/veth_endpoint.rs new file mode 100644 index 0000000000..c1bfb4c464 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/veth_endpoint.rs @@ -0,0 +1,84 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::{self, Error}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use hypervisor::{device::NetworkConfig, Device, Hypervisor}; + +use super::Endpoint; +use crate::network::{utils, NetworkPair}; + +#[derive(Debug)] +pub struct VethEndpoint { + net_pair: NetworkPair, +} + +impl VethEndpoint { + pub async fn new( + handle: &rtnetlink::Handle, + name: &str, + idx: u32, + model: &str, + queues: usize, + ) -> Result { + let net_pair = NetworkPair::new(handle, idx, name, model, queues) + .await + .context("new networkInterfacePair")?; + Ok(VethEndpoint { net_pair }) + } + + fn get_network_config(&self) -> Result { + let iface = &self.net_pair.tap.tap_iface; + let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| { + Error::new( + io::ErrorKind::InvalidData, + format!("hard_addr {}", &iface.hard_addr), + ) + })?; + Ok(NetworkConfig { + id: self.net_pair.virt_iface.name.clone(), + host_dev_name: iface.name.clone(), + guest_mac: Some(guest_mac), + }) + } +} + +#[async_trait] +impl Endpoint for VethEndpoint { + async fn name(&self) -> String { + self.net_pair.virt_iface.name.clone() + } + + async fn hardware_addr(&self) -> String { + self.net_pair.tap.tap_iface.hard_addr.clone() + } + + async fn attach(&self, h: &dyn Hypervisor) -> Result<()> { + self.net_pair + .add_network_model() + .await + .context("add network model")?; + let config = self.get_network_config().context("get network config")?; + h.add_device(Device::Network(config)) + .await + .context("Error add device")?; + Ok(()) + } + + async fn detach(&self, h: &dyn Hypervisor) -> Result<()> { + self.net_pair + .del_network_model() + .await + .context("del network model")?; + let config = self.get_network_config().context("get network config")?; + h.remove_device(Device::Network(config)) + .await + .context("remove device")?; + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/mod.rs b/src/runtime-rs/crates/resource/src/network/mod.rs new file mode 100644 index 0000000000..7193a6d921 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/mod.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod endpoint; +pub use endpoint::Endpoint; +mod network_entity; +mod network_info; +pub use network_info::NetworkInfo; +mod network_model; +pub use network_model::NetworkModel; +mod network_with_netns; +pub use network_with_netns::NetworkWithNetNsConfig; +use network_with_netns::NetworkWithNetns; +mod network_pair; +use network_pair::NetworkPair; +mod utils; + +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use hypervisor::Hypervisor; + +#[derive(Debug)] +pub enum NetworkConfig { + NetworkResourceWithNetNs(NetworkWithNetNsConfig), +} + +#[async_trait] +pub trait Network: Send + Sync { + async fn setup(&self, h: &dyn Hypervisor) -> Result<()>; + async fn interfaces(&self) -> Result>; + async fn routes(&self) -> Result>; + async fn neighs(&self) -> Result>; +} + +pub async fn new(config: &NetworkConfig) -> Result> { + match config { + NetworkConfig::NetworkResourceWithNetNs(c) => Ok(Arc::new( + NetworkWithNetns::new(c) + .await + .context("new network with netns")?, + )), + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_entity.rs b/src/runtime-rs/crates/resource/src/network/network_entity.rs new file mode 100644 index 0000000000..5182dfe4b0 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_entity.rs @@ -0,0 +1,24 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use super::{Endpoint, NetworkInfo}; + +#[derive(Debug)] +pub(crate) struct NetworkEntity { + pub(crate) endpoint: Arc, + pub(crate) network_info: Arc, +} + +impl NetworkEntity { + pub fn new(endpoint: Arc, network_info: Arc) -> Self { + Self { + endpoint, + network_info, + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_info/mod.rs b/src/runtime-rs/crates/resource/src/network/network_info/mod.rs new file mode 100644 index 0000000000..1500d5179e --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_info/mod.rs @@ -0,0 +1,18 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub(crate) mod network_info_from_link; + +use agent::{ARPNeighbor, Interface, Route}; +use anyhow::Result; +use async_trait::async_trait; + +#[async_trait] +pub trait NetworkInfo: std::fmt::Debug + Send + Sync { + async fn interface(&self) -> Result; + async fn routes(&self) -> Result>; + async fn neighs(&self) -> Result>; +} diff --git a/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_link.rs b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_link.rs new file mode 100644 index 0000000000..fa341a1ae6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_link.rs @@ -0,0 +1,203 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{convert::TryFrom, net::Ipv4Addr}; + +use agent::{ARPNeighbor, IPAddress, IPFamily, Interface, Route}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use futures::stream::TryStreamExt; +use netlink_packet_route::{ + self, neighbour::NeighbourMessage, nlas::neighbour::Nla, route::RouteMessage, +}; + +use super::NetworkInfo; +use crate::network::utils::{ + address::Address, + link::{self, LinkAttrs}, +}; + +#[derive(Debug)] +pub(crate) struct NetworkInfoFromLink { + interface: Interface, + neighs: Vec, + routes: Vec, +} + +impl NetworkInfoFromLink { + pub async fn new( + handle: &rtnetlink::Handle, + link: &dyn link::Link, + hw_addr: &str, + ) -> Result { + let attrs = link.attrs(); + let name = &attrs.name; + + Ok(Self { + interface: Interface { + device: name.clone(), + name: name.clone(), + ip_addresses: handle_addresses(handle, attrs) + .await + .context("handle addresses")?, + mtu: attrs.mtu as u64, + hw_addr: hw_addr.to_string(), + pci_addr: Default::default(), + field_type: link.r#type().to_string(), + raw_flags: attrs.flags & libc::IFF_NOARP as u32, + }, + neighs: handle_neighbors(handle, attrs) + .await + .context("handle neighbours")?, + routes: handle_routes(handle, attrs) + .await + .context("handle routes")?, + }) + } +} + +async fn handle_addresses(handle: &rtnetlink::Handle, attrs: &LinkAttrs) -> Result> { + let mut addr_msg_list = handle + .address() + .get() + .set_link_index_filter(attrs.index) + .execute(); + + let mut addresses = Vec::new(); + while let Some(addr_msg) = addr_msg_list.try_next().await? { + if addr_msg.header.family as i32 != libc::AF_INET { + warn!(sl!(), "unsupported ipv6 addr. {:?}", addr_msg); + continue; + } + let a = Address::try_from(addr_msg).context("get addr from msg")?; + if a.addr.is_loopback() { + continue; + } + + addresses.push(IPAddress { + family: if a.addr.is_ipv4() { + IPFamily::V4 + } else { + IPFamily::V6 + }, + address: a.addr.to_string(), + mask: a.perfix_len.to_string(), + }); + } + Ok(addresses) +} + +fn generate_neigh(name: &str, n: &NeighbourMessage) -> Result { + let mut neigh = ARPNeighbor { + device: name.to_string(), + state: n.header.state as i32, + ..Default::default() + }; + for nla in &n.nlas { + match nla { + Nla::Destination(addr) => { + if addr.len() != 4 { + continue; + } + let dest = Ipv4Addr::new(addr[0], addr[1], addr[2], addr[3]); + let addr = Some(IPAddress { + family: IPFamily::V4, + address: dest.to_string(), + mask: "".to_string(), + }); + neigh.to_ip_address = addr; + } + Nla::LinkLocalAddress(addr) => { + if addr.len() < 6 { + continue; + } + let lladdr = format!( + "{:<02x}:{:<02x}:{:<02x}:{:<02x}:{:<02x}:{:<02x}", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5] + ); + neigh.ll_addr = lladdr; + } + _ => { + // skip the unused Nla + } + } + } + + Ok(neigh) +} + +async fn handle_neighbors( + handle: &rtnetlink::Handle, + attrs: &LinkAttrs, +) -> Result> { + let name = &attrs.name; + let mut neighs = vec![]; + let mut neigh_msg_list = handle.neighbours().get().execute(); + while let Some(neigh) = neigh_msg_list.try_next().await? { + // get neigh filter with index + if neigh.header.ifindex == attrs.index { + neighs.push(generate_neigh(name, &neigh).context("generate neigh")?) + } + } + Ok(neighs) +} + +fn generate_route(name: &str, route: &RouteMessage) -> Result> { + if route.header.protocol == libc::RTPROT_KERNEL { + return Ok(None); + } + + Ok(Some(Route { + dest: route + .destination_prefix() + .map(|(addr, _)| addr.to_string()) + .unwrap_or_default(), + gateway: route.gateway().map(|v| v.to_string()).unwrap_or_default(), + device: name.to_string(), + source: route + .source_prefix() + .map(|(addr, _)| addr.to_string()) + .unwrap_or_default(), + scope: route.header.scope as u32, + family: if route.header.address_family == libc::AF_INET as u8 { + IPFamily::V4 + } else { + IPFamily::V6 + }, + })) +} + +async fn handle_routes(handle: &rtnetlink::Handle, attrs: &LinkAttrs) -> Result> { + let name = &attrs.name; + let mut routes = vec![]; + let mut route_msg_list = handle.route().get(rtnetlink::IpVersion::V4).execute(); + while let Some(route) = route_msg_list.try_next().await? { + // get route filter with index + if let Some(index) = route.output_interface() { + if index == attrs.index { + if let Some(route) = generate_route(name, &route).context("generate route")? { + routes.push(route); + } + } + } + } + Ok(routes) +} + +#[async_trait] +impl NetworkInfo for NetworkInfoFromLink { + async fn interface(&self) -> Result { + Ok(self.interface.clone()) + } + + async fn routes(&self) -> Result> { + Ok(self.routes.clone()) + } + + async fn neighs(&self) -> Result> { + Ok(self.neighs.clone()) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/mod.rs b/src/runtime-rs/crates/resource/src/network/network_model/mod.rs new file mode 100644 index 0000000000..11cda538ca --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/mod.rs @@ -0,0 +1,46 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod none_model; +pub mod route_model; +pub mod tc_filter_model; + +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; + +use super::NetworkPair; + +const TC_FILTER_NET_MODEL_STR: &str = "tcfilter"; +const ROUTE_NET_MODEL_STR: &str = "route"; + +pub enum NetworkModelType { + NoneModel, + TcFilter, + Route, +} + +#[async_trait] +pub trait NetworkModel: std::fmt::Debug + Send + Sync { + fn model_type(&self) -> NetworkModelType; + async fn add(&self, net_pair: &NetworkPair) -> Result<()>; + async fn del(&self, net_pair: &NetworkPair) -> Result<()>; +} + +pub fn new(model: &str) -> Result> { + match model { + TC_FILTER_NET_MODEL_STR => Ok(Arc::new( + tc_filter_model::TcFilterModel::new().context("new tc filter model")?, + )), + ROUTE_NET_MODEL_STR => Ok(Arc::new( + route_model::RouteModel::new().context("new route model")?, + )), + _ => Ok(Arc::new( + none_model::NoneModel::new().context("new none model")?, + )), + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/none_model.rs b/src/runtime-rs/crates/resource/src/network/network_model/none_model.rs new file mode 100644 index 0000000000..f68b4d3e22 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/none_model.rs @@ -0,0 +1,35 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use async_trait::async_trait; + +use super::{NetworkModel, NetworkModelType}; +use crate::network::NetworkPair; + +#[derive(Debug)] +pub(crate) struct NoneModel {} + +impl NoneModel { + pub fn new() -> Result { + Ok(Self {}) + } +} + +#[async_trait] +impl NetworkModel for NoneModel { + fn model_type(&self) -> NetworkModelType { + NetworkModelType::NoneModel + } + + async fn add(&self, _pair: &NetworkPair) -> Result<()> { + Ok(()) + } + + async fn del(&self, _pair: &NetworkPair) -> Result<()> { + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/route_model.rs b/src/runtime-rs/crates/resource/src/network/network_model/route_model.rs new file mode 100644 index 0000000000..64b1da2e74 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/route_model.rs @@ -0,0 +1,88 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::process::Command; + +use super::{NetworkModel, NetworkModelType}; +use crate::network::NetworkPair; + +#[derive(Debug)] +pub(crate) struct RouteModel {} + +impl RouteModel { + pub fn new() -> Result { + Ok(Self {}) + } +} + +#[async_trait] +impl NetworkModel for RouteModel { + fn model_type(&self) -> NetworkModelType { + NetworkModelType::Route + } + + async fn add(&self, pair: &NetworkPair) -> Result<()> { + let tap_name = &pair.tap.tap_iface.name; + let virt_name = &pair.virt_iface.name; + let virt_iface_addr = pair.virt_iface.addrs[0].addr.to_string(); + + let commands_args = vec![ + vec![ + "rule", "add", "pref", "10", "from", "all", "lookup", "local", + ], + vec!["rule", "del", "pref", "0", "from", "all"], + vec!["rule", "add", "pref", "5", "iif", virt_name, "table", "10"], + vec![ + "route", "replace", "default", "dev", tap_name, "table", "10", + ], + vec![ + "neigh", + "replace", + &virt_iface_addr, + "lladdr", + &pair.virt_iface.hard_addr, + "dev", + tap_name, + ], + ]; + + for ca in commands_args { + let output = Command::new("/sbin/ip") + .args(&ca) + .output() + .await + .context(format!("run command ip args {:?}", &ca))?; + if !output.status.success() { + return Err(anyhow!( + "run command ip args {:?} error {}", + &ca, + String::from_utf8(output.stderr)? + )); + } + } + + // TODO: support ipv6 + // change sysctl for tap0_kata + // echo 1 > /proc/sys/net/ipv4/conf/tap0_kata/accept_local + let accept_local_path = format!("/proc/sys/net/ipv4/conf/{}/accept_local", &tap_name); + std::fs::write(&accept_local_path, "1".to_string()) + .with_context(|| format!("Failed to echo 1 > {}", &accept_local_path))?; + + // echo 1 > /proc/sys/net/ipv4/conf/eth0/proxy_arp + // This enabled ARP reply on peer eth0 to prevent without any reply on VPC + let proxy_arp_path = format!("/proc/sys/net/ipv4/conf/{}/proxy_arp", &virt_name); + std::fs::write(&proxy_arp_path, "1".to_string()) + .with_context(|| format!("Failed to echo 1 > {}", &proxy_arp_path))?; + + Ok(()) + } + + async fn del(&self, _pair: &NetworkPair) -> Result<()> { + todo!() + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/tc_filter_model.rs b/src/runtime-rs/crates/resource/src/network/network_model/tc_filter_model.rs new file mode 100644 index 0000000000..ae347e717c --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/tc_filter_model.rs @@ -0,0 +1,95 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::process::Command; + +use super::{NetworkModel, NetworkModelType}; +use crate::network::NetworkPair; + +#[derive(Debug)] +pub(crate) struct TcFilterModel {} + +impl TcFilterModel { + pub fn new() -> Result { + Ok(Self {}) + } +} + +#[async_trait] +impl NetworkModel for TcFilterModel { + fn model_type(&self) -> NetworkModelType { + NetworkModelType::TcFilter + } + + async fn add(&self, pair: &NetworkPair) -> Result<()> { + let tap_name = &pair.tap.tap_iface.name; + let virt_name = &pair.virt_iface.name; + + add_qdisc_ingress(tap_name) + .await + .context("add qdisc ingress for tap link")?; + add_qdisc_ingress(virt_name) + .await + .context("add qdisc ingress")?; + + add_redirect_tcfilter(tap_name, virt_name) + .await + .context("add tc filter for tap")?; + add_redirect_tcfilter(virt_name, tap_name) + .await + .context("add tc filter")?; + Ok(()) + } + + async fn del(&self, pair: &NetworkPair) -> Result<()> { + del_qdisc(&pair.virt_iface.name) + .await + .context("del qdisc")?; + Ok(()) + } +} + +// TODO: use netlink replace tc command +async fn add_qdisc_ingress(dev: &str) -> Result<()> { + let output = Command::new("/sbin/tc") + .args(&["qdisc", "add", "dev", dev, "handle", "ffff:", "ingress"]) + .output() + .await + .context("add tc")?; + if !output.status.success() { + return Err(anyhow!("{}", String::from_utf8(output.stderr)?)); + } + Ok(()) +} + +async fn add_redirect_tcfilter(src: &str, dst: &str) -> Result<()> { + let output = Command::new("/sbin/tc") + .args(&[ + "filter", "add", "dev", src, "parent", "ffff:", "protocol", "all", "u32", "match", + "u8", "0", "0", "action", "mirred", "egress", "redirect", "dev", dst, + ]) + .output() + .await + .context("add redirect tcfilter")?; + if !output.status.success() { + return Err(anyhow!("{}", String::from_utf8(output.stderr)?)); + } + Ok(()) +} + +async fn del_qdisc(dev: &str) -> Result<()> { + let output = Command::new("/sbin/tc") + .args(&["qdisc", "del", "dev", dev, "handle", "ffff:", "ingress"]) + .output() + .await + .context("del qdisc")?; + if !output.status.success() { + return Err(anyhow!("{}", String::from_utf8(output.stderr)?)); + } + Ok(()) +} diff --git a/src/runtime-rs/crates/resource/src/network/network_pair.rs b/src/runtime-rs/crates/resource/src/network/network_pair.rs new file mode 100644 index 0000000000..71e212907e --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_pair.rs @@ -0,0 +1,178 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{convert::TryFrom, sync::Arc, usize}; + +use anyhow::{anyhow, Context, Result}; +use futures::stream::TryStreamExt; + +use super::{ + network_model, + utils::{self, address::Address, link}, +}; + +const TAP_SUFFIX: &str = "_kata"; + +#[derive(Default, Copy, Clone, Debug, PartialEq, Eq)] +pub struct NetInterworkingModel(u32); + +#[derive(Default, Debug, Clone)] +pub struct NetworkInterface { + pub name: String, + pub hard_addr: String, + pub addrs: Vec
, +} + +#[derive(Default, Debug)] +pub struct TapInterface { + pub id: String, + pub name: String, + pub tap_iface: NetworkInterface, +} +#[derive(Debug)] +pub struct NetworkPair { + pub tap: TapInterface, + pub virt_iface: NetworkInterface, + pub model: Arc, + pub network_qos: bool, +} +impl NetworkPair { + pub(crate) async fn new( + handle: &rtnetlink::Handle, + idx: u32, + name: &str, + model: &str, + queues: usize, + ) -> Result { + let unique_id = kata_sys_util::rand::UUID::new(); + let model = network_model::new(model).context("new network model")?; + let tap_iface_name = format!("tap{}{}", idx, TAP_SUFFIX); + let virt_iface_name = format!("eth{}", idx); + let tap_link = create_link(handle, &tap_iface_name, queues) + .await + .context("create link")?; + let virt_link = get_link_by_name(handle, virt_iface_name.clone().as_str()) + .await + .context("get link by name")?; + + let mut virt_addr_msg_list = handle + .address() + .get() + .set_link_index_filter(virt_link.attrs().index) + .execute(); + + let mut virt_address = vec![]; + while let Some(addr_msg) = virt_addr_msg_list.try_next().await? { + let addr = Address::try_from(addr_msg).context("get address from msg")?; + virt_address.push(addr); + } + + // Save the veth MAC address to the TAP so that it can later be used + // to build the hypervisor command line. This MAC address has to be + // the one inside the VM in order to avoid any firewall issues. The + // bridge created by the network plugin on the host actually expects + // to see traffic from this MAC address and not another one. + let tap_hard_addr = + utils::get_mac_addr(&virt_link.attrs().hardware_addr).context("get mac addr")?; + + // Save the TAP Mac address to the virt_iface so that it can later updated + // the guest's gateway IP's mac as this TAP device. This MAC address has + // to be inside the VM in order to the network reach to the gateway. + let virt_hard_addr = + utils::get_mac_addr(&tap_link.attrs().hardware_addr).context("get mac addr")?; + + handle + .link() + .set(tap_link.attrs().index) + .mtu(virt_link.attrs().mtu) + .execute() + .await + .context("set link mtu")?; + + handle + .link() + .set(tap_link.attrs().index) + .up() + .execute() + .await + .context("set link up")?; + + let mut net_pair = NetworkPair { + tap: TapInterface { + id: String::from(&unique_id), + name: format!("br{}{}", idx, TAP_SUFFIX), + tap_iface: NetworkInterface { + name: tap_iface_name, + hard_addr: tap_hard_addr, + ..Default::default() + }, + }, + virt_iface: NetworkInterface { + name: virt_iface_name, + hard_addr: virt_hard_addr, + addrs: virt_address, + }, + model, + network_qos: false, + }; + + if !name.is_empty() { + net_pair.virt_iface.name = String::from(name); + } + + Ok(net_pair) + } + + pub(crate) async fn add_network_model(&self) -> Result<()> { + let model = self.model.clone(); + model.add(self).await.context("add")?; + Ok(()) + } + + pub(crate) async fn del_network_model(&self) -> Result<()> { + let model = self.model.clone(); + model.del(self).await.context("del")?; + Ok(()) + } +} + +pub async fn create_link( + handle: &rtnetlink::Handle, + name: &str, + queues: usize, +) -> Result> { + link::create_link(name, link::LinkType::Tap, queues)?; + + let link = get_link_by_name(handle, name) + .await + .context("get link by name")?; + + let base = link.attrs(); + if base.master_index != 0 { + handle + .link() + .set(base.index) + .master(base.master_index) + .execute() + .await + .context("set index")?; + } + Ok(link) +} + +pub async fn get_link_by_name( + handle: &rtnetlink::Handle, + name: &str, +) -> Result> { + let mut link_msg_list = handle.link().get().match_name(name.to_string()).execute(); + let msg = if let Some(msg) = link_msg_list.try_next().await? { + msg + } else { + return Err(anyhow!("failed to find link by name {}", name)); + }; + + Ok(link::get_link_from_message(msg)) +} diff --git a/src/runtime-rs/crates/resource/src/network/network_with_netns.rs b/src/runtime-rs/crates/resource/src/network/network_with_netns.rs new file mode 100644 index 0000000000..c228a7c8a9 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_with_netns.rs @@ -0,0 +1,211 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::{ + atomic::{AtomicU32, Ordering}, + Arc, +}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use futures::stream::TryStreamExt; +use hypervisor::Hypervisor; +use scopeguard::defer; +use tokio::sync::RwLock; + +use super::{ + endpoint::{Endpoint, PhysicalEndpoint, VethEndpoint}, + network_entity::NetworkEntity, + network_info::network_info_from_link::NetworkInfoFromLink, + utils::{link, netns}, + Network, +}; +use crate::network::NetworkInfo; + +#[derive(Debug)] +pub struct NetworkWithNetNsConfig { + pub network_model: String, + pub netns_path: String, + pub queues: usize, +} + +struct NetworkWithNetnsInner { + netns_path: String, + entity_list: Vec, +} + +impl NetworkWithNetnsInner { + async fn new(config: &NetworkWithNetNsConfig) -> Result { + let entity_list = if config.netns_path.is_empty() { + warn!(sl!(), "skip to scan for empty netns"); + vec![] + } else { + // get endpoint + get_entity_from_netns(config) + .await + .context("get entity from netns")? + }; + Ok(Self { + netns_path: config.netns_path.to_string(), + entity_list, + }) + } +} + +pub(crate) struct NetworkWithNetns { + inner: Arc>, +} + +impl NetworkWithNetns { + pub(crate) async fn new(config: &NetworkWithNetNsConfig) -> Result { + Ok(Self { + inner: Arc::new(RwLock::new(NetworkWithNetnsInner::new(config).await?)), + }) + } +} + +#[async_trait] +impl Network for NetworkWithNetns { + async fn setup(&self, h: &dyn Hypervisor) -> Result<()> { + let inner = self.inner.read().await; + let _netns_guard = netns::NetnsGuard::new(&inner.netns_path).context("net netns guard")?; + for e in &inner.entity_list { + e.endpoint.attach(h).await.context("attach")?; + } + Ok(()) + } + + async fn interfaces(&self) -> Result> { + let inner = self.inner.read().await; + let mut interfaces = vec![]; + for e in &inner.entity_list { + interfaces.push(e.network_info.interface().await.context("interface")?); + } + Ok(interfaces) + } + + async fn routes(&self) -> Result> { + let inner = self.inner.read().await; + let mut routes = vec![]; + for e in &inner.entity_list { + let mut list = e.network_info.routes().await.context("routes")?; + routes.append(&mut list); + } + Ok(routes) + } + + async fn neighs(&self) -> Result> { + let inner = self.inner.read().await; + let mut neighs = vec![]; + for e in &inner.entity_list { + let mut list = e.network_info.neighs().await.context("neighs")?; + neighs.append(&mut list); + } + Ok(neighs) + } +} + +async fn get_entity_from_netns(config: &NetworkWithNetNsConfig) -> Result> { + info!( + sl!(), + "get network entity for config {:?} tid {:?}", + config, + nix::unistd::gettid() + ); + let mut entity_list = vec![]; + let _netns_guard = netns::NetnsGuard::new(&config.netns_path) + .context("net netns guard") + .unwrap(); + let (connection, handle, _) = rtnetlink::new_connection().context("new connection")?; + let thread_handler = tokio::spawn(connection); + defer!({ + thread_handler.abort(); + }); + + let mut links = handle.link().get().execute(); + + let idx = AtomicU32::new(0); + while let Some(link) = links.try_next().await? { + let link = link::get_link_from_message(link); + let attrs = link.attrs(); + + if (attrs.flags & libc::IFF_LOOPBACK as u32) != 0 { + continue; + } + + let idx = idx.fetch_add(1, Ordering::Relaxed); + let (endpoint, network_info) = create_endpoint(&handle, link.as_ref(), idx, config) + .await + .context("create endpoint")?; + + entity_list.push(NetworkEntity::new(endpoint, network_info)); + } + + Ok(entity_list) +} + +async fn create_endpoint( + handle: &rtnetlink::Handle, + link: &dyn link::Link, + idx: u32, + config: &NetworkWithNetNsConfig, +) -> Result<(Arc, Arc)> { + let _netns_guard = netns::NetnsGuard::new(&config.netns_path) + .context("net netns guard") + .unwrap(); + let attrs = link.attrs(); + let link_type = link.r#type(); + let endpoint: Arc = if is_physical_iface(&attrs.name)? { + info!( + sl!(), + "physical network interface found: {} {:?}", + &attrs.name, + nix::unistd::gettid() + ); + let t = PhysicalEndpoint::new(&attrs.name, &attrs.hardware_addr) + .context("new physical endpoint")?; + Arc::new(t) + } else { + info!( + sl!(), + "{} network interface found: {}", &link_type, &attrs.name + ); + match link_type { + "veth" => { + let ret = VethEndpoint::new( + handle, + &attrs.name, + idx, + &config.network_model, + config.queues, + ) + .await + .context("veth endpoint")?; + Arc::new(ret) + } + _ => return Err(anyhow!("unsupported link type: {}", link_type)), + } + }; + + let network_info = Arc::new( + NetworkInfoFromLink::new(handle, link, &endpoint.hardware_addr().await) + .await + .context("network info from link")?, + ); + + Ok((endpoint, network_info)) +} + +fn is_physical_iface(name: &str) -> Result { + if name == "lo" { + return Ok(false); + } + let driver_info = link::get_driver_info(name)?; + if driver_info.bus_info.split(':').count() != 3 { + return Ok(false); + } + Ok(true) +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/address.rs b/src/runtime-rs/crates/resource/src/network/utils/address.rs new file mode 100644 index 0000000000..916d011d58 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/address.rs @@ -0,0 +1,109 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + convert::TryFrom, + net::{IpAddr, Ipv4Addr, Ipv6Addr}, +}; + +use anyhow::{anyhow, Result}; +use netlink_packet_route::{nlas::address::Nla, AddressMessage, AF_INET, AF_INET6}; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Address { + pub addr: IpAddr, + pub label: String, + pub flags: u32, + pub scope: u8, + pub perfix_len: u8, + pub peer: IpAddr, + pub broadcast: IpAddr, + pub prefered_lft: u32, + pub valid_ltf: u32, +} + +impl TryFrom for Address { + type Error = anyhow::Error; + fn try_from(msg: AddressMessage) -> Result { + let AddressMessage { header, nlas } = msg; + let mut addr = Address { + addr: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), + peer: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), + broadcast: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), + label: String::default(), + flags: 0, + scope: header.scope, + perfix_len: header.prefix_len, + prefered_lft: 0, + valid_ltf: 0, + }; + + let mut local = IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)); + let mut dst = IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)); + + for nla in nlas.into_iter() { + match nla { + Nla::Address(a) => { + dst = parse_ip(a, header.family)?; + } + Nla::Local(a) => { + local = parse_ip(a, header.family)?; + } + Nla::Broadcast(b) => { + addr.broadcast = parse_ip(b, header.family)?; + } + Nla::Label(l) => { + addr.label = l; + } + Nla::Flags(f) => { + addr.flags = f; + } + Nla::CacheInfo(_c) => {} + _ => {} + } + } + + // IPv6 sends the local address as IFA_ADDRESS with no + // IFA_LOCAL, IPv4 sends both IFA_LOCAL and IFA_ADDRESS + // with IFA_ADDRESS being the peer address if they differ + // + // But obviously, as there are IPv6 PtP addresses, too, + // IFA_LOCAL should also be handled for IPv6. + if local.is_unspecified() { + if header.family == AF_INET as u8 && local == dst { + addr.addr = dst; + } else { + addr.addr = local; + addr.peer = dst; + } + } else { + addr.addr = dst; + } + Ok(addr) + } +} + +fn parse_ip(ip: Vec, family: u8) -> Result { + let support_len = if family as u16 == AF_INET { 4 } else { 16 }; + if ip.len() != support_len { + return Err(anyhow!( + "invalid ip addresses {:?} support {}", + &ip, + support_len + )); + } + match family as u16 { + AF_INET => Ok(IpAddr::V4(Ipv4Addr::new(ip[0], ip[1], ip[2], ip[3]))), + AF_INET6 => { + let mut octets = [0u8; 16]; + octets.copy_from_slice(&ip[..16]); + Ok(IpAddr::V6(Ipv6Addr::from(octets))) + } + _ => { + return Err(anyhow!("unknown IP network family {}", family)); + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/create.rs b/src/runtime-rs/crates/resource/src/network/utils/link/create.rs new file mode 100644 index 0000000000..06bedf79b9 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/create.rs @@ -0,0 +1,129 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs::{File, OpenOptions}, + os::unix::io::AsRawFd, + path::Path, + {io, mem}, +}; + +use anyhow::{Context, Result}; +use nix::ioctl_write_ptr; + +use super::macros::{get_name, set_name}; + +type IfName = [u8; libc::IFNAMSIZ]; + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +struct CreateLinkMap { + pub mem_start: libc::c_ulong, + pub mem_end: libc::c_ulong, + pub base_addr: libc::c_ushort, + pub irq: libc::c_uchar, + pub dma: libc::c_uchar, + pub port: libc::c_uchar, +} + +#[repr(C)] +union CreateLinkIfru { + pub ifr_addr: libc::sockaddr, + pub ifr_dst_addr: libc::sockaddr, + pub ifr_broad_addr: libc::sockaddr, + pub ifr_netmask: libc::sockaddr, + pub ifr_hw_addr: libc::sockaddr, + pub ifr_flags: libc::c_short, + pub ifr_if_index: libc::c_int, + pub ifr_metric: libc::c_int, + pub ifr_mtu: libc::c_int, + pub ifr_map: CreateLinkMap, + pub ifr_slave: IfName, + pub ifr_new_name: IfName, + pub ifr_data: *mut libc::c_char, +} + +#[repr(C)] +struct CreateLinkReq { + pub ifr_name: IfName, + pub ifr_ifru: CreateLinkIfru, +} + +impl CreateLinkReq { + pub fn from_name(name: &str) -> io::Result { + let mut req: CreateLinkReq = unsafe { mem::zeroed() }; + req.set_name(name)?; + Ok(req) + } + + pub fn set_name(&mut self, name: &str) -> io::Result<()> { + set_name!(self.ifr_name, name) + } + + pub fn get_name(&self) -> io::Result { + get_name!(self.ifr_name) + } + + pub unsafe fn set_raw_flags(&mut self, raw_flags: libc::c_short) { + self.ifr_ifru.ifr_flags = raw_flags; + } +} + +const DEVICE_PATH: &str = "/dev/net/tun"; + +ioctl_write_ptr!(tun_set_iff, b'T', 202, libc::c_int); +ioctl_write_ptr!(tun_set_persist, b'T', 203, libc::c_int); + +#[derive(Clone, Copy, Debug)] +pub enum LinkType { + #[allow(dead_code)] + Tun, + Tap, +} + +pub fn create_link(name: &str, link_type: LinkType, queues: usize) -> Result<()> { + let mut flags = libc::IFF_VNET_HDR; + flags |= match link_type { + LinkType::Tun => libc::IFF_TUN, + LinkType::Tap => libc::IFF_TAP, + }; + + let queues = if queues == 0 { 1 } else { queues }; + if queues > 1 { + flags |= libc::IFF_MULTI_QUEUE | libc::IFF_NO_PI; + } else { + flags |= libc::IFF_ONE_QUEUE; + }; + + // create first queue + let mut files = vec![]; + let (file, result_name) = create_queue(name, flags)?; + unsafe { + tun_set_persist(file.as_raw_fd(), &1).context("tun set persist")?; + } + files.push(file); + + // create other queues + if queues > 1 { + for _ in 0..queues - 1 { + files.push(create_queue(&result_name, flags)?.0); + } + } + + info!(sl!(), "create link with fds {:?}", files); + Ok(()) +} + +fn create_queue(name: &str, flags: libc::c_int) -> Result<(File, String)> { + let path = Path::new(DEVICE_PATH); + let file = OpenOptions::new().read(true).write(true).open(&path)?; + let mut req = CreateLinkReq::from_name(name)?; + unsafe { + req.set_raw_flags(flags as libc::c_short); + tun_set_iff(file.as_raw_fd(), &mut req as *mut _ as *mut _).context("tun set iff")?; + }; + Ok((file, req.get_name()?)) +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/driver_info.rs b/src/runtime-rs/crates/resource/src/network/utils/link/driver_info.rs new file mode 100644 index 0000000000..a7269d013a --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/driver_info.rs @@ -0,0 +1,102 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{io, mem}; + +use anyhow::{Context, Result}; +use nix::sys::socket::{socket, AddressFamily, SockFlag, SockType}; +use scopeguard::defer; + +use super::macros::{get_name, set_name}; + +/// FW version length +const ETHTOOL_FW_VERSION_LEN: usize = 32; + +/// bus info length +const ETHTOOL_BUS_INFO_LEN: usize = 32; + +/// erom version length +const ETHTOOL_EROM_VERSION_LEN: usize = 32; + +/// driver info +const ETHTOOL_DRIVER_INFO: u32 = 0x00000003; + +/// Ethtool interface define 0x8946 +const IOCTL_ETHTOOL_INTERFACE: u32 = 0x8946; + +nix::ioctl_readwrite_bad!(ioctl_ethtool, IOCTL_ETHTOOL_INTERFACE, DeviceInfoReq); + +#[repr(C)] +pub union DeviceInfoIfru { + pub ifr_addr: libc::sockaddr, + pub ifr_data: *mut libc::c_char, +} + +type IfName = [u8; libc::IFNAMSIZ]; + +#[repr(C)] +pub struct DeviceInfoReq { + pub ifr_name: IfName, + pub ifr_ifru: DeviceInfoIfru, +} + +impl DeviceInfoReq { + pub fn from_name(name: &str) -> io::Result { + let mut req: DeviceInfoReq = unsafe { mem::zeroed() }; + req.set_name(name)?; + Ok(req) + } + + pub fn set_name(&mut self, name: &str) -> io::Result<()> { + set_name!(self.ifr_name, name) + } +} + +#[repr(C)] +#[derive(Debug, Clone)] +struct Driver { + pub cmd: u32, + pub driver: [u8; 32], + pub version: [u8; 32], + pub fw_version: [u8; ETHTOOL_FW_VERSION_LEN], + pub bus_info: [u8; ETHTOOL_BUS_INFO_LEN], + pub erom_version: [u8; ETHTOOL_EROM_VERSION_LEN], + pub reserved2: [u8; 12], + pub n_priv_flags: u32, + pub n_stats: u32, + pub test_info_len: u32, + pub eedump_len: u32, + pub regdump_len: u32, +} + +#[derive(Debug, Clone)] +pub struct DriverInfo { + pub driver: String, + pub bus_info: String, +} + +pub fn get_driver_info(name: &str) -> Result { + let mut req = DeviceInfoReq::from_name(name).context(format!("ifreq from name {}", name))?; + let mut ereq: Driver = unsafe { mem::zeroed() }; + ereq.cmd = ETHTOOL_DRIVER_INFO; + req.ifr_ifru.ifr_data = &mut ereq as *mut _ as *mut _; + + let fd = socket( + AddressFamily::Inet, + SockType::Datagram, + SockFlag::empty(), + None, + ) + .context("new socket")?; + defer!({ + let _ = nix::unistd::close(fd); + }); + unsafe { ioctl_ethtool(fd, &mut req).context("ioctl ethtool")? }; + Ok(DriverInfo { + driver: get_name!(ereq.driver).context("get driver name")?, + bus_info: get_name!(ereq.bus_info).context("get bus info name")?, + }) +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/macros.rs b/src/runtime-rs/crates/resource/src/network/utils/link/macros.rs new file mode 100644 index 0000000000..128a76bb29 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/macros.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +macro_rules! set_name { + ($name_field:expr, $name_str:expr) => {{ + let name_c = &::std::ffi::CString::new($name_str.to_owned()).map_err(|_| { + ::std::io::Error::new( + ::std::io::ErrorKind::InvalidInput, + "malformed interface name", + ) + })?; + let name_slice = name_c.as_bytes_with_nul(); + if name_slice.len() > libc::IFNAMSIZ { + return Err(io::Error::new(::std::io::ErrorKind::InvalidInput, "").into()); + } + $name_field[..name_slice.len()].clone_from_slice(name_slice); + + Ok(()) + }}; +} + +macro_rules! get_name { + ($name_field:expr) => {{ + let nul_pos = match $name_field.iter().position(|x| *x == 0) { + Some(p) => p, + None => { + return Err(::std::io::Error::new( + ::std::io::ErrorKind::InvalidData, + "malformed interface name", + ) + .into()) + } + }; + + std::ffi::CString::new(&$name_field[..nul_pos]) + .unwrap() + .into_string() + .map_err(|_| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "malformed interface name") + }) + }}; +} + +pub(crate) use get_name; +pub(crate) use set_name; diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/manager.rs b/src/runtime-rs/crates/resource/src/network/utils/link/manager.rs new file mode 100644 index 0000000000..fd4a8b1c99 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/manager.rs @@ -0,0 +1,288 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use netlink_packet_route::{ + link::nlas::{Info, InfoBridge, InfoData, InfoKind, Nla}, + LinkMessage, +}; + +use super::{Link, LinkAttrs}; + +pub fn get_link_from_message(mut msg: LinkMessage) -> Box { + let mut base = LinkAttrs { + index: msg.header.index, + flags: msg.header.flags, + link_layer_type: msg.header.link_layer_type, + ..Default::default() + }; + if msg.header.flags & libc::IFF_PROMISC as u32 != 0 { + base.promisc = 1; + } + let mut link: Option> = None; + while let Some(attr) = msg.nlas.pop() { + match attr { + Nla::Info(infos) => { + link = Some(link_info(infos)); + } + Nla::Address(a) => { + base.hardware_addr = a; + } + Nla::IfName(i) => { + base.name = i; + } + Nla::Mtu(m) => { + base.mtu = m; + } + Nla::Link(l) => { + base.parent_index = l; + } + Nla::Master(m) => { + base.master_index = m; + } + Nla::TxQueueLen(t) => { + base.txq_len = t; + } + Nla::IfAlias(a) => { + base.alias = a; + } + Nla::Stats(_s) => {} + Nla::Stats64(_s) => {} + Nla::Xdp(_x) => {} + Nla::ProtoInfo(_) => {} + Nla::OperState(_) => {} + Nla::NetnsId(n) => { + base.net_ns_id = n; + } + Nla::GsoMaxSize(i) => { + base.gso_max_size = i; + } + Nla::GsoMaxSegs(e) => { + base.gso_max_seqs = e; + } + Nla::VfInfoList(_) => {} + Nla::NumTxQueues(t) => { + base.num_tx_queues = t; + } + Nla::NumRxQueues(r) => { + base.num_rx_queues = r; + } + Nla::Group(g) => { + base.group = g; + } + _ => { + // skip unused attr + } + } + } + + let mut ret = link.unwrap_or_else(|| Box::new(Device::default())); + ret.set_attrs(base); + ret +} + +fn link_info(mut infos: Vec) -> Box { + let mut link: Option> = None; + while let Some(info) = infos.pop() { + match info { + Info::Kind(kind) => match kind { + InfoKind::Tun => { + if link.is_none() { + link = Some(Box::new(Tuntap::default())); + } + } + InfoKind::Veth => { + if link.is_none() { + link = Some(Box::new(Veth::default())); + } + } + InfoKind::IpVlan => { + if link.is_none() { + link = Some(Box::new(IpVlan::default())); + } + } + InfoKind::Vlan => { + if link.is_none() { + link = Some(Box::new(Vlan::default())); + } + } + InfoKind::Bridge => { + if link.is_none() { + link = Some(Box::new(Bridge::default())); + } + } + _ => { + if link.is_none() { + link = Some(Box::new(Device::default())); + } + } + }, + Info::Data(data) => match data { + InfoData::Tun(_) => { + link = Some(Box::new(Tuntap::default())); + } + InfoData::Veth(_) => { + link = Some(Box::new(Veth::default())); + } + InfoData::IpVlan(_) => { + link = Some(Box::new(IpVlan::default())); + } + InfoData::Vlan(_) => { + link = Some(Box::new(Vlan::default())); + } + InfoData::Bridge(ibs) => { + link = Some(Box::new(parse_bridge(ibs))); + } + _ => { + link = Some(Box::new(Device::default())); + } + }, + Info::SlaveKind(_sk) => { + if link.is_none() { + link = Some(Box::new(Device::default())); + } + } + Info::SlaveData(_sd) => { + link = Some(Box::new(Device::default())); + } + _ => { + link = Some(Box::new(Device::default())); + } + } + } + link.unwrap() +} + +fn parse_bridge(mut ibs: Vec) -> Bridge { + let mut bridge = Bridge::default(); + while let Some(ib) = ibs.pop() { + match ib { + InfoBridge::HelloTime(ht) => { + bridge.hello_time = ht; + } + InfoBridge::MulticastSnooping(m) => { + bridge.multicast_snooping = m == 1; + } + InfoBridge::VlanFiltering(v) => { + bridge.vlan_filtering = v == 1; + } + _ => {} + } + } + bridge +} +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct Device { + attrs: Option, +} + +impl Link for Device { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + "device" + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct Tuntap { + pub attrs: Option, +} + +impl Link for Tuntap { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + "tuntap" + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct Veth { + attrs: Option, + + /// on create only + pub peer_name: String, +} + +impl Link for Veth { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + "veth" + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct IpVlan { + attrs: Option, + + /// on create only + pub peer_name: String, +} + +impl Link for IpVlan { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + "ipvlan" + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct Vlan { + attrs: Option, + + /// on create only + pub peer_name: String, +} + +impl Link for Vlan { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + "vlan" + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct Bridge { + attrs: Option, + pub multicast_snooping: bool, + pub hello_time: u32, + pub vlan_filtering: bool, +} + +impl Link for Bridge { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + "bridge" + } +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/mod.rs b/src/runtime-rs/crates/resource/src/network/utils/link/mod.rs new file mode 100644 index 0000000000..9fcc2b6405 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/mod.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod create; +pub use create::{create_link, LinkType}; +mod driver_info; +pub use driver_info::{get_driver_info, DriverInfo}; +mod macros; +mod manager; +pub use manager::get_link_from_message; + +use std::os::unix::io::RawFd; + +use netlink_packet_route::link::nlas::State; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Namespace { + NetNsPid(u32), + #[allow(dead_code)] + NetNsFd(RawFd), +} +impl Default for Namespace { + fn default() -> Self { + Self::NetNsPid(0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum LinkStatistics { + #[allow(dead_code)] + Stats(LinkStatistics32), + Stats64(LinkStatistics64), +} +impl Default for LinkStatistics { + fn default() -> Self { + Self::Stats64(LinkStatistics64::default()) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkStatistics32 { + pub rx_packets: u32, + pub tx_packets: u32, + pub rx_bytes: u32, + pub tx_bytes: u32, + pub rx_errors: u32, + pub tx_errors: u32, + pub rx_dropped: u32, + pub tx_dropped: u32, + pub multicast: u32, + pub collisions: u32, + pub rx_length_errors: u32, + pub rx_over_errors: u32, + pub rx_crc_errors: u32, + pub rx_frame_errors: u32, + pub rx_fifo_errors: u32, + pub rx_missed_errors: u32, + pub tx_aborted_errors: u32, + pub tx_carrier_errors: u32, + pub tx_fifo_errors: u32, + pub tx_heartbeat_errors: u32, + pub tx_window_errors: u32, + pub rx_compressed: u32, + pub tx_compressed: u32, +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkStatistics64 { + pub rx_packets: u64, + pub tx_packets: u64, + pub rx_bytes: u64, + pub tx_bytes: u64, + pub rx_errors: u64, + pub tx_errors: u64, + pub rx_dropped: u64, + pub tx_dropped: u64, + pub multicast: u64, + pub collisions: u64, + pub rx_length_errors: u64, + pub rx_over_errors: u64, + pub rx_crc_errors: u64, + pub rx_frame_errors: u64, + pub rx_fifo_errors: u64, + pub rx_missed_errors: u64, + pub tx_aborted_errors: u64, + pub tx_carrier_errors: u64, + pub tx_fifo_errors: u64, + pub tx_heartbeat_errors: u64, + pub tx_window_errors: u64, + pub rx_compressed: u64, + pub tx_compressed: u64, +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkXdp { + pub fd: RawFd, + pub attached: bool, + pub flags: u32, + pub prog_id: u32, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct OperState(State); +impl Default for OperState { + fn default() -> Self { + Self(State::Unknown) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkAttrs { + pub index: u32, + pub mtu: u32, + pub txq_len: u32, + + pub name: String, + pub hardware_addr: Vec, + pub flags: u32, + pub parent_index: u32, + pub master_index: u32, + pub namespace: Namespace, + pub alias: String, + pub statistics: LinkStatistics, + pub promisc: u32, + pub xdp: LinkXdp, + pub link_layer_type: u16, + pub proto_info: Vec, + pub oper_state: OperState, + pub net_ns_id: i32, + pub num_tx_queues: u32, + pub num_rx_queues: u32, + pub gso_max_size: u32, + pub gso_max_seqs: u32, + pub vfs: Vec, + pub group: u32, +} + +pub trait Link: Send + Sync { + fn attrs(&self) -> &LinkAttrs; + fn set_attrs(&mut self, attr: LinkAttrs); + fn r#type(&self) -> &str; +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/mod.rs b/src/runtime-rs/crates/resource/src/network/utils/mod.rs new file mode 100644 index 0000000000..574178c3de --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/mod.rs @@ -0,0 +1,35 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub(crate) mod address; +pub(crate) mod link; +pub(crate) mod netns; + +use anyhow::{anyhow, Result}; + +pub(crate) fn parse_mac(s: &str) -> Option { + let v: Vec<_> = s.split(':').collect(); + if v.len() != 6 { + return None; + } + let mut bytes = [0u8; 6]; + for i in 0..6 { + bytes[i] = u8::from_str_radix(v[i], 16).ok()?; + } + + Some(hypervisor::Address(bytes)) +} + +pub(crate) fn get_mac_addr(b: &[u8]) -> Result { + if b.len() != 6 { + return Err(anyhow!("invalid mac address {:?}", b)); + } else { + Ok(format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + )) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/netns.rs b/src/runtime-rs/crates/resource/src/network/utils/netns.rs new file mode 100644 index 0000000000..1377a78594 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/netns.rs @@ -0,0 +1,51 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{fs::File, os::unix::io::AsRawFd}; + +use anyhow::{Context, Result}; +use nix::sched::{setns, CloneFlags}; +use nix::unistd::{getpid, gettid}; + +pub(crate) struct NetnsGuard { + old_netns: Option, +} + +impl NetnsGuard { + pub(crate) fn new(new_netns_path: &str) -> Result { + let old_netns = if !new_netns_path.is_empty() { + let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net"); + let old_netns = File::open(¤t_netns_path) + .context(format!("open current netns path {}", ¤t_netns_path))?; + let new_netns = File::open(&new_netns_path) + .context(format!("open new netns path {}", &new_netns_path))?; + setns(new_netns.as_raw_fd(), CloneFlags::CLONE_NEWNET) + .context("set netns to new netns")?; + info!( + sl!(), + "set netns from old {:?} to new {:?} tid {}", + old_netns, + new_netns, + gettid().to_string() + ); + Some(old_netns) + } else { + warn!(sl!(), "skip to set netns for empty netns path"); + None + }; + Ok(Self { old_netns }) + } +} + +impl Drop for NetnsGuard { + fn drop(&mut self) { + if let Some(old_netns) = self.old_netns.as_ref() { + let old_netns_fd = old_netns.as_raw_fd(); + setns(old_netns_fd, CloneFlags::CLONE_NEWNET).unwrap(); + info!(sl!(), "set netns to old {:?}", old_netns_fd); + } + } +} diff --git a/src/runtime-rs/crates/resource/src/rootfs/mod.rs b/src/runtime-rs/crates/resource/src/rootfs/mod.rs index 4063c8bf82..7ea27fe0d6 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/mod.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/mod.rs @@ -11,7 +11,6 @@ use std::{sync::Arc, vec::Vec}; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use kata_types::mount::Mount; -use log::{error, info}; use nix::sys::stat::{self, SFlag}; use tokio::sync::RwLock; @@ -90,6 +89,7 @@ impl RootFsResource { let inner = self.inner.read().await; for r in &inner.rootfs { info!( + sl!(), "rootfs {:?}: count {}", r.get_guest_rootfs_path().await, Arc::strong_count(r) @@ -114,7 +114,7 @@ fn get_block_device(file_path: &str) -> Option { } } Err(err) => { - error!("failed to stat for {} {:?}", file_path, err); + error!(sl!(), "failed to stat for {} {:?}", file_path, err); return None; } }; diff --git a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs index 4fa6b341f1..9bf02ddc4f 100644 --- a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs @@ -7,7 +7,6 @@ use std::{path::Path, sync::Arc}; use anyhow::{anyhow, Context, Result}; -use log::debug; use nix::sys::stat::{stat, SFlag}; use super::Volume; @@ -53,6 +52,7 @@ impl ShareFsVolume { | SFlag::S_IFREG; if !file_type.contains(SFlag::from_bits_truncate(stat.st_mode)) { debug!( + sl!(), "Ignoring non-regular file as FS sharing not supported. mount: {:?}", m );