diff --git a/src/libs/kata-types/src/config/runtime.rs b/src/libs/kata-types/src/config/runtime.rs
index dddd3adc5b..853e4aef3f 100644
--- a/src/libs/kata-types/src/config/runtime.rs
+++ b/src/libs/kata-types/src/config/runtime.rs
@@ -137,6 +137,17 @@ pub struct Runtime {
/// This option is typically used to retain abnormal information for debugging.
#[serde(default)]
pub keep_abnormal: bool,
+
+ /// Base directory of directly attachable network config, the default value
+ /// is "/run/kata-containers/dans".
+ ///
+ /// Network devices for VM-based containers are allowed to be placed in the
+ /// host netns to eliminate as many hops as possible, which is what we
+ /// called a "directly attachable network". The config, set by special CNI
+ /// plugins, is used to tell the Kata Containers what devices are attached
+ /// to the hypervisor.
+ #[serde(default)]
+ pub dan_conf: String,
}
impl ConfigOps for Runtime {
diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile
index 1981a37d9c..496a9b1c20 100644
--- a/src/runtime-rs/Makefile
+++ b/src/runtime-rs/Makefile
@@ -162,6 +162,7 @@ DEFVFIOMODE := guest-kernel
DEFSANDBOXCGROUPONLY ?= false
DEFSTATICRESOURCEMGMT_DB ?= false
DEFBINDMOUNTS := []
+DEFDANCONF := /run/kata-containers/dans
SED = sed
CLI_DIR = cmd
SHIMV2 = containerd-shim-kata-v2
@@ -308,6 +309,7 @@ USER_VARS += DBSHAREDFS
USER_VARS += KATA_INSTALL_GROUP
USER_VARS += KATA_INSTALL_OWNER
USER_VARS += KATA_INSTALL_CFG_PERMS
+USER_VARS += DEFDANCONF
SOURCES := \
$(shell find . 2>&1 | grep -E '.*\.rs$$') \
diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in
index 58e29b9dda..f4b6bcfdbd 100644
--- a/src/runtime-rs/config/configuration-dragonball.toml.in
+++ b/src/runtime-rs/config/configuration-dragonball.toml.in
@@ -323,3 +323,12 @@ static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
# - "/path/to:ro", readonly mode.
# - "/path/to:rw", readwrite mode.
sandbox_bind_mounts=@DEFBINDMOUNTS@
+
+# Base directory of directly attachable network config.
+# Network devices for VM-based containers are allowed to be placed in the
+# host netns to eliminate as many hops as possible, which is what we
+# called a "Directly Attachable Network". The config, set by special CNI
+# plugins, is used to tell the Kata containers what devices are attached
+# to the hypervisor.
+# (default: /run/kata-containers/dans)
+dan_conf = "@DEFDANCONF@"
diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs
index 2fa825b517..3d19625b9f 100644
--- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs
+++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs
@@ -41,6 +41,12 @@ pub struct NetworkConfig {
/// Guest MAC address.
pub guest_mac: Option
,
+
+ /// Virtio queue size
+ pub queue_size: usize,
+
+ /// Virtio queue num
+ pub queue_num: usize,
}
#[derive(Clone, Debug, Default)]
diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs
index fe7186c760..85ac7bd196 100644
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs
@@ -214,6 +214,8 @@ impl DragonballInner {
Some(mac) => MacAddr::from_bytes(&mac.0).ok(),
None => None,
},
+ num_queues: config.queue_num,
+ queue_size: config.queue_size as u16,
..Default::default()
};
diff --git a/src/runtime-rs/crates/resource/src/network/dan.rs b/src/runtime-rs/crates/resource/src/network/dan.rs
new file mode 100644
index 0000000000..d59875bca1
--- /dev/null
+++ b/src/runtime-rs/crates/resource/src/network/dan.rs
@@ -0,0 +1,406 @@
+// Copyright (c) 2019-2023 Alibaba Cloud
+// Copyright (c) 2019-2023 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! Directly Attachable Network (DAN) is a type of network that runs in the host
+//! netns. It supports host-tap, vhost-user (DPDK), etc.
+//! The device information is retrieved from a JSON file, the type of which is
+//! `Vec`.
+//! In this module, `IPAddress`, `Interface`, etc., are duplicated mostly from
+//! `agent::IPAddress`, `agent::Interface`, and so on. They can't be referenced
+//! directly because the former represents the structure of the JSON file written
+//! by CNI plugins. They might have some slight differences, and may be revised in
+//! the future.
+
+use std::net::IpAddr;
+use std::path::PathBuf;
+use std::str::FromStr;
+use std::sync::Arc;
+
+use agent::IPFamily;
+use anyhow::{anyhow, Context, Result};
+use async_trait::async_trait;
+use hypervisor::device::device_manager::DeviceManager;
+use hypervisor::Hypervisor;
+use kata_types::config::TomlConfig;
+use scopeguard::defer;
+use serde::{Deserialize, Serialize};
+use tokio::fs;
+use tokio::sync::RwLock;
+
+use super::network_entity::NetworkEntity;
+use super::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
+use super::{EndpointState, NetnsGuard, Network};
+use crate::network::endpoint::TapEndpoint;
+use crate::network::network_info::network_info_from_dan::NetworkInfoFromDan;
+use crate::network::utils::generate_private_mac_addr;
+
+/// Directly attachable network
+pub struct Dan {
+ inner: Arc>,
+}
+
+pub struct DanInner {
+ netns: Option,
+ entity_list: Vec,
+}
+
+impl Dan {
+ pub async fn new(
+ config: &DanNetworkConfig,
+ dev_mgr: Arc>,
+ ) -> Result {
+ Ok(Self {
+ inner: Arc::new(RwLock::new(DanInner::new(config, &dev_mgr).await?)),
+ })
+ }
+}
+
+impl DanInner {
+ /// DanInner initialization deserializes DAN devices from a file writen
+ /// by CNI plugins. Respective endpoint and network_info are retrieved
+ /// from the devices, and compose NetworkEntity.
+ async fn new(config: &DanNetworkConfig, dev_mgr: &Arc>) -> Result {
+ let json_str = fs::read_to_string(&config.dan_conf_path)
+ .await
+ .context("Read DAN config from file")?;
+ let config: DanConfig = serde_json::from_str(&json_str).context("Invalid DAN config")?;
+ info!(sl!(), "Dan config is loaded = {:?}", config);
+
+ let (connection, handle, _) = rtnetlink::new_connection().context("New connection")?;
+ let thread_handler = tokio::spawn(connection);
+ defer!({
+ thread_handler.abort();
+ });
+
+ let mut entity_list = Vec::with_capacity(config.devices.len());
+ for (idx, device) in config.devices.iter().enumerate() {
+ let name = format!("eth{}", idx);
+ let endpoint = match &device.device {
+ // TODO: Support VhostUserNet protocol
+ Device::VhostUser {
+ path,
+ queue_num: _,
+ queue_size: _,
+ } => {
+ warn!(sl!(), "A DAN device whose type is \"vhost-user\" and socket path is {} is ignored.", path);
+ continue;
+ }
+ Device::HostTap {
+ tap_name,
+ queue_num,
+ queue_size,
+ } => Arc::new(
+ TapEndpoint::new(
+ &handle,
+ idx as u32,
+ &name,
+ tap_name,
+ &device.guest_mac,
+ *queue_num,
+ *queue_size,
+ dev_mgr,
+ )
+ .await
+ .with_context(|| format!("New a {} tap endpoint", tap_name))?,
+ ),
+ };
+
+ let network_info = Arc::new(
+ NetworkInfoFromDan::new(device)
+ .await
+ .context("Network info from DAN")?,
+ );
+
+ entity_list.push(NetworkEntity {
+ endpoint,
+ network_info,
+ })
+ }
+
+ Ok(Self {
+ netns: config.netns,
+ entity_list,
+ })
+ }
+}
+
+#[async_trait]
+impl Network for Dan {
+ async fn setup(&self) -> Result<()> {
+ let inner = self.inner.read().await;
+ let _netns_guard;
+ if let Some(netns) = inner.netns.as_ref() {
+ _netns_guard = NetnsGuard::new(netns).context("New netns guard")?;
+ }
+ for e in inner.entity_list.iter() {
+ e.endpoint.attach().await.context("Attach")?;
+ }
+ Ok(())
+ }
+
+ async fn interfaces(&self) -> Result> {
+ let inner = self.inner.read().await;
+ let mut interfaces = vec![];
+ for e in inner.entity_list.iter() {
+ interfaces.push(e.network_info.interface().await.context("Interface")?);
+ }
+ Ok(interfaces)
+ }
+
+ async fn routes(&self) -> Result> {
+ let inner = self.inner.read().await;
+ let mut routes = vec![];
+ for e in inner.entity_list.iter() {
+ let mut list = e.network_info.routes().await.context("Routes")?;
+ routes.append(&mut list);
+ }
+ Ok(routes)
+ }
+
+ async fn neighs(&self) -> Result> {
+ let inner = self.inner.read().await;
+ let mut neighs = vec![];
+ for e in &inner.entity_list {
+ let mut list = e.network_info.neighs().await.context("Neighs")?;
+ neighs.append(&mut list);
+ }
+ Ok(neighs)
+ }
+
+ async fn save(&self) -> Option> {
+ let inner = self.inner.read().await;
+ let mut ep_states = vec![];
+ for e in &inner.entity_list {
+ if let Some(state) = e.endpoint.save().await {
+ ep_states.push(state);
+ }
+ }
+ Some(ep_states)
+ }
+
+ async fn remove(&self, h: &dyn Hypervisor) -> Result<()> {
+ let inner = self.inner.read().await;
+ let _netns_guard;
+ if let Some(netns) = inner.netns.as_ref() {
+ _netns_guard = NetnsGuard::new(netns).context("New netns guard")?;
+ }
+ for e in inner.entity_list.iter() {
+ e.endpoint.detach(h).await.context("Detach")?;
+ }
+ Ok(())
+ }
+}
+
+/// Directly attachable network config
+#[derive(Debug)]
+pub struct DanNetworkConfig {
+ pub dan_conf_path: PathBuf,
+}
+
+/// Directly attachable network config written by CNI plugins
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct DanConfig {
+ netns: Option,
+ devices: Vec,
+}
+
+/// Directly attachable network device
+/// This struct is serilized from a file containing devices information,
+/// sent from CNI plugins.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub(crate) struct DanDevice {
+ // Name of device (interface name on the guest)
+ pub(crate) name: String,
+ // Mac address of interface on the guest, if it is not specified, a
+ // private address is generated as default.
+ #[serde(default = "generate_private_mac_addr")]
+ pub(crate) guest_mac: String,
+ // Device
+ pub(crate) device: Device,
+ // Network info
+ pub(crate) network_info: NetworkInfo,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub(crate) enum Device {
+ #[serde(rename = "vhost-user")]
+ VhostUser {
+ // Vhost-user socket path
+ path: String,
+ #[serde(default)]
+ queue_num: usize,
+ #[serde(default)]
+ queue_size: usize,
+ },
+ #[serde(rename = "host-tap")]
+ HostTap {
+ tap_name: String,
+ #[serde(default)]
+ queue_num: usize,
+ #[serde(default)]
+ queue_size: usize,
+ },
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub(crate) struct NetworkInfo {
+ pub(crate) interface: Interface,
+ #[serde(default)]
+ pub(crate) routes: Vec,
+ #[serde(default)]
+ pub(crate) neighbors: Vec,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub(crate) struct Interface {
+ // IP addresses in the format of CIDR
+ pub ip_addresses: Vec,
+ #[serde(default = "default_mtu")]
+ pub mtu: u64,
+ #[serde(default)]
+ // Link type
+ pub ntype: String,
+ #[serde(default)]
+ pub flags: u32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub(crate) struct Route {
+ #[serde(default)]
+ // Destination(CIDR), an empty string denotes no destination
+ pub dest: String,
+ #[serde(default)]
+ // Gateway(IP Address), an empty string denotes no gateway
+ pub gateway: String,
+ // Source(IP Address), an empty string denotes no gateway
+ #[serde(default)]
+ pub source: String,
+ // Scope
+ #[serde(default)]
+ pub scope: u32,
+}
+
+impl Route {
+ pub(crate) fn ip_family(&self) -> Result {
+ if !self.dest.is_empty() {
+ return Ok(ip_family_from_ip_addr(
+ &parse_ip_cidr(&self.dest)
+ .context("Parse ip addr from dest")?
+ .0,
+ ));
+ }
+
+ if !self.gateway.is_empty() {
+ return Ok(ip_family_from_ip_addr(
+ &IpAddr::from_str(&self.gateway).context("Parse ip addr from gateway")?,
+ ));
+ }
+
+ if !self.source.is_empty() {
+ return Ok(ip_family_from_ip_addr(
+ &IpAddr::from_str(&self.source).context("Parse ip addr from source")?,
+ ));
+ }
+
+ Err(anyhow!("Failed to retrieve IP family from {:?}", self))
+ }
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub(crate) struct ARPNeighbor {
+ // IP address in the format of CIDR
+ pub ip_address: Option,
+ #[serde(default)]
+ pub hardware_addr: String,
+ #[serde(default)]
+ pub state: u32,
+ #[serde(default)]
+ pub flags: u32,
+}
+
+fn default_mtu() -> u64 {
+ 1500
+}
+
+/// Path of DAN config, the file contains an array of DanDevices.
+#[inline]
+pub fn dan_config_path(config: &TomlConfig, sandbox_id: &str) -> PathBuf {
+ PathBuf::from(config.runtime.dan_conf.as_str()).join(format!("{}.json", sandbox_id))
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::network::dan::{ARPNeighbor, DanDevice, Device, Interface, NetworkInfo, Route};
+
+ #[test]
+ fn test_dan_json() {
+ let json_str = r#"{
+ "name": "eth0",
+ "guest_mac": "xx:xx:xx:xx:xx",
+ "device": {
+ "type": "vhost-user",
+ "path": "/tmp/test",
+ "queue_num": 1,
+ "queue_size": 1
+ },
+ "network_info": {
+ "interface": {
+ "ip_addresses": ["192.168.0.1/24"],
+ "mtu": 1500,
+ "ntype": "tuntap",
+ "flags": 0
+ },
+ "routes": [{
+ "dest": "172.18.0.0/16",
+ "source": "172.18.0.1",
+ "gateway": "172.18.31.1",
+ "scope": 0,
+ "flags": 0
+ }],
+ "neighbors": [{
+ "ip_address": "192.168.0.3/16",
+ "device": "",
+ "state": 0,
+ "flags": 0,
+ "hardware_addr": "xx:xx:xx:xx:xx"
+ }]
+ }
+ }"#;
+ let dev_from_json: DanDevice = serde_json::from_str(json_str).unwrap();
+ let dev = DanDevice {
+ name: "eth0".to_owned(),
+ guest_mac: "xx:xx:xx:xx:xx".to_owned(),
+ device: Device::VhostUser {
+ path: "/tmp/test".to_owned(),
+ queue_num: 1,
+ queue_size: 1,
+ },
+ network_info: NetworkInfo {
+ interface: Interface {
+ ip_addresses: vec!["192.168.0.1/24".to_owned()],
+ mtu: 1500,
+ ntype: "tuntap".to_owned(),
+ flags: 0,
+ },
+ routes: vec![Route {
+ dest: "172.18.0.0/16".to_owned(),
+ source: "172.18.0.1".to_owned(),
+ gateway: "172.18.31.1".to_owned(),
+ scope: 0,
+ }],
+ neighbors: vec![ARPNeighbor {
+ ip_address: Some("192.168.0.3/16".to_owned()),
+ hardware_addr: "xx:xx:xx:xx:xx".to_owned(),
+ state: 0,
+ flags: 0,
+ }],
+ },
+ };
+
+ assert_eq!(dev_from_json, dev);
+ }
+}
diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs b/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs
index 1f6fe3c58f..b637b2afe6 100644
--- a/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs
+++ b/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs
@@ -39,6 +39,11 @@ pub struct IpVlanEndpointState {
pub network_qos: bool,
}
+#[derive(Serialize, Deserialize, Clone, Default)]
+pub struct TapEndpointState {
+ pub if_name: String,
+}
+
#[derive(Serialize, Deserialize, Clone, Default)]
pub struct EndpointState {
pub physical_endpoint: Option,
@@ -46,5 +51,6 @@ pub struct EndpointState {
pub ipvlan_endpoint: Option,
pub macvlan_endpoint: Option,
pub vlan_endpoint: Option,
+ pub tap_endpoint: Option,
// TODO : other endpoint
}
diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs
index 2efd0bc349..1c15f67e03 100644
--- a/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs
+++ b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs
@@ -16,6 +16,8 @@ mod macvlan_endpoint;
pub use macvlan_endpoint::MacVlanEndpoint;
pub mod endpoint_persist;
mod endpoints_test;
+mod tap_endpoint;
+pub use tap_endpoint::TapEndpoint;
use anyhow::Result;
use async_trait::async_trait;
diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/tap_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/tap_endpoint.rs
new file mode 100644
index 0000000000..e22a91d922
--- /dev/null
+++ b/src/runtime-rs/crates/resource/src/network/endpoint/tap_endpoint.rs
@@ -0,0 +1,124 @@
+// Copyright (c) 2019-2023 Alibaba Cloud
+// Copyright (c) 2019-2023 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::sync::Arc;
+
+use anyhow::{Context, Result};
+use async_trait::async_trait;
+use hypervisor::device::device_manager::{do_handle_device, DeviceManager};
+use hypervisor::device::{DeviceConfig, DeviceType};
+use hypervisor::{Hypervisor, NetworkConfig, NetworkDevice};
+use tokio::sync::RwLock;
+
+use super::endpoint_persist::TapEndpointState;
+use super::Endpoint;
+use crate::network::network_pair::{get_link_by_name, NetworkInterface};
+use crate::network::{utils, EndpointState};
+
+/// TapEndpoint is used to attach to the hypervisor directly
+#[derive(Debug)]
+pub struct TapEndpoint {
+ // Index
+ #[allow(dead_code)]
+ index: u32,
+ // Name of virt interface
+ name: String,
+ // Hardware address of virt interface
+ guest_mac: String,
+ // Tap interface on the host
+ tap_iface: NetworkInterface,
+ // Device manager
+ dev_mgr: Arc>,
+ // Virtio queue num
+ queue_num: usize,
+ // Virtio queue size
+ queue_size: usize,
+}
+
+impl TapEndpoint {
+ #[allow(clippy::too_many_arguments)]
+ pub async fn new(
+ handle: &rtnetlink::Handle,
+ index: u32,
+ name: &str,
+ tap_name: &str,
+ guest_mac: &str,
+ queue_num: usize,
+ queue_size: usize,
+ dev_mgr: &Arc>,
+ ) -> Result {
+ let tap_link = get_link_by_name(handle, tap_name)
+ .await
+ .context("get link by name")?;
+ let tap_hard_addr =
+ utils::get_mac_addr(&tap_link.attrs().hardware_addr).context("Get mac addr of tap")?;
+
+ Ok(TapEndpoint {
+ index,
+ name: name.to_owned(),
+ guest_mac: guest_mac.to_owned(),
+ tap_iface: NetworkInterface {
+ name: tap_name.to_owned(),
+ hard_addr: tap_hard_addr,
+ ..Default::default()
+ },
+ dev_mgr: dev_mgr.clone(),
+ queue_num,
+ queue_size,
+ })
+ }
+
+ fn get_network_config(&self) -> Result {
+ let guest_mac = utils::parse_mac(&self.guest_mac).context("Parse mac address")?;
+ Ok(NetworkConfig {
+ host_dev_name: self.tap_iface.name.clone(),
+ virt_iface_name: self.name.clone(),
+ guest_mac: Some(guest_mac),
+ queue_num: self.queue_num,
+ queue_size: self.queue_size,
+ ..Default::default()
+ })
+ }
+}
+
+#[async_trait]
+impl Endpoint for TapEndpoint {
+ async fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ async fn hardware_addr(&self) -> String {
+ self.guest_mac.clone()
+ }
+
+ async fn attach(&self) -> Result<()> {
+ let config = self.get_network_config().context("Get network config")?;
+ do_handle_device(&self.dev_mgr, &DeviceConfig::NetworkCfg(config))
+ .await
+ .context("Handle device")?;
+ Ok(())
+ }
+
+ async fn detach(&self, h: &dyn Hypervisor) -> Result<()> {
+ let config = self.get_network_config().context("Get network config")?;
+ h.remove_device(DeviceType::Network(NetworkDevice {
+ config,
+ ..Default::default()
+ }))
+ .await
+ .context("Remove device")?;
+ Ok(())
+ }
+
+ async fn save(&self) -> Option {
+ Some(EndpointState {
+ tap_endpoint: Some(TapEndpointState {
+ if_name: self.name.clone(),
+ }),
+ ..Default::default()
+ })
+ }
+}
diff --git a/src/runtime-rs/crates/resource/src/network/mod.rs b/src/runtime-rs/crates/resource/src/network/mod.rs
index ed9a9e4f98..5a85ee0895 100644
--- a/src/runtime-rs/crates/resource/src/network/mod.rs
+++ b/src/runtime-rs/crates/resource/src/network/mod.rs
@@ -4,9 +4,11 @@
// SPDX-License-Identifier: Apache-2.0
//
-mod endpoint;
use std::sync::Arc;
+mod dan;
+mod endpoint;
+pub use dan::{dan_config_path, Dan, DanNetworkConfig};
pub use endpoint::endpoint_persist::EndpointState;
pub use endpoint::Endpoint;
mod network_entity;
@@ -20,9 +22,8 @@ use network_with_netns::NetworkWithNetns;
mod network_pair;
use network_pair::NetworkPair;
mod utils;
-pub use utils::netns::{generate_netns_name, NetnsGuard};
-
use tokio::sync::RwLock;
+pub use utils::netns::{generate_netns_name, NetnsGuard};
use anyhow::{Context, Result};
use async_trait::async_trait;
@@ -30,7 +31,8 @@ use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
#[derive(Debug)]
pub enum NetworkConfig {
- NetworkResourceWithNetNs(NetworkWithNetNsConfig),
+ NetNs(NetworkWithNetNsConfig),
+ Dan(DanNetworkConfig),
}
#[async_trait]
@@ -48,10 +50,15 @@ pub async fn new(
d: Arc>,
) -> Result> {
match config {
- NetworkConfig::NetworkResourceWithNetNs(c) => Ok(Arc::new(
+ NetworkConfig::NetNs(c) => Ok(Arc::new(
NetworkWithNetns::new(c, d)
.await
.context("new network with netns")?,
)),
+ NetworkConfig::Dan(c) => Ok(Arc::new(
+ Dan::new(c, d)
+ .await
+ .context("New directly attachable network")?,
+ )),
}
}
diff --git a/src/runtime-rs/crates/resource/src/network/network_info/mod.rs b/src/runtime-rs/crates/resource/src/network/network_info/mod.rs
index 1500d5179e..a0e896bb3a 100644
--- a/src/runtime-rs/crates/resource/src/network/network_info/mod.rs
+++ b/src/runtime-rs/crates/resource/src/network/network_info/mod.rs
@@ -4,6 +4,7 @@
// SPDX-License-Identifier: Apache-2.0
//
+pub(crate) mod network_info_from_dan;
pub(crate) mod network_info_from_link;
use agent::{ARPNeighbor, Interface, Route};
diff --git a/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_dan.rs b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_dan.rs
new file mode 100644
index 0000000000..5ca06d340c
--- /dev/null
+++ b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_dan.rs
@@ -0,0 +1,213 @@
+// Copyright (c) 2019-2023 Alibaba Cloud
+// Copyright (c) 2019-2023 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use agent::{ARPNeighbor, IPAddress, Interface, Route};
+use anyhow::Result;
+use async_trait::async_trait;
+use netlink_packet_route::IFF_NOARP;
+
+use super::NetworkInfo;
+use crate::network::dan::DanDevice;
+use crate::network::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
+
+/// NetworkInfoFromDan is responsible for converting network info in JSON
+/// to agent's network info.
+#[derive(Debug)]
+pub(crate) struct NetworkInfoFromDan {
+ interface: Interface,
+ routes: Vec,
+ neighs: Vec,
+}
+
+impl NetworkInfoFromDan {
+ pub async fn new(dan_device: &DanDevice) -> Result {
+ let ip_addresses = dan_device
+ .network_info
+ .interface
+ .ip_addresses
+ .iter()
+ .filter_map(|addr| {
+ let (ipaddr, mask) = match parse_ip_cidr(addr) {
+ Ok(ip_cidr) => (ip_cidr.0, ip_cidr.1),
+ Err(_) => return None,
+ };
+ // Skip if it is a loopback address
+ if ipaddr.is_loopback() {
+ return None;
+ }
+
+ Some(IPAddress {
+ family: ip_family_from_ip_addr(&ipaddr),
+ address: ipaddr.to_string(),
+ mask: format!("{}", mask),
+ })
+ })
+ .collect();
+
+ let interface = Interface {
+ device: dan_device.name.clone(),
+ name: dan_device.name.clone(),
+ ip_addresses,
+ mtu: dan_device.network_info.interface.mtu,
+ hw_addr: dan_device.guest_mac.clone(),
+ pci_addr: String::default(),
+ field_type: dan_device.network_info.interface.ntype.clone(),
+ raw_flags: dan_device.network_info.interface.flags & IFF_NOARP,
+ };
+
+ let routes = dan_device
+ .network_info
+ .routes
+ .iter()
+ .filter_map(|route| {
+ let family = match route.ip_family() {
+ Ok(family) => family,
+ Err(_) => return None,
+ };
+ Some(Route {
+ dest: route.dest.clone(),
+ gateway: route.gateway.clone(),
+ device: dan_device.name.clone(),
+ source: route.source.clone(),
+ scope: route.scope,
+ family,
+ })
+ })
+ .collect();
+
+ let neighs = dan_device
+ .network_info
+ .neighbors
+ .iter()
+ .map(|neigh| {
+ let to_ip_address = neigh.ip_address.as_ref().and_then(|ip_address| {
+ parse_ip_cidr(ip_address)
+ .ok()
+ .map(|(ipaddr, mask)| IPAddress {
+ family: ip_family_from_ip_addr(&ipaddr),
+ address: ipaddr.to_string(),
+ mask: format!("{}", mask),
+ })
+ });
+
+ ARPNeighbor {
+ to_ip_address,
+ device: dan_device.name.clone(),
+ ll_addr: neigh.hardware_addr.clone(),
+ state: neigh.state as i32,
+ flags: neigh.flags as i32,
+ }
+ })
+ .collect();
+
+ Ok(Self {
+ interface,
+ routes,
+ neighs,
+ })
+ }
+}
+
+#[async_trait]
+impl NetworkInfo for NetworkInfoFromDan {
+ async fn interface(&self) -> Result {
+ Ok(self.interface.clone())
+ }
+
+ async fn routes(&self) -> Result> {
+ Ok(self.routes.clone())
+ }
+
+ async fn neighs(&self) -> Result> {
+ Ok(self.neighs.clone())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use agent::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
+
+ use super::NetworkInfoFromDan;
+ use crate::network::dan::{
+ ARPNeighbor as DanARPNeighbor, DanDevice, Device, Interface as DanInterface,
+ NetworkInfo as DanNetworkInfo, Route as DanRoute,
+ };
+ use crate::network::NetworkInfo;
+
+ #[tokio::test]
+ async fn test_network_info_from_dan() {
+ let dan_device = DanDevice {
+ name: "eth0".to_owned(),
+ guest_mac: "xx:xx:xx:xx:xx".to_owned(),
+ device: Device::HostTap {
+ tap_name: "tap0".to_owned(),
+ queue_num: 0,
+ queue_size: 0,
+ },
+ network_info: DanNetworkInfo {
+ interface: DanInterface {
+ ip_addresses: vec!["192.168.0.1/24".to_owned()],
+ mtu: 1500,
+ ntype: "tuntap".to_owned(),
+ flags: 0,
+ },
+ routes: vec![DanRoute {
+ dest: "172.18.0.0/16".to_owned(),
+ source: "172.18.0.1".to_owned(),
+ gateway: "172.18.31.1".to_owned(),
+ scope: 0,
+ }],
+ neighbors: vec![DanARPNeighbor {
+ ip_address: Some("192.168.0.3/16".to_owned()),
+ hardware_addr: "yy:yy:yy:yy:yy".to_owned(),
+ state: 0,
+ flags: 0,
+ }],
+ },
+ };
+
+ let network_info = NetworkInfoFromDan::new(&dan_device).await.unwrap();
+
+ let interface = Interface {
+ device: "eth0".to_owned(),
+ name: "eth0".to_owned(),
+ ip_addresses: vec![IPAddress {
+ family: IPFamily::V4,
+ address: "192.168.0.1".to_owned(),
+ mask: "24".to_owned(),
+ }],
+ mtu: 1500,
+ hw_addr: "xx:xx:xx:xx:xx".to_owned(),
+ pci_addr: String::default(),
+ field_type: "tuntap".to_owned(),
+ raw_flags: 0,
+ };
+ assert_eq!(interface, network_info.interface().await.unwrap());
+
+ let routes = vec![Route {
+ dest: "172.18.0.0/16".to_owned(),
+ gateway: "172.18.31.1".to_owned(),
+ device: "eth0".to_owned(),
+ source: "172.18.0.1".to_owned(),
+ scope: 0,
+ family: IPFamily::V4,
+ }];
+ assert_eq!(routes, network_info.routes().await.unwrap());
+
+ let neighbors = vec![ARPNeighbor {
+ to_ip_address: Some(IPAddress {
+ family: IPFamily::V4,
+ address: "192.168.0.3".to_owned(),
+ mask: "16".to_owned(),
+ }),
+ device: "eth0".to_owned(),
+ ll_addr: "yy:yy:yy:yy:yy".to_owned(),
+ state: 0,
+ flags: 0,
+ }];
+ assert_eq!(neighbors, network_info.neighs().await.unwrap());
+ }
+}
diff --git a/src/runtime-rs/crates/resource/src/network/utils/address.rs b/src/runtime-rs/crates/resource/src/network/utils/address.rs
index 3046d3685b..792c4fbb5f 100644
--- a/src/runtime-rs/crates/resource/src/network/utils/address.rs
+++ b/src/runtime-rs/crates/resource/src/network/utils/address.rs
@@ -4,13 +4,14 @@
// SPDX-License-Identifier: Apache-2.0
//
-use std::{
- convert::TryFrom,
- net::{IpAddr, Ipv4Addr, Ipv6Addr},
-};
+use std::convert::TryFrom;
+use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
+use std::str::FromStr;
-use anyhow::{anyhow, Result};
-use netlink_packet_route::{nlas::address::Nla, AddressMessage, AF_INET, AF_INET6};
+use agent::IPFamily;
+use anyhow::{anyhow, Context, Result};
+use netlink_packet_route::nlas::address::Nla;
+use netlink_packet_route::{AddressMessage, AF_INET, AF_INET6};
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Address {
@@ -84,6 +85,41 @@ pub(crate) fn parse_ip(ip: &[u8], family: u8) -> Result {
}
}
+pub(crate) fn parse_ip_cidr(ip: &str) -> Result<(IpAddr, u8)> {
+ let items: Vec<&str> = ip.split('/').collect();
+ if items.len() != 2 {
+ return Err(anyhow!(format!(
+ "{} is a bad IP address in format of CIDR",
+ ip
+ )));
+ }
+ let ipaddr = IpAddr::from_str(items[0]).context("Parse IP address from string")?;
+ let mask = u8::from_str(items[1]).context("Parse mask")?;
+ if ipaddr.is_ipv4() && mask > 32 {
+ return Err(anyhow!(format!(
+ "The mask of IPv4 address should be less than or equal to 32, but we got {}.",
+ mask
+ )));
+ }
+ if mask > 128 {
+ return Err(anyhow!(format!(
+ "The mask should be less than or equal to 128, but we got {}.",
+ mask
+ )));
+ }
+ Ok((ipaddr, mask))
+}
+
+/// Retrieve IP Family defined at agent crate from IpAddr.
+#[inline]
+pub(crate) fn ip_family_from_ip_addr(ip_addr: &IpAddr) -> IPFamily {
+ if ip_addr.is_ipv4() {
+ IPFamily::V4
+ } else {
+ IPFamily::V6
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -109,4 +145,28 @@ mod tests {
let fail_ipv6 = [1, 2, 3, 4, 5, 6, 7, 8, 2, 3];
assert!(parse_ip(fail_ipv6.as_slice(), AF_INET6 as u8).is_err());
}
+
+ #[test]
+ fn test_parse_ip_cidr() {
+ let test_cases = vec![
+ ("127.0.0.1/32", ("127.0.0.1", 32u8)),
+ ("2001:4860:4860::8888/32", ("2001:4860:4860::8888", 32u8)),
+ ("2001:4860:4860::8888/128", ("2001:4860:4860::8888", 128u8)),
+ ];
+ for tc in test_cases.iter() {
+ let (ipaddr, mask) = parse_ip_cidr(tc.0).unwrap();
+ assert_eq!(ipaddr.to_string(), tc.1 .0);
+ assert_eq!(mask, tc.1 .1);
+ }
+ let test_cases = vec![
+ "127.0.0.1/33",
+ "2001:4860:4860::8888/129",
+ "2001:4860:4860::8888/300",
+ "127.0.0.1/33/1",
+ "127.0.0.1",
+ ];
+ for tc in test_cases.iter() {
+ assert!(parse_ip_cidr(tc).is_err());
+ }
+ }
}
diff --git a/src/runtime-rs/crates/resource/src/network/utils/mod.rs b/src/runtime-rs/crates/resource/src/network/utils/mod.rs
index 341038cb9f..39a34d6876 100644
--- a/src/runtime-rs/crates/resource/src/network/utils/mod.rs
+++ b/src/runtime-rs/crates/resource/src/network/utils/mod.rs
@@ -9,6 +9,8 @@ pub(crate) mod link;
pub(crate) mod netns;
use anyhow::{anyhow, Result};
+use rand::rngs::OsRng;
+use rand::RngCore;
pub(crate) fn parse_mac(s: &str) -> Option {
let v: Vec<_> = s.split(':').collect();
@@ -34,6 +36,17 @@ pub(crate) fn get_mac_addr(b: &[u8]) -> Result {
}
}
+/// Generate a private mac address.
+/// The range of private mac addressess is
+/// x2-xx-xx-xx-xx-xx, x6-xx-xx-xx-xx-xx, xA-xx-xx-xx-xx-xx, xE-xx-xx-xx-xx-xx.
+pub(crate) fn generate_private_mac_addr() -> String {
+ let mut addr: [u8; 6] = [0, 0, 0, 0, 0, 0];
+ OsRng.fill_bytes(&mut addr);
+ addr[0] = (addr[0] | 2) & 0xfe;
+ // This is a safty unwrap since the len of addr is 6
+ get_mac_addr(&addr).unwrap()
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -63,4 +76,14 @@ mod tests {
assert!(addr.is_some());
assert_eq!(expected_addr.0, addr.unwrap().0);
}
+
+ #[test]
+ fn test_generate_private_mac_addr() {
+ let addr1 = generate_private_mac_addr();
+ let addr2 = generate_private_mac_addr();
+ assert_ne!(addr1, addr2);
+ let ch1 = addr1.chars().nth(1).unwrap();
+ let is_private = ch1 == '2' || ch1 == '6' || ch1 == 'a' || ch1 == 'e';
+ assert!(is_private)
+ }
}
diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs
index 1244b6835a..a5af2a3fdf 100644
--- a/src/runtime-rs/crates/runtimes/src/manager.rs
+++ b/src/runtime-rs/crates/runtimes/src/manager.rs
@@ -21,7 +21,10 @@ use kata_types::{
use linux_container::LinuxContainer;
use netns_rs::NetNs;
use persist::sandbox_persist::Persist;
-use resource::{cpu_mem::initial_size::InitialSizeManager, network::generate_netns_name};
+use resource::{
+ cpu_mem::initial_size::InitialSizeManager,
+ network::{dan_config_path, generate_netns_name},
+};
use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER;
use tokio::fs;
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
@@ -146,10 +149,14 @@ impl RuntimeHandlerManagerInner {
let config = load_config(spec, options).context("load config")?;
+ let dan_path = dan_config_path(&config, &self.id);
let mut network_created = false;
// set netns to None if we want no network for the VM
let netns = if config.runtime.disable_new_netns {
None
+ } else if dan_path.exists() {
+ info!(sl!(), "Do not create a netns due to DAN");
+ None
} else {
let mut netns_path = None;
if let Some(linux) = &spec.linux {
diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs
index 9ded5033cf..6ede6fb2cf 100644
--- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs
@@ -6,30 +6,25 @@
use std::sync::Arc;
-use agent::{
- self, kata::KataAgent, types::KernelModule, Agent, GetIPTablesRequest, SetIPTablesRequest,
- VolumeStatsRequest,
-};
+use agent::kata::KataAgent;
+use agent::types::KernelModule;
+use agent::{self, Agent, GetIPTablesRequest, SetIPTablesRequest, VolumeStatsRequest};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
-use common::{
- message::{Action, Message},
- Sandbox, SandboxNetworkEnv,
-};
+use common::message::{Action, Message};
+use common::{Sandbox, SandboxNetworkEnv};
use containerd_shim_protos::events::task::TaskOOM;
use hypervisor::{dragonball::Dragonball, BlockConfig, Hypervisor, HYPERVISOR_DRAGONBALL};
use kata_sys_util::hooks::HookStates;
use kata_types::config::TomlConfig;
-use resource::{
- manager::ManagerArgs,
- network::{NetworkConfig, NetworkWithNetNsConfig},
- ResourceConfig, ResourceManager,
-};
+use persist::{self, sandbox_persist::Persist};
+use resource::manager::ManagerArgs;
+use resource::network::{dan_config_path, DanNetworkConfig, NetworkConfig, NetworkWithNetNsConfig};
+use resource::{ResourceConfig, ResourceManager};
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
use tracing::instrument;
use crate::health_check::HealthCheck;
-use persist::{self, sandbox_persist::Persist};
pub(crate) const VIRTCONTAINER: &str = "virt_container";
pub struct SandboxRestoreArgs {
@@ -101,19 +96,15 @@ impl VirtSandbox {
#[instrument]
async fn prepare_for_start_sandbox(
&self,
- _id: &str,
+ id: &str,
network_env: SandboxNetworkEnv,
) -> Result> {
let mut resource_configs = vec![];
// prepare network config
if !network_env.network_created {
- if let Some(netns_path) = network_env.netns {
- let network_config = ResourceConfig::Network(
- self.prepare_network_config(netns_path, network_env.network_created)
- .await,
- );
- resource_configs.push(network_config);
+ if let Some(network_resource) = self.prepare_network_resource(&network_env).await {
+ resource_configs.push(network_resource);
}
}
@@ -133,6 +124,39 @@ impl VirtSandbox {
Ok(resource_configs)
}
+ async fn prepare_network_resource(
+ &self,
+ network_env: &SandboxNetworkEnv,
+ ) -> Option {
+ let config = self.resource_manager.config().await;
+ let dan_path = dan_config_path(&config, &self.sid);
+
+ // Network priority: DAN > NetNS
+ if dan_path.exists() {
+ Some(ResourceConfig::Network(NetworkConfig::Dan(
+ DanNetworkConfig {
+ dan_conf_path: dan_path,
+ },
+ )))
+ } else if let Some(netns_path) = network_env.netns.as_ref() {
+ Some(ResourceConfig::Network(NetworkConfig::NetNs(
+ NetworkWithNetNsConfig {
+ network_model: config.runtime.internetworking_model.clone(),
+ netns_path: netns_path.to_owned(),
+ queues: self
+ .hypervisor
+ .hypervisor_config()
+ .await
+ .network_info
+ .network_queues as usize,
+ network_created: network_env.network_created,
+ },
+ )))
+ } else {
+ None
+ }
+ }
+
async fn execute_oci_hook_functions(
&self,
prestart_hooks: &[oci::Hook],
@@ -166,25 +190,6 @@ impl VirtSandbox {
Ok(())
}
- async fn prepare_network_config(
- &self,
- netns_path: String,
- network_created: bool,
- ) -> NetworkConfig {
- let config = self.resource_manager.config().await;
- NetworkConfig::NetworkResourceWithNetNs(NetworkWithNetNsConfig {
- network_model: config.runtime.internetworking_model.clone(),
- netns_path,
- queues: self
- .hypervisor
- .hypervisor_config()
- .await
- .network_info
- .network_queues as usize,
- network_created,
- })
- }
-
async fn prepare_rootfs_config(&self) -> Result {
let boot_info = self.hypervisor.hypervisor_config().await.boot_info;
@@ -270,18 +275,23 @@ impl Sandbox for VirtSandbox {
// We need to rescan the netns to handle the change.
// 2. Do not scan the netns if we want no network for the VM.
// TODO In case of vm factory, scan the netns to hotplug interfaces after the VM is started.
+ let config = self.resource_manager.config().await;
if self.has_prestart_hooks(prestart_hooks, create_runtime_hooks)
- && !self
- .resource_manager
- .config()
- .await
- .runtime
- .disable_new_netns
+ && !config.runtime.disable_new_netns
+ && !dan_config_path(&config, &self.sid).exists()
{
if let Some(netns_path) = network_env.netns {
- let network_resource = self
- .prepare_network_config(netns_path, network_env.network_created)
- .await;
+ let network_resource = NetworkConfig::NetNs(NetworkWithNetNsConfig {
+ network_model: config.runtime.internetworking_model.clone(),
+ netns_path: netns_path.to_owned(),
+ queues: self
+ .hypervisor
+ .hypervisor_config()
+ .await
+ .network_info
+ .network_queues as usize,
+ network_created: network_env.network_created,
+ });
self.resource_manager
.handle_network(network_resource)
.await