1
0
mirror of https://github.com/kata-containers/kata-containers.git synced 2025-04-29 12:14:48 +00:00

runtime-rs: Introduce directly attachable network

Kata containers as VM-based containers are allowed to run in the host
netns. That is, the network is able to isolate in the L2. The network
performance will benefit from this architecture, which eliminates as many
hops as possible. We called it a Directly Attachable Network (DAN for
short).

The network devices are placed at the host netns by the CNI plugins. The
configs are saved at {dan_conf}/{sandbox_id}.json in the format of JSON,
including device name, type, and network info. At the very beginning stage,
the DAN only supports host tap devices. More devices, like the DPDK, will
be supported in later versions.

The format of file looks like as below:

```json
{
	"netns": "/path/to/netns",
	"devices": [{
		"name": "eth0",
		"guest_mac": "xx:xx:xx:xx:xx",
		"device": {
			"type": "vhost-user",
			"path": "/tmp/test",
			"queue_num": 1,
			"queue_size": 1
		},
		"network_info": {
			"interface": {
				"ip_addresses": ["192.168.0.1/24"],
				"mtu": 1500,
				"ntype": "tuntap",
				"flags": 0
			},
			"routes": [{
				"dest": "172.18.0.0/16",
				"source": "172.18.0.1",
				"gateway": "172.18.31.1",
				"scope": 0,
				"flags": 0
			}],
			"neighbors": [{
				"ip_address": "192.168.0.3/16",
				"device": "",
				"state": 0,
				"flags": 0,
				"hardware_addr": "xx:xx:xx:xx:xx"
			}]
		}
	}]
}
```

Fixes: 

Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
This commit is contained in:
Xuewei Niu 2023-07-19 16:09:30 +08:00
parent 7d1c48c881
commit 3958a39d07
16 changed files with 950 additions and 61 deletions
src
libs/kata-types/src/config
runtime-rs

View File

@ -137,6 +137,17 @@ pub struct Runtime {
/// This option is typically used to retain abnormal information for debugging.
#[serde(default)]
pub keep_abnormal: bool,
/// Base directory of directly attachable network config, the default value
/// is "/run/kata-containers/dans".
///
/// Network devices for VM-based containers are allowed to be placed in the
/// host netns to eliminate as many hops as possible, which is what we
/// called a "directly attachable network". The config, set by special CNI
/// plugins, is used to tell the Kata Containers what devices are attached
/// to the hypervisor.
#[serde(default)]
pub dan_conf: String,
}
impl ConfigOps for Runtime {

View File

@ -162,6 +162,7 @@ DEFVFIOMODE := guest-kernel
DEFSANDBOXCGROUPONLY ?= false
DEFSTATICRESOURCEMGMT_DB ?= false
DEFBINDMOUNTS := []
DEFDANCONF := /run/kata-containers/dans
SED = sed
CLI_DIR = cmd
SHIMV2 = containerd-shim-kata-v2
@ -308,6 +309,7 @@ USER_VARS += DBSHAREDFS
USER_VARS += KATA_INSTALL_GROUP
USER_VARS += KATA_INSTALL_OWNER
USER_VARS += KATA_INSTALL_CFG_PERMS
USER_VARS += DEFDANCONF
SOURCES := \
$(shell find . 2>&1 | grep -E '.*\.rs$$') \

View File

@ -323,3 +323,12 @@ static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
# - "/path/to:ro", readonly mode.
# - "/path/to:rw", readwrite mode.
sandbox_bind_mounts=@DEFBINDMOUNTS@
# Base directory of directly attachable network config.
# Network devices for VM-based containers are allowed to be placed in the
# host netns to eliminate as many hops as possible, which is what we
# called a "Directly Attachable Network". The config, set by special CNI
# plugins, is used to tell the Kata containers what devices are attached
# to the hypervisor.
# (default: /run/kata-containers/dans)
dan_conf = "@DEFDANCONF@"

View File

@ -41,6 +41,12 @@ pub struct NetworkConfig {
/// Guest MAC address.
pub guest_mac: Option<Address>,
/// Virtio queue size
pub queue_size: usize,
/// Virtio queue num
pub queue_num: usize,
}
#[derive(Clone, Debug, Default)]

View File

@ -214,6 +214,8 @@ impl DragonballInner {
Some(mac) => MacAddr::from_bytes(&mac.0).ok(),
None => None,
},
num_queues: config.queue_num,
queue_size: config.queue_size as u16,
..Default::default()
};

View File

@ -0,0 +1,406 @@
// Copyright (c) 2019-2023 Alibaba Cloud
// Copyright (c) 2019-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
//! Directly Attachable Network (DAN) is a type of network that runs in the host
//! netns. It supports host-tap, vhost-user (DPDK), etc.
//! The device information is retrieved from a JSON file, the type of which is
//! `Vec<DanDevice>`.
//! In this module, `IPAddress`, `Interface`, etc., are duplicated mostly from
//! `agent::IPAddress`, `agent::Interface`, and so on. They can't be referenced
//! directly because the former represents the structure of the JSON file written
//! by CNI plugins. They might have some slight differences, and may be revised in
//! the future.
use std::net::IpAddr;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use agent::IPFamily;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use hypervisor::Hypervisor;
use kata_types::config::TomlConfig;
use scopeguard::defer;
use serde::{Deserialize, Serialize};
use tokio::fs;
use tokio::sync::RwLock;
use super::network_entity::NetworkEntity;
use super::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
use super::{EndpointState, NetnsGuard, Network};
use crate::network::endpoint::TapEndpoint;
use crate::network::network_info::network_info_from_dan::NetworkInfoFromDan;
use crate::network::utils::generate_private_mac_addr;
/// Directly attachable network
pub struct Dan {
inner: Arc<RwLock<DanInner>>,
}
pub struct DanInner {
netns: Option<String>,
entity_list: Vec<NetworkEntity>,
}
impl Dan {
pub async fn new(
config: &DanNetworkConfig,
dev_mgr: Arc<RwLock<DeviceManager>>,
) -> Result<Self> {
Ok(Self {
inner: Arc::new(RwLock::new(DanInner::new(config, &dev_mgr).await?)),
})
}
}
impl DanInner {
/// DanInner initialization deserializes DAN devices from a file writen
/// by CNI plugins. Respective endpoint and network_info are retrieved
/// from the devices, and compose NetworkEntity.
async fn new(config: &DanNetworkConfig, dev_mgr: &Arc<RwLock<DeviceManager>>) -> Result<Self> {
let json_str = fs::read_to_string(&config.dan_conf_path)
.await
.context("Read DAN config from file")?;
let config: DanConfig = serde_json::from_str(&json_str).context("Invalid DAN config")?;
info!(sl!(), "Dan config is loaded = {:?}", config);
let (connection, handle, _) = rtnetlink::new_connection().context("New connection")?;
let thread_handler = tokio::spawn(connection);
defer!({
thread_handler.abort();
});
let mut entity_list = Vec::with_capacity(config.devices.len());
for (idx, device) in config.devices.iter().enumerate() {
let name = format!("eth{}", idx);
let endpoint = match &device.device {
// TODO: Support VhostUserNet protocol
Device::VhostUser {
path,
queue_num: _,
queue_size: _,
} => {
warn!(sl!(), "A DAN device whose type is \"vhost-user\" and socket path is {} is ignored.", path);
continue;
}
Device::HostTap {
tap_name,
queue_num,
queue_size,
} => Arc::new(
TapEndpoint::new(
&handle,
idx as u32,
&name,
tap_name,
&device.guest_mac,
*queue_num,
*queue_size,
dev_mgr,
)
.await
.with_context(|| format!("New a {} tap endpoint", tap_name))?,
),
};
let network_info = Arc::new(
NetworkInfoFromDan::new(device)
.await
.context("Network info from DAN")?,
);
entity_list.push(NetworkEntity {
endpoint,
network_info,
})
}
Ok(Self {
netns: config.netns,
entity_list,
})
}
}
#[async_trait]
impl Network for Dan {
async fn setup(&self) -> Result<()> {
let inner = self.inner.read().await;
let _netns_guard;
if let Some(netns) = inner.netns.as_ref() {
_netns_guard = NetnsGuard::new(netns).context("New netns guard")?;
}
for e in inner.entity_list.iter() {
e.endpoint.attach().await.context("Attach")?;
}
Ok(())
}
async fn interfaces(&self) -> Result<Vec<agent::Interface>> {
let inner = self.inner.read().await;
let mut interfaces = vec![];
for e in inner.entity_list.iter() {
interfaces.push(e.network_info.interface().await.context("Interface")?);
}
Ok(interfaces)
}
async fn routes(&self) -> Result<Vec<agent::Route>> {
let inner = self.inner.read().await;
let mut routes = vec![];
for e in inner.entity_list.iter() {
let mut list = e.network_info.routes().await.context("Routes")?;
routes.append(&mut list);
}
Ok(routes)
}
async fn neighs(&self) -> Result<Vec<agent::ARPNeighbor>> {
let inner = self.inner.read().await;
let mut neighs = vec![];
for e in &inner.entity_list {
let mut list = e.network_info.neighs().await.context("Neighs")?;
neighs.append(&mut list);
}
Ok(neighs)
}
async fn save(&self) -> Option<Vec<EndpointState>> {
let inner = self.inner.read().await;
let mut ep_states = vec![];
for e in &inner.entity_list {
if let Some(state) = e.endpoint.save().await {
ep_states.push(state);
}
}
Some(ep_states)
}
async fn remove(&self, h: &dyn Hypervisor) -> Result<()> {
let inner = self.inner.read().await;
let _netns_guard;
if let Some(netns) = inner.netns.as_ref() {
_netns_guard = NetnsGuard::new(netns).context("New netns guard")?;
}
for e in inner.entity_list.iter() {
e.endpoint.detach(h).await.context("Detach")?;
}
Ok(())
}
}
/// Directly attachable network config
#[derive(Debug)]
pub struct DanNetworkConfig {
pub dan_conf_path: PathBuf,
}
/// Directly attachable network config written by CNI plugins
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct DanConfig {
netns: Option<String>,
devices: Vec<DanDevice>,
}
/// Directly attachable network device
/// This struct is serilized from a file containing devices information,
/// sent from CNI plugins.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct DanDevice {
// Name of device (interface name on the guest)
pub(crate) name: String,
// Mac address of interface on the guest, if it is not specified, a
// private address is generated as default.
#[serde(default = "generate_private_mac_addr")]
pub(crate) guest_mac: String,
// Device
pub(crate) device: Device,
// Network info
pub(crate) network_info: NetworkInfo,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub(crate) enum Device {
#[serde(rename = "vhost-user")]
VhostUser {
// Vhost-user socket path
path: String,
#[serde(default)]
queue_num: usize,
#[serde(default)]
queue_size: usize,
},
#[serde(rename = "host-tap")]
HostTap {
tap_name: String,
#[serde(default)]
queue_num: usize,
#[serde(default)]
queue_size: usize,
},
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct NetworkInfo {
pub(crate) interface: Interface,
#[serde(default)]
pub(crate) routes: Vec<Route>,
#[serde(default)]
pub(crate) neighbors: Vec<ARPNeighbor>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct Interface {
// IP addresses in the format of CIDR
pub ip_addresses: Vec<String>,
#[serde(default = "default_mtu")]
pub mtu: u64,
#[serde(default)]
// Link type
pub ntype: String,
#[serde(default)]
pub flags: u32,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct Route {
#[serde(default)]
// Destination(CIDR), an empty string denotes no destination
pub dest: String,
#[serde(default)]
// Gateway(IP Address), an empty string denotes no gateway
pub gateway: String,
// Source(IP Address), an empty string denotes no gateway
#[serde(default)]
pub source: String,
// Scope
#[serde(default)]
pub scope: u32,
}
impl Route {
pub(crate) fn ip_family(&self) -> Result<IPFamily> {
if !self.dest.is_empty() {
return Ok(ip_family_from_ip_addr(
&parse_ip_cidr(&self.dest)
.context("Parse ip addr from dest")?
.0,
));
}
if !self.gateway.is_empty() {
return Ok(ip_family_from_ip_addr(
&IpAddr::from_str(&self.gateway).context("Parse ip addr from gateway")?,
));
}
if !self.source.is_empty() {
return Ok(ip_family_from_ip_addr(
&IpAddr::from_str(&self.source).context("Parse ip addr from source")?,
));
}
Err(anyhow!("Failed to retrieve IP family from {:?}", self))
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct ARPNeighbor {
// IP address in the format of CIDR
pub ip_address: Option<String>,
#[serde(default)]
pub hardware_addr: String,
#[serde(default)]
pub state: u32,
#[serde(default)]
pub flags: u32,
}
fn default_mtu() -> u64 {
1500
}
/// Path of DAN config, the file contains an array of DanDevices.
#[inline]
pub fn dan_config_path(config: &TomlConfig, sandbox_id: &str) -> PathBuf {
PathBuf::from(config.runtime.dan_conf.as_str()).join(format!("{}.json", sandbox_id))
}
#[cfg(test)]
mod tests {
use crate::network::dan::{ARPNeighbor, DanDevice, Device, Interface, NetworkInfo, Route};
#[test]
fn test_dan_json() {
let json_str = r#"{
"name": "eth0",
"guest_mac": "xx:xx:xx:xx:xx",
"device": {
"type": "vhost-user",
"path": "/tmp/test",
"queue_num": 1,
"queue_size": 1
},
"network_info": {
"interface": {
"ip_addresses": ["192.168.0.1/24"],
"mtu": 1500,
"ntype": "tuntap",
"flags": 0
},
"routes": [{
"dest": "172.18.0.0/16",
"source": "172.18.0.1",
"gateway": "172.18.31.1",
"scope": 0,
"flags": 0
}],
"neighbors": [{
"ip_address": "192.168.0.3/16",
"device": "",
"state": 0,
"flags": 0,
"hardware_addr": "xx:xx:xx:xx:xx"
}]
}
}"#;
let dev_from_json: DanDevice = serde_json::from_str(json_str).unwrap();
let dev = DanDevice {
name: "eth0".to_owned(),
guest_mac: "xx:xx:xx:xx:xx".to_owned(),
device: Device::VhostUser {
path: "/tmp/test".to_owned(),
queue_num: 1,
queue_size: 1,
},
network_info: NetworkInfo {
interface: Interface {
ip_addresses: vec!["192.168.0.1/24".to_owned()],
mtu: 1500,
ntype: "tuntap".to_owned(),
flags: 0,
},
routes: vec![Route {
dest: "172.18.0.0/16".to_owned(),
source: "172.18.0.1".to_owned(),
gateway: "172.18.31.1".to_owned(),
scope: 0,
}],
neighbors: vec![ARPNeighbor {
ip_address: Some("192.168.0.3/16".to_owned()),
hardware_addr: "xx:xx:xx:xx:xx".to_owned(),
state: 0,
flags: 0,
}],
},
};
assert_eq!(dev_from_json, dev);
}
}

View File

@ -39,6 +39,11 @@ pub struct IpVlanEndpointState {
pub network_qos: bool,
}
#[derive(Serialize, Deserialize, Clone, Default)]
pub struct TapEndpointState {
pub if_name: String,
}
#[derive(Serialize, Deserialize, Clone, Default)]
pub struct EndpointState {
pub physical_endpoint: Option<PhysicalEndpointState>,
@ -46,5 +51,6 @@ pub struct EndpointState {
pub ipvlan_endpoint: Option<IpVlanEndpointState>,
pub macvlan_endpoint: Option<MacvlanEndpointState>,
pub vlan_endpoint: Option<VlanEndpointState>,
pub tap_endpoint: Option<TapEndpointState>,
// TODO : other endpoint
}

View File

@ -16,6 +16,8 @@ mod macvlan_endpoint;
pub use macvlan_endpoint::MacVlanEndpoint;
pub mod endpoint_persist;
mod endpoints_test;
mod tap_endpoint;
pub use tap_endpoint::TapEndpoint;
use anyhow::Result;
use async_trait::async_trait;

View File

@ -0,0 +1,124 @@
// Copyright (c) 2019-2023 Alibaba Cloud
// Copyright (c) 2019-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::sync::Arc;
use anyhow::{Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::{do_handle_device, DeviceManager};
use hypervisor::device::{DeviceConfig, DeviceType};
use hypervisor::{Hypervisor, NetworkConfig, NetworkDevice};
use tokio::sync::RwLock;
use super::endpoint_persist::TapEndpointState;
use super::Endpoint;
use crate::network::network_pair::{get_link_by_name, NetworkInterface};
use crate::network::{utils, EndpointState};
/// TapEndpoint is used to attach to the hypervisor directly
#[derive(Debug)]
pub struct TapEndpoint {
// Index
#[allow(dead_code)]
index: u32,
// Name of virt interface
name: String,
// Hardware address of virt interface
guest_mac: String,
// Tap interface on the host
tap_iface: NetworkInterface,
// Device manager
dev_mgr: Arc<RwLock<DeviceManager>>,
// Virtio queue num
queue_num: usize,
// Virtio queue size
queue_size: usize,
}
impl TapEndpoint {
#[allow(clippy::too_many_arguments)]
pub async fn new(
handle: &rtnetlink::Handle,
index: u32,
name: &str,
tap_name: &str,
guest_mac: &str,
queue_num: usize,
queue_size: usize,
dev_mgr: &Arc<RwLock<DeviceManager>>,
) -> Result<Self> {
let tap_link = get_link_by_name(handle, tap_name)
.await
.context("get link by name")?;
let tap_hard_addr =
utils::get_mac_addr(&tap_link.attrs().hardware_addr).context("Get mac addr of tap")?;
Ok(TapEndpoint {
index,
name: name.to_owned(),
guest_mac: guest_mac.to_owned(),
tap_iface: NetworkInterface {
name: tap_name.to_owned(),
hard_addr: tap_hard_addr,
..Default::default()
},
dev_mgr: dev_mgr.clone(),
queue_num,
queue_size,
})
}
fn get_network_config(&self) -> Result<NetworkConfig> {
let guest_mac = utils::parse_mac(&self.guest_mac).context("Parse mac address")?;
Ok(NetworkConfig {
host_dev_name: self.tap_iface.name.clone(),
virt_iface_name: self.name.clone(),
guest_mac: Some(guest_mac),
queue_num: self.queue_num,
queue_size: self.queue_size,
..Default::default()
})
}
}
#[async_trait]
impl Endpoint for TapEndpoint {
async fn name(&self) -> String {
self.name.clone()
}
async fn hardware_addr(&self) -> String {
self.guest_mac.clone()
}
async fn attach(&self) -> Result<()> {
let config = self.get_network_config().context("Get network config")?;
do_handle_device(&self.dev_mgr, &DeviceConfig::NetworkCfg(config))
.await
.context("Handle device")?;
Ok(())
}
async fn detach(&self, h: &dyn Hypervisor) -> Result<()> {
let config = self.get_network_config().context("Get network config")?;
h.remove_device(DeviceType::Network(NetworkDevice {
config,
..Default::default()
}))
.await
.context("Remove device")?;
Ok(())
}
async fn save(&self) -> Option<EndpointState> {
Some(EndpointState {
tap_endpoint: Some(TapEndpointState {
if_name: self.name.clone(),
}),
..Default::default()
})
}
}

View File

@ -4,9 +4,11 @@
// SPDX-License-Identifier: Apache-2.0
//
mod endpoint;
use std::sync::Arc;
mod dan;
mod endpoint;
pub use dan::{dan_config_path, Dan, DanNetworkConfig};
pub use endpoint::endpoint_persist::EndpointState;
pub use endpoint::Endpoint;
mod network_entity;
@ -20,9 +22,8 @@ use network_with_netns::NetworkWithNetns;
mod network_pair;
use network_pair::NetworkPair;
mod utils;
pub use utils::netns::{generate_netns_name, NetnsGuard};
use tokio::sync::RwLock;
pub use utils::netns::{generate_netns_name, NetnsGuard};
use anyhow::{Context, Result};
use async_trait::async_trait;
@ -30,7 +31,8 @@ use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
#[derive(Debug)]
pub enum NetworkConfig {
NetworkResourceWithNetNs(NetworkWithNetNsConfig),
NetNs(NetworkWithNetNsConfig),
Dan(DanNetworkConfig),
}
#[async_trait]
@ -48,10 +50,15 @@ pub async fn new(
d: Arc<RwLock<DeviceManager>>,
) -> Result<Arc<dyn Network>> {
match config {
NetworkConfig::NetworkResourceWithNetNs(c) => Ok(Arc::new(
NetworkConfig::NetNs(c) => Ok(Arc::new(
NetworkWithNetns::new(c, d)
.await
.context("new network with netns")?,
)),
NetworkConfig::Dan(c) => Ok(Arc::new(
Dan::new(c, d)
.await
.context("New directly attachable network")?,
)),
}
}

View File

@ -4,6 +4,7 @@
// SPDX-License-Identifier: Apache-2.0
//
pub(crate) mod network_info_from_dan;
pub(crate) mod network_info_from_link;
use agent::{ARPNeighbor, Interface, Route};

View File

@ -0,0 +1,213 @@
// Copyright (c) 2019-2023 Alibaba Cloud
// Copyright (c) 2019-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use agent::{ARPNeighbor, IPAddress, Interface, Route};
use anyhow::Result;
use async_trait::async_trait;
use netlink_packet_route::IFF_NOARP;
use super::NetworkInfo;
use crate::network::dan::DanDevice;
use crate::network::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
/// NetworkInfoFromDan is responsible for converting network info in JSON
/// to agent's network info.
#[derive(Debug)]
pub(crate) struct NetworkInfoFromDan {
interface: Interface,
routes: Vec<Route>,
neighs: Vec<ARPNeighbor>,
}
impl NetworkInfoFromDan {
pub async fn new(dan_device: &DanDevice) -> Result<Self> {
let ip_addresses = dan_device
.network_info
.interface
.ip_addresses
.iter()
.filter_map(|addr| {
let (ipaddr, mask) = match parse_ip_cidr(addr) {
Ok(ip_cidr) => (ip_cidr.0, ip_cidr.1),
Err(_) => return None,
};
// Skip if it is a loopback address
if ipaddr.is_loopback() {
return None;
}
Some(IPAddress {
family: ip_family_from_ip_addr(&ipaddr),
address: ipaddr.to_string(),
mask: format!("{}", mask),
})
})
.collect();
let interface = Interface {
device: dan_device.name.clone(),
name: dan_device.name.clone(),
ip_addresses,
mtu: dan_device.network_info.interface.mtu,
hw_addr: dan_device.guest_mac.clone(),
pci_addr: String::default(),
field_type: dan_device.network_info.interface.ntype.clone(),
raw_flags: dan_device.network_info.interface.flags & IFF_NOARP,
};
let routes = dan_device
.network_info
.routes
.iter()
.filter_map(|route| {
let family = match route.ip_family() {
Ok(family) => family,
Err(_) => return None,
};
Some(Route {
dest: route.dest.clone(),
gateway: route.gateway.clone(),
device: dan_device.name.clone(),
source: route.source.clone(),
scope: route.scope,
family,
})
})
.collect();
let neighs = dan_device
.network_info
.neighbors
.iter()
.map(|neigh| {
let to_ip_address = neigh.ip_address.as_ref().and_then(|ip_address| {
parse_ip_cidr(ip_address)
.ok()
.map(|(ipaddr, mask)| IPAddress {
family: ip_family_from_ip_addr(&ipaddr),
address: ipaddr.to_string(),
mask: format!("{}", mask),
})
});
ARPNeighbor {
to_ip_address,
device: dan_device.name.clone(),
ll_addr: neigh.hardware_addr.clone(),
state: neigh.state as i32,
flags: neigh.flags as i32,
}
})
.collect();
Ok(Self {
interface,
routes,
neighs,
})
}
}
#[async_trait]
impl NetworkInfo for NetworkInfoFromDan {
async fn interface(&self) -> Result<Interface> {
Ok(self.interface.clone())
}
async fn routes(&self) -> Result<Vec<Route>> {
Ok(self.routes.clone())
}
async fn neighs(&self) -> Result<Vec<ARPNeighbor>> {
Ok(self.neighs.clone())
}
}
#[cfg(test)]
mod tests {
use agent::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
use super::NetworkInfoFromDan;
use crate::network::dan::{
ARPNeighbor as DanARPNeighbor, DanDevice, Device, Interface as DanInterface,
NetworkInfo as DanNetworkInfo, Route as DanRoute,
};
use crate::network::NetworkInfo;
#[tokio::test]
async fn test_network_info_from_dan() {
let dan_device = DanDevice {
name: "eth0".to_owned(),
guest_mac: "xx:xx:xx:xx:xx".to_owned(),
device: Device::HostTap {
tap_name: "tap0".to_owned(),
queue_num: 0,
queue_size: 0,
},
network_info: DanNetworkInfo {
interface: DanInterface {
ip_addresses: vec!["192.168.0.1/24".to_owned()],
mtu: 1500,
ntype: "tuntap".to_owned(),
flags: 0,
},
routes: vec![DanRoute {
dest: "172.18.0.0/16".to_owned(),
source: "172.18.0.1".to_owned(),
gateway: "172.18.31.1".to_owned(),
scope: 0,
}],
neighbors: vec![DanARPNeighbor {
ip_address: Some("192.168.0.3/16".to_owned()),
hardware_addr: "yy:yy:yy:yy:yy".to_owned(),
state: 0,
flags: 0,
}],
},
};
let network_info = NetworkInfoFromDan::new(&dan_device).await.unwrap();
let interface = Interface {
device: "eth0".to_owned(),
name: "eth0".to_owned(),
ip_addresses: vec![IPAddress {
family: IPFamily::V4,
address: "192.168.0.1".to_owned(),
mask: "24".to_owned(),
}],
mtu: 1500,
hw_addr: "xx:xx:xx:xx:xx".to_owned(),
pci_addr: String::default(),
field_type: "tuntap".to_owned(),
raw_flags: 0,
};
assert_eq!(interface, network_info.interface().await.unwrap());
let routes = vec![Route {
dest: "172.18.0.0/16".to_owned(),
gateway: "172.18.31.1".to_owned(),
device: "eth0".to_owned(),
source: "172.18.0.1".to_owned(),
scope: 0,
family: IPFamily::V4,
}];
assert_eq!(routes, network_info.routes().await.unwrap());
let neighbors = vec![ARPNeighbor {
to_ip_address: Some(IPAddress {
family: IPFamily::V4,
address: "192.168.0.3".to_owned(),
mask: "16".to_owned(),
}),
device: "eth0".to_owned(),
ll_addr: "yy:yy:yy:yy:yy".to_owned(),
state: 0,
flags: 0,
}];
assert_eq!(neighbors, network_info.neighs().await.unwrap());
}
}

View File

@ -4,13 +4,14 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::{
convert::TryFrom,
net::{IpAddr, Ipv4Addr, Ipv6Addr},
};
use std::convert::TryFrom;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::str::FromStr;
use anyhow::{anyhow, Result};
use netlink_packet_route::{nlas::address::Nla, AddressMessage, AF_INET, AF_INET6};
use agent::IPFamily;
use anyhow::{anyhow, Context, Result};
use netlink_packet_route::nlas::address::Nla;
use netlink_packet_route::{AddressMessage, AF_INET, AF_INET6};
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Address {
@ -84,6 +85,41 @@ pub(crate) fn parse_ip(ip: &[u8], family: u8) -> Result<IpAddr> {
}
}
pub(crate) fn parse_ip_cidr(ip: &str) -> Result<(IpAddr, u8)> {
let items: Vec<&str> = ip.split('/').collect();
if items.len() != 2 {
return Err(anyhow!(format!(
"{} is a bad IP address in format of CIDR",
ip
)));
}
let ipaddr = IpAddr::from_str(items[0]).context("Parse IP address from string")?;
let mask = u8::from_str(items[1]).context("Parse mask")?;
if ipaddr.is_ipv4() && mask > 32 {
return Err(anyhow!(format!(
"The mask of IPv4 address should be less than or equal to 32, but we got {}.",
mask
)));
}
if mask > 128 {
return Err(anyhow!(format!(
"The mask should be less than or equal to 128, but we got {}.",
mask
)));
}
Ok((ipaddr, mask))
}
/// Retrieve IP Family defined at agent crate from IpAddr.
#[inline]
pub(crate) fn ip_family_from_ip_addr(ip_addr: &IpAddr) -> IPFamily {
if ip_addr.is_ipv4() {
IPFamily::V4
} else {
IPFamily::V6
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -109,4 +145,28 @@ mod tests {
let fail_ipv6 = [1, 2, 3, 4, 5, 6, 7, 8, 2, 3];
assert!(parse_ip(fail_ipv6.as_slice(), AF_INET6 as u8).is_err());
}
#[test]
fn test_parse_ip_cidr() {
let test_cases = vec![
("127.0.0.1/32", ("127.0.0.1", 32u8)),
("2001:4860:4860::8888/32", ("2001:4860:4860::8888", 32u8)),
("2001:4860:4860::8888/128", ("2001:4860:4860::8888", 128u8)),
];
for tc in test_cases.iter() {
let (ipaddr, mask) = parse_ip_cidr(tc.0).unwrap();
assert_eq!(ipaddr.to_string(), tc.1 .0);
assert_eq!(mask, tc.1 .1);
}
let test_cases = vec![
"127.0.0.1/33",
"2001:4860:4860::8888/129",
"2001:4860:4860::8888/300",
"127.0.0.1/33/1",
"127.0.0.1",
];
for tc in test_cases.iter() {
assert!(parse_ip_cidr(tc).is_err());
}
}
}

View File

@ -9,6 +9,8 @@ pub(crate) mod link;
pub(crate) mod netns;
use anyhow::{anyhow, Result};
use rand::rngs::OsRng;
use rand::RngCore;
pub(crate) fn parse_mac(s: &str) -> Option<hypervisor::Address> {
let v: Vec<_> = s.split(':').collect();
@ -34,6 +36,17 @@ pub(crate) fn get_mac_addr(b: &[u8]) -> Result<String> {
}
}
/// Generate a private mac address.
/// The range of private mac addressess is
/// x2-xx-xx-xx-xx-xx, x6-xx-xx-xx-xx-xx, xA-xx-xx-xx-xx-xx, xE-xx-xx-xx-xx-xx.
pub(crate) fn generate_private_mac_addr() -> String {
let mut addr: [u8; 6] = [0, 0, 0, 0, 0, 0];
OsRng.fill_bytes(&mut addr);
addr[0] = (addr[0] | 2) & 0xfe;
// This is a safty unwrap since the len of addr is 6
get_mac_addr(&addr).unwrap()
}
#[cfg(test)]
mod tests {
use super::*;
@ -63,4 +76,14 @@ mod tests {
assert!(addr.is_some());
assert_eq!(expected_addr.0, addr.unwrap().0);
}
#[test]
fn test_generate_private_mac_addr() {
let addr1 = generate_private_mac_addr();
let addr2 = generate_private_mac_addr();
assert_ne!(addr1, addr2);
let ch1 = addr1.chars().nth(1).unwrap();
let is_private = ch1 == '2' || ch1 == '6' || ch1 == 'a' || ch1 == 'e';
assert!(is_private)
}
}

View File

@ -21,7 +21,10 @@ use kata_types::{
use linux_container::LinuxContainer;
use netns_rs::NetNs;
use persist::sandbox_persist::Persist;
use resource::{cpu_mem::initial_size::InitialSizeManager, network::generate_netns_name};
use resource::{
cpu_mem::initial_size::InitialSizeManager,
network::{dan_config_path, generate_netns_name},
};
use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER;
use tokio::fs;
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
@ -146,10 +149,14 @@ impl RuntimeHandlerManagerInner {
let config = load_config(spec, options).context("load config")?;
let dan_path = dan_config_path(&config, &self.id);
let mut network_created = false;
// set netns to None if we want no network for the VM
let netns = if config.runtime.disable_new_netns {
None
} else if dan_path.exists() {
info!(sl!(), "Do not create a netns due to DAN");
None
} else {
let mut netns_path = None;
if let Some(linux) = &spec.linux {

View File

@ -6,30 +6,25 @@
use std::sync::Arc;
use agent::{
self, kata::KataAgent, types::KernelModule, Agent, GetIPTablesRequest, SetIPTablesRequest,
VolumeStatsRequest,
};
use agent::kata::KataAgent;
use agent::types::KernelModule;
use agent::{self, Agent, GetIPTablesRequest, SetIPTablesRequest, VolumeStatsRequest};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use common::{
message::{Action, Message},
Sandbox, SandboxNetworkEnv,
};
use common::message::{Action, Message};
use common::{Sandbox, SandboxNetworkEnv};
use containerd_shim_protos::events::task::TaskOOM;
use hypervisor::{dragonball::Dragonball, BlockConfig, Hypervisor, HYPERVISOR_DRAGONBALL};
use kata_sys_util::hooks::HookStates;
use kata_types::config::TomlConfig;
use resource::{
manager::ManagerArgs,
network::{NetworkConfig, NetworkWithNetNsConfig},
ResourceConfig, ResourceManager,
};
use persist::{self, sandbox_persist::Persist};
use resource::manager::ManagerArgs;
use resource::network::{dan_config_path, DanNetworkConfig, NetworkConfig, NetworkWithNetNsConfig};
use resource::{ResourceConfig, ResourceManager};
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
use tracing::instrument;
use crate::health_check::HealthCheck;
use persist::{self, sandbox_persist::Persist};
pub(crate) const VIRTCONTAINER: &str = "virt_container";
pub struct SandboxRestoreArgs {
@ -101,19 +96,15 @@ impl VirtSandbox {
#[instrument]
async fn prepare_for_start_sandbox(
&self,
_id: &str,
id: &str,
network_env: SandboxNetworkEnv,
) -> Result<Vec<ResourceConfig>> {
let mut resource_configs = vec![];
// prepare network config
if !network_env.network_created {
if let Some(netns_path) = network_env.netns {
let network_config = ResourceConfig::Network(
self.prepare_network_config(netns_path, network_env.network_created)
.await,
);
resource_configs.push(network_config);
if let Some(network_resource) = self.prepare_network_resource(&network_env).await {
resource_configs.push(network_resource);
}
}
@ -133,6 +124,39 @@ impl VirtSandbox {
Ok(resource_configs)
}
async fn prepare_network_resource(
&self,
network_env: &SandboxNetworkEnv,
) -> Option<ResourceConfig> {
let config = self.resource_manager.config().await;
let dan_path = dan_config_path(&config, &self.sid);
// Network priority: DAN > NetNS
if dan_path.exists() {
Some(ResourceConfig::Network(NetworkConfig::Dan(
DanNetworkConfig {
dan_conf_path: dan_path,
},
)))
} else if let Some(netns_path) = network_env.netns.as_ref() {
Some(ResourceConfig::Network(NetworkConfig::NetNs(
NetworkWithNetNsConfig {
network_model: config.runtime.internetworking_model.clone(),
netns_path: netns_path.to_owned(),
queues: self
.hypervisor
.hypervisor_config()
.await
.network_info
.network_queues as usize,
network_created: network_env.network_created,
},
)))
} else {
None
}
}
async fn execute_oci_hook_functions(
&self,
prestart_hooks: &[oci::Hook],
@ -166,25 +190,6 @@ impl VirtSandbox {
Ok(())
}
async fn prepare_network_config(
&self,
netns_path: String,
network_created: bool,
) -> NetworkConfig {
let config = self.resource_manager.config().await;
NetworkConfig::NetworkResourceWithNetNs(NetworkWithNetNsConfig {
network_model: config.runtime.internetworking_model.clone(),
netns_path,
queues: self
.hypervisor
.hypervisor_config()
.await
.network_info
.network_queues as usize,
network_created,
})
}
async fn prepare_rootfs_config(&self) -> Result<BlockConfig> {
let boot_info = self.hypervisor.hypervisor_config().await.boot_info;
@ -270,18 +275,23 @@ impl Sandbox for VirtSandbox {
// We need to rescan the netns to handle the change.
// 2. Do not scan the netns if we want no network for the VM.
// TODO In case of vm factory, scan the netns to hotplug interfaces after the VM is started.
let config = self.resource_manager.config().await;
if self.has_prestart_hooks(prestart_hooks, create_runtime_hooks)
&& !self
.resource_manager
.config()
.await
.runtime
.disable_new_netns
&& !config.runtime.disable_new_netns
&& !dan_config_path(&config, &self.sid).exists()
{
if let Some(netns_path) = network_env.netns {
let network_resource = self
.prepare_network_config(netns_path, network_env.network_created)
.await;
let network_resource = NetworkConfig::NetNs(NetworkWithNetNsConfig {
network_model: config.runtime.internetworking_model.clone(),
netns_path: netns_path.to_owned(),
queues: self
.hypervisor
.hypervisor_config()
.await
.network_info
.network_queues as usize,
network_created: network_env.network_created,
});
self.resource_manager
.handle_network(network_resource)
.await