runtime-rs: support network resource

Fixes: #3785
Signed-off-by: Quanwei Zhou <quanweiZhou@linux.alibaba.com>
This commit is contained in:
Quanwei Zhou 2022-04-14 12:21:35 +08:00 committed by Fupan Li
parent 4be7185aa4
commit fd4c26f9c1
31 changed files with 2311 additions and 17 deletions

View File

@ -11,7 +11,7 @@ license = "Apache-2.0"
edition = "2018"
[dependencies]
byteorder = "~1"
byteorder = "1.4.3"
cgroups = { package = "cgroups-rs", version = "0.2.7" }
chrono = "0.4.0"
common-path = "=1.0.0"
@ -24,7 +24,7 @@ serde_json = "1.0.73"
slog = "2.5.2"
slog-scope = "4.4.0"
subprocess = "0.2.8"
rand = "^0.7.2"
rand = "0.7.2"
thiserror = "1.0.30"
kata-types = { path = "../kata-types" }

View File

@ -46,6 +46,17 @@ mod tests {
fn random_bytes() {
let b = RandomBytes::new(16);
assert_eq!(b.bytes.len(), 16);
println!("{:?}", b.bytes);
// check lower hex
let lower_hex = format!("{:x}", b);
assert_eq!(lower_hex, lower_hex.to_lowercase());
// check upper hex
let upper_hex = format!("{:X}", b);
assert_eq!(upper_hex, upper_hex.to_uppercase());
// check new random bytes
let b1 = RandomBytes::new(16);
assert_ne!(b.bytes, b1.bytes);
}
}

View File

@ -27,6 +27,7 @@ impl UUID {
}
}
/// From: convert UUID to string
impl From<&UUID> for String {
fn from(from: &UUID) -> Self {
let time_low = BigEndian::read_u32(&from.0[..4]);
@ -57,13 +58,17 @@ mod tests {
#[test]
fn test_uuid() {
let uuid = UUID::new();
let sss: String = String::from(&uuid);
println!("{}", sss);
let uuid1 = UUID::new();
let s1: String = String::from(&uuid1);
let uuid2 = UUID([0u8, 1u8, 2u8, 3u8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let sss2 = String::from(&uuid2);
println!("Display: {}", uuid2);
assert_eq!(&sss2, "00010203-0405-0607-0809-0a0b0c0d0e0f");
let uuid2 = UUID::new();
let s2: String = String::from(&uuid2);
assert_eq!(s1.len(), s2.len());
assert_ne!(s1, s2);
let uuid3 = UUID([0u8, 1u8, 2u8, 3u8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let s3 = String::from(&uuid3);
assert_eq!(&s3, "00010203-0405-0607-0809-0a0b0c0d0e0f");
}
}

View File

@ -792,6 +792,71 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]]
name = "netlink-packet-core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "345b8ab5bd4e71a2986663e88c56856699d060e78e152e6e9d7966fcd5491297"
dependencies = [
"anyhow",
"byteorder",
"libc",
"netlink-packet-utils",
]
[[package]]
name = "netlink-packet-route"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "733ea73609acfd7fa7ddadfb7bf709b0471668c456ad9513685af543a06342b2"
dependencies = [
"anyhow",
"bitflags",
"byteorder",
"libc",
"netlink-packet-core",
"netlink-packet-utils",
]
[[package]]
name = "netlink-packet-utils"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25af9cf0dc55498b7bd94a1508af7a78706aa0ab715a73c5169273e03c84845e"
dependencies = [
"anyhow",
"byteorder",
"paste",
"thiserror",
]
[[package]]
name = "netlink-proto"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef8785b8141e8432aa45fceb922a7e876d7da3fad37fa7e7ec702ace3aa0826b"
dependencies = [
"bytes 1.1.0",
"futures 0.3.21",
"log",
"netlink-packet-core",
"netlink-sys",
"tokio",
]
[[package]]
name = "netlink-sys"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e4c9f9547a08241bee7b6558b9b98e1f290d187de8b7cfca2bbb4937bcaa8f8"
dependencies = [
"bytes 1.1.0",
"futures 0.3.21",
"libc",
"log",
"tokio",
]
[[package]]
name = "nix"
version = "0.16.1"
@ -805,6 +870,19 @@ dependencies = [
"void",
]
[[package]]
name = "nix"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4916f159ed8e5de0082076562152a76b7a1f64a01fd9d1e0fea002c37624faf"
dependencies = [
"bitflags",
"cc",
"cfg-if 1.0.0",
"libc",
"memoffset",
]
[[package]]
name = "nix"
version = "0.23.1"
@ -921,6 +999,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "paste"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc"
[[package]]
name = "percent-encoding"
version = "2.1.0"
@ -1254,22 +1338,43 @@ dependencies = [
"agent",
"anyhow",
"async-trait",
"bitflags",
"cgroups-rs",
"futures 0.3.21",
"hypervisor",
"kata-sys-util",
"kata-types",
"lazy_static",
"libc",
"log",
"logging",
"netlink-packet-route",
"netlink-sys",
"nix 0.16.1",
"oci",
"rand 0.7.3",
"rtnetlink",
"scopeguard",
"slog",
"slog-scope",
"tokio",
"uuid",
]
[[package]]
name = "rtnetlink"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f54290e54521dac3de4149d83ddf9f62a359b3cc93bcb494a794a41e6f4744b"
dependencies = [
"futures 0.3.21",
"log",
"netlink-packet-route",
"netlink-proto",
"nix 0.22.3",
"thiserror",
"tokio",
]
[[package]]
name = "runtimes"
version = "0.1.0"

View File

@ -10,6 +10,7 @@ extern crate slog;
logging::logger_with_subsystem!(sl, "hypervisor");
pub mod device;
pub use device::*;
use std::collections::HashMap;

View File

@ -7,11 +7,17 @@ edition = "2018"
[dependencies]
anyhow = "^1.0"
async-trait = "0.1.48"
bitflags = "1.2.1"
cgroups-rs = "0.2.9"
futures = "0.3.11"
lazy_static = "1.4.0"
libc = ">=0.2.39"
log = "^0.4.0"
netlink-sys = "0.8.2"
netlink-packet-route = "0.11.0"
nix = "0.16.0"
rand = "^0.7.2"
rtnetlink = "0.9.1"
scopeguard = "1.0.0"
slog = "2.5.2"
slog-scope = "4.4.0"
tokio = { version = "1.8.0", features = ["process"] }

View File

@ -15,6 +15,8 @@ logging::logger_with_subsystem!(sl, "resource");
pub mod cgroups;
pub mod manager;
mod manager_inner;
pub mod network;
use network::NetworkConfig;
pub mod rootfs;
pub mod share_fs;
pub mod volume;
@ -24,5 +26,6 @@ use kata_types::config::hypervisor::SharedFsInfo;
#[derive(Debug)]
pub enum ResourceConfig {
Network(NetworkConfig),
ShareFs(SharedFsInfo),
}

View File

@ -15,6 +15,7 @@ use oci::LinuxResources;
use crate::{
cgroups::CgroupsResource,
network::{self, Network},
rootfs::{RootFsResource, Rootfs},
share_fs::{self, ShareFs},
volume::{Volume, VolumeResource},
@ -23,10 +24,9 @@ use crate::{
pub(crate) struct ResourceManagerInner {
sid: String,
// TODO: remove
#[allow(dead_code)]
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
network: Option<Arc<dyn Network>>,
share_fs: Option<Arc<dyn ShareFs>>,
pub rootfs_resource: RootFsResource,
@ -45,6 +45,7 @@ impl ResourceManagerInner {
sid: sid.to_string(),
agent,
hypervisor,
network: None,
share_fs: None,
rootfs_resource: RootFsResource::new(),
volume_resource: VolumeResource::new(),
@ -66,12 +67,60 @@ impl ResourceManagerInner {
.context("setup share fs device before start vm")?;
self.share_fs = Some(share_fs);
}
ResourceConfig::Network(c) => {
let d = network::new(&c).await.context("new network")?;
d.setup(self.hypervisor.as_ref())
.await
.context("setup network")?;
self.network = Some(d)
}
};
}
Ok(())
}
async fn handle_interfaces(&self, network: &dyn Network) -> Result<()> {
for i in network.interfaces().await.context("get interfaces")? {
// update interface
info!(sl!(), "update interface {:?}", i);
self.agent
.update_interface(agent::UpdateInterfaceRequest { interface: Some(i) })
.await
.context("update interface")?;
}
Ok(())
}
async fn handle_neighbours(&self, network: &dyn Network) -> Result<()> {
let neighbors = network.neighs().await.context("neighs")?;
if !neighbors.is_empty() {
info!(sl!(), "update neighbors {:?}", neighbors);
self.agent
.add_arp_neighbors(agent::AddArpNeighborRequest {
neighbors: Some(agent::ARPNeighbors { neighbors }),
})
.await
.context("update neighbors")?;
}
Ok(())
}
async fn handle_routes(&self, network: &dyn Network) -> Result<()> {
let routes = network.routes().await.context("routes")?;
if !routes.is_empty() {
info!(sl!(), "update routes {:?}", routes);
self.agent
.update_routes(agent::UpdateRoutesRequest {
route: Some(agent::Routes { routes }),
})
.await
.context("update routes")?;
}
Ok(())
}
pub async fn setup_after_start_vm(&mut self) -> Result<()> {
if let Some(share_fs) = self.share_fs.as_ref() {
share_fs
@ -80,6 +129,16 @@ impl ResourceManagerInner {
.context("setup share fs device after start vm")?;
}
if let Some(network) = self.network.as_ref() {
let network = network.as_ref();
self.handle_interfaces(network)
.await
.context("handle interfaces")?;
self.handle_neighbours(network)
.await
.context("handle neighbors")?;
self.handle_routes(network).await.context("handle routes")?;
}
Ok(())
}

View File

@ -0,0 +1,22 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
mod physical_endpoint;
pub use physical_endpoint::PhysicalEndpoint;
mod veth_endpoint;
pub use veth_endpoint::VethEndpoint;
use anyhow::Result;
use async_trait::async_trait;
use hypervisor::Hypervisor;
#[async_trait]
pub trait Endpoint: std::fmt::Debug + Send + Sync {
async fn name(&self) -> String;
async fn hardware_addr(&self) -> String;
async fn attach(&self, hypervisor: &dyn Hypervisor) -> Result<()>;
async fn detach(&self, hypervisor: &dyn Hypervisor) -> Result<()>;
}

View File

@ -0,0 +1,145 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::path::Path;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use hypervisor::{device, Hypervisor};
use super::Endpoint;
use crate::network::utils::{self, link};
pub const SYS_PCI_DEVICES_PATH: &str = "/sys/bus/pci/devices";
#[derive(Debug)]
pub struct VendorDevice {
vendor_id: String,
device_id: String,
}
impl VendorDevice {
pub fn new(vendor_id: &str, device_id: &str) -> Result<Self> {
if vendor_id.is_empty() || device_id.is_empty() {
return Err(anyhow!(
"invalid parameters vendor_id {} device_id {}",
vendor_id,
device_id
));
}
Ok(Self {
vendor_id: vendor_id.to_string(),
device_id: device_id.to_string(),
})
}
pub fn vendor_device_id(&self) -> String {
format!("{}_{}", &self.vendor_id, &self.device_id)
}
}
#[derive(Debug)]
pub struct PhysicalEndpoint {
iface_name: String,
hard_addr: String,
bdf: String,
driver: String,
vendor_device_id: VendorDevice,
}
impl PhysicalEndpoint {
pub fn new(name: &str, hardware_addr: &[u8]) -> Result<Self> {
let driver_info = link::get_driver_info(name).context("get driver info")?;
let bdf = driver_info.bus_info;
let sys_pci_devices_path = Path::new(SYS_PCI_DEVICES_PATH);
// get driver by following symlink /sys/bus/pci/devices/$bdf/driver
let driver_path = sys_pci_devices_path.join(&bdf).join("driver");
let link = driver_path.read_link().context("read link")?;
let driver = link
.file_name()
.map_or(String::new(), |v| v.to_str().unwrap().to_owned());
// get vendor and device id from pci space (sys/bus/pci/devices/$bdf)
let iface_device_path = sys_pci_devices_path.join(&bdf).join("device");
let device_id = std::fs::read_to_string(&iface_device_path)
.context(format!("read device path {:?}", &iface_device_path))?;
let iface_vendor_path = sys_pci_devices_path.join(&bdf).join("vendor");
let vendor_id = std::fs::read_to_string(&iface_vendor_path)
.context(format!("read vendor path {:?}", &iface_vendor_path))?;
Ok(Self {
iface_name: name.to_string(),
hard_addr: utils::get_mac_addr(hardware_addr).context("get mac addr")?,
vendor_device_id: VendorDevice::new(&vendor_id, &device_id)
.context("new vendor device")?,
driver,
bdf,
})
}
}
#[async_trait]
impl Endpoint for PhysicalEndpoint {
async fn name(&self) -> String {
self.iface_name.clone()
}
async fn hardware_addr(&self) -> String {
self.hard_addr.clone()
}
async fn attach(&self, hypervisor: &dyn Hypervisor) -> Result<()> {
// bind physical interface from host driver and bind to vfio
device::bind_device_to_vfio(
&self.bdf,
&self.driver,
&self.vendor_device_id.vendor_device_id(),
)
.context(format!(
"bind physical endpoint from {} to vfio",
&self.driver
))?;
// set vfio's bus type, pci or mmio. Mostly use pci by default.
let mode = match self.driver.as_str() {
"virtio-pci" => "mmio",
_ => "pci",
};
// add vfio device
let d = device::Device::Vfio(device::VfioConfig {
id: format!("physical_nic_{}", self.name().await),
sysfs_path: "".to_string(),
bus_slot_func: self.bdf.clone(),
mode: device::VfioBusMode::new(mode)
.context(format!("new vfio bus mode {:?}", mode))?,
});
hypervisor.add_device(d).await.context("add device")?;
Ok(())
}
// detach for physical endpoint unbinds the physical network interface from vfio-pci
// and binds it back to the saved host driver.
async fn detach(&self, _hypervisor: &dyn Hypervisor) -> Result<()> {
// bind back the physical network interface to host.
// we need to do this even if a new network namespace has not
// been created by virt-containers.
// we do not need to enter the network namespace to bind back the
// physical interface to host driver.
device::bind_device_to_host(
&self.bdf,
&self.driver,
&self.vendor_device_id.vendor_device_id(),
)
.context(format!(
"bind physical endpoint device from vfio to {}",
&self.driver
))?;
Ok(())
}
}

View File

@ -0,0 +1,84 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::io::{self, Error};
use anyhow::{Context, Result};
use async_trait::async_trait;
use hypervisor::{device::NetworkConfig, Device, Hypervisor};
use super::Endpoint;
use crate::network::{utils, NetworkPair};
#[derive(Debug)]
pub struct VethEndpoint {
net_pair: NetworkPair,
}
impl VethEndpoint {
pub async fn new(
handle: &rtnetlink::Handle,
name: &str,
idx: u32,
model: &str,
queues: usize,
) -> Result<Self> {
let net_pair = NetworkPair::new(handle, idx, name, model, queues)
.await
.context("new networkInterfacePair")?;
Ok(VethEndpoint { net_pair })
}
fn get_network_config(&self) -> Result<NetworkConfig> {
let iface = &self.net_pair.tap.tap_iface;
let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| {
Error::new(
io::ErrorKind::InvalidData,
format!("hard_addr {}", &iface.hard_addr),
)
})?;
Ok(NetworkConfig {
id: self.net_pair.virt_iface.name.clone(),
host_dev_name: iface.name.clone(),
guest_mac: Some(guest_mac),
})
}
}
#[async_trait]
impl Endpoint for VethEndpoint {
async fn name(&self) -> String {
self.net_pair.virt_iface.name.clone()
}
async fn hardware_addr(&self) -> String {
self.net_pair.tap.tap_iface.hard_addr.clone()
}
async fn attach(&self, h: &dyn Hypervisor) -> Result<()> {
self.net_pair
.add_network_model()
.await
.context("add network model")?;
let config = self.get_network_config().context("get network config")?;
h.add_device(Device::Network(config))
.await
.context("Error add device")?;
Ok(())
}
async fn detach(&self, h: &dyn Hypervisor) -> Result<()> {
self.net_pair
.del_network_model()
.await
.context("del network model")?;
let config = self.get_network_config().context("get network config")?;
h.remove_device(Device::Network(config))
.await
.context("remove device")?;
Ok(())
}
}

View File

@ -0,0 +1,48 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
mod endpoint;
pub use endpoint::Endpoint;
mod network_entity;
mod network_info;
pub use network_info::NetworkInfo;
mod network_model;
pub use network_model::NetworkModel;
mod network_with_netns;
pub use network_with_netns::NetworkWithNetNsConfig;
use network_with_netns::NetworkWithNetns;
mod network_pair;
use network_pair::NetworkPair;
mod utils;
use std::sync::Arc;
use anyhow::{Context, Result};
use async_trait::async_trait;
use hypervisor::Hypervisor;
#[derive(Debug)]
pub enum NetworkConfig {
NetworkResourceWithNetNs(NetworkWithNetNsConfig),
}
#[async_trait]
pub trait Network: Send + Sync {
async fn setup(&self, h: &dyn Hypervisor) -> Result<()>;
async fn interfaces(&self) -> Result<Vec<agent::Interface>>;
async fn routes(&self) -> Result<Vec<agent::Route>>;
async fn neighs(&self) -> Result<Vec<agent::ARPNeighbor>>;
}
pub async fn new(config: &NetworkConfig) -> Result<Arc<dyn Network>> {
match config {
NetworkConfig::NetworkResourceWithNetNs(c) => Ok(Arc::new(
NetworkWithNetns::new(c)
.await
.context("new network with netns")?,
)),
}
}

View File

@ -0,0 +1,24 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::sync::Arc;
use super::{Endpoint, NetworkInfo};
#[derive(Debug)]
pub(crate) struct NetworkEntity {
pub(crate) endpoint: Arc<dyn Endpoint>,
pub(crate) network_info: Arc<dyn NetworkInfo>,
}
impl NetworkEntity {
pub fn new(endpoint: Arc<dyn Endpoint>, network_info: Arc<dyn NetworkInfo>) -> Self {
Self {
endpoint,
network_info,
}
}
}

View File

@ -0,0 +1,18 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
pub(crate) mod network_info_from_link;
use agent::{ARPNeighbor, Interface, Route};
use anyhow::Result;
use async_trait::async_trait;
#[async_trait]
pub trait NetworkInfo: std::fmt::Debug + Send + Sync {
async fn interface(&self) -> Result<Interface>;
async fn routes(&self) -> Result<Vec<Route>>;
async fn neighs(&self) -> Result<Vec<ARPNeighbor>>;
}

View File

@ -0,0 +1,203 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{convert::TryFrom, net::Ipv4Addr};
use agent::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
use anyhow::{Context, Result};
use async_trait::async_trait;
use futures::stream::TryStreamExt;
use netlink_packet_route::{
self, neighbour::NeighbourMessage, nlas::neighbour::Nla, route::RouteMessage,
};
use super::NetworkInfo;
use crate::network::utils::{
address::Address,
link::{self, LinkAttrs},
};
#[derive(Debug)]
pub(crate) struct NetworkInfoFromLink {
interface: Interface,
neighs: Vec<ARPNeighbor>,
routes: Vec<Route>,
}
impl NetworkInfoFromLink {
pub async fn new(
handle: &rtnetlink::Handle,
link: &dyn link::Link,
hw_addr: &str,
) -> Result<Self> {
let attrs = link.attrs();
let name = &attrs.name;
Ok(Self {
interface: Interface {
device: name.clone(),
name: name.clone(),
ip_addresses: handle_addresses(handle, attrs)
.await
.context("handle addresses")?,
mtu: attrs.mtu as u64,
hw_addr: hw_addr.to_string(),
pci_addr: Default::default(),
field_type: link.r#type().to_string(),
raw_flags: attrs.flags & libc::IFF_NOARP as u32,
},
neighs: handle_neighbors(handle, attrs)
.await
.context("handle neighbours")?,
routes: handle_routes(handle, attrs)
.await
.context("handle routes")?,
})
}
}
async fn handle_addresses(handle: &rtnetlink::Handle, attrs: &LinkAttrs) -> Result<Vec<IPAddress>> {
let mut addr_msg_list = handle
.address()
.get()
.set_link_index_filter(attrs.index)
.execute();
let mut addresses = Vec::new();
while let Some(addr_msg) = addr_msg_list.try_next().await? {
if addr_msg.header.family as i32 != libc::AF_INET {
warn!(sl!(), "unsupported ipv6 addr. {:?}", addr_msg);
continue;
}
let a = Address::try_from(addr_msg).context("get addr from msg")?;
if a.addr.is_loopback() {
continue;
}
addresses.push(IPAddress {
family: if a.addr.is_ipv4() {
IPFamily::V4
} else {
IPFamily::V6
},
address: a.addr.to_string(),
mask: a.perfix_len.to_string(),
});
}
Ok(addresses)
}
fn generate_neigh(name: &str, n: &NeighbourMessage) -> Result<ARPNeighbor> {
let mut neigh = ARPNeighbor {
device: name.to_string(),
state: n.header.state as i32,
..Default::default()
};
for nla in &n.nlas {
match nla {
Nla::Destination(addr) => {
if addr.len() != 4 {
continue;
}
let dest = Ipv4Addr::new(addr[0], addr[1], addr[2], addr[3]);
let addr = Some(IPAddress {
family: IPFamily::V4,
address: dest.to_string(),
mask: "".to_string(),
});
neigh.to_ip_address = addr;
}
Nla::LinkLocalAddress(addr) => {
if addr.len() < 6 {
continue;
}
let lladdr = format!(
"{:<02x}:{:<02x}:{:<02x}:{:<02x}:{:<02x}:{:<02x}",
addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]
);
neigh.ll_addr = lladdr;
}
_ => {
// skip the unused Nla
}
}
}
Ok(neigh)
}
async fn handle_neighbors(
handle: &rtnetlink::Handle,
attrs: &LinkAttrs,
) -> Result<Vec<ARPNeighbor>> {
let name = &attrs.name;
let mut neighs = vec![];
let mut neigh_msg_list = handle.neighbours().get().execute();
while let Some(neigh) = neigh_msg_list.try_next().await? {
// get neigh filter with index
if neigh.header.ifindex == attrs.index {
neighs.push(generate_neigh(name, &neigh).context("generate neigh")?)
}
}
Ok(neighs)
}
fn generate_route(name: &str, route: &RouteMessage) -> Result<Option<Route>> {
if route.header.protocol == libc::RTPROT_KERNEL {
return Ok(None);
}
Ok(Some(Route {
dest: route
.destination_prefix()
.map(|(addr, _)| addr.to_string())
.unwrap_or_default(),
gateway: route.gateway().map(|v| v.to_string()).unwrap_or_default(),
device: name.to_string(),
source: route
.source_prefix()
.map(|(addr, _)| addr.to_string())
.unwrap_or_default(),
scope: route.header.scope as u32,
family: if route.header.address_family == libc::AF_INET as u8 {
IPFamily::V4
} else {
IPFamily::V6
},
}))
}
async fn handle_routes(handle: &rtnetlink::Handle, attrs: &LinkAttrs) -> Result<Vec<Route>> {
let name = &attrs.name;
let mut routes = vec![];
let mut route_msg_list = handle.route().get(rtnetlink::IpVersion::V4).execute();
while let Some(route) = route_msg_list.try_next().await? {
// get route filter with index
if let Some(index) = route.output_interface() {
if index == attrs.index {
if let Some(route) = generate_route(name, &route).context("generate route")? {
routes.push(route);
}
}
}
}
Ok(routes)
}
#[async_trait]
impl NetworkInfo for NetworkInfoFromLink {
async fn interface(&self) -> Result<Interface> {
Ok(self.interface.clone())
}
async fn routes(&self) -> Result<Vec<Route>> {
Ok(self.routes.clone())
}
async fn neighs(&self) -> Result<Vec<ARPNeighbor>> {
Ok(self.neighs.clone())
}
}

View File

@ -0,0 +1,46 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
pub mod none_model;
pub mod route_model;
pub mod tc_filter_model;
use std::sync::Arc;
use anyhow::{Context, Result};
use async_trait::async_trait;
use super::NetworkPair;
const TC_FILTER_NET_MODEL_STR: &str = "tcfilter";
const ROUTE_NET_MODEL_STR: &str = "route";
pub enum NetworkModelType {
NoneModel,
TcFilter,
Route,
}
#[async_trait]
pub trait NetworkModel: std::fmt::Debug + Send + Sync {
fn model_type(&self) -> NetworkModelType;
async fn add(&self, net_pair: &NetworkPair) -> Result<()>;
async fn del(&self, net_pair: &NetworkPair) -> Result<()>;
}
pub fn new(model: &str) -> Result<Arc<dyn NetworkModel>> {
match model {
TC_FILTER_NET_MODEL_STR => Ok(Arc::new(
tc_filter_model::TcFilterModel::new().context("new tc filter model")?,
)),
ROUTE_NET_MODEL_STR => Ok(Arc::new(
route_model::RouteModel::new().context("new route model")?,
)),
_ => Ok(Arc::new(
none_model::NoneModel::new().context("new none model")?,
)),
}
}

View File

@ -0,0 +1,35 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
use async_trait::async_trait;
use super::{NetworkModel, NetworkModelType};
use crate::network::NetworkPair;
#[derive(Debug)]
pub(crate) struct NoneModel {}
impl NoneModel {
pub fn new() -> Result<Self> {
Ok(Self {})
}
}
#[async_trait]
impl NetworkModel for NoneModel {
fn model_type(&self) -> NetworkModelType {
NetworkModelType::NoneModel
}
async fn add(&self, _pair: &NetworkPair) -> Result<()> {
Ok(())
}
async fn del(&self, _pair: &NetworkPair) -> Result<()> {
Ok(())
}
}

View File

@ -0,0 +1,88 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use tokio::process::Command;
use super::{NetworkModel, NetworkModelType};
use crate::network::NetworkPair;
#[derive(Debug)]
pub(crate) struct RouteModel {}
impl RouteModel {
pub fn new() -> Result<Self> {
Ok(Self {})
}
}
#[async_trait]
impl NetworkModel for RouteModel {
fn model_type(&self) -> NetworkModelType {
NetworkModelType::Route
}
async fn add(&self, pair: &NetworkPair) -> Result<()> {
let tap_name = &pair.tap.tap_iface.name;
let virt_name = &pair.virt_iface.name;
let virt_iface_addr = pair.virt_iface.addrs[0].addr.to_string();
let commands_args = vec![
vec![
"rule", "add", "pref", "10", "from", "all", "lookup", "local",
],
vec!["rule", "del", "pref", "0", "from", "all"],
vec!["rule", "add", "pref", "5", "iif", virt_name, "table", "10"],
vec![
"route", "replace", "default", "dev", tap_name, "table", "10",
],
vec![
"neigh",
"replace",
&virt_iface_addr,
"lladdr",
&pair.virt_iface.hard_addr,
"dev",
tap_name,
],
];
for ca in commands_args {
let output = Command::new("/sbin/ip")
.args(&ca)
.output()
.await
.context(format!("run command ip args {:?}", &ca))?;
if !output.status.success() {
return Err(anyhow!(
"run command ip args {:?} error {}",
&ca,
String::from_utf8(output.stderr)?
));
}
}
// TODO: support ipv6
// change sysctl for tap0_kata
// echo 1 > /proc/sys/net/ipv4/conf/tap0_kata/accept_local
let accept_local_path = format!("/proc/sys/net/ipv4/conf/{}/accept_local", &tap_name);
std::fs::write(&accept_local_path, "1".to_string())
.with_context(|| format!("Failed to echo 1 > {}", &accept_local_path))?;
// echo 1 > /proc/sys/net/ipv4/conf/eth0/proxy_arp
// This enabled ARP reply on peer eth0 to prevent without any reply on VPC
let proxy_arp_path = format!("/proc/sys/net/ipv4/conf/{}/proxy_arp", &virt_name);
std::fs::write(&proxy_arp_path, "1".to_string())
.with_context(|| format!("Failed to echo 1 > {}", &proxy_arp_path))?;
Ok(())
}
async fn del(&self, _pair: &NetworkPair) -> Result<()> {
todo!()
}
}

View File

@ -0,0 +1,95 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use tokio::process::Command;
use super::{NetworkModel, NetworkModelType};
use crate::network::NetworkPair;
#[derive(Debug)]
pub(crate) struct TcFilterModel {}
impl TcFilterModel {
pub fn new() -> Result<Self> {
Ok(Self {})
}
}
#[async_trait]
impl NetworkModel for TcFilterModel {
fn model_type(&self) -> NetworkModelType {
NetworkModelType::TcFilter
}
async fn add(&self, pair: &NetworkPair) -> Result<()> {
let tap_name = &pair.tap.tap_iface.name;
let virt_name = &pair.virt_iface.name;
add_qdisc_ingress(tap_name)
.await
.context("add qdisc ingress for tap link")?;
add_qdisc_ingress(virt_name)
.await
.context("add qdisc ingress")?;
add_redirect_tcfilter(tap_name, virt_name)
.await
.context("add tc filter for tap")?;
add_redirect_tcfilter(virt_name, tap_name)
.await
.context("add tc filter")?;
Ok(())
}
async fn del(&self, pair: &NetworkPair) -> Result<()> {
del_qdisc(&pair.virt_iface.name)
.await
.context("del qdisc")?;
Ok(())
}
}
// TODO: use netlink replace tc command
async fn add_qdisc_ingress(dev: &str) -> Result<()> {
let output = Command::new("/sbin/tc")
.args(&["qdisc", "add", "dev", dev, "handle", "ffff:", "ingress"])
.output()
.await
.context("add tc")?;
if !output.status.success() {
return Err(anyhow!("{}", String::from_utf8(output.stderr)?));
}
Ok(())
}
async fn add_redirect_tcfilter(src: &str, dst: &str) -> Result<()> {
let output = Command::new("/sbin/tc")
.args(&[
"filter", "add", "dev", src, "parent", "ffff:", "protocol", "all", "u32", "match",
"u8", "0", "0", "action", "mirred", "egress", "redirect", "dev", dst,
])
.output()
.await
.context("add redirect tcfilter")?;
if !output.status.success() {
return Err(anyhow!("{}", String::from_utf8(output.stderr)?));
}
Ok(())
}
async fn del_qdisc(dev: &str) -> Result<()> {
let output = Command::new("/sbin/tc")
.args(&["qdisc", "del", "dev", dev, "handle", "ffff:", "ingress"])
.output()
.await
.context("del qdisc")?;
if !output.status.success() {
return Err(anyhow!("{}", String::from_utf8(output.stderr)?));
}
Ok(())
}

View File

@ -0,0 +1,178 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{convert::TryFrom, sync::Arc, usize};
use anyhow::{anyhow, Context, Result};
use futures::stream::TryStreamExt;
use super::{
network_model,
utils::{self, address::Address, link},
};
const TAP_SUFFIX: &str = "_kata";
#[derive(Default, Copy, Clone, Debug, PartialEq, Eq)]
pub struct NetInterworkingModel(u32);
#[derive(Default, Debug, Clone)]
pub struct NetworkInterface {
pub name: String,
pub hard_addr: String,
pub addrs: Vec<Address>,
}
#[derive(Default, Debug)]
pub struct TapInterface {
pub id: String,
pub name: String,
pub tap_iface: NetworkInterface,
}
#[derive(Debug)]
pub struct NetworkPair {
pub tap: TapInterface,
pub virt_iface: NetworkInterface,
pub model: Arc<dyn network_model::NetworkModel>,
pub network_qos: bool,
}
impl NetworkPair {
pub(crate) async fn new(
handle: &rtnetlink::Handle,
idx: u32,
name: &str,
model: &str,
queues: usize,
) -> Result<Self> {
let unique_id = kata_sys_util::rand::UUID::new();
let model = network_model::new(model).context("new network model")?;
let tap_iface_name = format!("tap{}{}", idx, TAP_SUFFIX);
let virt_iface_name = format!("eth{}", idx);
let tap_link = create_link(handle, &tap_iface_name, queues)
.await
.context("create link")?;
let virt_link = get_link_by_name(handle, virt_iface_name.clone().as_str())
.await
.context("get link by name")?;
let mut virt_addr_msg_list = handle
.address()
.get()
.set_link_index_filter(virt_link.attrs().index)
.execute();
let mut virt_address = vec![];
while let Some(addr_msg) = virt_addr_msg_list.try_next().await? {
let addr = Address::try_from(addr_msg).context("get address from msg")?;
virt_address.push(addr);
}
// Save the veth MAC address to the TAP so that it can later be used
// to build the hypervisor command line. This MAC address has to be
// the one inside the VM in order to avoid any firewall issues. The
// bridge created by the network plugin on the host actually expects
// to see traffic from this MAC address and not another one.
let tap_hard_addr =
utils::get_mac_addr(&virt_link.attrs().hardware_addr).context("get mac addr")?;
// Save the TAP Mac address to the virt_iface so that it can later updated
// the guest's gateway IP's mac as this TAP device. This MAC address has
// to be inside the VM in order to the network reach to the gateway.
let virt_hard_addr =
utils::get_mac_addr(&tap_link.attrs().hardware_addr).context("get mac addr")?;
handle
.link()
.set(tap_link.attrs().index)
.mtu(virt_link.attrs().mtu)
.execute()
.await
.context("set link mtu")?;
handle
.link()
.set(tap_link.attrs().index)
.up()
.execute()
.await
.context("set link up")?;
let mut net_pair = NetworkPair {
tap: TapInterface {
id: String::from(&unique_id),
name: format!("br{}{}", idx, TAP_SUFFIX),
tap_iface: NetworkInterface {
name: tap_iface_name,
hard_addr: tap_hard_addr,
..Default::default()
},
},
virt_iface: NetworkInterface {
name: virt_iface_name,
hard_addr: virt_hard_addr,
addrs: virt_address,
},
model,
network_qos: false,
};
if !name.is_empty() {
net_pair.virt_iface.name = String::from(name);
}
Ok(net_pair)
}
pub(crate) async fn add_network_model(&self) -> Result<()> {
let model = self.model.clone();
model.add(self).await.context("add")?;
Ok(())
}
pub(crate) async fn del_network_model(&self) -> Result<()> {
let model = self.model.clone();
model.del(self).await.context("del")?;
Ok(())
}
}
pub async fn create_link(
handle: &rtnetlink::Handle,
name: &str,
queues: usize,
) -> Result<Box<dyn link::Link>> {
link::create_link(name, link::LinkType::Tap, queues)?;
let link = get_link_by_name(handle, name)
.await
.context("get link by name")?;
let base = link.attrs();
if base.master_index != 0 {
handle
.link()
.set(base.index)
.master(base.master_index)
.execute()
.await
.context("set index")?;
}
Ok(link)
}
pub async fn get_link_by_name(
handle: &rtnetlink::Handle,
name: &str,
) -> Result<Box<dyn link::Link>> {
let mut link_msg_list = handle.link().get().match_name(name.to_string()).execute();
let msg = if let Some(msg) = link_msg_list.try_next().await? {
msg
} else {
return Err(anyhow!("failed to find link by name {}", name));
};
Ok(link::get_link_from_message(msg))
}

View File

@ -0,0 +1,211 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::sync::{
atomic::{AtomicU32, Ordering},
Arc,
};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use futures::stream::TryStreamExt;
use hypervisor::Hypervisor;
use scopeguard::defer;
use tokio::sync::RwLock;
use super::{
endpoint::{Endpoint, PhysicalEndpoint, VethEndpoint},
network_entity::NetworkEntity,
network_info::network_info_from_link::NetworkInfoFromLink,
utils::{link, netns},
Network,
};
use crate::network::NetworkInfo;
#[derive(Debug)]
pub struct NetworkWithNetNsConfig {
pub network_model: String,
pub netns_path: String,
pub queues: usize,
}
struct NetworkWithNetnsInner {
netns_path: String,
entity_list: Vec<NetworkEntity>,
}
impl NetworkWithNetnsInner {
async fn new(config: &NetworkWithNetNsConfig) -> Result<Self> {
let entity_list = if config.netns_path.is_empty() {
warn!(sl!(), "skip to scan for empty netns");
vec![]
} else {
// get endpoint
get_entity_from_netns(config)
.await
.context("get entity from netns")?
};
Ok(Self {
netns_path: config.netns_path.to_string(),
entity_list,
})
}
}
pub(crate) struct NetworkWithNetns {
inner: Arc<RwLock<NetworkWithNetnsInner>>,
}
impl NetworkWithNetns {
pub(crate) async fn new(config: &NetworkWithNetNsConfig) -> Result<Self> {
Ok(Self {
inner: Arc::new(RwLock::new(NetworkWithNetnsInner::new(config).await?)),
})
}
}
#[async_trait]
impl Network for NetworkWithNetns {
async fn setup(&self, h: &dyn Hypervisor) -> Result<()> {
let inner = self.inner.read().await;
let _netns_guard = netns::NetnsGuard::new(&inner.netns_path).context("net netns guard")?;
for e in &inner.entity_list {
e.endpoint.attach(h).await.context("attach")?;
}
Ok(())
}
async fn interfaces(&self) -> Result<Vec<agent::Interface>> {
let inner = self.inner.read().await;
let mut interfaces = vec![];
for e in &inner.entity_list {
interfaces.push(e.network_info.interface().await.context("interface")?);
}
Ok(interfaces)
}
async fn routes(&self) -> Result<Vec<agent::Route>> {
let inner = self.inner.read().await;
let mut routes = vec![];
for e in &inner.entity_list {
let mut list = e.network_info.routes().await.context("routes")?;
routes.append(&mut list);
}
Ok(routes)
}
async fn neighs(&self) -> Result<Vec<agent::ARPNeighbor>> {
let inner = self.inner.read().await;
let mut neighs = vec![];
for e in &inner.entity_list {
let mut list = e.network_info.neighs().await.context("neighs")?;
neighs.append(&mut list);
}
Ok(neighs)
}
}
async fn get_entity_from_netns(config: &NetworkWithNetNsConfig) -> Result<Vec<NetworkEntity>> {
info!(
sl!(),
"get network entity for config {:?} tid {:?}",
config,
nix::unistd::gettid()
);
let mut entity_list = vec![];
let _netns_guard = netns::NetnsGuard::new(&config.netns_path)
.context("net netns guard")
.unwrap();
let (connection, handle, _) = rtnetlink::new_connection().context("new connection")?;
let thread_handler = tokio::spawn(connection);
defer!({
thread_handler.abort();
});
let mut links = handle.link().get().execute();
let idx = AtomicU32::new(0);
while let Some(link) = links.try_next().await? {
let link = link::get_link_from_message(link);
let attrs = link.attrs();
if (attrs.flags & libc::IFF_LOOPBACK as u32) != 0 {
continue;
}
let idx = idx.fetch_add(1, Ordering::Relaxed);
let (endpoint, network_info) = create_endpoint(&handle, link.as_ref(), idx, config)
.await
.context("create endpoint")?;
entity_list.push(NetworkEntity::new(endpoint, network_info));
}
Ok(entity_list)
}
async fn create_endpoint(
handle: &rtnetlink::Handle,
link: &dyn link::Link,
idx: u32,
config: &NetworkWithNetNsConfig,
) -> Result<(Arc<dyn Endpoint>, Arc<dyn NetworkInfo>)> {
let _netns_guard = netns::NetnsGuard::new(&config.netns_path)
.context("net netns guard")
.unwrap();
let attrs = link.attrs();
let link_type = link.r#type();
let endpoint: Arc<dyn Endpoint> = if is_physical_iface(&attrs.name)? {
info!(
sl!(),
"physical network interface found: {} {:?}",
&attrs.name,
nix::unistd::gettid()
);
let t = PhysicalEndpoint::new(&attrs.name, &attrs.hardware_addr)
.context("new physical endpoint")?;
Arc::new(t)
} else {
info!(
sl!(),
"{} network interface found: {}", &link_type, &attrs.name
);
match link_type {
"veth" => {
let ret = VethEndpoint::new(
handle,
&attrs.name,
idx,
&config.network_model,
config.queues,
)
.await
.context("veth endpoint")?;
Arc::new(ret)
}
_ => return Err(anyhow!("unsupported link type: {}", link_type)),
}
};
let network_info = Arc::new(
NetworkInfoFromLink::new(handle, link, &endpoint.hardware_addr().await)
.await
.context("network info from link")?,
);
Ok((endpoint, network_info))
}
fn is_physical_iface(name: &str) -> Result<bool> {
if name == "lo" {
return Ok(false);
}
let driver_info = link::get_driver_info(name)?;
if driver_info.bus_info.split(':').count() != 3 {
return Ok(false);
}
Ok(true)
}

View File

@ -0,0 +1,109 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{
convert::TryFrom,
net::{IpAddr, Ipv4Addr, Ipv6Addr},
};
use anyhow::{anyhow, Result};
use netlink_packet_route::{nlas::address::Nla, AddressMessage, AF_INET, AF_INET6};
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Address {
pub addr: IpAddr,
pub label: String,
pub flags: u32,
pub scope: u8,
pub perfix_len: u8,
pub peer: IpAddr,
pub broadcast: IpAddr,
pub prefered_lft: u32,
pub valid_ltf: u32,
}
impl TryFrom<AddressMessage> for Address {
type Error = anyhow::Error;
fn try_from(msg: AddressMessage) -> Result<Self> {
let AddressMessage { header, nlas } = msg;
let mut addr = Address {
addr: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)),
peer: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)),
broadcast: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)),
label: String::default(),
flags: 0,
scope: header.scope,
perfix_len: header.prefix_len,
prefered_lft: 0,
valid_ltf: 0,
};
let mut local = IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0));
let mut dst = IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0));
for nla in nlas.into_iter() {
match nla {
Nla::Address(a) => {
dst = parse_ip(a, header.family)?;
}
Nla::Local(a) => {
local = parse_ip(a, header.family)?;
}
Nla::Broadcast(b) => {
addr.broadcast = parse_ip(b, header.family)?;
}
Nla::Label(l) => {
addr.label = l;
}
Nla::Flags(f) => {
addr.flags = f;
}
Nla::CacheInfo(_c) => {}
_ => {}
}
}
// IPv6 sends the local address as IFA_ADDRESS with no
// IFA_LOCAL, IPv4 sends both IFA_LOCAL and IFA_ADDRESS
// with IFA_ADDRESS being the peer address if they differ
//
// But obviously, as there are IPv6 PtP addresses, too,
// IFA_LOCAL should also be handled for IPv6.
if local.is_unspecified() {
if header.family == AF_INET as u8 && local == dst {
addr.addr = dst;
} else {
addr.addr = local;
addr.peer = dst;
}
} else {
addr.addr = dst;
}
Ok(addr)
}
}
fn parse_ip(ip: Vec<u8>, family: u8) -> Result<IpAddr> {
let support_len = if family as u16 == AF_INET { 4 } else { 16 };
if ip.len() != support_len {
return Err(anyhow!(
"invalid ip addresses {:?} support {}",
&ip,
support_len
));
}
match family as u16 {
AF_INET => Ok(IpAddr::V4(Ipv4Addr::new(ip[0], ip[1], ip[2], ip[3]))),
AF_INET6 => {
let mut octets = [0u8; 16];
octets.copy_from_slice(&ip[..16]);
Ok(IpAddr::V6(Ipv6Addr::from(octets)))
}
_ => {
return Err(anyhow!("unknown IP network family {}", family));
}
}
}

View File

@ -0,0 +1,129 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{
fs::{File, OpenOptions},
os::unix::io::AsRawFd,
path::Path,
{io, mem},
};
use anyhow::{Context, Result};
use nix::ioctl_write_ptr;
use super::macros::{get_name, set_name};
type IfName = [u8; libc::IFNAMSIZ];
#[derive(Copy, Clone, Debug)]
#[repr(C)]
struct CreateLinkMap {
pub mem_start: libc::c_ulong,
pub mem_end: libc::c_ulong,
pub base_addr: libc::c_ushort,
pub irq: libc::c_uchar,
pub dma: libc::c_uchar,
pub port: libc::c_uchar,
}
#[repr(C)]
union CreateLinkIfru {
pub ifr_addr: libc::sockaddr,
pub ifr_dst_addr: libc::sockaddr,
pub ifr_broad_addr: libc::sockaddr,
pub ifr_netmask: libc::sockaddr,
pub ifr_hw_addr: libc::sockaddr,
pub ifr_flags: libc::c_short,
pub ifr_if_index: libc::c_int,
pub ifr_metric: libc::c_int,
pub ifr_mtu: libc::c_int,
pub ifr_map: CreateLinkMap,
pub ifr_slave: IfName,
pub ifr_new_name: IfName,
pub ifr_data: *mut libc::c_char,
}
#[repr(C)]
struct CreateLinkReq {
pub ifr_name: IfName,
pub ifr_ifru: CreateLinkIfru,
}
impl CreateLinkReq {
pub fn from_name(name: &str) -> io::Result<CreateLinkReq> {
let mut req: CreateLinkReq = unsafe { mem::zeroed() };
req.set_name(name)?;
Ok(req)
}
pub fn set_name(&mut self, name: &str) -> io::Result<()> {
set_name!(self.ifr_name, name)
}
pub fn get_name(&self) -> io::Result<String> {
get_name!(self.ifr_name)
}
pub unsafe fn set_raw_flags(&mut self, raw_flags: libc::c_short) {
self.ifr_ifru.ifr_flags = raw_flags;
}
}
const DEVICE_PATH: &str = "/dev/net/tun";
ioctl_write_ptr!(tun_set_iff, b'T', 202, libc::c_int);
ioctl_write_ptr!(tun_set_persist, b'T', 203, libc::c_int);
#[derive(Clone, Copy, Debug)]
pub enum LinkType {
#[allow(dead_code)]
Tun,
Tap,
}
pub fn create_link(name: &str, link_type: LinkType, queues: usize) -> Result<()> {
let mut flags = libc::IFF_VNET_HDR;
flags |= match link_type {
LinkType::Tun => libc::IFF_TUN,
LinkType::Tap => libc::IFF_TAP,
};
let queues = if queues == 0 { 1 } else { queues };
if queues > 1 {
flags |= libc::IFF_MULTI_QUEUE | libc::IFF_NO_PI;
} else {
flags |= libc::IFF_ONE_QUEUE;
};
// create first queue
let mut files = vec![];
let (file, result_name) = create_queue(name, flags)?;
unsafe {
tun_set_persist(file.as_raw_fd(), &1).context("tun set persist")?;
}
files.push(file);
// create other queues
if queues > 1 {
for _ in 0..queues - 1 {
files.push(create_queue(&result_name, flags)?.0);
}
}
info!(sl!(), "create link with fds {:?}", files);
Ok(())
}
fn create_queue(name: &str, flags: libc::c_int) -> Result<(File, String)> {
let path = Path::new(DEVICE_PATH);
let file = OpenOptions::new().read(true).write(true).open(&path)?;
let mut req = CreateLinkReq::from_name(name)?;
unsafe {
req.set_raw_flags(flags as libc::c_short);
tun_set_iff(file.as_raw_fd(), &mut req as *mut _ as *mut _).context("tun set iff")?;
};
Ok((file, req.get_name()?))
}

View File

@ -0,0 +1,102 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{io, mem};
use anyhow::{Context, Result};
use nix::sys::socket::{socket, AddressFamily, SockFlag, SockType};
use scopeguard::defer;
use super::macros::{get_name, set_name};
/// FW version length
const ETHTOOL_FW_VERSION_LEN: usize = 32;
/// bus info length
const ETHTOOL_BUS_INFO_LEN: usize = 32;
/// erom version length
const ETHTOOL_EROM_VERSION_LEN: usize = 32;
/// driver info
const ETHTOOL_DRIVER_INFO: u32 = 0x00000003;
/// Ethtool interface define 0x8946
const IOCTL_ETHTOOL_INTERFACE: u32 = 0x8946;
nix::ioctl_readwrite_bad!(ioctl_ethtool, IOCTL_ETHTOOL_INTERFACE, DeviceInfoReq);
#[repr(C)]
pub union DeviceInfoIfru {
pub ifr_addr: libc::sockaddr,
pub ifr_data: *mut libc::c_char,
}
type IfName = [u8; libc::IFNAMSIZ];
#[repr(C)]
pub struct DeviceInfoReq {
pub ifr_name: IfName,
pub ifr_ifru: DeviceInfoIfru,
}
impl DeviceInfoReq {
pub fn from_name(name: &str) -> io::Result<DeviceInfoReq> {
let mut req: DeviceInfoReq = unsafe { mem::zeroed() };
req.set_name(name)?;
Ok(req)
}
pub fn set_name(&mut self, name: &str) -> io::Result<()> {
set_name!(self.ifr_name, name)
}
}
#[repr(C)]
#[derive(Debug, Clone)]
struct Driver {
pub cmd: u32,
pub driver: [u8; 32],
pub version: [u8; 32],
pub fw_version: [u8; ETHTOOL_FW_VERSION_LEN],
pub bus_info: [u8; ETHTOOL_BUS_INFO_LEN],
pub erom_version: [u8; ETHTOOL_EROM_VERSION_LEN],
pub reserved2: [u8; 12],
pub n_priv_flags: u32,
pub n_stats: u32,
pub test_info_len: u32,
pub eedump_len: u32,
pub regdump_len: u32,
}
#[derive(Debug, Clone)]
pub struct DriverInfo {
pub driver: String,
pub bus_info: String,
}
pub fn get_driver_info(name: &str) -> Result<DriverInfo> {
let mut req = DeviceInfoReq::from_name(name).context(format!("ifreq from name {}", name))?;
let mut ereq: Driver = unsafe { mem::zeroed() };
ereq.cmd = ETHTOOL_DRIVER_INFO;
req.ifr_ifru.ifr_data = &mut ereq as *mut _ as *mut _;
let fd = socket(
AddressFamily::Inet,
SockType::Datagram,
SockFlag::empty(),
None,
)
.context("new socket")?;
defer!({
let _ = nix::unistd::close(fd);
});
unsafe { ioctl_ethtool(fd, &mut req).context("ioctl ethtool")? };
Ok(DriverInfo {
driver: get_name!(ereq.driver).context("get driver name")?,
bus_info: get_name!(ereq.bus_info).context("get bus info name")?,
})
}

View File

@ -0,0 +1,48 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
macro_rules! set_name {
($name_field:expr, $name_str:expr) => {{
let name_c = &::std::ffi::CString::new($name_str.to_owned()).map_err(|_| {
::std::io::Error::new(
::std::io::ErrorKind::InvalidInput,
"malformed interface name",
)
})?;
let name_slice = name_c.as_bytes_with_nul();
if name_slice.len() > libc::IFNAMSIZ {
return Err(io::Error::new(::std::io::ErrorKind::InvalidInput, "").into());
}
$name_field[..name_slice.len()].clone_from_slice(name_slice);
Ok(())
}};
}
macro_rules! get_name {
($name_field:expr) => {{
let nul_pos = match $name_field.iter().position(|x| *x == 0) {
Some(p) => p,
None => {
return Err(::std::io::Error::new(
::std::io::ErrorKind::InvalidData,
"malformed interface name",
)
.into())
}
};
std::ffi::CString::new(&$name_field[..nul_pos])
.unwrap()
.into_string()
.map_err(|_| {
std::io::Error::new(std::io::ErrorKind::InvalidData, "malformed interface name")
})
}};
}
pub(crate) use get_name;
pub(crate) use set_name;

View File

@ -0,0 +1,288 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use netlink_packet_route::{
link::nlas::{Info, InfoBridge, InfoData, InfoKind, Nla},
LinkMessage,
};
use super::{Link, LinkAttrs};
pub fn get_link_from_message(mut msg: LinkMessage) -> Box<dyn Link> {
let mut base = LinkAttrs {
index: msg.header.index,
flags: msg.header.flags,
link_layer_type: msg.header.link_layer_type,
..Default::default()
};
if msg.header.flags & libc::IFF_PROMISC as u32 != 0 {
base.promisc = 1;
}
let mut link: Option<Box<dyn Link>> = None;
while let Some(attr) = msg.nlas.pop() {
match attr {
Nla::Info(infos) => {
link = Some(link_info(infos));
}
Nla::Address(a) => {
base.hardware_addr = a;
}
Nla::IfName(i) => {
base.name = i;
}
Nla::Mtu(m) => {
base.mtu = m;
}
Nla::Link(l) => {
base.parent_index = l;
}
Nla::Master(m) => {
base.master_index = m;
}
Nla::TxQueueLen(t) => {
base.txq_len = t;
}
Nla::IfAlias(a) => {
base.alias = a;
}
Nla::Stats(_s) => {}
Nla::Stats64(_s) => {}
Nla::Xdp(_x) => {}
Nla::ProtoInfo(_) => {}
Nla::OperState(_) => {}
Nla::NetnsId(n) => {
base.net_ns_id = n;
}
Nla::GsoMaxSize(i) => {
base.gso_max_size = i;
}
Nla::GsoMaxSegs(e) => {
base.gso_max_seqs = e;
}
Nla::VfInfoList(_) => {}
Nla::NumTxQueues(t) => {
base.num_tx_queues = t;
}
Nla::NumRxQueues(r) => {
base.num_rx_queues = r;
}
Nla::Group(g) => {
base.group = g;
}
_ => {
// skip unused attr
}
}
}
let mut ret = link.unwrap_or_else(|| Box::new(Device::default()));
ret.set_attrs(base);
ret
}
fn link_info(mut infos: Vec<Info>) -> Box<dyn Link> {
let mut link: Option<Box<dyn Link>> = None;
while let Some(info) = infos.pop() {
match info {
Info::Kind(kind) => match kind {
InfoKind::Tun => {
if link.is_none() {
link = Some(Box::new(Tuntap::default()));
}
}
InfoKind::Veth => {
if link.is_none() {
link = Some(Box::new(Veth::default()));
}
}
InfoKind::IpVlan => {
if link.is_none() {
link = Some(Box::new(IpVlan::default()));
}
}
InfoKind::Vlan => {
if link.is_none() {
link = Some(Box::new(Vlan::default()));
}
}
InfoKind::Bridge => {
if link.is_none() {
link = Some(Box::new(Bridge::default()));
}
}
_ => {
if link.is_none() {
link = Some(Box::new(Device::default()));
}
}
},
Info::Data(data) => match data {
InfoData::Tun(_) => {
link = Some(Box::new(Tuntap::default()));
}
InfoData::Veth(_) => {
link = Some(Box::new(Veth::default()));
}
InfoData::IpVlan(_) => {
link = Some(Box::new(IpVlan::default()));
}
InfoData::Vlan(_) => {
link = Some(Box::new(Vlan::default()));
}
InfoData::Bridge(ibs) => {
link = Some(Box::new(parse_bridge(ibs)));
}
_ => {
link = Some(Box::new(Device::default()));
}
},
Info::SlaveKind(_sk) => {
if link.is_none() {
link = Some(Box::new(Device::default()));
}
}
Info::SlaveData(_sd) => {
link = Some(Box::new(Device::default()));
}
_ => {
link = Some(Box::new(Device::default()));
}
}
}
link.unwrap()
}
fn parse_bridge(mut ibs: Vec<InfoBridge>) -> Bridge {
let mut bridge = Bridge::default();
while let Some(ib) = ibs.pop() {
match ib {
InfoBridge::HelloTime(ht) => {
bridge.hello_time = ht;
}
InfoBridge::MulticastSnooping(m) => {
bridge.multicast_snooping = m == 1;
}
InfoBridge::VlanFiltering(v) => {
bridge.vlan_filtering = v == 1;
}
_ => {}
}
}
bridge
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Device {
attrs: Option<LinkAttrs>,
}
impl Link for Device {
fn attrs(&self) -> &LinkAttrs {
self.attrs.as_ref().unwrap()
}
fn set_attrs(&mut self, attr: LinkAttrs) {
self.attrs = Some(attr);
}
fn r#type(&self) -> &'static str {
"device"
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Tuntap {
pub attrs: Option<LinkAttrs>,
}
impl Link for Tuntap {
fn attrs(&self) -> &LinkAttrs {
self.attrs.as_ref().unwrap()
}
fn set_attrs(&mut self, attr: LinkAttrs) {
self.attrs = Some(attr);
}
fn r#type(&self) -> &'static str {
"tuntap"
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Veth {
attrs: Option<LinkAttrs>,
/// on create only
pub peer_name: String,
}
impl Link for Veth {
fn attrs(&self) -> &LinkAttrs {
self.attrs.as_ref().unwrap()
}
fn set_attrs(&mut self, attr: LinkAttrs) {
self.attrs = Some(attr);
}
fn r#type(&self) -> &'static str {
"veth"
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct IpVlan {
attrs: Option<LinkAttrs>,
/// on create only
pub peer_name: String,
}
impl Link for IpVlan {
fn attrs(&self) -> &LinkAttrs {
self.attrs.as_ref().unwrap()
}
fn set_attrs(&mut self, attr: LinkAttrs) {
self.attrs = Some(attr);
}
fn r#type(&self) -> &'static str {
"ipvlan"
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Vlan {
attrs: Option<LinkAttrs>,
/// on create only
pub peer_name: String,
}
impl Link for Vlan {
fn attrs(&self) -> &LinkAttrs {
self.attrs.as_ref().unwrap()
}
fn set_attrs(&mut self, attr: LinkAttrs) {
self.attrs = Some(attr);
}
fn r#type(&self) -> &'static str {
"vlan"
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Bridge {
attrs: Option<LinkAttrs>,
pub multicast_snooping: bool,
pub hello_time: u32,
pub vlan_filtering: bool,
}
impl Link for Bridge {
fn attrs(&self) -> &LinkAttrs {
self.attrs.as_ref().unwrap()
}
fn set_attrs(&mut self, attr: LinkAttrs) {
self.attrs = Some(attr);
}
fn r#type(&self) -> &'static str {
"bridge"
}
}

View File

@ -0,0 +1,145 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
mod create;
pub use create::{create_link, LinkType};
mod driver_info;
pub use driver_info::{get_driver_info, DriverInfo};
mod macros;
mod manager;
pub use manager::get_link_from_message;
use std::os::unix::io::RawFd;
use netlink_packet_route::link::nlas::State;
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Namespace {
NetNsPid(u32),
#[allow(dead_code)]
NetNsFd(RawFd),
}
impl Default for Namespace {
fn default() -> Self {
Self::NetNsPid(0)
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum LinkStatistics {
#[allow(dead_code)]
Stats(LinkStatistics32),
Stats64(LinkStatistics64),
}
impl Default for LinkStatistics {
fn default() -> Self {
Self::Stats64(LinkStatistics64::default())
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct LinkStatistics32 {
pub rx_packets: u32,
pub tx_packets: u32,
pub rx_bytes: u32,
pub tx_bytes: u32,
pub rx_errors: u32,
pub tx_errors: u32,
pub rx_dropped: u32,
pub tx_dropped: u32,
pub multicast: u32,
pub collisions: u32,
pub rx_length_errors: u32,
pub rx_over_errors: u32,
pub rx_crc_errors: u32,
pub rx_frame_errors: u32,
pub rx_fifo_errors: u32,
pub rx_missed_errors: u32,
pub tx_aborted_errors: u32,
pub tx_carrier_errors: u32,
pub tx_fifo_errors: u32,
pub tx_heartbeat_errors: u32,
pub tx_window_errors: u32,
pub rx_compressed: u32,
pub tx_compressed: u32,
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct LinkStatistics64 {
pub rx_packets: u64,
pub tx_packets: u64,
pub rx_bytes: u64,
pub tx_bytes: u64,
pub rx_errors: u64,
pub tx_errors: u64,
pub rx_dropped: u64,
pub tx_dropped: u64,
pub multicast: u64,
pub collisions: u64,
pub rx_length_errors: u64,
pub rx_over_errors: u64,
pub rx_crc_errors: u64,
pub rx_frame_errors: u64,
pub rx_fifo_errors: u64,
pub rx_missed_errors: u64,
pub tx_aborted_errors: u64,
pub tx_carrier_errors: u64,
pub tx_fifo_errors: u64,
pub tx_heartbeat_errors: u64,
pub tx_window_errors: u64,
pub rx_compressed: u64,
pub tx_compressed: u64,
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct LinkXdp {
pub fd: RawFd,
pub attached: bool,
pub flags: u32,
pub prog_id: u32,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct OperState(State);
impl Default for OperState {
fn default() -> Self {
Self(State::Unknown)
}
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct LinkAttrs {
pub index: u32,
pub mtu: u32,
pub txq_len: u32,
pub name: String,
pub hardware_addr: Vec<u8>,
pub flags: u32,
pub parent_index: u32,
pub master_index: u32,
pub namespace: Namespace,
pub alias: String,
pub statistics: LinkStatistics,
pub promisc: u32,
pub xdp: LinkXdp,
pub link_layer_type: u16,
pub proto_info: Vec<u8>,
pub oper_state: OperState,
pub net_ns_id: i32,
pub num_tx_queues: u32,
pub num_rx_queues: u32,
pub gso_max_size: u32,
pub gso_max_seqs: u32,
pub vfs: Vec<u8>,
pub group: u32,
}
pub trait Link: Send + Sync {
fn attrs(&self) -> &LinkAttrs;
fn set_attrs(&mut self, attr: LinkAttrs);
fn r#type(&self) -> &str;
}

View File

@ -0,0 +1,35 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
pub(crate) mod address;
pub(crate) mod link;
pub(crate) mod netns;
use anyhow::{anyhow, Result};
pub(crate) fn parse_mac(s: &str) -> Option<hypervisor::Address> {
let v: Vec<_> = s.split(':').collect();
if v.len() != 6 {
return None;
}
let mut bytes = [0u8; 6];
for i in 0..6 {
bytes[i] = u8::from_str_radix(v[i], 16).ok()?;
}
Some(hypervisor::Address(bytes))
}
pub(crate) fn get_mac_addr(b: &[u8]) -> Result<String> {
if b.len() != 6 {
return Err(anyhow!("invalid mac address {:?}", b));
} else {
Ok(format!(
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
b[0], b[1], b[2], b[3], b[4], b[5]
))
}
}

View File

@ -0,0 +1,51 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{fs::File, os::unix::io::AsRawFd};
use anyhow::{Context, Result};
use nix::sched::{setns, CloneFlags};
use nix::unistd::{getpid, gettid};
pub(crate) struct NetnsGuard {
old_netns: Option<File>,
}
impl NetnsGuard {
pub(crate) fn new(new_netns_path: &str) -> Result<Self> {
let old_netns = if !new_netns_path.is_empty() {
let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net");
let old_netns = File::open(&current_netns_path)
.context(format!("open current netns path {}", &current_netns_path))?;
let new_netns = File::open(&new_netns_path)
.context(format!("open new netns path {}", &new_netns_path))?;
setns(new_netns.as_raw_fd(), CloneFlags::CLONE_NEWNET)
.context("set netns to new netns")?;
info!(
sl!(),
"set netns from old {:?} to new {:?} tid {}",
old_netns,
new_netns,
gettid().to_string()
);
Some(old_netns)
} else {
warn!(sl!(), "skip to set netns for empty netns path");
None
};
Ok(Self { old_netns })
}
}
impl Drop for NetnsGuard {
fn drop(&mut self) {
if let Some(old_netns) = self.old_netns.as_ref() {
let old_netns_fd = old_netns.as_raw_fd();
setns(old_netns_fd, CloneFlags::CLONE_NEWNET).unwrap();
info!(sl!(), "set netns to old {:?}", old_netns_fd);
}
}
}

View File

@ -11,7 +11,6 @@ use std::{sync::Arc, vec::Vec};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use kata_types::mount::Mount;
use log::{error, info};
use nix::sys::stat::{self, SFlag};
use tokio::sync::RwLock;
@ -90,6 +89,7 @@ impl RootFsResource {
let inner = self.inner.read().await;
for r in &inner.rootfs {
info!(
sl!(),
"rootfs {:?}: count {}",
r.get_guest_rootfs_path().await,
Arc::strong_count(r)
@ -114,7 +114,7 @@ fn get_block_device(file_path: &str) -> Option<u64> {
}
}
Err(err) => {
error!("failed to stat for {} {:?}", file_path, err);
error!(sl!(), "failed to stat for {} {:?}", file_path, err);
return None;
}
};

View File

@ -7,7 +7,6 @@
use std::{path::Path, sync::Arc};
use anyhow::{anyhow, Context, Result};
use log::debug;
use nix::sys::stat::{stat, SFlag};
use super::Volume;
@ -53,6 +52,7 @@ impl ShareFsVolume {
| SFlag::S_IFREG;
if !file_type.contains(SFlag::from_bits_truncate(stat.st_mode)) {
debug!(
sl!(),
"Ignoring non-regular file as FS sharing not supported. mount: {:?}",
m
);