mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-18 15:28:10 +00:00
Merge pull request #8866 from Apokleos/netdev-qemu-rs
runtime-rs: add netdev params to cmdline for qemu-rs.
This commit is contained in:
2
src/libs/Cargo.lock
generated
2
src/libs/Cargo.lock
generated
@@ -692,6 +692,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"common-path",
|
||||
"fail",
|
||||
"hex",
|
||||
"kata-types",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
@@ -708,6 +709,7 @@ dependencies = [
|
||||
"slog-scope",
|
||||
"subprocess",
|
||||
"tempfile",
|
||||
"test-utils",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
|
@@ -28,6 +28,7 @@ slog-scope = "4.4.0"
|
||||
subprocess = "0.2.8"
|
||||
rand = "0.8.5"
|
||||
thiserror = "1.0.30"
|
||||
hex = "0.4.3"
|
||||
|
||||
kata-types = { path = "../kata-types" }
|
||||
oci = { path = "../oci" }
|
||||
@@ -37,3 +38,4 @@ safe-path = { path = "../safe-path" }
|
||||
num_cpus = "1.13.1"
|
||||
serial_test = "0.5.1"
|
||||
tempfile = "3.2.0"
|
||||
test-utils = { path = "../test-utils" }
|
||||
|
@@ -12,6 +12,7 @@ pub mod fs;
|
||||
pub mod hooks;
|
||||
pub mod k8s;
|
||||
pub mod mount;
|
||||
pub mod netns;
|
||||
pub mod numa;
|
||||
pub mod protection;
|
||||
pub mod rand;
|
||||
|
@@ -11,6 +11,8 @@ use nix::sched::{setns, CloneFlags};
|
||||
use nix::unistd::{getpid, gettid};
|
||||
use rand::Rng;
|
||||
|
||||
use kata_types::sl;
|
||||
|
||||
pub struct NetnsGuard {
|
||||
old_netns: Option<File>,
|
||||
}
|
2
src/runtime-rs/Cargo.lock
generated
2
src/runtime-rs/Cargo.lock
generated
@@ -1796,6 +1796,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"common-path",
|
||||
"fail",
|
||||
"hex",
|
||||
"kata-types",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
@@ -3211,7 +3212,6 @@ dependencies = [
|
||||
"byte-unit 4.0.19",
|
||||
"cgroups-rs",
|
||||
"futures 0.3.28",
|
||||
"hex",
|
||||
"hypervisor",
|
||||
"kata-sys-util",
|
||||
"kata-types",
|
||||
|
@@ -29,6 +29,7 @@ path-clean = "1.0.1"
|
||||
lazy_static = "1.4"
|
||||
tracing = "0.1.36"
|
||||
|
||||
dbs-utils = { path = "../../../dragonball/src/dbs_utils" }
|
||||
kata-sys-util = { path = "../../../libs/kata-sys-util" }
|
||||
kata-types = { path = "../../../libs/kata-types" }
|
||||
logging = { path = "../../../libs/logging" }
|
||||
@@ -43,7 +44,6 @@ crossbeam-channel = "0.5.6"
|
||||
|
||||
[target.'cfg(not(target_arch = "s390x"))'.dependencies]
|
||||
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] }
|
||||
dbs-utils = { path = "../../../dragonball/src/dbs_utils" }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
@@ -13,7 +13,7 @@ use crate::device::topology::PCIeTopology;
|
||||
use crate::device::{Device, DeviceType};
|
||||
use crate::Hypervisor as hypervisor;
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Address(pub [u8; 6]);
|
||||
|
||||
impl fmt::Debug for Address {
|
||||
|
@@ -3,12 +3,15 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::{kernel_param::KernelParams, HypervisorConfig};
|
||||
use super::network::{generate_netdev_fds, NetDevice};
|
||||
use crate::utils::clear_fd_flags;
|
||||
use crate::{kernel_param::KernelParams, HypervisorConfig, NetworkConfig};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use nix::fcntl;
|
||||
use std::fs::read_to_string;
|
||||
use kata_types::config::hypervisor::NetworkInfo;
|
||||
use std::fs::{read_to_string, File};
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::os::unix::io::RawFd;
|
||||
|
||||
// These should have been called MiB and GiB for better readability but the
|
||||
@@ -703,6 +706,25 @@ impl ToQemuParams for Serial {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for NetDevice {
|
||||
// qemu_params returns the qemu parameters built out of this network device.
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut qemu_params: Vec<String> = Vec::new();
|
||||
|
||||
let netdev_params = self.qemu_netdev_params()?;
|
||||
let device_params = self.qemu_device_params()?;
|
||||
|
||||
qemu_params.push("-netdev".to_owned());
|
||||
qemu_params.push(netdev_params.join(","));
|
||||
|
||||
qemu_params.push("-device".to_owned());
|
||||
qemu_params.push(device_params.join(","));
|
||||
|
||||
Ok(qemu_params)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_running_in_vm() -> Result<bool> {
|
||||
let res = read_to_string("/proc/cpuinfo")?
|
||||
.lines()
|
||||
@@ -782,16 +804,7 @@ impl<'a> QemuCmdLine<'a> {
|
||||
}
|
||||
|
||||
pub fn add_vsock(&mut self, vhostfd: RawFd, guest_cid: u32) -> Result<()> {
|
||||
// Clear the O_CLOEXEC which is set by default by Rust standard library
|
||||
// as it would obviously prevent passing the descriptor to the qemu process.
|
||||
if let Err(err) = fcntl::fcntl(vhostfd, fcntl::FcntlArg::F_SETFD(fcntl::FdFlag::empty())) {
|
||||
info!(
|
||||
sl!(),
|
||||
"couldn't clear O_CLOEXEC on vsock, communication with agent will not work: {:?}",
|
||||
err
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
clear_fd_flags(vhostfd).context("clear flags failed")?;
|
||||
|
||||
let mut vhost_vsock_pci = VhostVsockPci::new(vhostfd, guest_cid);
|
||||
|
||||
@@ -855,6 +868,27 @@ impl<'a> QemuCmdLine<'a> {
|
||||
));
|
||||
}
|
||||
|
||||
pub fn add_network_device(
|
||||
&mut self,
|
||||
config: &NetworkConfig,
|
||||
network_info: &NetworkInfo,
|
||||
) -> Result<Vec<File>> {
|
||||
let disable_vhost_net = network_info.disable_vhost_net;
|
||||
let queues = network_info.network_queues;
|
||||
|
||||
let (tun_files, vhost_files) = generate_netdev_fds(config, queues)?;
|
||||
let tun_fds: Vec<i32> = tun_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
||||
let vhost_fds: Vec<i32> = vhost_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
||||
|
||||
let net_device = NetDevice::new(config, disable_vhost_net, tun_fds, vhost_fds);
|
||||
self.devices.push(Box::new(net_device));
|
||||
|
||||
let dev_files = vec![tun_files, vhost_files];
|
||||
let fds: Vec<File> = dev_files.into_iter().flatten().collect();
|
||||
|
||||
Ok(fds)
|
||||
}
|
||||
|
||||
pub async fn build(&self) -> Result<Vec<String>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
|
@@ -5,11 +5,12 @@
|
||||
|
||||
use super::cmdline_generator::QemuCmdLine;
|
||||
use crate::{
|
||||
hypervisor_persist::HypervisorState, HypervisorConfig, MemoryConfig, VcpuThreadIds,
|
||||
VsockDevice, HYPERVISOR_QEMU,
|
||||
hypervisor_persist::HypervisorState, utils::enter_netns, HypervisorConfig, MemoryConfig,
|
||||
VcpuThreadIds, VsockDevice, HYPERVISOR_QEMU,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use kata_sys_util::netns::NetnsGuard;
|
||||
use kata_types::{
|
||||
capabilities::{Capabilities, CapabilityBits},
|
||||
config::KATA_PATH,
|
||||
@@ -34,6 +35,7 @@ pub struct QemuInner {
|
||||
|
||||
config: HypervisorConfig,
|
||||
devices: Vec<DeviceType>,
|
||||
netns: Option<String>,
|
||||
}
|
||||
|
||||
impl QemuInner {
|
||||
@@ -43,12 +45,14 @@ impl QemuInner {
|
||||
qemu_process: None,
|
||||
config: Default::default(),
|
||||
devices: Vec::new(),
|
||||
netns: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn prepare_vm(&mut self, id: &str, _netns: Option<String>) -> Result<()> {
|
||||
pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option<String>) -> Result<()> {
|
||||
info!(sl!(), "Preparing QEMU VM");
|
||||
self.id = id.to_string();
|
||||
self.netns = netns;
|
||||
|
||||
let vm_path = [KATA_PATH, self.id.as_str()].join("/");
|
||||
std::fs::create_dir_all(vm_path)?;
|
||||
@@ -58,6 +62,7 @@ impl QemuInner {
|
||||
|
||||
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
|
||||
info!(sl!(), "Starting QEMU VM");
|
||||
let netns = self.netns.clone().unwrap_or_default();
|
||||
|
||||
let mut cmdline = QemuCmdLine::new(&self.id, &self.config)?;
|
||||
|
||||
@@ -65,6 +70,10 @@ impl QemuInner {
|
||||
// descriptor needs to stay open until the qemu process launches.
|
||||
// This is why we need to store it in a variable at this scope.
|
||||
let mut _vhost_fd = None;
|
||||
// We need to keep the vhost-net/tuntap file descriptor open until the QEMU process launches.
|
||||
// However, we're likely not interested in the specific type of file descriptor itself. We just
|
||||
// want to ensure any fds associated with network devices remain open within the current scope.
|
||||
let mut _fds_for_qemu: Vec<std::fs::File> = Vec::new();
|
||||
|
||||
for device in &mut self.devices {
|
||||
match device {
|
||||
@@ -99,6 +108,14 @@ impl QemuInner {
|
||||
}
|
||||
}
|
||||
}
|
||||
DeviceType::Network(network) => {
|
||||
let network_info = &self.config.network_info;
|
||||
|
||||
// we need ensure add_network_device happens in netns.
|
||||
let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?;
|
||||
|
||||
_fds_for_qemu = cmdline.add_network_device(&network.config, network_info)?;
|
||||
}
|
||||
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
||||
}
|
||||
}
|
||||
@@ -112,6 +129,16 @@ impl QemuInner {
|
||||
command.args(cmdline.build().await?);
|
||||
|
||||
info!(sl!(), "qemu cmd: {:?}", command);
|
||||
|
||||
// we need move the qemu process into Network Namespace.
|
||||
unsafe {
|
||||
let _pre_exec = command.pre_exec(move || {
|
||||
let _ = enter_netns(&netns);
|
||||
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
self.qemu_process = Some(command.stderr(Stdio::piped()).spawn()?);
|
||||
info!(sl!(), "qemu process started");
|
||||
|
||||
@@ -357,6 +384,7 @@ impl Persist for QemuInner {
|
||||
qemu_process: None,
|
||||
config: hypervisor_state.config,
|
||||
devices: Vec::new(),
|
||||
netns: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@@ -5,6 +5,7 @@
|
||||
|
||||
mod cmdline_generator;
|
||||
mod inner;
|
||||
mod network;
|
||||
|
||||
use crate::device::DeviceType;
|
||||
use crate::hypervisor_persist::HypervisorState;
|
||||
|
339
src/runtime-rs/crates/hypervisor/src/qemu/network.rs
Normal file
339
src/runtime-rs/crates/hypervisor/src/qemu/network.rs
Normal file
@@ -0,0 +1,339 @@
|
||||
// Copyright (c) 2024 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::os::fd::RawFd;
|
||||
|
||||
use crate::utils::{clear_fd_flags, open_named_tuntap};
|
||||
use crate::{Address, NetworkConfig};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
// VirtioTransport is the transport in use for a virtio device.
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
enum VirtioTransport {
|
||||
#[default]
|
||||
Pci,
|
||||
}
|
||||
|
||||
impl ToString for VirtioTransport {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
VirtioTransport::Pci => "pci".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for VirtioTransport {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(_transport: &str) -> Result<Self> {
|
||||
Ok(VirtioTransport::Pci)
|
||||
}
|
||||
}
|
||||
|
||||
// DeviceDriver is set in "-device driver=<DeviceDriver>"
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
enum DeviceDriver {
|
||||
// VirtioNetPci("virtio-net-pci") is a virtio-net device using PCI transport.
|
||||
#[default]
|
||||
VirtioNetPci,
|
||||
|
||||
// VfioPci("vfio-pci") is an attached host device using PCI transport.
|
||||
VfioPci,
|
||||
}
|
||||
|
||||
impl ToString for DeviceDriver {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
DeviceDriver::VirtioNetPci => "virtio-net-pci".to_owned(),
|
||||
DeviceDriver::VfioPci => "vfio-pci".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for DeviceDriver {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(device_driver: &str) -> Result<Self> {
|
||||
Ok(match device_driver {
|
||||
"virtio-net-pci" => DeviceDriver::VirtioNetPci,
|
||||
"vfio-pci" => DeviceDriver::VfioPci,
|
||||
_ => return Err(anyhow!("unsupported transport")),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
enum NetDev {
|
||||
/// Tap("tap") is a tap networking device type.
|
||||
#[default]
|
||||
Tap,
|
||||
|
||||
/// MacTap("macvtap") is a macvtap networking device type.
|
||||
#[allow(dead_code)]
|
||||
MacvTap,
|
||||
}
|
||||
|
||||
impl ToString for NetDev {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
NetDev::Tap | NetDev::MacvTap => "tap".to_owned(),
|
||||
// VhostUser is to be added in future.
|
||||
// NetDev::VhostUser => "vhost-user".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NetDevice represents a guest networking device
|
||||
// -netdev tap,id=hostnet0,vhost=on,vhostfds=x:y:z,fds=a:b:c
|
||||
// -device virtio-net-pci,netdev=hostnet0,id=net0,mac=24:42:54:20:50:46,bus=pci.0,addr=0x7
|
||||
#[derive(Debug, Default)]
|
||||
pub struct NetDevice {
|
||||
// device_type is the netdev type (e.g. tap).
|
||||
device_type: NetDev,
|
||||
|
||||
// driver is the qemu device driver
|
||||
device_driver: DeviceDriver,
|
||||
|
||||
// id is the net device identifier.
|
||||
id: String,
|
||||
|
||||
// if_name is the interface name,
|
||||
if_name: String,
|
||||
|
||||
// bus is the bus path name of a PCI device.
|
||||
bus: String,
|
||||
|
||||
// pci_addr is the address offset of a PCI device.
|
||||
pci_addr: String,
|
||||
|
||||
// fds represents the list of already existing file descriptors to be used.
|
||||
// This is mostly useful for mq support.
|
||||
// {
|
||||
// fds: Vec<File>,
|
||||
// vhost_fds: Vec<File>,
|
||||
// }
|
||||
fds: HashMap<String, Vec<RawFd>>,
|
||||
|
||||
// disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu.
|
||||
disable_vhost_net: bool,
|
||||
|
||||
// mac_address is the networking device interface MAC address.
|
||||
mac_address: Address,
|
||||
|
||||
// disable_modern prevents qemu from relying on fast MMIO.
|
||||
disable_modern: bool,
|
||||
|
||||
// transport is the virtio transport for this device.
|
||||
transport: VirtioTransport,
|
||||
}
|
||||
|
||||
impl NetDevice {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(
|
||||
config: &NetworkConfig,
|
||||
disable_vhost_net: bool,
|
||||
tun_fds: Vec<i32>,
|
||||
vhost_fds: Vec<i32>,
|
||||
) -> Self {
|
||||
// we have only two <Key, Value>s:
|
||||
// {
|
||||
// "fds": vec![fd1, fd2,...],
|
||||
// "vhostfds": vec![fd3, fd4,...],
|
||||
// }
|
||||
let mut fds: HashMap<String, Vec<RawFd>> = HashMap::with_capacity(2);
|
||||
fds.insert("fds".to_owned(), tun_fds);
|
||||
fds.insert("vhostfds".to_owned(), vhost_fds);
|
||||
|
||||
// FIXME(Hard Code): It's safe to unwrap here because of the valid input.
|
||||
// Ideally device_driver should be derived from transport to minimize code duplication.
|
||||
// While currently we focus on PCI for the initial implementation.
|
||||
// And we'll support other transports, e.g. s390x's CCW.
|
||||
let device_driver = DeviceDriver::try_from("virtio-net-pci").unwrap();
|
||||
let transport = VirtioTransport::try_from("pci").unwrap();
|
||||
|
||||
NetDevice {
|
||||
device_type: NetDev::Tap,
|
||||
device_driver,
|
||||
id: format!("network-{}", &config.index),
|
||||
if_name: config.virt_iface_name.clone(),
|
||||
mac_address: config.guest_mac.clone().unwrap(),
|
||||
disable_vhost_net,
|
||||
disable_modern: false,
|
||||
fds,
|
||||
transport,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn mq_param(&self) -> String {
|
||||
let mut params = vec!["mq=on".to_owned()];
|
||||
if self.transport == VirtioTransport::Pci {
|
||||
// https://www.linux-kvm.org/page/Multiqueue
|
||||
// -netdev tap,vhost=on,queues=N
|
||||
// enable mq and specify msix vectors in qemu cmdline
|
||||
// (2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq)
|
||||
// -device virtio-net-pci,mq=on,vectors=2N+2...
|
||||
// enable mq in guest by 'ethtool -L eth0 combined $queue_num'
|
||||
// Clearlinux automatically sets up the queues properly
|
||||
// The agent implementation should do this to ensure that it is
|
||||
// always set
|
||||
|
||||
// vectors = len(netdev.FDs) * 2 + 2
|
||||
if let Some(fds) = self.fds.get("fds") {
|
||||
params.push(format!("vectors={}", 2 * fds.len() + 2));
|
||||
}
|
||||
}
|
||||
|
||||
params.join(",")
|
||||
}
|
||||
|
||||
pub fn qemu_device_params(&self) -> Result<Vec<String>> {
|
||||
let mut device_params: Vec<String> = Vec::new();
|
||||
|
||||
device_params.push(format!("driver={}", &self.device_driver.to_string()));
|
||||
device_params.push(format!("netdev={}", &self.id));
|
||||
|
||||
let mac = self.mac_address.to_string();
|
||||
device_params.push(format!("mac={}", &mac));
|
||||
|
||||
if !self.bus.is_empty() {
|
||||
device_params.push(format!("bus={}", &self.bus));
|
||||
}
|
||||
|
||||
if !self.pci_addr.is_empty() {
|
||||
// FIXME: pci_addr: PciPath
|
||||
device_params.push(format!("addr={}", &self.pci_addr));
|
||||
}
|
||||
|
||||
device_params.push(format!(
|
||||
"disable-modern={}",
|
||||
if self.disable_modern { "true" } else { "false" }
|
||||
));
|
||||
|
||||
if !self.fds.is_empty() {
|
||||
device_params.push(self.mq_param());
|
||||
}
|
||||
|
||||
Ok(device_params)
|
||||
}
|
||||
|
||||
pub fn qemu_netdev_params(&self) -> Result<Vec<String>> {
|
||||
let mut netdev_params: Vec<String> = Vec::new();
|
||||
let netdev_type = self.device_type.to_string();
|
||||
netdev_params.push(netdev_type);
|
||||
netdev_params.push(format!("id={}", self.id));
|
||||
|
||||
if !self.disable_vhost_net {
|
||||
netdev_params.push("vhost=on".to_owned());
|
||||
if let Some(vhost_fds) = self.fds.get("vhostfds") {
|
||||
for fd in vhost_fds.iter() {
|
||||
clear_fd_flags(*fd)?;
|
||||
}
|
||||
let s = vhost_fds
|
||||
.iter()
|
||||
.map(|&n| n.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(":");
|
||||
|
||||
netdev_params.push(format!("vhostfds={}", s));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(tuntap_fds) = self.fds.get("fds") {
|
||||
for fd in tuntap_fds.iter() {
|
||||
clear_fd_flags(*fd).context("clear flag of fd failed")?;
|
||||
}
|
||||
let s = tuntap_fds
|
||||
.iter()
|
||||
.map(|&n| n.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(":");
|
||||
netdev_params.push(format!("fds={}", s));
|
||||
} else {
|
||||
netdev_params.push(format!("ifname={}", self.if_name));
|
||||
netdev_params.push("script=no".to_owned());
|
||||
netdev_params.push("downscript=no".to_owned());
|
||||
}
|
||||
|
||||
Ok(netdev_params)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for Address {
|
||||
fn to_string(&self) -> String {
|
||||
let b: [u8; 6] = self.0;
|
||||
|
||||
format!(
|
||||
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
||||
b[0], b[1], b[2], b[3], b[4], b[5]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// /dev/tap$(cat /sys/class/net/macvtap1/ifindex)
|
||||
// for example: /dev/tap2381
|
||||
#[allow(dead_code)]
|
||||
pub fn create_macvtap_fds(ifindex: u32, queues: u32) -> Result<Vec<File>> {
|
||||
let macvtap = format!("/dev/tap{}", ifindex);
|
||||
create_fds(macvtap.as_str(), queues as usize)
|
||||
}
|
||||
|
||||
pub fn create_vhost_net_fds(queues: u32) -> Result<Vec<File>> {
|
||||
let vhost_dev = "/dev/vhost-net";
|
||||
let num_fds = if queues > 1 { queues as usize } else { 1_usize };
|
||||
|
||||
create_fds(vhost_dev, num_fds)
|
||||
}
|
||||
|
||||
// For example: if num_fds = 3; fds = {0xc000012028, 0xc000012030, 0xc000012038}
|
||||
fn create_fds(device: &str, num_fds: usize) -> Result<Vec<File>> {
|
||||
let mut fds: Vec<File> = Vec::with_capacity(num_fds);
|
||||
|
||||
for i in 0..num_fds {
|
||||
match OpenOptions::new().read(true).write(true).open(device) {
|
||||
Ok(f) => {
|
||||
fds.push(f);
|
||||
}
|
||||
Err(e) => {
|
||||
fds.clear();
|
||||
return Err(anyhow!(
|
||||
"It failed with error {:?} when opened the {:?} device.",
|
||||
e,
|
||||
i
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(fds)
|
||||
}
|
||||
|
||||
pub fn generate_netdev_fds(
|
||||
network_config: &NetworkConfig,
|
||||
queues: u32,
|
||||
) -> Result<(Vec<File>, Vec<File>)> {
|
||||
let if_name = network_config.host_dev_name.as_str();
|
||||
let tun_taps = open_named_tuntap(if_name, queues)?;
|
||||
let vhost_fds = create_vhost_net_fds(queues)?;
|
||||
|
||||
Ok((tun_taps, vhost_fds))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::create_fds;
|
||||
|
||||
#[test]
|
||||
fn test_ctreate_fds() {
|
||||
let device = "/dev/null";
|
||||
let num_fds = 3_usize;
|
||||
let fds = create_fds(device, num_fds);
|
||||
assert!(fds.is_ok());
|
||||
assert_eq!(fds.unwrap().len(), num_fds);
|
||||
}
|
||||
}
|
@@ -4,9 +4,19 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
fs::File,
|
||||
os::fd::{AsRawFd, RawFd},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use dbs_utils::net::Tap;
|
||||
use kata_types::config::KATA_PATH;
|
||||
use nix::{
|
||||
fcntl,
|
||||
sched::{setns, CloneFlags},
|
||||
};
|
||||
|
||||
use crate::{DEFAULT_HYBRID_VSOCK_NAME, JAILER_ROOT};
|
||||
|
||||
@@ -47,3 +57,46 @@ pub fn get_jailer_root(sid: &str) -> String {
|
||||
|
||||
[&sandbox_path, JAILER_ROOT].join("/")
|
||||
}
|
||||
|
||||
// Clear the O_CLOEXEC which is set by default by Rust standard library
|
||||
// as it would obviously prevent passing the descriptor to the hypervisor process.
|
||||
pub fn clear_fd_flags(rawfd: RawFd) -> Result<()> {
|
||||
if let Err(err) = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_SETFD(fcntl::FdFlag::empty())) {
|
||||
info!(
|
||||
sl!(),
|
||||
"couldn't clear O_CLOEXEC on device's fd, communication with agent will not work: {:?}",
|
||||
err
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn enter_netns(netns_path: &str) -> Result<()> {
|
||||
if !netns_path.is_empty() {
|
||||
let netns =
|
||||
File::open(netns_path).context(anyhow!("open netns path {:?} failed.", netns_path))?;
|
||||
setns(netns.as_raw_fd(), CloneFlags::CLONE_NEWNET).context("set netns failed")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn open_named_tuntap(if_name: &str, queues: u32) -> Result<Vec<File>> {
|
||||
let (multi_vq, vq_pairs) = if queues > 1 {
|
||||
(true, queues as usize)
|
||||
} else {
|
||||
(false, 1_usize)
|
||||
};
|
||||
|
||||
let tap: Tap = Tap::open_named(if_name, multi_vq).context("open named tuntap device failed")?;
|
||||
let taps: Vec<Tap> = tap.into_mq_taps(vq_pairs).context("into mq taps failed.")?;
|
||||
|
||||
let mut tap_files: Vec<std::fs::File> = Vec::new();
|
||||
for tap in taps {
|
||||
tap_files.push(tap.tap_file);
|
||||
}
|
||||
|
||||
Ok(tap_files)
|
||||
}
|
||||
|
@@ -17,7 +17,6 @@ bitflags = "1.2.1"
|
||||
byte-unit = "4.0.14"
|
||||
cgroups-rs = "0.3.2"
|
||||
futures = "0.3.11"
|
||||
hex = "0.4.3"
|
||||
lazy_static = "1.4.0"
|
||||
libc = ">=0.2.39"
|
||||
netns-rs = "0.1.0"
|
||||
|
@@ -24,6 +24,7 @@ use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_sys_util::netns::NetnsGuard;
|
||||
use kata_types::config::TomlConfig;
|
||||
use scopeguard::defer;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -32,7 +33,7 @@ use tokio::sync::RwLock;
|
||||
|
||||
use super::network_entity::NetworkEntity;
|
||||
use super::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
|
||||
use super::{EndpointState, NetnsGuard, Network};
|
||||
use super::{EndpointState, Network};
|
||||
use crate::network::endpoint::{TapEndpoint, VhostUserEndpoint};
|
||||
use crate::network::network_info::network_info_from_dan::NetworkInfoFromDan;
|
||||
use crate::network::utils::generate_private_mac_addr;
|
||||
|
@@ -22,8 +22,8 @@ use network_with_netns::NetworkWithNetns;
|
||||
mod network_pair;
|
||||
use network_pair::NetworkPair;
|
||||
mod utils;
|
||||
pub use kata_sys_util::netns::{generate_netns_name, NetnsGuard};
|
||||
use tokio::sync::RwLock;
|
||||
pub use utils::netns::{generate_netns_name, NetnsGuard};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
|
@@ -17,6 +17,7 @@ use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use futures::stream::TryStreamExt;
|
||||
use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
|
||||
use kata_sys_util::netns;
|
||||
use netns_rs::get_from_path;
|
||||
use scopeguard::defer;
|
||||
use tokio::sync::RwLock;
|
||||
@@ -27,7 +28,7 @@ use super::{
|
||||
},
|
||||
network_entity::NetworkEntity,
|
||||
network_info::network_info_from_link::{handle_addresses, NetworkInfoFromLink},
|
||||
utils::{link, netns},
|
||||
utils::link,
|
||||
Network,
|
||||
};
|
||||
use crate::network::NetworkInfo;
|
||||
|
@@ -6,7 +6,6 @@
|
||||
|
||||
pub(crate) mod address;
|
||||
pub(crate) mod link;
|
||||
pub(crate) mod netns;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use rand::rngs::OsRng;
|
||||
|
@@ -21,12 +21,11 @@ use common::{
|
||||
};
|
||||
use hypervisor::Hypervisor;
|
||||
use oci::Process as OCIProcess;
|
||||
use resource::network::NetnsGuard;
|
||||
use resource::ResourceManager;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::instrument;
|
||||
|
||||
use kata_sys_util::hooks::HookStates;
|
||||
use kata_sys_util::{hooks::HookStates, netns::NetnsGuard};
|
||||
|
||||
use super::{logger_with_process, Container};
|
||||
|
||||
|
Reference in New Issue
Block a user