mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-20 00:07:55 +00:00
Merge pull request #8866 from Apokleos/netdev-qemu-rs
runtime-rs: add netdev params to cmdline for qemu-rs.
This commit is contained in:
2
src/libs/Cargo.lock
generated
2
src/libs/Cargo.lock
generated
@@ -692,6 +692,7 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"common-path",
|
"common-path",
|
||||||
"fail",
|
"fail",
|
||||||
|
"hex",
|
||||||
"kata-types",
|
"kata-types",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
@@ -708,6 +709,7 @@ dependencies = [
|
|||||||
"slog-scope",
|
"slog-scope",
|
||||||
"subprocess",
|
"subprocess",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
"test-utils",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -28,6 +28,7 @@ slog-scope = "4.4.0"
|
|||||||
subprocess = "0.2.8"
|
subprocess = "0.2.8"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
thiserror = "1.0.30"
|
thiserror = "1.0.30"
|
||||||
|
hex = "0.4.3"
|
||||||
|
|
||||||
kata-types = { path = "../kata-types" }
|
kata-types = { path = "../kata-types" }
|
||||||
oci = { path = "../oci" }
|
oci = { path = "../oci" }
|
||||||
@@ -37,3 +38,4 @@ safe-path = { path = "../safe-path" }
|
|||||||
num_cpus = "1.13.1"
|
num_cpus = "1.13.1"
|
||||||
serial_test = "0.5.1"
|
serial_test = "0.5.1"
|
||||||
tempfile = "3.2.0"
|
tempfile = "3.2.0"
|
||||||
|
test-utils = { path = "../test-utils" }
|
||||||
|
@@ -12,6 +12,7 @@ pub mod fs;
|
|||||||
pub mod hooks;
|
pub mod hooks;
|
||||||
pub mod k8s;
|
pub mod k8s;
|
||||||
pub mod mount;
|
pub mod mount;
|
||||||
|
pub mod netns;
|
||||||
pub mod numa;
|
pub mod numa;
|
||||||
pub mod protection;
|
pub mod protection;
|
||||||
pub mod rand;
|
pub mod rand;
|
||||||
|
@@ -11,6 +11,8 @@ use nix::sched::{setns, CloneFlags};
|
|||||||
use nix::unistd::{getpid, gettid};
|
use nix::unistd::{getpid, gettid};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
|
|
||||||
|
use kata_types::sl;
|
||||||
|
|
||||||
pub struct NetnsGuard {
|
pub struct NetnsGuard {
|
||||||
old_netns: Option<File>,
|
old_netns: Option<File>,
|
||||||
}
|
}
|
2
src/runtime-rs/Cargo.lock
generated
2
src/runtime-rs/Cargo.lock
generated
@@ -1796,6 +1796,7 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"common-path",
|
"common-path",
|
||||||
"fail",
|
"fail",
|
||||||
|
"hex",
|
||||||
"kata-types",
|
"kata-types",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
@@ -3211,7 +3212,6 @@ dependencies = [
|
|||||||
"byte-unit 4.0.19",
|
"byte-unit 4.0.19",
|
||||||
"cgroups-rs",
|
"cgroups-rs",
|
||||||
"futures 0.3.28",
|
"futures 0.3.28",
|
||||||
"hex",
|
|
||||||
"hypervisor",
|
"hypervisor",
|
||||||
"kata-sys-util",
|
"kata-sys-util",
|
||||||
"kata-types",
|
"kata-types",
|
||||||
|
@@ -29,6 +29,7 @@ path-clean = "1.0.1"
|
|||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
tracing = "0.1.36"
|
tracing = "0.1.36"
|
||||||
|
|
||||||
|
dbs-utils = { path = "../../../dragonball/src/dbs_utils" }
|
||||||
kata-sys-util = { path = "../../../libs/kata-sys-util" }
|
kata-sys-util = { path = "../../../libs/kata-sys-util" }
|
||||||
kata-types = { path = "../../../libs/kata-types" }
|
kata-types = { path = "../../../libs/kata-types" }
|
||||||
logging = { path = "../../../libs/logging" }
|
logging = { path = "../../../libs/logging" }
|
||||||
@@ -43,7 +44,6 @@ crossbeam-channel = "0.5.6"
|
|||||||
|
|
||||||
[target.'cfg(not(target_arch = "s390x"))'.dependencies]
|
[target.'cfg(not(target_arch = "s390x"))'.dependencies]
|
||||||
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] }
|
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] }
|
||||||
dbs-utils = { path = "../../../dragonball/src/dbs_utils" }
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
|
@@ -13,7 +13,7 @@ use crate::device::topology::PCIeTopology;
|
|||||||
use crate::device::{Device, DeviceType};
|
use crate::device::{Device, DeviceType};
|
||||||
use crate::Hypervisor as hypervisor;
|
use crate::Hypervisor as hypervisor;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Default)]
|
||||||
pub struct Address(pub [u8; 6]);
|
pub struct Address(pub [u8; 6]);
|
||||||
|
|
||||||
impl fmt::Debug for Address {
|
impl fmt::Debug for Address {
|
||||||
|
@@ -3,12 +3,15 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
use crate::{kernel_param::KernelParams, HypervisorConfig};
|
use super::network::{generate_netdev_fds, NetDevice};
|
||||||
|
use crate::utils::clear_fd_flags;
|
||||||
|
use crate::{kernel_param::KernelParams, HypervisorConfig, NetworkConfig};
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use nix::fcntl;
|
use kata_types::config::hypervisor::NetworkInfo;
|
||||||
use std::fs::read_to_string;
|
use std::fs::{read_to_string, File};
|
||||||
|
use std::os::fd::AsRawFd;
|
||||||
use std::os::unix::io::RawFd;
|
use std::os::unix::io::RawFd;
|
||||||
|
|
||||||
// These should have been called MiB and GiB for better readability but the
|
// These should have been called MiB and GiB for better readability but the
|
||||||
@@ -703,6 +706,25 @@ impl ToQemuParams for Serial {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl ToQemuParams for NetDevice {
|
||||||
|
// qemu_params returns the qemu parameters built out of this network device.
|
||||||
|
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||||
|
let mut qemu_params: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
let netdev_params = self.qemu_netdev_params()?;
|
||||||
|
let device_params = self.qemu_device_params()?;
|
||||||
|
|
||||||
|
qemu_params.push("-netdev".to_owned());
|
||||||
|
qemu_params.push(netdev_params.join(","));
|
||||||
|
|
||||||
|
qemu_params.push("-device".to_owned());
|
||||||
|
qemu_params.push(device_params.join(","));
|
||||||
|
|
||||||
|
Ok(qemu_params)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn is_running_in_vm() -> Result<bool> {
|
fn is_running_in_vm() -> Result<bool> {
|
||||||
let res = read_to_string("/proc/cpuinfo")?
|
let res = read_to_string("/proc/cpuinfo")?
|
||||||
.lines()
|
.lines()
|
||||||
@@ -782,16 +804,7 @@ impl<'a> QemuCmdLine<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_vsock(&mut self, vhostfd: RawFd, guest_cid: u32) -> Result<()> {
|
pub fn add_vsock(&mut self, vhostfd: RawFd, guest_cid: u32) -> Result<()> {
|
||||||
// Clear the O_CLOEXEC which is set by default by Rust standard library
|
clear_fd_flags(vhostfd).context("clear flags failed")?;
|
||||||
// as it would obviously prevent passing the descriptor to the qemu process.
|
|
||||||
if let Err(err) = fcntl::fcntl(vhostfd, fcntl::FcntlArg::F_SETFD(fcntl::FdFlag::empty())) {
|
|
||||||
info!(
|
|
||||||
sl!(),
|
|
||||||
"couldn't clear O_CLOEXEC on vsock, communication with agent will not work: {:?}",
|
|
||||||
err
|
|
||||||
);
|
|
||||||
return Err(err.into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut vhost_vsock_pci = VhostVsockPci::new(vhostfd, guest_cid);
|
let mut vhost_vsock_pci = VhostVsockPci::new(vhostfd, guest_cid);
|
||||||
|
|
||||||
@@ -855,6 +868,27 @@ impl<'a> QemuCmdLine<'a> {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn add_network_device(
|
||||||
|
&mut self,
|
||||||
|
config: &NetworkConfig,
|
||||||
|
network_info: &NetworkInfo,
|
||||||
|
) -> Result<Vec<File>> {
|
||||||
|
let disable_vhost_net = network_info.disable_vhost_net;
|
||||||
|
let queues = network_info.network_queues;
|
||||||
|
|
||||||
|
let (tun_files, vhost_files) = generate_netdev_fds(config, queues)?;
|
||||||
|
let tun_fds: Vec<i32> = tun_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
||||||
|
let vhost_fds: Vec<i32> = vhost_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
||||||
|
|
||||||
|
let net_device = NetDevice::new(config, disable_vhost_net, tun_fds, vhost_fds);
|
||||||
|
self.devices.push(Box::new(net_device));
|
||||||
|
|
||||||
|
let dev_files = vec![tun_files, vhost_files];
|
||||||
|
let fds: Vec<File> = dev_files.into_iter().flatten().collect();
|
||||||
|
|
||||||
|
Ok(fds)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn build(&self) -> Result<Vec<String>> {
|
pub async fn build(&self) -> Result<Vec<String>> {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
|
|
||||||
|
@@ -5,11 +5,12 @@
|
|||||||
|
|
||||||
use super::cmdline_generator::QemuCmdLine;
|
use super::cmdline_generator::QemuCmdLine;
|
||||||
use crate::{
|
use crate::{
|
||||||
hypervisor_persist::HypervisorState, HypervisorConfig, MemoryConfig, VcpuThreadIds,
|
hypervisor_persist::HypervisorState, utils::enter_netns, HypervisorConfig, MemoryConfig,
|
||||||
VsockDevice, HYPERVISOR_QEMU,
|
VcpuThreadIds, VsockDevice, HYPERVISOR_QEMU,
|
||||||
};
|
};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use kata_sys_util::netns::NetnsGuard;
|
||||||
use kata_types::{
|
use kata_types::{
|
||||||
capabilities::{Capabilities, CapabilityBits},
|
capabilities::{Capabilities, CapabilityBits},
|
||||||
config::KATA_PATH,
|
config::KATA_PATH,
|
||||||
@@ -34,6 +35,7 @@ pub struct QemuInner {
|
|||||||
|
|
||||||
config: HypervisorConfig,
|
config: HypervisorConfig,
|
||||||
devices: Vec<DeviceType>,
|
devices: Vec<DeviceType>,
|
||||||
|
netns: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl QemuInner {
|
impl QemuInner {
|
||||||
@@ -43,12 +45,14 @@ impl QemuInner {
|
|||||||
qemu_process: None,
|
qemu_process: None,
|
||||||
config: Default::default(),
|
config: Default::default(),
|
||||||
devices: Vec::new(),
|
devices: Vec::new(),
|
||||||
|
netns: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn prepare_vm(&mut self, id: &str, _netns: Option<String>) -> Result<()> {
|
pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option<String>) -> Result<()> {
|
||||||
info!(sl!(), "Preparing QEMU VM");
|
info!(sl!(), "Preparing QEMU VM");
|
||||||
self.id = id.to_string();
|
self.id = id.to_string();
|
||||||
|
self.netns = netns;
|
||||||
|
|
||||||
let vm_path = [KATA_PATH, self.id.as_str()].join("/");
|
let vm_path = [KATA_PATH, self.id.as_str()].join("/");
|
||||||
std::fs::create_dir_all(vm_path)?;
|
std::fs::create_dir_all(vm_path)?;
|
||||||
@@ -58,6 +62,7 @@ impl QemuInner {
|
|||||||
|
|
||||||
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
|
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
|
||||||
info!(sl!(), "Starting QEMU VM");
|
info!(sl!(), "Starting QEMU VM");
|
||||||
|
let netns = self.netns.clone().unwrap_or_default();
|
||||||
|
|
||||||
let mut cmdline = QemuCmdLine::new(&self.id, &self.config)?;
|
let mut cmdline = QemuCmdLine::new(&self.id, &self.config)?;
|
||||||
|
|
||||||
@@ -65,6 +70,10 @@ impl QemuInner {
|
|||||||
// descriptor needs to stay open until the qemu process launches.
|
// descriptor needs to stay open until the qemu process launches.
|
||||||
// This is why we need to store it in a variable at this scope.
|
// This is why we need to store it in a variable at this scope.
|
||||||
let mut _vhost_fd = None;
|
let mut _vhost_fd = None;
|
||||||
|
// We need to keep the vhost-net/tuntap file descriptor open until the QEMU process launches.
|
||||||
|
// However, we're likely not interested in the specific type of file descriptor itself. We just
|
||||||
|
// want to ensure any fds associated with network devices remain open within the current scope.
|
||||||
|
let mut _fds_for_qemu: Vec<std::fs::File> = Vec::new();
|
||||||
|
|
||||||
for device in &mut self.devices {
|
for device in &mut self.devices {
|
||||||
match device {
|
match device {
|
||||||
@@ -99,6 +108,14 @@ impl QemuInner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DeviceType::Network(network) => {
|
||||||
|
let network_info = &self.config.network_info;
|
||||||
|
|
||||||
|
// we need ensure add_network_device happens in netns.
|
||||||
|
let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?;
|
||||||
|
|
||||||
|
_fds_for_qemu = cmdline.add_network_device(&network.config, network_info)?;
|
||||||
|
}
|
||||||
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -112,6 +129,16 @@ impl QemuInner {
|
|||||||
command.args(cmdline.build().await?);
|
command.args(cmdline.build().await?);
|
||||||
|
|
||||||
info!(sl!(), "qemu cmd: {:?}", command);
|
info!(sl!(), "qemu cmd: {:?}", command);
|
||||||
|
|
||||||
|
// we need move the qemu process into Network Namespace.
|
||||||
|
unsafe {
|
||||||
|
let _pre_exec = command.pre_exec(move || {
|
||||||
|
let _ = enter_netns(&netns);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
self.qemu_process = Some(command.stderr(Stdio::piped()).spawn()?);
|
self.qemu_process = Some(command.stderr(Stdio::piped()).spawn()?);
|
||||||
info!(sl!(), "qemu process started");
|
info!(sl!(), "qemu process started");
|
||||||
|
|
||||||
@@ -357,6 +384,7 @@ impl Persist for QemuInner {
|
|||||||
qemu_process: None,
|
qemu_process: None,
|
||||||
config: hypervisor_state.config,
|
config: hypervisor_state.config,
|
||||||
devices: Vec::new(),
|
devices: Vec::new(),
|
||||||
|
netns: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
mod cmdline_generator;
|
mod cmdline_generator;
|
||||||
mod inner;
|
mod inner;
|
||||||
|
mod network;
|
||||||
|
|
||||||
use crate::device::DeviceType;
|
use crate::device::DeviceType;
|
||||||
use crate::hypervisor_persist::HypervisorState;
|
use crate::hypervisor_persist::HypervisorState;
|
||||||
|
339
src/runtime-rs/crates/hypervisor/src/qemu/network.rs
Normal file
339
src/runtime-rs/crates/hypervisor/src/qemu/network.rs
Normal file
@@ -0,0 +1,339 @@
|
|||||||
|
// Copyright (c) 2024 Ant Group
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
use std::fs::{File, OpenOptions};
|
||||||
|
use std::os::fd::RawFd;
|
||||||
|
|
||||||
|
use crate::utils::{clear_fd_flags, open_named_tuntap};
|
||||||
|
use crate::{Address, NetworkConfig};
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
|
||||||
|
// VirtioTransport is the transport in use for a virtio device.
|
||||||
|
#[derive(Debug, Default, PartialEq)]
|
||||||
|
enum VirtioTransport {
|
||||||
|
#[default]
|
||||||
|
Pci,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for VirtioTransport {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
VirtioTransport::Pci => "pci".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for VirtioTransport {
|
||||||
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
|
fn try_from(_transport: &str) -> Result<Self> {
|
||||||
|
Ok(VirtioTransport::Pci)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeviceDriver is set in "-device driver=<DeviceDriver>"
|
||||||
|
#[derive(Debug, Default, PartialEq)]
|
||||||
|
enum DeviceDriver {
|
||||||
|
// VirtioNetPci("virtio-net-pci") is a virtio-net device using PCI transport.
|
||||||
|
#[default]
|
||||||
|
VirtioNetPci,
|
||||||
|
|
||||||
|
// VfioPci("vfio-pci") is an attached host device using PCI transport.
|
||||||
|
VfioPci,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for DeviceDriver {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
DeviceDriver::VirtioNetPci => "virtio-net-pci".to_owned(),
|
||||||
|
DeviceDriver::VfioPci => "vfio-pci".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for DeviceDriver {
|
||||||
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
|
fn try_from(device_driver: &str) -> Result<Self> {
|
||||||
|
Ok(match device_driver {
|
||||||
|
"virtio-net-pci" => DeviceDriver::VirtioNetPci,
|
||||||
|
"vfio-pci" => DeviceDriver::VfioPci,
|
||||||
|
_ => return Err(anyhow!("unsupported transport")),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, PartialEq)]
|
||||||
|
enum NetDev {
|
||||||
|
/// Tap("tap") is a tap networking device type.
|
||||||
|
#[default]
|
||||||
|
Tap,
|
||||||
|
|
||||||
|
/// MacTap("macvtap") is a macvtap networking device type.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
MacvTap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for NetDev {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
NetDev::Tap | NetDev::MacvTap => "tap".to_owned(),
|
||||||
|
// VhostUser is to be added in future.
|
||||||
|
// NetDev::VhostUser => "vhost-user".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NetDevice represents a guest networking device
|
||||||
|
// -netdev tap,id=hostnet0,vhost=on,vhostfds=x:y:z,fds=a:b:c
|
||||||
|
// -device virtio-net-pci,netdev=hostnet0,id=net0,mac=24:42:54:20:50:46,bus=pci.0,addr=0x7
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct NetDevice {
|
||||||
|
// device_type is the netdev type (e.g. tap).
|
||||||
|
device_type: NetDev,
|
||||||
|
|
||||||
|
// driver is the qemu device driver
|
||||||
|
device_driver: DeviceDriver,
|
||||||
|
|
||||||
|
// id is the net device identifier.
|
||||||
|
id: String,
|
||||||
|
|
||||||
|
// if_name is the interface name,
|
||||||
|
if_name: String,
|
||||||
|
|
||||||
|
// bus is the bus path name of a PCI device.
|
||||||
|
bus: String,
|
||||||
|
|
||||||
|
// pci_addr is the address offset of a PCI device.
|
||||||
|
pci_addr: String,
|
||||||
|
|
||||||
|
// fds represents the list of already existing file descriptors to be used.
|
||||||
|
// This is mostly useful for mq support.
|
||||||
|
// {
|
||||||
|
// fds: Vec<File>,
|
||||||
|
// vhost_fds: Vec<File>,
|
||||||
|
// }
|
||||||
|
fds: HashMap<String, Vec<RawFd>>,
|
||||||
|
|
||||||
|
// disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu.
|
||||||
|
disable_vhost_net: bool,
|
||||||
|
|
||||||
|
// mac_address is the networking device interface MAC address.
|
||||||
|
mac_address: Address,
|
||||||
|
|
||||||
|
// disable_modern prevents qemu from relying on fast MMIO.
|
||||||
|
disable_modern: bool,
|
||||||
|
|
||||||
|
// transport is the virtio transport for this device.
|
||||||
|
transport: VirtioTransport,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NetDevice {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn new(
|
||||||
|
config: &NetworkConfig,
|
||||||
|
disable_vhost_net: bool,
|
||||||
|
tun_fds: Vec<i32>,
|
||||||
|
vhost_fds: Vec<i32>,
|
||||||
|
) -> Self {
|
||||||
|
// we have only two <Key, Value>s:
|
||||||
|
// {
|
||||||
|
// "fds": vec![fd1, fd2,...],
|
||||||
|
// "vhostfds": vec![fd3, fd4,...],
|
||||||
|
// }
|
||||||
|
let mut fds: HashMap<String, Vec<RawFd>> = HashMap::with_capacity(2);
|
||||||
|
fds.insert("fds".to_owned(), tun_fds);
|
||||||
|
fds.insert("vhostfds".to_owned(), vhost_fds);
|
||||||
|
|
||||||
|
// FIXME(Hard Code): It's safe to unwrap here because of the valid input.
|
||||||
|
// Ideally device_driver should be derived from transport to minimize code duplication.
|
||||||
|
// While currently we focus on PCI for the initial implementation.
|
||||||
|
// And we'll support other transports, e.g. s390x's CCW.
|
||||||
|
let device_driver = DeviceDriver::try_from("virtio-net-pci").unwrap();
|
||||||
|
let transport = VirtioTransport::try_from("pci").unwrap();
|
||||||
|
|
||||||
|
NetDevice {
|
||||||
|
device_type: NetDev::Tap,
|
||||||
|
device_driver,
|
||||||
|
id: format!("network-{}", &config.index),
|
||||||
|
if_name: config.virt_iface_name.clone(),
|
||||||
|
mac_address: config.guest_mac.clone().unwrap(),
|
||||||
|
disable_vhost_net,
|
||||||
|
disable_modern: false,
|
||||||
|
fds,
|
||||||
|
transport,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mq_param(&self) -> String {
|
||||||
|
let mut params = vec!["mq=on".to_owned()];
|
||||||
|
if self.transport == VirtioTransport::Pci {
|
||||||
|
// https://www.linux-kvm.org/page/Multiqueue
|
||||||
|
// -netdev tap,vhost=on,queues=N
|
||||||
|
// enable mq and specify msix vectors in qemu cmdline
|
||||||
|
// (2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq)
|
||||||
|
// -device virtio-net-pci,mq=on,vectors=2N+2...
|
||||||
|
// enable mq in guest by 'ethtool -L eth0 combined $queue_num'
|
||||||
|
// Clearlinux automatically sets up the queues properly
|
||||||
|
// The agent implementation should do this to ensure that it is
|
||||||
|
// always set
|
||||||
|
|
||||||
|
// vectors = len(netdev.FDs) * 2 + 2
|
||||||
|
if let Some(fds) = self.fds.get("fds") {
|
||||||
|
params.push(format!("vectors={}", 2 * fds.len() + 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
params.join(",")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn qemu_device_params(&self) -> Result<Vec<String>> {
|
||||||
|
let mut device_params: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
device_params.push(format!("driver={}", &self.device_driver.to_string()));
|
||||||
|
device_params.push(format!("netdev={}", &self.id));
|
||||||
|
|
||||||
|
let mac = self.mac_address.to_string();
|
||||||
|
device_params.push(format!("mac={}", &mac));
|
||||||
|
|
||||||
|
if !self.bus.is_empty() {
|
||||||
|
device_params.push(format!("bus={}", &self.bus));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.pci_addr.is_empty() {
|
||||||
|
// FIXME: pci_addr: PciPath
|
||||||
|
device_params.push(format!("addr={}", &self.pci_addr));
|
||||||
|
}
|
||||||
|
|
||||||
|
device_params.push(format!(
|
||||||
|
"disable-modern={}",
|
||||||
|
if self.disable_modern { "true" } else { "false" }
|
||||||
|
));
|
||||||
|
|
||||||
|
if !self.fds.is_empty() {
|
||||||
|
device_params.push(self.mq_param());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(device_params)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn qemu_netdev_params(&self) -> Result<Vec<String>> {
|
||||||
|
let mut netdev_params: Vec<String> = Vec::new();
|
||||||
|
let netdev_type = self.device_type.to_string();
|
||||||
|
netdev_params.push(netdev_type);
|
||||||
|
netdev_params.push(format!("id={}", self.id));
|
||||||
|
|
||||||
|
if !self.disable_vhost_net {
|
||||||
|
netdev_params.push("vhost=on".to_owned());
|
||||||
|
if let Some(vhost_fds) = self.fds.get("vhostfds") {
|
||||||
|
for fd in vhost_fds.iter() {
|
||||||
|
clear_fd_flags(*fd)?;
|
||||||
|
}
|
||||||
|
let s = vhost_fds
|
||||||
|
.iter()
|
||||||
|
.map(|&n| n.to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(":");
|
||||||
|
|
||||||
|
netdev_params.push(format!("vhostfds={}", s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(tuntap_fds) = self.fds.get("fds") {
|
||||||
|
for fd in tuntap_fds.iter() {
|
||||||
|
clear_fd_flags(*fd).context("clear flag of fd failed")?;
|
||||||
|
}
|
||||||
|
let s = tuntap_fds
|
||||||
|
.iter()
|
||||||
|
.map(|&n| n.to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(":");
|
||||||
|
netdev_params.push(format!("fds={}", s));
|
||||||
|
} else {
|
||||||
|
netdev_params.push(format!("ifname={}", self.if_name));
|
||||||
|
netdev_params.push("script=no".to_owned());
|
||||||
|
netdev_params.push("downscript=no".to_owned());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(netdev_params)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for Address {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
let b: [u8; 6] = self.0;
|
||||||
|
|
||||||
|
format!(
|
||||||
|
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
||||||
|
b[0], b[1], b[2], b[3], b[4], b[5]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// /dev/tap$(cat /sys/class/net/macvtap1/ifindex)
|
||||||
|
// for example: /dev/tap2381
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn create_macvtap_fds(ifindex: u32, queues: u32) -> Result<Vec<File>> {
|
||||||
|
let macvtap = format!("/dev/tap{}", ifindex);
|
||||||
|
create_fds(macvtap.as_str(), queues as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_vhost_net_fds(queues: u32) -> Result<Vec<File>> {
|
||||||
|
let vhost_dev = "/dev/vhost-net";
|
||||||
|
let num_fds = if queues > 1 { queues as usize } else { 1_usize };
|
||||||
|
|
||||||
|
create_fds(vhost_dev, num_fds)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For example: if num_fds = 3; fds = {0xc000012028, 0xc000012030, 0xc000012038}
|
||||||
|
fn create_fds(device: &str, num_fds: usize) -> Result<Vec<File>> {
|
||||||
|
let mut fds: Vec<File> = Vec::with_capacity(num_fds);
|
||||||
|
|
||||||
|
for i in 0..num_fds {
|
||||||
|
match OpenOptions::new().read(true).write(true).open(device) {
|
||||||
|
Ok(f) => {
|
||||||
|
fds.push(f);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
fds.clear();
|
||||||
|
return Err(anyhow!(
|
||||||
|
"It failed with error {:?} when opened the {:?} device.",
|
||||||
|
e,
|
||||||
|
i
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(fds)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_netdev_fds(
|
||||||
|
network_config: &NetworkConfig,
|
||||||
|
queues: u32,
|
||||||
|
) -> Result<(Vec<File>, Vec<File>)> {
|
||||||
|
let if_name = network_config.host_dev_name.as_str();
|
||||||
|
let tun_taps = open_named_tuntap(if_name, queues)?;
|
||||||
|
let vhost_fds = create_vhost_net_fds(queues)?;
|
||||||
|
|
||||||
|
Ok((tun_taps, vhost_fds))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::create_fds;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ctreate_fds() {
|
||||||
|
let device = "/dev/null";
|
||||||
|
let num_fds = 3_usize;
|
||||||
|
let fds = create_fds(device, num_fds);
|
||||||
|
assert!(fds.is_ok());
|
||||||
|
assert_eq!(fds.unwrap().len(), num_fds);
|
||||||
|
}
|
||||||
|
}
|
@@ -4,9 +4,19 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::{
|
||||||
|
collections::HashSet,
|
||||||
|
fs::File,
|
||||||
|
os::fd::{AsRawFd, RawFd},
|
||||||
|
};
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use dbs_utils::net::Tap;
|
||||||
use kata_types::config::KATA_PATH;
|
use kata_types::config::KATA_PATH;
|
||||||
|
use nix::{
|
||||||
|
fcntl,
|
||||||
|
sched::{setns, CloneFlags},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::{DEFAULT_HYBRID_VSOCK_NAME, JAILER_ROOT};
|
use crate::{DEFAULT_HYBRID_VSOCK_NAME, JAILER_ROOT};
|
||||||
|
|
||||||
@@ -47,3 +57,46 @@ pub fn get_jailer_root(sid: &str) -> String {
|
|||||||
|
|
||||||
[&sandbox_path, JAILER_ROOT].join("/")
|
[&sandbox_path, JAILER_ROOT].join("/")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear the O_CLOEXEC which is set by default by Rust standard library
|
||||||
|
// as it would obviously prevent passing the descriptor to the hypervisor process.
|
||||||
|
pub fn clear_fd_flags(rawfd: RawFd) -> Result<()> {
|
||||||
|
if let Err(err) = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_SETFD(fcntl::FdFlag::empty())) {
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"couldn't clear O_CLOEXEC on device's fd, communication with agent will not work: {:?}",
|
||||||
|
err
|
||||||
|
);
|
||||||
|
return Err(err.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn enter_netns(netns_path: &str) -> Result<()> {
|
||||||
|
if !netns_path.is_empty() {
|
||||||
|
let netns =
|
||||||
|
File::open(netns_path).context(anyhow!("open netns path {:?} failed.", netns_path))?;
|
||||||
|
setns(netns.as_raw_fd(), CloneFlags::CLONE_NEWNET).context("set netns failed")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn open_named_tuntap(if_name: &str, queues: u32) -> Result<Vec<File>> {
|
||||||
|
let (multi_vq, vq_pairs) = if queues > 1 {
|
||||||
|
(true, queues as usize)
|
||||||
|
} else {
|
||||||
|
(false, 1_usize)
|
||||||
|
};
|
||||||
|
|
||||||
|
let tap: Tap = Tap::open_named(if_name, multi_vq).context("open named tuntap device failed")?;
|
||||||
|
let taps: Vec<Tap> = tap.into_mq_taps(vq_pairs).context("into mq taps failed.")?;
|
||||||
|
|
||||||
|
let mut tap_files: Vec<std::fs::File> = Vec::new();
|
||||||
|
for tap in taps {
|
||||||
|
tap_files.push(tap.tap_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(tap_files)
|
||||||
|
}
|
||||||
|
@@ -17,7 +17,6 @@ bitflags = "1.2.1"
|
|||||||
byte-unit = "4.0.14"
|
byte-unit = "4.0.14"
|
||||||
cgroups-rs = "0.3.2"
|
cgroups-rs = "0.3.2"
|
||||||
futures = "0.3.11"
|
futures = "0.3.11"
|
||||||
hex = "0.4.3"
|
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
libc = ">=0.2.39"
|
libc = ">=0.2.39"
|
||||||
netns-rs = "0.1.0"
|
netns-rs = "0.1.0"
|
||||||
|
@@ -24,6 +24,7 @@ use anyhow::{anyhow, Context, Result};
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use hypervisor::device::device_manager::DeviceManager;
|
use hypervisor::device::device_manager::DeviceManager;
|
||||||
use hypervisor::Hypervisor;
|
use hypervisor::Hypervisor;
|
||||||
|
use kata_sys_util::netns::NetnsGuard;
|
||||||
use kata_types::config::TomlConfig;
|
use kata_types::config::TomlConfig;
|
||||||
use scopeguard::defer;
|
use scopeguard::defer;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -32,7 +33,7 @@ use tokio::sync::RwLock;
|
|||||||
|
|
||||||
use super::network_entity::NetworkEntity;
|
use super::network_entity::NetworkEntity;
|
||||||
use super::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
|
use super::utils::address::{ip_family_from_ip_addr, parse_ip_cidr};
|
||||||
use super::{EndpointState, NetnsGuard, Network};
|
use super::{EndpointState, Network};
|
||||||
use crate::network::endpoint::{TapEndpoint, VhostUserEndpoint};
|
use crate::network::endpoint::{TapEndpoint, VhostUserEndpoint};
|
||||||
use crate::network::network_info::network_info_from_dan::NetworkInfoFromDan;
|
use crate::network::network_info::network_info_from_dan::NetworkInfoFromDan;
|
||||||
use crate::network::utils::generate_private_mac_addr;
|
use crate::network::utils::generate_private_mac_addr;
|
||||||
|
@@ -22,8 +22,8 @@ use network_with_netns::NetworkWithNetns;
|
|||||||
mod network_pair;
|
mod network_pair;
|
||||||
use network_pair::NetworkPair;
|
use network_pair::NetworkPair;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
pub use kata_sys_util::netns::{generate_netns_name, NetnsGuard};
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
pub use utils::netns::{generate_netns_name, NetnsGuard};
|
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
@@ -17,6 +17,7 @@ use anyhow::{anyhow, Context, Result};
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use futures::stream::TryStreamExt;
|
use futures::stream::TryStreamExt;
|
||||||
use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
|
use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
|
||||||
|
use kata_sys_util::netns;
|
||||||
use netns_rs::get_from_path;
|
use netns_rs::get_from_path;
|
||||||
use scopeguard::defer;
|
use scopeguard::defer;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
@@ -27,7 +28,7 @@ use super::{
|
|||||||
},
|
},
|
||||||
network_entity::NetworkEntity,
|
network_entity::NetworkEntity,
|
||||||
network_info::network_info_from_link::{handle_addresses, NetworkInfoFromLink},
|
network_info::network_info_from_link::{handle_addresses, NetworkInfoFromLink},
|
||||||
utils::{link, netns},
|
utils::link,
|
||||||
Network,
|
Network,
|
||||||
};
|
};
|
||||||
use crate::network::NetworkInfo;
|
use crate::network::NetworkInfo;
|
||||||
|
@@ -6,7 +6,6 @@
|
|||||||
|
|
||||||
pub(crate) mod address;
|
pub(crate) mod address;
|
||||||
pub(crate) mod link;
|
pub(crate) mod link;
|
||||||
pub(crate) mod netns;
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use rand::rngs::OsRng;
|
use rand::rngs::OsRng;
|
||||||
|
@@ -21,12 +21,11 @@ use common::{
|
|||||||
};
|
};
|
||||||
use hypervisor::Hypervisor;
|
use hypervisor::Hypervisor;
|
||||||
use oci::Process as OCIProcess;
|
use oci::Process as OCIProcess;
|
||||||
use resource::network::NetnsGuard;
|
|
||||||
use resource::ResourceManager;
|
use resource::ResourceManager;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
use tracing::instrument;
|
use tracing::instrument;
|
||||||
|
|
||||||
use kata_sys_util::hooks::HookStates;
|
use kata_sys_util::{hooks::HookStates, netns::NetnsGuard};
|
||||||
|
|
||||||
use super::{logger_with_process, Container};
|
use super::{logger_with_process, Container};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user