mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-31 08:28:34 +00:00
Merge pull request #9353 from pmores/pr-8866-follow-up
runtime-rs: refactor qemu driver
This commit is contained in:
@@ -3,17 +3,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::network::{generate_netdev_fds, NetDevice};
|
||||
use crate::utils::clear_fd_flags;
|
||||
use crate::{kernel_param::KernelParams, HypervisorConfig, NetworkConfig};
|
||||
use crate::utils::{clear_cloexec, create_vhost_net_fds, open_named_tuntap};
|
||||
use crate::{kernel_param::KernelParams, Address, HypervisorConfig};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use kata_types::config::hypervisor::NetworkInfo;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::fs::{read_to_string, File};
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::os::unix::io::RawFd;
|
||||
use tokio;
|
||||
|
||||
// These should have been called MiB and GiB for better readability but the
|
||||
// more fitting names unfortunately generate linter warnings.
|
||||
@@ -811,13 +810,13 @@ impl ToQemuParams for DeviceVirtioBlk {
|
||||
|
||||
struct VhostVsock {
|
||||
bus_type: VirtioBusType,
|
||||
vhostfd: RawFd,
|
||||
vhostfd: tokio::fs::File,
|
||||
guest_cid: u32,
|
||||
disable_modern: bool,
|
||||
}
|
||||
|
||||
impl VhostVsock {
|
||||
fn new(vhostfd: RawFd, guest_cid: u32, bus_type: VirtioBusType) -> VhostVsock {
|
||||
fn new(vhostfd: tokio::fs::File, guest_cid: u32, bus_type: VirtioBusType) -> VhostVsock {
|
||||
VhostVsock {
|
||||
bus_type,
|
||||
vhostfd,
|
||||
@@ -840,7 +839,7 @@ impl ToQemuParams for VhostVsock {
|
||||
if self.disable_modern {
|
||||
params.push("disable-modern=true".to_owned());
|
||||
}
|
||||
params.push(format!("vhostfd={}", self.vhostfd));
|
||||
params.push(format!("vhostfd={}", self.vhostfd.as_raw_fd()));
|
||||
params.push(format!("guest-cid={}", self.guest_cid));
|
||||
|
||||
Ok(vec!["-device".to_owned(), params.join(",")])
|
||||
@@ -892,22 +891,135 @@ impl ToQemuParams for Serial {
|
||||
}
|
||||
}
|
||||
|
||||
fn format_fds(files: &[File]) -> String {
|
||||
files
|
||||
.iter()
|
||||
.map(|file| file.as_raw_fd().to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(":")
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Netdev {
|
||||
id: String,
|
||||
|
||||
// File descriptors for vhost multi-queue support.
|
||||
// {
|
||||
// queue_fds: Vec<File>,
|
||||
// vhost_fds: Vec<File>,
|
||||
// }
|
||||
fds: HashMap<String, Vec<File>>,
|
||||
|
||||
// disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu.
|
||||
disable_vhost_net: bool,
|
||||
}
|
||||
|
||||
impl Netdev {
|
||||
fn new(id: &str, host_if_name: &str, num_queues: u32) -> Result<Netdev> {
|
||||
let fds = HashMap::from([
|
||||
(
|
||||
"fds".to_owned(),
|
||||
open_named_tuntap(host_if_name, num_queues)?,
|
||||
),
|
||||
("vhostfds".to_owned(), create_vhost_net_fds(num_queues)?),
|
||||
]);
|
||||
for file in fds.values().flatten() {
|
||||
clear_cloexec(file.as_raw_fd()).context("clearing O_CLOEXEC failed")?;
|
||||
}
|
||||
|
||||
Ok(Netdev {
|
||||
id: id.to_owned(),
|
||||
fds,
|
||||
disable_vhost_net: false,
|
||||
})
|
||||
}
|
||||
|
||||
fn set_disable_vhost_net(&mut self, disable_vhost_net: bool) -> &mut Self {
|
||||
self.disable_vhost_net = disable_vhost_net;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for NetDevice {
|
||||
// qemu_params returns the qemu parameters built out of this network device.
|
||||
impl ToQemuParams for Netdev {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut qemu_params: Vec<String> = Vec::new();
|
||||
let mut params: Vec<String> = Vec::new();
|
||||
params.push("tap".to_owned());
|
||||
params.push(format!("id={}", self.id));
|
||||
|
||||
let netdev_params = self.qemu_netdev_params()?;
|
||||
let device_params = self.qemu_device_params()?;
|
||||
if !self.disable_vhost_net {
|
||||
params.push("vhost=on".to_owned());
|
||||
if let Some(vhost_fds) = self.fds.get("vhostfds") {
|
||||
params.push(format!("vhostfds={}", format_fds(vhost_fds)));
|
||||
}
|
||||
}
|
||||
|
||||
qemu_params.push("-netdev".to_owned());
|
||||
qemu_params.push(netdev_params.join(","));
|
||||
if let Some(tuntap_fds) = self.fds.get("fds") {
|
||||
params.push(format!("fds={}", format_fds(tuntap_fds)));
|
||||
}
|
||||
|
||||
qemu_params.push("-device".to_owned());
|
||||
qemu_params.push(device_params.join(","));
|
||||
Ok(vec!["-netdev".to_owned(), params.join(",")])
|
||||
}
|
||||
}
|
||||
|
||||
Ok(qemu_params)
|
||||
#[derive(Debug)]
|
||||
pub struct DeviceVirtioNet {
|
||||
// driver is the qemu device driver
|
||||
device_driver: String,
|
||||
|
||||
// id is the corresponding backend net device identifier.
|
||||
netdev_id: String,
|
||||
|
||||
// mac_address is the guest-side networking device interface MAC address.
|
||||
mac_address: Address,
|
||||
|
||||
// disable_modern prevents qemu from relying on fast MMIO.
|
||||
disable_modern: bool,
|
||||
|
||||
num_queues: u32,
|
||||
}
|
||||
|
||||
impl DeviceVirtioNet {
|
||||
fn new(netdev_id: &str, mac_address: Address) -> DeviceVirtioNet {
|
||||
DeviceVirtioNet {
|
||||
device_driver: "virtio-net-pci".to_owned(),
|
||||
netdev_id: netdev_id.to_owned(),
|
||||
mac_address,
|
||||
disable_modern: false,
|
||||
num_queues: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn set_disable_modern(&mut self, disable_modern: bool) -> &mut Self {
|
||||
self.disable_modern = disable_modern;
|
||||
self
|
||||
}
|
||||
|
||||
fn set_num_queues(&mut self, num_queues: u32) -> &mut Self {
|
||||
self.num_queues = num_queues;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for DeviceVirtioNet {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut params: Vec<String> = Vec::new();
|
||||
|
||||
//params.push(format!("driver={}", &self.device_driver.to_string()));
|
||||
params.push(self.device_driver.clone());
|
||||
params.push(format!("netdev={}", &self.netdev_id));
|
||||
|
||||
params.push(format!("mac={:?}", self.mac_address));
|
||||
|
||||
if self.disable_modern {
|
||||
params.push("disable-modern=true".to_owned());
|
||||
}
|
||||
|
||||
params.push("mq=on".to_owned());
|
||||
params.push(format!("vectors={}", 2 * self.num_queues + 2));
|
||||
|
||||
Ok(vec!["-device".to_owned(), params.join(",")])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -974,6 +1086,19 @@ fn is_running_in_vm() -> Result<bool> {
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn should_disable_modern() -> bool {
|
||||
match is_running_in_vm() {
|
||||
Ok(retval) => retval,
|
||||
Err(err) => {
|
||||
info!(
|
||||
sl!(),
|
||||
"unable to check if running in VM, assuming not: {}", err
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct QemuCmdLine<'a> {
|
||||
id: String,
|
||||
config: &'a HypervisorConfig,
|
||||
@@ -1055,25 +1180,13 @@ impl<'a> QemuCmdLine<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_vsock(&mut self, vhostfd: RawFd, guest_cid: u32) -> Result<()> {
|
||||
clear_fd_flags(vhostfd).context("clear flags failed")?;
|
||||
pub fn add_vsock(&mut self, vhostfd: tokio::fs::File, guest_cid: u32) -> Result<()> {
|
||||
clear_cloexec(vhostfd.as_raw_fd()).context("clearing O_CLOEXEC failed on vsock fd")?;
|
||||
|
||||
let mut vhost_vsock_pci = VhostVsock::new(vhostfd, guest_cid, self.bus_type());
|
||||
|
||||
if !self.config.disable_nesting_checks {
|
||||
let nested = match is_running_in_vm() {
|
||||
Ok(retval) => retval,
|
||||
Err(err) => {
|
||||
info!(
|
||||
sl!(),
|
||||
"unable to check if running in VM, assuming not: {}", err
|
||||
);
|
||||
false
|
||||
}
|
||||
};
|
||||
if nested {
|
||||
vhost_vsock_pci.set_disable_modern(true);
|
||||
}
|
||||
if !self.config.disable_nesting_checks && should_disable_modern() {
|
||||
vhost_vsock_pci.set_disable_modern(true);
|
||||
}
|
||||
|
||||
self.devices.push(Box::new(vhost_vsock_pci));
|
||||
@@ -1130,23 +1243,31 @@ impl<'a> QemuCmdLine<'a> {
|
||||
|
||||
pub fn add_network_device(
|
||||
&mut self,
|
||||
config: &NetworkConfig,
|
||||
network_info: &NetworkInfo,
|
||||
) -> Result<Vec<File>> {
|
||||
let disable_vhost_net = network_info.disable_vhost_net;
|
||||
let queues = network_info.network_queues;
|
||||
dev_index: u64,
|
||||
host_dev_name: &str,
|
||||
guest_mac: Address,
|
||||
) -> Result<()> {
|
||||
let mut netdev = Netdev::new(
|
||||
&format!("network-{}", dev_index),
|
||||
host_dev_name,
|
||||
self.config.network_info.network_queues,
|
||||
)?;
|
||||
if self.config.network_info.disable_vhost_net {
|
||||
netdev.set_disable_vhost_net(true);
|
||||
}
|
||||
|
||||
let (tun_files, vhost_files) = generate_netdev_fds(config, queues)?;
|
||||
let tun_fds: Vec<i32> = tun_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
||||
let vhost_fds: Vec<i32> = vhost_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
||||
let mut virtio_net_device = DeviceVirtioNet::new(&netdev.id, guest_mac);
|
||||
|
||||
let net_device = NetDevice::new(config, disable_vhost_net, tun_fds, vhost_fds);
|
||||
self.devices.push(Box::new(net_device));
|
||||
if should_disable_modern() {
|
||||
virtio_net_device.set_disable_modern(true);
|
||||
}
|
||||
if self.config.network_info.network_queues > 1 {
|
||||
virtio_net_device.set_num_queues(self.config.network_info.network_queues);
|
||||
}
|
||||
|
||||
let dev_files = vec![tun_files, vhost_files];
|
||||
let fds: Vec<File> = dev_files.into_iter().flatten().collect();
|
||||
|
||||
Ok(fds)
|
||||
self.devices.push(Box::new(netdev));
|
||||
self.devices.push(Box::new(virtio_net_device));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_console(&mut self, console_socket_path: &str) {
|
||||
|
@@ -17,7 +17,6 @@ use kata_types::{
|
||||
};
|
||||
use persist::sandbox_persist::Persist;
|
||||
use std::collections::HashMap;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
use tokio::{
|
||||
@@ -65,15 +64,11 @@ impl QemuInner {
|
||||
info!(sl!(), "Starting QEMU VM");
|
||||
let netns = self.netns.clone().unwrap_or_default();
|
||||
|
||||
// CAUTION: since 'cmdline' contains file descriptors that have to stay
|
||||
// open until spawn() is called to launch qemu later in this function,
|
||||
// 'cmdline' has to live at least until spawn() is called
|
||||
let mut cmdline = QemuCmdLine::new(&self.id, &self.config)?;
|
||||
|
||||
// CAUTION: File descriptors that are passed to QEMU must stay open until the QEMU process
|
||||
// is started and closed afterwards. This is achieved by collecting them in _fds_for_qemu.
|
||||
// It is mandatory for _fds_for_qemu to last until the QEMU process is forked. Leave it
|
||||
// in the outer scope of this function for this to happen. The files in _fds_for_qemu
|
||||
// should not be used in any way.
|
||||
let mut _fds_for_qemu: Vec<std::fs::File> = Vec::new();
|
||||
|
||||
for device in &mut self.devices {
|
||||
match device {
|
||||
DeviceType::ShareFs(share_fs_dev) => {
|
||||
@@ -87,8 +82,7 @@ impl QemuInner {
|
||||
}
|
||||
DeviceType::Vsock(vsock_dev) => {
|
||||
let fd = vsock_dev.init_config().await?;
|
||||
cmdline.add_vsock(fd.as_raw_fd(), vsock_dev.config.guest_cid)?;
|
||||
_fds_for_qemu.push(fd.into_std().await);
|
||||
cmdline.add_vsock(fd, vsock_dev.config.guest_cid)?;
|
||||
}
|
||||
DeviceType::Block(block_dev) => {
|
||||
if block_dev.config.path_on_host == self.config.boot_info.initrd {
|
||||
@@ -112,13 +106,14 @@ impl QemuInner {
|
||||
}
|
||||
}
|
||||
DeviceType::Network(network) => {
|
||||
let network_info = &self.config.network_info;
|
||||
|
||||
// we need ensure add_network_device happens in netns.
|
||||
let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?;
|
||||
|
||||
_fds_for_qemu
|
||||
.append(&mut cmdline.add_network_device(&network.config, network_info)?);
|
||||
cmdline.add_network_device(
|
||||
network.config.index,
|
||||
&network.config.host_dev_name,
|
||||
network.config.guest_mac.clone().unwrap(),
|
||||
)?;
|
||||
}
|
||||
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
||||
}
|
||||
|
@@ -5,7 +5,6 @@
|
||||
|
||||
mod cmdline_generator;
|
||||
mod inner;
|
||||
mod network;
|
||||
|
||||
use crate::device::DeviceType;
|
||||
use crate::hypervisor_persist::HypervisorState;
|
||||
|
@@ -1,339 +0,0 @@
|
||||
// Copyright (c) 2024 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::os::fd::RawFd;
|
||||
|
||||
use crate::utils::{clear_fd_flags, open_named_tuntap};
|
||||
use crate::{Address, NetworkConfig};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
// VirtioTransport is the transport in use for a virtio device.
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
enum VirtioTransport {
|
||||
#[default]
|
||||
Pci,
|
||||
}
|
||||
|
||||
impl ToString for VirtioTransport {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
VirtioTransport::Pci => "pci".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for VirtioTransport {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(_transport: &str) -> Result<Self> {
|
||||
Ok(VirtioTransport::Pci)
|
||||
}
|
||||
}
|
||||
|
||||
// DeviceDriver is set in "-device driver=<DeviceDriver>"
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
enum DeviceDriver {
|
||||
// VirtioNetPci("virtio-net-pci") is a virtio-net device using PCI transport.
|
||||
#[default]
|
||||
VirtioNetPci,
|
||||
|
||||
// VfioPci("vfio-pci") is an attached host device using PCI transport.
|
||||
VfioPci,
|
||||
}
|
||||
|
||||
impl ToString for DeviceDriver {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
DeviceDriver::VirtioNetPci => "virtio-net-pci".to_owned(),
|
||||
DeviceDriver::VfioPci => "vfio-pci".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for DeviceDriver {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(device_driver: &str) -> Result<Self> {
|
||||
Ok(match device_driver {
|
||||
"virtio-net-pci" => DeviceDriver::VirtioNetPci,
|
||||
"vfio-pci" => DeviceDriver::VfioPci,
|
||||
_ => return Err(anyhow!("unsupported transport")),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
enum NetDev {
|
||||
/// Tap("tap") is a tap networking device type.
|
||||
#[default]
|
||||
Tap,
|
||||
|
||||
/// MacTap("macvtap") is a macvtap networking device type.
|
||||
#[allow(dead_code)]
|
||||
MacvTap,
|
||||
}
|
||||
|
||||
impl ToString for NetDev {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
NetDev::Tap | NetDev::MacvTap => "tap".to_owned(),
|
||||
// VhostUser is to be added in future.
|
||||
// NetDev::VhostUser => "vhost-user".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NetDevice represents a guest networking device
|
||||
// -netdev tap,id=hostnet0,vhost=on,vhostfds=x:y:z,fds=a:b:c
|
||||
// -device virtio-net-pci,netdev=hostnet0,id=net0,mac=24:42:54:20:50:46,bus=pci.0,addr=0x7
|
||||
#[derive(Debug, Default)]
|
||||
pub struct NetDevice {
|
||||
// device_type is the netdev type (e.g. tap).
|
||||
device_type: NetDev,
|
||||
|
||||
// driver is the qemu device driver
|
||||
device_driver: DeviceDriver,
|
||||
|
||||
// id is the net device identifier.
|
||||
id: String,
|
||||
|
||||
// if_name is the interface name,
|
||||
if_name: String,
|
||||
|
||||
// bus is the bus path name of a PCI device.
|
||||
bus: String,
|
||||
|
||||
// pci_addr is the address offset of a PCI device.
|
||||
pci_addr: String,
|
||||
|
||||
// fds represents the list of already existing file descriptors to be used.
|
||||
// This is mostly useful for mq support.
|
||||
// {
|
||||
// fds: Vec<File>,
|
||||
// vhost_fds: Vec<File>,
|
||||
// }
|
||||
fds: HashMap<String, Vec<RawFd>>,
|
||||
|
||||
// disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu.
|
||||
disable_vhost_net: bool,
|
||||
|
||||
// mac_address is the networking device interface MAC address.
|
||||
mac_address: Address,
|
||||
|
||||
// disable_modern prevents qemu from relying on fast MMIO.
|
||||
disable_modern: bool,
|
||||
|
||||
// transport is the virtio transport for this device.
|
||||
transport: VirtioTransport,
|
||||
}
|
||||
|
||||
impl NetDevice {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(
|
||||
config: &NetworkConfig,
|
||||
disable_vhost_net: bool,
|
||||
tun_fds: Vec<i32>,
|
||||
vhost_fds: Vec<i32>,
|
||||
) -> Self {
|
||||
// we have only two <Key, Value>s:
|
||||
// {
|
||||
// "fds": vec![fd1, fd2,...],
|
||||
// "vhostfds": vec![fd3, fd4,...],
|
||||
// }
|
||||
let mut fds: HashMap<String, Vec<RawFd>> = HashMap::with_capacity(2);
|
||||
fds.insert("fds".to_owned(), tun_fds);
|
||||
fds.insert("vhostfds".to_owned(), vhost_fds);
|
||||
|
||||
// FIXME(Hard Code): It's safe to unwrap here because of the valid input.
|
||||
// Ideally device_driver should be derived from transport to minimize code duplication.
|
||||
// While currently we focus on PCI for the initial implementation.
|
||||
// And we'll support other transports, e.g. s390x's CCW.
|
||||
let device_driver = DeviceDriver::try_from("virtio-net-pci").unwrap();
|
||||
let transport = VirtioTransport::try_from("pci").unwrap();
|
||||
|
||||
NetDevice {
|
||||
device_type: NetDev::Tap,
|
||||
device_driver,
|
||||
id: format!("network-{}", &config.index),
|
||||
if_name: config.virt_iface_name.clone(),
|
||||
mac_address: config.guest_mac.clone().unwrap(),
|
||||
disable_vhost_net,
|
||||
disable_modern: false,
|
||||
fds,
|
||||
transport,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn mq_param(&self) -> String {
|
||||
let mut params = vec!["mq=on".to_owned()];
|
||||
if self.transport == VirtioTransport::Pci {
|
||||
// https://www.linux-kvm.org/page/Multiqueue
|
||||
// -netdev tap,vhost=on,queues=N
|
||||
// enable mq and specify msix vectors in qemu cmdline
|
||||
// (2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq)
|
||||
// -device virtio-net-pci,mq=on,vectors=2N+2...
|
||||
// enable mq in guest by 'ethtool -L eth0 combined $queue_num'
|
||||
// Clearlinux automatically sets up the queues properly
|
||||
// The agent implementation should do this to ensure that it is
|
||||
// always set
|
||||
|
||||
// vectors = len(netdev.FDs) * 2 + 2
|
||||
if let Some(fds) = self.fds.get("fds") {
|
||||
params.push(format!("vectors={}", 2 * fds.len() + 2));
|
||||
}
|
||||
}
|
||||
|
||||
params.join(",")
|
||||
}
|
||||
|
||||
pub fn qemu_device_params(&self) -> Result<Vec<String>> {
|
||||
let mut device_params: Vec<String> = Vec::new();
|
||||
|
||||
device_params.push(format!("driver={}", &self.device_driver.to_string()));
|
||||
device_params.push(format!("netdev={}", &self.id));
|
||||
|
||||
let mac = self.mac_address.to_string();
|
||||
device_params.push(format!("mac={}", &mac));
|
||||
|
||||
if !self.bus.is_empty() {
|
||||
device_params.push(format!("bus={}", &self.bus));
|
||||
}
|
||||
|
||||
if !self.pci_addr.is_empty() {
|
||||
// FIXME: pci_addr: PciPath
|
||||
device_params.push(format!("addr={}", &self.pci_addr));
|
||||
}
|
||||
|
||||
device_params.push(format!(
|
||||
"disable-modern={}",
|
||||
if self.disable_modern { "true" } else { "false" }
|
||||
));
|
||||
|
||||
if !self.fds.is_empty() {
|
||||
device_params.push(self.mq_param());
|
||||
}
|
||||
|
||||
Ok(device_params)
|
||||
}
|
||||
|
||||
pub fn qemu_netdev_params(&self) -> Result<Vec<String>> {
|
||||
let mut netdev_params: Vec<String> = Vec::new();
|
||||
let netdev_type = self.device_type.to_string();
|
||||
netdev_params.push(netdev_type);
|
||||
netdev_params.push(format!("id={}", self.id));
|
||||
|
||||
if !self.disable_vhost_net {
|
||||
netdev_params.push("vhost=on".to_owned());
|
||||
if let Some(vhost_fds) = self.fds.get("vhostfds") {
|
||||
for fd in vhost_fds.iter() {
|
||||
clear_fd_flags(*fd)?;
|
||||
}
|
||||
let s = vhost_fds
|
||||
.iter()
|
||||
.map(|&n| n.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(":");
|
||||
|
||||
netdev_params.push(format!("vhostfds={}", s));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(tuntap_fds) = self.fds.get("fds") {
|
||||
for fd in tuntap_fds.iter() {
|
||||
clear_fd_flags(*fd).context("clear flag of fd failed")?;
|
||||
}
|
||||
let s = tuntap_fds
|
||||
.iter()
|
||||
.map(|&n| n.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(":");
|
||||
netdev_params.push(format!("fds={}", s));
|
||||
} else {
|
||||
netdev_params.push(format!("ifname={}", self.if_name));
|
||||
netdev_params.push("script=no".to_owned());
|
||||
netdev_params.push("downscript=no".to_owned());
|
||||
}
|
||||
|
||||
Ok(netdev_params)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for Address {
|
||||
fn to_string(&self) -> String {
|
||||
let b: [u8; 6] = self.0;
|
||||
|
||||
format!(
|
||||
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
||||
b[0], b[1], b[2], b[3], b[4], b[5]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// /dev/tap$(cat /sys/class/net/macvtap1/ifindex)
|
||||
// for example: /dev/tap2381
|
||||
#[allow(dead_code)]
|
||||
pub fn create_macvtap_fds(ifindex: u32, queues: u32) -> Result<Vec<File>> {
|
||||
let macvtap = format!("/dev/tap{}", ifindex);
|
||||
create_fds(macvtap.as_str(), queues as usize)
|
||||
}
|
||||
|
||||
pub fn create_vhost_net_fds(queues: u32) -> Result<Vec<File>> {
|
||||
let vhost_dev = "/dev/vhost-net";
|
||||
let num_fds = if queues > 1 { queues as usize } else { 1_usize };
|
||||
|
||||
create_fds(vhost_dev, num_fds)
|
||||
}
|
||||
|
||||
// For example: if num_fds = 3; fds = {0xc000012028, 0xc000012030, 0xc000012038}
|
||||
fn create_fds(device: &str, num_fds: usize) -> Result<Vec<File>> {
|
||||
let mut fds: Vec<File> = Vec::with_capacity(num_fds);
|
||||
|
||||
for i in 0..num_fds {
|
||||
match OpenOptions::new().read(true).write(true).open(device) {
|
||||
Ok(f) => {
|
||||
fds.push(f);
|
||||
}
|
||||
Err(e) => {
|
||||
fds.clear();
|
||||
return Err(anyhow!(
|
||||
"It failed with error {:?} when opened the {:?} device.",
|
||||
e,
|
||||
i
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(fds)
|
||||
}
|
||||
|
||||
pub fn generate_netdev_fds(
|
||||
network_config: &NetworkConfig,
|
||||
queues: u32,
|
||||
) -> Result<(Vec<File>, Vec<File>)> {
|
||||
let if_name = network_config.host_dev_name.as_str();
|
||||
let tun_taps = open_named_tuntap(if_name, queues)?;
|
||||
let vhost_fds = create_vhost_net_fds(queues)?;
|
||||
|
||||
Ok((tun_taps, vhost_fds))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::create_fds;
|
||||
|
||||
#[test]
|
||||
fn test_ctreate_fds() {
|
||||
let device = "/dev/null";
|
||||
let num_fds = 3_usize;
|
||||
let fds = create_fds(device, num_fds);
|
||||
assert!(fds.is_ok());
|
||||
assert_eq!(fds.unwrap().len(), num_fds);
|
||||
}
|
||||
}
|
@@ -6,7 +6,7 @@
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
fs::File,
|
||||
fs::{File, OpenOptions},
|
||||
os::fd::{AsRawFd, RawFd},
|
||||
};
|
||||
|
||||
@@ -58,15 +58,19 @@ pub fn get_jailer_root(sid: &str) -> String {
|
||||
[&sandbox_path, JAILER_ROOT].join("/")
|
||||
}
|
||||
|
||||
// Clear the O_CLOEXEC which is set by default by Rust standard library
|
||||
// as it would obviously prevent passing the descriptor to the hypervisor process.
|
||||
pub fn clear_fd_flags(rawfd: RawFd) -> Result<()> {
|
||||
if let Err(err) = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_SETFD(fcntl::FdFlag::empty())) {
|
||||
info!(
|
||||
sl!(),
|
||||
"couldn't clear O_CLOEXEC on device's fd, communication with agent will not work: {:?}",
|
||||
err
|
||||
);
|
||||
// Clear the O_CLOEXEC which is set by default by Rust standard library on
|
||||
// file descriptors that it opens. This function is mostly meant to be
|
||||
// called on descriptors to be passed to a child (hypervisor) process as
|
||||
// O_CLOEXEC would obviously prevent that.
|
||||
pub fn clear_cloexec(rawfd: RawFd) -> Result<()> {
|
||||
let cur_flags = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_GETFD)?;
|
||||
let mut new_flags = fcntl::FdFlag::from_bits(cur_flags).ok_or(anyhow!(
|
||||
"couldn't construct FdFlag from flags value {:?}",
|
||||
cur_flags
|
||||
))?;
|
||||
new_flags.remove(fcntl::FdFlag::FD_CLOEXEC);
|
||||
if let Err(err) = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_SETFD(new_flags)) {
|
||||
info!(sl!(), "couldn't clear O_CLOEXEC on fd: {:?}", err);
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
@@ -100,3 +104,55 @@ pub fn open_named_tuntap(if_name: &str, queues: u32) -> Result<Vec<File>> {
|
||||
|
||||
Ok(tap_files)
|
||||
}
|
||||
|
||||
// /dev/tap$(cat /sys/class/net/macvtap1/ifindex)
|
||||
// for example: /dev/tap2381
|
||||
#[allow(dead_code)]
|
||||
pub fn create_macvtap_fds(ifindex: u32, queues: u32) -> Result<Vec<File>> {
|
||||
let macvtap = format!("/dev/tap{}", ifindex);
|
||||
create_fds(macvtap.as_str(), queues as usize)
|
||||
}
|
||||
|
||||
pub fn create_vhost_net_fds(queues: u32) -> Result<Vec<File>> {
|
||||
let vhost_dev = "/dev/vhost-net";
|
||||
let num_fds = if queues > 1 { queues as usize } else { 1_usize };
|
||||
|
||||
create_fds(vhost_dev, num_fds)
|
||||
}
|
||||
|
||||
// For example: if num_fds = 3; fds = {0xc000012028, 0xc000012030, 0xc000012038}
|
||||
fn create_fds(device: &str, num_fds: usize) -> Result<Vec<File>> {
|
||||
let mut fds: Vec<File> = Vec::with_capacity(num_fds);
|
||||
|
||||
for i in 0..num_fds {
|
||||
match OpenOptions::new().read(true).write(true).open(device) {
|
||||
Ok(f) => {
|
||||
fds.push(f);
|
||||
}
|
||||
Err(e) => {
|
||||
fds.clear();
|
||||
return Err(anyhow!(
|
||||
"It failed with error {:?} when opened the {:?} device.",
|
||||
e,
|
||||
i
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(fds)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::create_fds;
|
||||
|
||||
#[test]
|
||||
fn test_ctreate_fds() {
|
||||
let device = "/dev/null";
|
||||
let num_fds = 3_usize;
|
||||
let fds = create_fds(device, num_fds);
|
||||
assert!(fds.is_ok());
|
||||
assert_eq!(fds.unwrap().len(), num_fds);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user