mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-03 01:44:29 +00:00
Merge pull request #9353 from pmores/pr-8866-follow-up
runtime-rs: refactor qemu driver
This commit is contained in:
@@ -3,17 +3,16 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
use super::network::{generate_netdev_fds, NetDevice};
|
use crate::utils::{clear_cloexec, create_vhost_net_fds, open_named_tuntap};
|
||||||
use crate::utils::clear_fd_flags;
|
use crate::{kernel_param::KernelParams, Address, HypervisorConfig};
|
||||||
use crate::{kernel_param::KernelParams, HypervisorConfig, NetworkConfig};
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use kata_types::config::hypervisor::NetworkInfo;
|
use std::collections::HashMap;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::fs::{read_to_string, File};
|
use std::fs::{read_to_string, File};
|
||||||
use std::os::fd::AsRawFd;
|
use std::os::fd::AsRawFd;
|
||||||
use std::os::unix::io::RawFd;
|
use tokio;
|
||||||
|
|
||||||
// These should have been called MiB and GiB for better readability but the
|
// These should have been called MiB and GiB for better readability but the
|
||||||
// more fitting names unfortunately generate linter warnings.
|
// more fitting names unfortunately generate linter warnings.
|
||||||
@@ -811,13 +810,13 @@ impl ToQemuParams for DeviceVirtioBlk {
|
|||||||
|
|
||||||
struct VhostVsock {
|
struct VhostVsock {
|
||||||
bus_type: VirtioBusType,
|
bus_type: VirtioBusType,
|
||||||
vhostfd: RawFd,
|
vhostfd: tokio::fs::File,
|
||||||
guest_cid: u32,
|
guest_cid: u32,
|
||||||
disable_modern: bool,
|
disable_modern: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VhostVsock {
|
impl VhostVsock {
|
||||||
fn new(vhostfd: RawFd, guest_cid: u32, bus_type: VirtioBusType) -> VhostVsock {
|
fn new(vhostfd: tokio::fs::File, guest_cid: u32, bus_type: VirtioBusType) -> VhostVsock {
|
||||||
VhostVsock {
|
VhostVsock {
|
||||||
bus_type,
|
bus_type,
|
||||||
vhostfd,
|
vhostfd,
|
||||||
@@ -840,7 +839,7 @@ impl ToQemuParams for VhostVsock {
|
|||||||
if self.disable_modern {
|
if self.disable_modern {
|
||||||
params.push("disable-modern=true".to_owned());
|
params.push("disable-modern=true".to_owned());
|
||||||
}
|
}
|
||||||
params.push(format!("vhostfd={}", self.vhostfd));
|
params.push(format!("vhostfd={}", self.vhostfd.as_raw_fd()));
|
||||||
params.push(format!("guest-cid={}", self.guest_cid));
|
params.push(format!("guest-cid={}", self.guest_cid));
|
||||||
|
|
||||||
Ok(vec!["-device".to_owned(), params.join(",")])
|
Ok(vec!["-device".to_owned(), params.join(",")])
|
||||||
@@ -892,22 +891,135 @@ impl ToQemuParams for Serial {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn format_fds(files: &[File]) -> String {
|
||||||
|
files
|
||||||
|
.iter()
|
||||||
|
.map(|file| file.as_raw_fd().to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(":")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Netdev {
|
||||||
|
id: String,
|
||||||
|
|
||||||
|
// File descriptors for vhost multi-queue support.
|
||||||
|
// {
|
||||||
|
// queue_fds: Vec<File>,
|
||||||
|
// vhost_fds: Vec<File>,
|
||||||
|
// }
|
||||||
|
fds: HashMap<String, Vec<File>>,
|
||||||
|
|
||||||
|
// disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu.
|
||||||
|
disable_vhost_net: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Netdev {
|
||||||
|
fn new(id: &str, host_if_name: &str, num_queues: u32) -> Result<Netdev> {
|
||||||
|
let fds = HashMap::from([
|
||||||
|
(
|
||||||
|
"fds".to_owned(),
|
||||||
|
open_named_tuntap(host_if_name, num_queues)?,
|
||||||
|
),
|
||||||
|
("vhostfds".to_owned(), create_vhost_net_fds(num_queues)?),
|
||||||
|
]);
|
||||||
|
for file in fds.values().flatten() {
|
||||||
|
clear_cloexec(file.as_raw_fd()).context("clearing O_CLOEXEC failed")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Netdev {
|
||||||
|
id: id.to_owned(),
|
||||||
|
fds,
|
||||||
|
disable_vhost_net: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_disable_vhost_net(&mut self, disable_vhost_net: bool) -> &mut Self {
|
||||||
|
self.disable_vhost_net = disable_vhost_net;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl ToQemuParams for NetDevice {
|
impl ToQemuParams for Netdev {
|
||||||
// qemu_params returns the qemu parameters built out of this network device.
|
|
||||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||||
let mut qemu_params: Vec<String> = Vec::new();
|
let mut params: Vec<String> = Vec::new();
|
||||||
|
params.push("tap".to_owned());
|
||||||
|
params.push(format!("id={}", self.id));
|
||||||
|
|
||||||
let netdev_params = self.qemu_netdev_params()?;
|
if !self.disable_vhost_net {
|
||||||
let device_params = self.qemu_device_params()?;
|
params.push("vhost=on".to_owned());
|
||||||
|
if let Some(vhost_fds) = self.fds.get("vhostfds") {
|
||||||
|
params.push(format!("vhostfds={}", format_fds(vhost_fds)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
qemu_params.push("-netdev".to_owned());
|
if let Some(tuntap_fds) = self.fds.get("fds") {
|
||||||
qemu_params.push(netdev_params.join(","));
|
params.push(format!("fds={}", format_fds(tuntap_fds)));
|
||||||
|
}
|
||||||
|
|
||||||
qemu_params.push("-device".to_owned());
|
Ok(vec!["-netdev".to_owned(), params.join(",")])
|
||||||
qemu_params.push(device_params.join(","));
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(qemu_params)
|
#[derive(Debug)]
|
||||||
|
pub struct DeviceVirtioNet {
|
||||||
|
// driver is the qemu device driver
|
||||||
|
device_driver: String,
|
||||||
|
|
||||||
|
// id is the corresponding backend net device identifier.
|
||||||
|
netdev_id: String,
|
||||||
|
|
||||||
|
// mac_address is the guest-side networking device interface MAC address.
|
||||||
|
mac_address: Address,
|
||||||
|
|
||||||
|
// disable_modern prevents qemu from relying on fast MMIO.
|
||||||
|
disable_modern: bool,
|
||||||
|
|
||||||
|
num_queues: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DeviceVirtioNet {
|
||||||
|
fn new(netdev_id: &str, mac_address: Address) -> DeviceVirtioNet {
|
||||||
|
DeviceVirtioNet {
|
||||||
|
device_driver: "virtio-net-pci".to_owned(),
|
||||||
|
netdev_id: netdev_id.to_owned(),
|
||||||
|
mac_address,
|
||||||
|
disable_modern: false,
|
||||||
|
num_queues: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_disable_modern(&mut self, disable_modern: bool) -> &mut Self {
|
||||||
|
self.disable_modern = disable_modern;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_num_queues(&mut self, num_queues: u32) -> &mut Self {
|
||||||
|
self.num_queues = num_queues;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl ToQemuParams for DeviceVirtioNet {
|
||||||
|
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||||
|
let mut params: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
//params.push(format!("driver={}", &self.device_driver.to_string()));
|
||||||
|
params.push(self.device_driver.clone());
|
||||||
|
params.push(format!("netdev={}", &self.netdev_id));
|
||||||
|
|
||||||
|
params.push(format!("mac={:?}", self.mac_address));
|
||||||
|
|
||||||
|
if self.disable_modern {
|
||||||
|
params.push("disable-modern=true".to_owned());
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push("mq=on".to_owned());
|
||||||
|
params.push(format!("vectors={}", 2 * self.num_queues + 2));
|
||||||
|
|
||||||
|
Ok(vec!["-device".to_owned(), params.join(",")])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -974,6 +1086,19 @@ fn is_running_in_vm() -> Result<bool> {
|
|||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn should_disable_modern() -> bool {
|
||||||
|
match is_running_in_vm() {
|
||||||
|
Ok(retval) => retval,
|
||||||
|
Err(err) => {
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"unable to check if running in VM, assuming not: {}", err
|
||||||
|
);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct QemuCmdLine<'a> {
|
pub struct QemuCmdLine<'a> {
|
||||||
id: String,
|
id: String,
|
||||||
config: &'a HypervisorConfig,
|
config: &'a HypervisorConfig,
|
||||||
@@ -1055,25 +1180,13 @@ impl<'a> QemuCmdLine<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_vsock(&mut self, vhostfd: RawFd, guest_cid: u32) -> Result<()> {
|
pub fn add_vsock(&mut self, vhostfd: tokio::fs::File, guest_cid: u32) -> Result<()> {
|
||||||
clear_fd_flags(vhostfd).context("clear flags failed")?;
|
clear_cloexec(vhostfd.as_raw_fd()).context("clearing O_CLOEXEC failed on vsock fd")?;
|
||||||
|
|
||||||
let mut vhost_vsock_pci = VhostVsock::new(vhostfd, guest_cid, self.bus_type());
|
let mut vhost_vsock_pci = VhostVsock::new(vhostfd, guest_cid, self.bus_type());
|
||||||
|
|
||||||
if !self.config.disable_nesting_checks {
|
if !self.config.disable_nesting_checks && should_disable_modern() {
|
||||||
let nested = match is_running_in_vm() {
|
vhost_vsock_pci.set_disable_modern(true);
|
||||||
Ok(retval) => retval,
|
|
||||||
Err(err) => {
|
|
||||||
info!(
|
|
||||||
sl!(),
|
|
||||||
"unable to check if running in VM, assuming not: {}", err
|
|
||||||
);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if nested {
|
|
||||||
vhost_vsock_pci.set_disable_modern(true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.devices.push(Box::new(vhost_vsock_pci));
|
self.devices.push(Box::new(vhost_vsock_pci));
|
||||||
@@ -1130,23 +1243,31 @@ impl<'a> QemuCmdLine<'a> {
|
|||||||
|
|
||||||
pub fn add_network_device(
|
pub fn add_network_device(
|
||||||
&mut self,
|
&mut self,
|
||||||
config: &NetworkConfig,
|
dev_index: u64,
|
||||||
network_info: &NetworkInfo,
|
host_dev_name: &str,
|
||||||
) -> Result<Vec<File>> {
|
guest_mac: Address,
|
||||||
let disable_vhost_net = network_info.disable_vhost_net;
|
) -> Result<()> {
|
||||||
let queues = network_info.network_queues;
|
let mut netdev = Netdev::new(
|
||||||
|
&format!("network-{}", dev_index),
|
||||||
|
host_dev_name,
|
||||||
|
self.config.network_info.network_queues,
|
||||||
|
)?;
|
||||||
|
if self.config.network_info.disable_vhost_net {
|
||||||
|
netdev.set_disable_vhost_net(true);
|
||||||
|
}
|
||||||
|
|
||||||
let (tun_files, vhost_files) = generate_netdev_fds(config, queues)?;
|
let mut virtio_net_device = DeviceVirtioNet::new(&netdev.id, guest_mac);
|
||||||
let tun_fds: Vec<i32> = tun_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
|
||||||
let vhost_fds: Vec<i32> = vhost_files.iter().map(|dev| dev.as_raw_fd()).collect();
|
|
||||||
|
|
||||||
let net_device = NetDevice::new(config, disable_vhost_net, tun_fds, vhost_fds);
|
if should_disable_modern() {
|
||||||
self.devices.push(Box::new(net_device));
|
virtio_net_device.set_disable_modern(true);
|
||||||
|
}
|
||||||
|
if self.config.network_info.network_queues > 1 {
|
||||||
|
virtio_net_device.set_num_queues(self.config.network_info.network_queues);
|
||||||
|
}
|
||||||
|
|
||||||
let dev_files = vec![tun_files, vhost_files];
|
self.devices.push(Box::new(netdev));
|
||||||
let fds: Vec<File> = dev_files.into_iter().flatten().collect();
|
self.devices.push(Box::new(virtio_net_device));
|
||||||
|
Ok(())
|
||||||
Ok(fds)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_console(&mut self, console_socket_path: &str) {
|
pub fn add_console(&mut self, console_socket_path: &str) {
|
||||||
|
@@ -17,7 +17,6 @@ use kata_types::{
|
|||||||
};
|
};
|
||||||
use persist::sandbox_persist::Persist;
|
use persist::sandbox_persist::Persist;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::os::unix::io::AsRawFd;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::process::Stdio;
|
use std::process::Stdio;
|
||||||
use tokio::{
|
use tokio::{
|
||||||
@@ -65,15 +64,11 @@ impl QemuInner {
|
|||||||
info!(sl!(), "Starting QEMU VM");
|
info!(sl!(), "Starting QEMU VM");
|
||||||
let netns = self.netns.clone().unwrap_or_default();
|
let netns = self.netns.clone().unwrap_or_default();
|
||||||
|
|
||||||
|
// CAUTION: since 'cmdline' contains file descriptors that have to stay
|
||||||
|
// open until spawn() is called to launch qemu later in this function,
|
||||||
|
// 'cmdline' has to live at least until spawn() is called
|
||||||
let mut cmdline = QemuCmdLine::new(&self.id, &self.config)?;
|
let mut cmdline = QemuCmdLine::new(&self.id, &self.config)?;
|
||||||
|
|
||||||
// CAUTION: File descriptors that are passed to QEMU must stay open until the QEMU process
|
|
||||||
// is started and closed afterwards. This is achieved by collecting them in _fds_for_qemu.
|
|
||||||
// It is mandatory for _fds_for_qemu to last until the QEMU process is forked. Leave it
|
|
||||||
// in the outer scope of this function for this to happen. The files in _fds_for_qemu
|
|
||||||
// should not be used in any way.
|
|
||||||
let mut _fds_for_qemu: Vec<std::fs::File> = Vec::new();
|
|
||||||
|
|
||||||
for device in &mut self.devices {
|
for device in &mut self.devices {
|
||||||
match device {
|
match device {
|
||||||
DeviceType::ShareFs(share_fs_dev) => {
|
DeviceType::ShareFs(share_fs_dev) => {
|
||||||
@@ -87,8 +82,7 @@ impl QemuInner {
|
|||||||
}
|
}
|
||||||
DeviceType::Vsock(vsock_dev) => {
|
DeviceType::Vsock(vsock_dev) => {
|
||||||
let fd = vsock_dev.init_config().await?;
|
let fd = vsock_dev.init_config().await?;
|
||||||
cmdline.add_vsock(fd.as_raw_fd(), vsock_dev.config.guest_cid)?;
|
cmdline.add_vsock(fd, vsock_dev.config.guest_cid)?;
|
||||||
_fds_for_qemu.push(fd.into_std().await);
|
|
||||||
}
|
}
|
||||||
DeviceType::Block(block_dev) => {
|
DeviceType::Block(block_dev) => {
|
||||||
if block_dev.config.path_on_host == self.config.boot_info.initrd {
|
if block_dev.config.path_on_host == self.config.boot_info.initrd {
|
||||||
@@ -112,13 +106,14 @@ impl QemuInner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
DeviceType::Network(network) => {
|
DeviceType::Network(network) => {
|
||||||
let network_info = &self.config.network_info;
|
|
||||||
|
|
||||||
// we need ensure add_network_device happens in netns.
|
// we need ensure add_network_device happens in netns.
|
||||||
let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?;
|
let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?;
|
||||||
|
|
||||||
_fds_for_qemu
|
cmdline.add_network_device(
|
||||||
.append(&mut cmdline.add_network_device(&network.config, network_info)?);
|
network.config.index,
|
||||||
|
&network.config.host_dev_name,
|
||||||
|
network.config.guest_mac.clone().unwrap(),
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,6 @@
|
|||||||
|
|
||||||
mod cmdline_generator;
|
mod cmdline_generator;
|
||||||
mod inner;
|
mod inner;
|
||||||
mod network;
|
|
||||||
|
|
||||||
use crate::device::DeviceType;
|
use crate::device::DeviceType;
|
||||||
use crate::hypervisor_persist::HypervisorState;
|
use crate::hypervisor_persist::HypervisorState;
|
||||||
|
@@ -1,339 +0,0 @@
|
|||||||
// Copyright (c) 2024 Ant Group
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
use std::fs::{File, OpenOptions};
|
|
||||||
use std::os::fd::RawFd;
|
|
||||||
|
|
||||||
use crate::utils::{clear_fd_flags, open_named_tuntap};
|
|
||||||
use crate::{Address, NetworkConfig};
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
|
||||||
|
|
||||||
// VirtioTransport is the transport in use for a virtio device.
|
|
||||||
#[derive(Debug, Default, PartialEq)]
|
|
||||||
enum VirtioTransport {
|
|
||||||
#[default]
|
|
||||||
Pci,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ToString for VirtioTransport {
|
|
||||||
fn to_string(&self) -> String {
|
|
||||||
match self {
|
|
||||||
VirtioTransport::Pci => "pci".to_owned(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<&str> for VirtioTransport {
|
|
||||||
type Error = anyhow::Error;
|
|
||||||
|
|
||||||
fn try_from(_transport: &str) -> Result<Self> {
|
|
||||||
Ok(VirtioTransport::Pci)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeviceDriver is set in "-device driver=<DeviceDriver>"
|
|
||||||
#[derive(Debug, Default, PartialEq)]
|
|
||||||
enum DeviceDriver {
|
|
||||||
// VirtioNetPci("virtio-net-pci") is a virtio-net device using PCI transport.
|
|
||||||
#[default]
|
|
||||||
VirtioNetPci,
|
|
||||||
|
|
||||||
// VfioPci("vfio-pci") is an attached host device using PCI transport.
|
|
||||||
VfioPci,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ToString for DeviceDriver {
|
|
||||||
fn to_string(&self) -> String {
|
|
||||||
match self {
|
|
||||||
DeviceDriver::VirtioNetPci => "virtio-net-pci".to_owned(),
|
|
||||||
DeviceDriver::VfioPci => "vfio-pci".to_owned(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<&str> for DeviceDriver {
|
|
||||||
type Error = anyhow::Error;
|
|
||||||
|
|
||||||
fn try_from(device_driver: &str) -> Result<Self> {
|
|
||||||
Ok(match device_driver {
|
|
||||||
"virtio-net-pci" => DeviceDriver::VirtioNetPci,
|
|
||||||
"vfio-pci" => DeviceDriver::VfioPci,
|
|
||||||
_ => return Err(anyhow!("unsupported transport")),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Default, PartialEq)]
|
|
||||||
enum NetDev {
|
|
||||||
/// Tap("tap") is a tap networking device type.
|
|
||||||
#[default]
|
|
||||||
Tap,
|
|
||||||
|
|
||||||
/// MacTap("macvtap") is a macvtap networking device type.
|
|
||||||
#[allow(dead_code)]
|
|
||||||
MacvTap,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ToString for NetDev {
|
|
||||||
fn to_string(&self) -> String {
|
|
||||||
match self {
|
|
||||||
NetDev::Tap | NetDev::MacvTap => "tap".to_owned(),
|
|
||||||
// VhostUser is to be added in future.
|
|
||||||
// NetDev::VhostUser => "vhost-user".to_owned(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NetDevice represents a guest networking device
|
|
||||||
// -netdev tap,id=hostnet0,vhost=on,vhostfds=x:y:z,fds=a:b:c
|
|
||||||
// -device virtio-net-pci,netdev=hostnet0,id=net0,mac=24:42:54:20:50:46,bus=pci.0,addr=0x7
|
|
||||||
#[derive(Debug, Default)]
|
|
||||||
pub struct NetDevice {
|
|
||||||
// device_type is the netdev type (e.g. tap).
|
|
||||||
device_type: NetDev,
|
|
||||||
|
|
||||||
// driver is the qemu device driver
|
|
||||||
device_driver: DeviceDriver,
|
|
||||||
|
|
||||||
// id is the net device identifier.
|
|
||||||
id: String,
|
|
||||||
|
|
||||||
// if_name is the interface name,
|
|
||||||
if_name: String,
|
|
||||||
|
|
||||||
// bus is the bus path name of a PCI device.
|
|
||||||
bus: String,
|
|
||||||
|
|
||||||
// pci_addr is the address offset of a PCI device.
|
|
||||||
pci_addr: String,
|
|
||||||
|
|
||||||
// fds represents the list of already existing file descriptors to be used.
|
|
||||||
// This is mostly useful for mq support.
|
|
||||||
// {
|
|
||||||
// fds: Vec<File>,
|
|
||||||
// vhost_fds: Vec<File>,
|
|
||||||
// }
|
|
||||||
fds: HashMap<String, Vec<RawFd>>,
|
|
||||||
|
|
||||||
// disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu.
|
|
||||||
disable_vhost_net: bool,
|
|
||||||
|
|
||||||
// mac_address is the networking device interface MAC address.
|
|
||||||
mac_address: Address,
|
|
||||||
|
|
||||||
// disable_modern prevents qemu from relying on fast MMIO.
|
|
||||||
disable_modern: bool,
|
|
||||||
|
|
||||||
// transport is the virtio transport for this device.
|
|
||||||
transport: VirtioTransport,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NetDevice {
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn new(
|
|
||||||
config: &NetworkConfig,
|
|
||||||
disable_vhost_net: bool,
|
|
||||||
tun_fds: Vec<i32>,
|
|
||||||
vhost_fds: Vec<i32>,
|
|
||||||
) -> Self {
|
|
||||||
// we have only two <Key, Value>s:
|
|
||||||
// {
|
|
||||||
// "fds": vec![fd1, fd2,...],
|
|
||||||
// "vhostfds": vec![fd3, fd4,...],
|
|
||||||
// }
|
|
||||||
let mut fds: HashMap<String, Vec<RawFd>> = HashMap::with_capacity(2);
|
|
||||||
fds.insert("fds".to_owned(), tun_fds);
|
|
||||||
fds.insert("vhostfds".to_owned(), vhost_fds);
|
|
||||||
|
|
||||||
// FIXME(Hard Code): It's safe to unwrap here because of the valid input.
|
|
||||||
// Ideally device_driver should be derived from transport to minimize code duplication.
|
|
||||||
// While currently we focus on PCI for the initial implementation.
|
|
||||||
// And we'll support other transports, e.g. s390x's CCW.
|
|
||||||
let device_driver = DeviceDriver::try_from("virtio-net-pci").unwrap();
|
|
||||||
let transport = VirtioTransport::try_from("pci").unwrap();
|
|
||||||
|
|
||||||
NetDevice {
|
|
||||||
device_type: NetDev::Tap,
|
|
||||||
device_driver,
|
|
||||||
id: format!("network-{}", &config.index),
|
|
||||||
if_name: config.virt_iface_name.clone(),
|
|
||||||
mac_address: config.guest_mac.clone().unwrap(),
|
|
||||||
disable_vhost_net,
|
|
||||||
disable_modern: false,
|
|
||||||
fds,
|
|
||||||
transport,
|
|
||||||
..Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mq_param(&self) -> String {
|
|
||||||
let mut params = vec!["mq=on".to_owned()];
|
|
||||||
if self.transport == VirtioTransport::Pci {
|
|
||||||
// https://www.linux-kvm.org/page/Multiqueue
|
|
||||||
// -netdev tap,vhost=on,queues=N
|
|
||||||
// enable mq and specify msix vectors in qemu cmdline
|
|
||||||
// (2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq)
|
|
||||||
// -device virtio-net-pci,mq=on,vectors=2N+2...
|
|
||||||
// enable mq in guest by 'ethtool -L eth0 combined $queue_num'
|
|
||||||
// Clearlinux automatically sets up the queues properly
|
|
||||||
// The agent implementation should do this to ensure that it is
|
|
||||||
// always set
|
|
||||||
|
|
||||||
// vectors = len(netdev.FDs) * 2 + 2
|
|
||||||
if let Some(fds) = self.fds.get("fds") {
|
|
||||||
params.push(format!("vectors={}", 2 * fds.len() + 2));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
params.join(",")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn qemu_device_params(&self) -> Result<Vec<String>> {
|
|
||||||
let mut device_params: Vec<String> = Vec::new();
|
|
||||||
|
|
||||||
device_params.push(format!("driver={}", &self.device_driver.to_string()));
|
|
||||||
device_params.push(format!("netdev={}", &self.id));
|
|
||||||
|
|
||||||
let mac = self.mac_address.to_string();
|
|
||||||
device_params.push(format!("mac={}", &mac));
|
|
||||||
|
|
||||||
if !self.bus.is_empty() {
|
|
||||||
device_params.push(format!("bus={}", &self.bus));
|
|
||||||
}
|
|
||||||
|
|
||||||
if !self.pci_addr.is_empty() {
|
|
||||||
// FIXME: pci_addr: PciPath
|
|
||||||
device_params.push(format!("addr={}", &self.pci_addr));
|
|
||||||
}
|
|
||||||
|
|
||||||
device_params.push(format!(
|
|
||||||
"disable-modern={}",
|
|
||||||
if self.disable_modern { "true" } else { "false" }
|
|
||||||
));
|
|
||||||
|
|
||||||
if !self.fds.is_empty() {
|
|
||||||
device_params.push(self.mq_param());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(device_params)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn qemu_netdev_params(&self) -> Result<Vec<String>> {
|
|
||||||
let mut netdev_params: Vec<String> = Vec::new();
|
|
||||||
let netdev_type = self.device_type.to_string();
|
|
||||||
netdev_params.push(netdev_type);
|
|
||||||
netdev_params.push(format!("id={}", self.id));
|
|
||||||
|
|
||||||
if !self.disable_vhost_net {
|
|
||||||
netdev_params.push("vhost=on".to_owned());
|
|
||||||
if let Some(vhost_fds) = self.fds.get("vhostfds") {
|
|
||||||
for fd in vhost_fds.iter() {
|
|
||||||
clear_fd_flags(*fd)?;
|
|
||||||
}
|
|
||||||
let s = vhost_fds
|
|
||||||
.iter()
|
|
||||||
.map(|&n| n.to_string())
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join(":");
|
|
||||||
|
|
||||||
netdev_params.push(format!("vhostfds={}", s));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(tuntap_fds) = self.fds.get("fds") {
|
|
||||||
for fd in tuntap_fds.iter() {
|
|
||||||
clear_fd_flags(*fd).context("clear flag of fd failed")?;
|
|
||||||
}
|
|
||||||
let s = tuntap_fds
|
|
||||||
.iter()
|
|
||||||
.map(|&n| n.to_string())
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join(":");
|
|
||||||
netdev_params.push(format!("fds={}", s));
|
|
||||||
} else {
|
|
||||||
netdev_params.push(format!("ifname={}", self.if_name));
|
|
||||||
netdev_params.push("script=no".to_owned());
|
|
||||||
netdev_params.push("downscript=no".to_owned());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(netdev_params)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ToString for Address {
|
|
||||||
fn to_string(&self) -> String {
|
|
||||||
let b: [u8; 6] = self.0;
|
|
||||||
|
|
||||||
format!(
|
|
||||||
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
|
||||||
b[0], b[1], b[2], b[3], b[4], b[5]
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// /dev/tap$(cat /sys/class/net/macvtap1/ifindex)
|
|
||||||
// for example: /dev/tap2381
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn create_macvtap_fds(ifindex: u32, queues: u32) -> Result<Vec<File>> {
|
|
||||||
let macvtap = format!("/dev/tap{}", ifindex);
|
|
||||||
create_fds(macvtap.as_str(), queues as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn create_vhost_net_fds(queues: u32) -> Result<Vec<File>> {
|
|
||||||
let vhost_dev = "/dev/vhost-net";
|
|
||||||
let num_fds = if queues > 1 { queues as usize } else { 1_usize };
|
|
||||||
|
|
||||||
create_fds(vhost_dev, num_fds)
|
|
||||||
}
|
|
||||||
|
|
||||||
// For example: if num_fds = 3; fds = {0xc000012028, 0xc000012030, 0xc000012038}
|
|
||||||
fn create_fds(device: &str, num_fds: usize) -> Result<Vec<File>> {
|
|
||||||
let mut fds: Vec<File> = Vec::with_capacity(num_fds);
|
|
||||||
|
|
||||||
for i in 0..num_fds {
|
|
||||||
match OpenOptions::new().read(true).write(true).open(device) {
|
|
||||||
Ok(f) => {
|
|
||||||
fds.push(f);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
fds.clear();
|
|
||||||
return Err(anyhow!(
|
|
||||||
"It failed with error {:?} when opened the {:?} device.",
|
|
||||||
e,
|
|
||||||
i
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(fds)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate_netdev_fds(
|
|
||||||
network_config: &NetworkConfig,
|
|
||||||
queues: u32,
|
|
||||||
) -> Result<(Vec<File>, Vec<File>)> {
|
|
||||||
let if_name = network_config.host_dev_name.as_str();
|
|
||||||
let tun_taps = open_named_tuntap(if_name, queues)?;
|
|
||||||
let vhost_fds = create_vhost_net_fds(queues)?;
|
|
||||||
|
|
||||||
Ok((tun_taps, vhost_fds))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::create_fds;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_ctreate_fds() {
|
|
||||||
let device = "/dev/null";
|
|
||||||
let num_fds = 3_usize;
|
|
||||||
let fds = create_fds(device, num_fds);
|
|
||||||
assert!(fds.is_ok());
|
|
||||||
assert_eq!(fds.unwrap().len(), num_fds);
|
|
||||||
}
|
|
||||||
}
|
|
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashSet,
|
collections::HashSet,
|
||||||
fs::File,
|
fs::{File, OpenOptions},
|
||||||
os::fd::{AsRawFd, RawFd},
|
os::fd::{AsRawFd, RawFd},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -58,15 +58,19 @@ pub fn get_jailer_root(sid: &str) -> String {
|
|||||||
[&sandbox_path, JAILER_ROOT].join("/")
|
[&sandbox_path, JAILER_ROOT].join("/")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear the O_CLOEXEC which is set by default by Rust standard library
|
// Clear the O_CLOEXEC which is set by default by Rust standard library on
|
||||||
// as it would obviously prevent passing the descriptor to the hypervisor process.
|
// file descriptors that it opens. This function is mostly meant to be
|
||||||
pub fn clear_fd_flags(rawfd: RawFd) -> Result<()> {
|
// called on descriptors to be passed to a child (hypervisor) process as
|
||||||
if let Err(err) = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_SETFD(fcntl::FdFlag::empty())) {
|
// O_CLOEXEC would obviously prevent that.
|
||||||
info!(
|
pub fn clear_cloexec(rawfd: RawFd) -> Result<()> {
|
||||||
sl!(),
|
let cur_flags = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_GETFD)?;
|
||||||
"couldn't clear O_CLOEXEC on device's fd, communication with agent will not work: {:?}",
|
let mut new_flags = fcntl::FdFlag::from_bits(cur_flags).ok_or(anyhow!(
|
||||||
err
|
"couldn't construct FdFlag from flags value {:?}",
|
||||||
);
|
cur_flags
|
||||||
|
))?;
|
||||||
|
new_flags.remove(fcntl::FdFlag::FD_CLOEXEC);
|
||||||
|
if let Err(err) = fcntl::fcntl(rawfd, fcntl::FcntlArg::F_SETFD(new_flags)) {
|
||||||
|
info!(sl!(), "couldn't clear O_CLOEXEC on fd: {:?}", err);
|
||||||
return Err(err.into());
|
return Err(err.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,3 +104,55 @@ pub fn open_named_tuntap(if_name: &str, queues: u32) -> Result<Vec<File>> {
|
|||||||
|
|
||||||
Ok(tap_files)
|
Ok(tap_files)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// /dev/tap$(cat /sys/class/net/macvtap1/ifindex)
|
||||||
|
// for example: /dev/tap2381
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn create_macvtap_fds(ifindex: u32, queues: u32) -> Result<Vec<File>> {
|
||||||
|
let macvtap = format!("/dev/tap{}", ifindex);
|
||||||
|
create_fds(macvtap.as_str(), queues as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_vhost_net_fds(queues: u32) -> Result<Vec<File>> {
|
||||||
|
let vhost_dev = "/dev/vhost-net";
|
||||||
|
let num_fds = if queues > 1 { queues as usize } else { 1_usize };
|
||||||
|
|
||||||
|
create_fds(vhost_dev, num_fds)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For example: if num_fds = 3; fds = {0xc000012028, 0xc000012030, 0xc000012038}
|
||||||
|
fn create_fds(device: &str, num_fds: usize) -> Result<Vec<File>> {
|
||||||
|
let mut fds: Vec<File> = Vec::with_capacity(num_fds);
|
||||||
|
|
||||||
|
for i in 0..num_fds {
|
||||||
|
match OpenOptions::new().read(true).write(true).open(device) {
|
||||||
|
Ok(f) => {
|
||||||
|
fds.push(f);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
fds.clear();
|
||||||
|
return Err(anyhow!(
|
||||||
|
"It failed with error {:?} when opened the {:?} device.",
|
||||||
|
e,
|
||||||
|
i
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(fds)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::create_fds;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ctreate_fds() {
|
||||||
|
let device = "/dev/null";
|
||||||
|
let num_fds = 3_usize;
|
||||||
|
let fds = create_fds(device, num_fds);
|
||||||
|
assert!(fds.is_ok());
|
||||||
|
assert_eq!(fds.unwrap().len(), num_fds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user