From 75e282b4c19e49ad7d8ef246a1992e20816f8989 Mon Sep 17 00:00:00 2001 From: Quanwei Zhou Date: Wed, 30 Mar 2022 10:13:17 +0800 Subject: [PATCH] runtime-rs: hypervisor base define Responsible for VM manager, such as Qemu, Dragonball Fixes: #3785 Signed-off-by: Quanwei Zhou --- src/runtime-rs/Cargo.lock | 15 ++ src/runtime-rs/Cargo.toml | 2 + src/runtime-rs/crates/hypervisor/Cargo.toml | 21 +++ .../crates/hypervisor/src/device/block.rs | 24 +++ .../crates/hypervisor/src/device/mod.rs | 36 +++++ .../crates/hypervisor/src/device/network.rs | 32 ++++ .../hypervisor/src/device/share_fs_device.rs | 27 ++++ .../hypervisor/src/device/share_fs_mount.rs | 43 ++++++ .../crates/hypervisor/src/device/vfio.rs | 145 ++++++++++++++++++ .../crates/hypervisor/src/device/vsock.rs | 17 ++ src/runtime-rs/crates/hypervisor/src/lib.rs | 49 ++++++ 11 files changed, 411 insertions(+) create mode 100644 src/runtime-rs/crates/hypervisor/Cargo.toml create mode 100644 src/runtime-rs/crates/hypervisor/src/device/block.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/device/mod.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/device/network.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/device/share_fs_device.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/device/share_fs_mount.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/device/vfio.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/device/vsock.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/lib.rs diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 4dda25cd7c..f56b90609a 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -513,6 +513,21 @@ dependencies = [ "libc", ] +[[package]] +name = "hypervisor" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "kata-types", + "libc", + "logging", + "serde_json", + "slog", + "slog-scope", + "thiserror", +] + [[package]] name = "idna" version = "0.2.3" diff --git a/src/runtime-rs/Cargo.toml b/src/runtime-rs/Cargo.toml index b359f9d1fb..176bb222ad 100644 --- a/src/runtime-rs/Cargo.toml +++ b/src/runtime-rs/Cargo.toml @@ -3,4 +3,6 @@ members = [ "crates/shim", # TODO: current only for check, delete after use the agent crate "crates/agent", + # TODO: current only for check, delete after use the hypervisor crate + "crates/hypervisor", ] diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml new file mode 100644 index 0000000000..4520409d2a --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "hypervisor" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "^1.0" +async-trait = "0.1.48" +libc = ">=0.2.39" +serde_json = ">=1.0.9" +slog = "2.5.2" +slog-scope = "4.4.0" +thiserror = "1.0" + +kata-types = { path = "../../../libs/kata-types" } +logging = { path = "../../../libs/logging" } + +[features] diff --git a/src/runtime-rs/crates/hypervisor/src/device/block.rs b/src/runtime-rs/crates/hypervisor/src/device/block.rs new file mode 100644 index 0000000000..4f59cc0ea3 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/block.rs @@ -0,0 +1,24 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(Debug)] +pub struct BlockConfig { + /// Unique identifier of the drive. + pub id: String, + + /// Path of the drive. + pub path_on_host: String, + + /// If set to true, the drive is opened in read-only mode. Otherwise, the + /// drive is opened as read-write. + pub is_readonly: bool, + + /// Don't close `path_on_host` file when dropping the device. + pub no_drop: bool, + + /// device index + pub index: u64, +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/mod.rs new file mode 100644 index 0000000000..49215e0d1a --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/mod.rs @@ -0,0 +1,36 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod block; +pub use block::BlockConfig; +mod network; +pub use network::{Address, NetworkConfig}; +mod share_fs_device; +pub use share_fs_device::ShareFsDeviceConfig; +mod vfio; +pub use vfio::{bind_device_to_host, bind_device_to_vfio, VfioBusMode, VfioConfig}; +mod share_fs_mount; +pub use share_fs_mount::{ShareFsMountConfig, ShareFsMountType, ShareFsOperation}; +mod vsock; +pub use vsock::VsockConfig; + +use std::fmt; + +#[derive(Debug)] +pub enum Device { + Block(BlockConfig), + Network(NetworkConfig), + ShareFsDevice(ShareFsDeviceConfig), + Vfio(VfioConfig), + ShareFsMount(ShareFsMountConfig), + Vsock(VsockConfig), +} + +impl fmt::Display for Device { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/network.rs b/src/runtime-rs/crates/hypervisor/src/device/network.rs new file mode 100644 index 0000000000..6c13a9ca1e --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/network.rs @@ -0,0 +1,32 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fmt; + +pub struct Address(pub [u8; 6]); + +impl fmt::Debug for Address { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let b = self.0; + write!( + f, + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +} + +#[derive(Debug)] +pub struct NetworkConfig { + /// Unique identifier of the device + pub id: String, + + /// Host level path for the guest network interface. + pub host_dev_name: String, + + /// Guest MAC address. + pub guest_mac: Option
, +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/share_fs_device.rs b/src/runtime-rs/crates/hypervisor/src/device/share_fs_device.rs new file mode 100644 index 0000000000..4bf73eab73 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/share_fs_device.rs @@ -0,0 +1,27 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +/// ShareFsDeviceConfig: share fs device config +#[derive(Debug)] +pub struct ShareFsDeviceConfig { + /// fs_type: virtiofs or inline-virtiofs + pub fs_type: String, + + /// socket_path: socket path for virtiofs + pub sock_path: String, + + /// mount_tag: a label used as a hint to the guest. + pub mount_tag: String, + + /// host_path: the host filesystem path for this volume. + pub host_path: String, + + /// queue_size: queue size + pub queue_size: u64, + + /// queue_num: queue number + pub queue_num: u64, +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/share_fs_mount.rs b/src/runtime-rs/crates/hypervisor/src/device/share_fs_mount.rs new file mode 100644 index 0000000000..85f5164562 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/share_fs_mount.rs @@ -0,0 +1,43 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(Copy, Clone, Debug)] +pub enum ShareFsOperation { + Mount, + Umount, + Update, +} + +#[derive(Debug)] +pub enum ShareFsMountType { + PASSTHROUGH, + RAFS, +} + +/// ShareFsMountConfig: share fs mount config +#[derive(Debug)] +pub struct ShareFsMountConfig { + /// source: the passthrough fs exported dir or rafs meta file of rafs + pub source: String, + + /// fstype: specifies the type of this sub-fs, could be passthrough-fs or rafs + pub fstype: ShareFsMountType, + + /// mount_point: the mount point inside guest + pub mount_point: String, + + /// config: the rafs backend config file + pub config: Option, + + /// tag: is the tag used inside the kata guest. + pub tag: String, + + /// op: the operation to take, e.g. mount, umount or update + pub op: ShareFsOperation, + + /// prefetch_list_path: path to file that contains file lists that should be prefetched by rafs + pub prefetch_list_path: Option, +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/vfio.rs new file mode 100644 index 0000000000..db1a99fb3f --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/vfio.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{fs, path::Path, process::Command}; + +use anyhow::{anyhow, Context, Result}; + +fn override_driver(bdf: &str, driver: &str) -> Result<()> { + let driver_override = format!("/sys/bus/pci/devices/{}/driver_override", bdf); + fs::write(&driver_override, driver) + .context(format!("echo {} > {}", driver, &driver_override))?; + info!(sl!(), "echo {} > {}", driver, driver_override); + Ok(()) +} + +const SYS_PCI_DEVICES_PATH: &str = "/sys/bus/pci/devices"; +const PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe"; +const VFIO_NEW_ID_PATH: &str = "/sys/bus/pci/drivers/vfio-pci/new_id"; + +pub const VFIO_PCI: &str = "vfio-pci"; + +#[derive(Debug)] +pub enum VfioBusMode { + PCI, + MMIO, +} + +impl VfioBusMode { + pub fn new(mode: &str) -> Result { + Ok(match mode { + "mmio" => VfioBusMode::MMIO, + _ => VfioBusMode::PCI, + }) + } +} + +#[derive(Debug)] +pub struct VfioConfig { + /// Unique identifier of the device + pub id: String, + + /// Sysfs path for mdev bus type device + pub sysfs_path: String, + + /// PCI device information: "bus:slot:function" + pub bus_slot_func: String, + + /// Bus Mode, PCI or MMIO + pub mode: VfioBusMode, +} + +/// binds the device to vfio driver after unbinding from host. +/// Will be called by a network interface or a generic pcie device. +pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> { + // modprobe vfio-pci + if !Path::new(VFIO_NEW_ID_PATH).exists() { + Command::new("modprobe") + .arg(VFIO_PCI) + .output() + .expect("Failed to run modprobe vfio-pci"); + } + + // Arm does not need cmdline to open iommu, just set it through bios. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // check intel_iommu=on + let cmdline = fs::read_to_string("/proc/cmdline").unwrap(); + if cmdline.contains("iommu=off") || !cmdline.contains("iommu=") { + return Err(anyhow!("iommu isn't set on kernel cmdline")); + } + } + + // if it's already bound to vfio + if is_equal_driver(bdf, VFIO_PCI) { + info!(sl!(), "bdf : {} was already bound to vfio-pci", bdf); + return Ok(()); + } + + info!(sl!(), "host driver : {}", host_driver); + override_driver(bdf, VFIO_PCI).context("override driver")?; + + let unbind_path = format!("/sys/bus/pci/devices/{}/driver/unbind", bdf); + + // echo bdf > /sys/bus/pci/drivers/virtio-pci/unbind" + fs::write(&unbind_path, bdf) + .with_context(|| format!("Failed to echo {} > {}", bdf, &unbind_path))?; + + info!(sl!(), "{} is unbound from {}", bdf, host_driver); + + // echo bdf > /sys/bus/pci/drivers_probe + fs::write(PCI_DRIVER_PROBE, bdf) + .with_context(|| format!("Failed to echo {} > {}", bdf, PCI_DRIVER_PROBE))?; + + info!(sl!(), "echo {} > /sys/bus/pci/drivers_probe", bdf); + Ok(()) +} + +pub fn is_equal_driver(bdf: &str, host_driver: &str) -> bool { + let sys_pci_devices_path = Path::new(SYS_PCI_DEVICES_PATH); + let driver_file = sys_pci_devices_path.join(bdf).join("driver"); + + if driver_file.exists() { + let driver_path = fs::read_link(driver_file).unwrap_or_default(); + let driver_name = driver_path + .file_name() + .map_or(String::new(), |v| v.to_str().unwrap().to_owned()); + return driver_name.eq(host_driver); + } + + false +} + +/// bind_device_to_host binds the device to the host driver after unbinding from vfio-pci. +pub fn bind_device_to_host(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> { + // Unbind from vfio-pci driver to the original host driver + + info!(sl!(), "bind {} to {}", bdf, host_driver); + + // if it's already bound to host_driver + if is_equal_driver(bdf, host_driver) { + info!( + sl!(), + "bdf {} was already unbound to host driver {}", bdf, host_driver + ); + return Ok(()); + } + + override_driver(bdf, host_driver).context("override driver")?; + + let unbind_path = "/sys/bus/pci/drivers/vfio-pci/unbind"; + + // echo bdf > /sys/bus/pci/drivers/vfio-pci/unbind" + std::fs::write(unbind_path, bdf).with_context(|| format!("echo {}> {}", bdf, unbind_path))?; + info!(sl!(), "echo {} > {}", bdf, unbind_path); + + // echo bdf > /sys/bus/pci/drivers_probe + std::fs::write(PCI_DRIVER_PROBE, bdf) + .context(format!("echo {} > {}", bdf, PCI_DRIVER_PROBE))?; + info!(sl!(), "echo {} > {}", bdf, PCI_DRIVER_PROBE); + + Ok(()) +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/vsock.rs b/src/runtime-rs/crates/hypervisor/src/device/vsock.rs new file mode 100644 index 0000000000..3a5b7c8b3c --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/vsock.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(Debug)] +pub struct VsockConfig { + /// Unique identifier of the device + pub id: String, + + /// A 32-bit Context Identifier (CID) used to identify the guest. + pub guest_cid: u32, + + /// unix domain socket path + pub uds_path: String, +} diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs new file mode 100644 index 0000000000..de8c172375 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -0,0 +1,49 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "hypervisor"); + +pub mod device; + +use std::collections::HashMap; + +use anyhow::Result; +use async_trait::async_trait; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; + +#[derive(Debug)] +pub struct VcpuThreadIds { + /// List of tids of vcpu threads (vcpu index, tid) + pub vcpus: HashMap, +} + +#[async_trait] +pub trait Hypervisor: Send + Sync { + // vm manager + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()>; + async fn start_vm(&self, timeout: i32) -> Result<()>; + async fn stop_vm(&self) -> Result<()>; + async fn pause_vm(&self) -> Result<()>; + async fn save_vm(&self) -> Result<()>; + async fn resume_vm(&self) -> Result<()>; + + // device manager + async fn add_device(&self, device: device::Device) -> Result<()>; + async fn remove_device(&self, device: device::Device) -> Result<()>; + + // utils + async fn get_agent_socket(&self) -> Result; + async fn disconnect(&self); + async fn hypervisor_config(&self) -> HypervisorConfig; + async fn get_thread_ids(&self) -> Result; + async fn get_pids(&self) -> Result>; + async fn cleanup(&self) -> Result<()>; + async fn check(&self) -> Result<()>; + async fn get_jailer_root(&self) -> Result; +}