diff --git a/README.md b/README.md index b8fa30996f..e66917251e 100644 --- a/README.md +++ b/README.md @@ -117,8 +117,8 @@ The table below lists the core parts of the project: |-|-|-| | [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. | | [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. | -| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.ht -ml) | +| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) | +| [`dragonball`](src/dragonball) | core | An optional built-in VMM brings out-of-the-box Kata Containers experience with optimizations on container workloads | | [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). | | [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) | | [tests](https://github.com/kata-containers/tests) | tests | Excludes unit tests which live with the main code. | diff --git a/src/dragonball/.gitignore b/src/dragonball/.gitignore new file mode 100644 index 0000000000..64f40ab296 --- /dev/null +++ b/src/dragonball/.gitignore @@ -0,0 +1,3 @@ +target +Cargo.lock +.idea diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml new file mode 100644 index 0000000000..1368c362f0 --- /dev/null +++ b/src/dragonball/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "dragonball" +version = "0.1.0" +authors = ["The Kata Containers community "] +description = "A secure sandbox for Kata Containers" +keywords = ["kata-containers", "sandbox", "vmm", "dragonball"] +homepage = "https://katacontainers.io/" +repository = "https://github.com/kata-containers/kata-containers.git" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +arc-swap = "1.5.0" +bytes = "1.1.0" +dbs-address-space = "0.1.0" +dbs-allocator = "0.1.0" +dbs-boot = "0.2.0" +dbs-device = "0.1.0" +dbs-interrupt = { version = "0.1.0", features = ["kvm-irq"] } +dbs-legacy-devices = "0.1.0" +dbs-utils = "0.1.0" +dbs-virtio-devices = { version = "0.1.0", optional = true, features = ["virtio-mmio"] } +kvm-bindings = "0.5.0" +kvm-ioctls = "0.11.0" +libc = "0.2.39" +linux-loader = "0.4.0" +log = "0.4.14" +nix = "0.23.1" +serde = "1.0.27" +serde_derive = "1.0.27" +serde_json = "1.0.9" +slog = "2.5.2" +slog-scope = "4.4.0" +thiserror = "1" +vmm-sys-util = "0.9.0" +virtio-queue = { version = "0.1.0", optional = true } +vm-memory = { version = "0.7.0", features = ["backend-mmap"] } + +[dev-dependencies] +slog-term = "2.9.0" +slog-async = "2.7.0" + +[features] +atomic-guest-memory = [] +virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"] + +[patch.'crates-io'] +dbs-device = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" } +dbs-interrupt = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" } +dbs-legacy-devices = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" } +dbs-utils = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" } +dbs-virtio-devices = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" } +dbs-upcall = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" } diff --git a/src/dragonball/LICENSE b/src/dragonball/LICENSE new file mode 120000 index 0000000000..30cff7403d --- /dev/null +++ b/src/dragonball/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/Makefile b/src/dragonball/Makefile new file mode 100644 index 0000000000..8acd29de57 --- /dev/null +++ b/src/dragonball/Makefile @@ -0,0 +1,29 @@ +# Copyright (c) 2019-2022 Alibaba Cloud. All rights reserved. +# Copyright (c) 2019-2022 Ant Group. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +default: build + +build: + # FIXME: This line will be removed when we solve the vm-memory dependency problem in Dragonball Sandbox + cargo update -p vm-memory:0.8.0 --precise 0.7.0 + cargo build --all-features + +check: clippy format + +clippy: + @echo "INFO: cargo clippy..." + cargo clippy --all-targets --all-features \ + -- \ + -D warnings + +format: + @echo "INFO: cargo fmt..." + cargo fmt -- --check + +clean: + cargo clean + +test: + @echo "INFO: testing dragonball for development build" + cargo test --all-features -- --nocapture diff --git a/src/dragonball/README.md b/src/dragonball/README.md new file mode 100644 index 0000000000..0e3bcb45a0 --- /dev/null +++ b/src/dragonball/README.md @@ -0,0 +1,37 @@ +# Introduction +`Dragonball Sandbox` is a light-weight virtual machine manager (VMM) based on Linux Kernel-based Virtual Machine (KVM), +which is optimized for container workloads with: +- container image management and acceleration service +- flexible and high-performance virtual device drivers +- low CPU and memory overhead +- minimal startup time +- optimized concurrent startup speed + +`Dragonball Sandbox` aims to provide a simple solution for the Kata Containers community. It is integrated into Kata 3.0 +runtime as a built-in VMM and gives users an out-of-the-box Kata Containers experience without complex environment setup +and configuration process. + +# Getting Started +[TODO](https://github.com/kata-containers/kata-containers/issues/4302) + +# Documentation + +Device: [Device Document](docs/device.md) + +You could see the [official documentation](docs/) page for more details. + +# Supported Architectures +- x86-64 +- aarch64 + +# Supported Kernel +[TODO](https://github.com/kata-containers/kata-containers/issues/4303) + +# Acknowledgement +Part of the code is based on the [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor) project, [`crosvm`](https://github.com/google/crosvm) project and [Firecracker](https://github.com/firecracker-microvm/firecracker) project. They are all rust written virtual machine managers with advantages on safety and security. + +`Dragonball sandbox` is designed to be a VMM that is customized for Kata Containers and we will focus on optimizing container workloads for Kata ecosystem. The focus on the Kata community is what differentiates us from other rust written virtual machines. + +# License + +`Dragonball` is licensed under [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0. \ No newline at end of file diff --git a/src/dragonball/THIRD-PARTY b/src/dragonball/THIRD-PARTY new file mode 100644 index 0000000000..c3069125a3 --- /dev/null +++ b/src/dragonball/THIRD-PARTY @@ -0,0 +1,27 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/src/dragonball/docs/device.md b/src/dragonball/docs/device.md new file mode 100644 index 0000000000..8f3fdbe6ed --- /dev/null +++ b/src/dragonball/docs/device.md @@ -0,0 +1,17 @@ +# Device + +## Device Manager + +Currently we have following device manager: +| Name | Description | +| --- | --- | +| [address space manager](../src/address_space_manager.rs) | abstracts virtual machine's physical management and provide mapping for guest virtual memory and MMIO ranges of emulated virtual devices, pass-through devices and vCPU | +| [config manager](../src/config_manager.rs) | provides abstractions for configuration information | +| [console manager](../src/device_manager/console_manager.rs) | provides management for all console devices | +| [resource manager](../src/resource_manager.rs) |provides resource management for `legacy_irq_pool`, `msi_irq_pool`, `pio_pool`, `mmio_pool`, `mem_pool`, `kvm_mem_slot_pool` with builder `ResourceManagerBuilder` | +| [VSOCK device manager](../src/device_manager/vsock_dev_mgr.rs) | provides configuration info for `VIRTIO-VSOCK` and management for all VSOCK devices | + + +## Device supported +`VIRTIO-VSOCK` + diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs new file mode 100644 index 0000000000..dc7650e97a --- /dev/null +++ b/src/dragonball/src/address_space_manager.rs @@ -0,0 +1,890 @@ +// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Address space abstraction to manage virtual machine's physical address space. +//! +//! The AddressSpace abstraction is introduced to manage virtual machine's physical address space. +//! The regions in virtual machine's physical address space may be used to: +//! 1) map guest virtual memory +//! 2) map MMIO ranges for emulated virtual devices, such as virtio-fs DAX window. +//! 3) map MMIO ranges for pass-through devices, such as PCI device BARs. +//! 4) map MMIO ranges for to vCPU, such as local APIC. +//! 5) not used/available +//! +//! A related abstraction, vm_memory::GuestMemory, is used to access guest virtual memory only. +//! In other words, AddressSpace is the resource owner, and GuestMemory is an accessor for guest +//! virtual memory. + +use std::collections::{BTreeMap, HashMap}; +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; + +use dbs_address_space::{ + AddressSpace, AddressSpaceError, AddressSpaceLayout, AddressSpaceRegion, + AddressSpaceRegionType, NumaNode, NumaNodeInfo, MPOL_MF_MOVE, MPOL_PREFERRED, +}; +use dbs_allocator::Constraint; +use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; +use kvm_ioctls::VmFd; +use log::{debug, error, info, warn}; +use nix::sys::mman; +use nix::unistd::dup; +#[cfg(feature = "atomic-guest-memory")] +use vm_memory::atomic::GuestMemoryAtomic; +use vm_memory::{ + Address, FileOffset, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestMemoryRegion, + GuestRegionMmap, GuestUsize, MemoryRegionAddress, MmapRegion, +}; + +use crate::resource_manager::ResourceManager; +use crate::vm::NumaRegionInfo; + +#[cfg(not(feature = "atomic-guest-memory"))] +/// Concrete GuestAddressSpace type used by the VMM. +pub type GuestAddressSpaceImpl = Arc; + +#[cfg(feature = "atomic-guest-memory")] +/// Concrete GuestAddressSpace type used by the VMM. +pub type GuestAddressSpaceImpl = GuestMemoryAtomic; + +/// Concrete GuestMemory type used by the VMM. +pub type GuestMemoryImpl = as GuestAddressSpace>::M; +/// Concrete GuestRegion type used by the VMM. +pub type GuestRegionImpl = GuestRegionMmap; + +// Maximum number of working threads for memory pre-allocation. +const MAX_PRE_ALLOC_THREAD: u64 = 16; + +// Control the actual number of pre-allocating threads. After several performance tests, we decide to use one thread to do pre-allocating for every 4G memory. +const PRE_ALLOC_GRANULARITY: u64 = 32; + +// We don't have plan to support mainframe computer and only focus on PC servers. +// 64 as max nodes should be enough for now. +const MAX_NODE: u32 = 64; + +// We will split the memory region if it conflicts with the MMIO hole. +// But if the space below the MMIO hole is smaller than the MINIMAL_SPLIT_SPACE, we won't split the memory region in order to enhance performance. +const MINIMAL_SPLIT_SPACE: u64 = 128 << 20; + +/// Errors associated with virtual machine address space management. +#[derive(Debug, thiserror::Error)] +pub enum AddressManagerError { + /// Invalid address space operation. + #[error("invalid address space operation")] + InvalidOperation, + + /// Invalid address range. + #[error("invalid address space region (0x{0:x}, 0x{1:x})")] + InvalidAddressRange(u64, GuestUsize), + + /// No available mem address. + #[error("no available mem address")] + NoAvailableMemAddress, + + /// No available kvm slotse. + #[error("no available kvm slots")] + NoAvailableKvmSlot, + + /// Address manager failed to create memfd to map anonymous memory. + #[error("address manager failed to create memfd to map anonymous memory")] + CreateMemFd(#[source] nix::Error), + + /// Address manager failed to open memory file. + #[error("address manager failed to open memory file")] + OpenFile(#[source] std::io::Error), + + /// Memory file provided is invalid due to empty file path, non-existent file path and other possible mistakes. + #[error("memory file provided to address manager {0} is invalid")] + FileInvalid(String), + + /// Memory file provided is invalid due to empty memory type + #[error("memory type provided to address manager {0} is invalid")] + TypeInvalid(String), + + /// Failed to set size for memory file. + #[error("address manager failed to set size for memory file")] + SetFileSize(#[source] std::io::Error), + + /// Failed to unlink memory file. + #[error("address manager failed to unlink memory file")] + UnlinkFile(#[source] nix::Error), + + /// Failed to duplicate fd of memory file. + #[error("address manager failed to duplicate memory file descriptor")] + DupFd(#[source] nix::Error), + + /// Failure in accessing the memory located at some address. + #[error("address manager failed to access guest memory located at 0x{0:x}")] + AccessGuestMemory(u64, #[source] vm_memory::mmap::Error), + + /// Failed to create GuestMemory + #[error("address manager failed to create guest memory object")] + CreateGuestMemory(#[source] vm_memory::Error), + + /// Failure in initializing guest memory. + #[error("address manager failed to initialize guest memory")] + GuestMemoryNotInitialized, + + /// Failed to mmap() guest memory + #[error("address manager failed to mmap() guest memory into current process")] + MmapGuestMemory(#[source] vm_memory::mmap::MmapRegionError), + + /// Failed to set KVM memory slot. + #[error("address manager failed to configure KVM memory slot")] + KvmSetMemorySlot(#[source] kvm_ioctls::Error), + + /// Failed to set madvise on AddressSpaceRegion + #[error("address manager failed to set madvice() on guest memory region")] + Madvise(#[source] nix::Error), + + /// join threads fail + #[error("address manager failed to join threads")] + JoinFail, + + /// Failed to create Address Space Region + #[error("address manager failed to create Address Space Region {0}")] + CreateAddressSpaceRegion(#[source] AddressSpaceError), +} + +type Result = std::result::Result; + +/// Parameters to configure address space creation operations. +pub struct AddressSpaceMgrBuilder<'a> { + mem_type: &'a str, + mem_file: &'a str, + mem_index: u32, + mem_suffix: bool, + mem_prealloc: bool, + dirty_page_logging: bool, + vmfd: Option>, +} + +impl<'a> AddressSpaceMgrBuilder<'a> { + /// Create a new [`AddressSpaceMgrBuilder`] object. + pub fn new(mem_type: &'a str, mem_file: &'a str) -> Result { + if mem_type.is_empty() { + return Err(AddressManagerError::TypeInvalid(mem_type.to_string())); + } + Ok(AddressSpaceMgrBuilder { + mem_type, + mem_file, + mem_index: 0, + mem_suffix: true, + mem_prealloc: false, + dirty_page_logging: false, + vmfd: None, + }) + } + + /// Enable/disable adding numbered suffix to memory file path. + /// This feature could be useful to generate hugetlbfs files with number suffix. (e.g. shmem0, shmem1) + pub fn toggle_file_suffix(&mut self, enabled: bool) { + self.mem_suffix = enabled; + } + + /// Enable/disable memory pre-allocation. + /// Enable this feature could improve performance stability at the start of workload by avoiding page fault. + /// Disable this feature may influence performance stability but the cpu resource consumption and start-up time will decrease. + pub fn toggle_prealloc(&mut self, prealloc: bool) { + self.mem_prealloc = prealloc; + } + + /// Enable/disable KVM dirty page logging. + pub fn toggle_dirty_page_logging(&mut self, logging: bool) { + self.dirty_page_logging = logging; + } + + /// Set KVM [`VmFd`] handle to configure memory slots. + pub fn set_kvm_vm_fd(&mut self, vmfd: Arc) -> Option> { + let mut existing_vmfd = None; + if self.vmfd.is_some() { + existing_vmfd = self.vmfd.clone(); + } + self.vmfd = Some(vmfd); + existing_vmfd + } + + /// Build a ['AddressSpaceMgr'] using the configured parameters. + pub fn build( + self, + res_mgr: &ResourceManager, + numa_region_infos: &[NumaRegionInfo], + ) -> Result { + let mut mgr = AddressSpaceMgr::default(); + mgr.create_address_space(res_mgr, numa_region_infos, self)?; + Ok(mgr) + } + + fn get_next_mem_file(&mut self) -> String { + if self.mem_suffix { + let path = format!("{}{}", self.mem_file, self.mem_index); + self.mem_index += 1; + path + } else { + self.mem_file.to_string() + } + } +} + +/// Struct to manage virtual machine's physical address space. +pub struct AddressSpaceMgr { + address_space: Option, + vm_as: Option, + base_to_slot: Arc>>, + prealloc_handlers: Vec>, + prealloc_exit: Arc, + numa_nodes: BTreeMap, +} + +impl AddressSpaceMgr { + /// Query address space manager is initialized or not + pub fn is_initialized(&self) -> bool { + self.address_space.is_some() + } + + /// Create the address space for a virtual machine. + /// + /// This method is designed to be called when starting up a virtual machine instead of at + /// runtime, so it's expected the virtual machine will be tore down and no strict error recover. + pub fn create_address_space( + &mut self, + res_mgr: &ResourceManager, + numa_region_infos: &[NumaRegionInfo], + mut param: AddressSpaceMgrBuilder, + ) -> Result<()> { + let mut regions = Vec::new(); + let mut start_addr = dbs_boot::layout::GUEST_MEM_START; + + // Create address space regions. + for info in numa_region_infos.iter() { + info!("numa_region_info {:?}", info); + // convert size_in_mib to bytes + let size = info + .size + .checked_shl(20) + .ok_or_else(|| AddressManagerError::InvalidOperation)?; + + // Guest memory does not intersect with the MMIO hole. + // TODO: make it work for ARM (issue #4307) + if start_addr > dbs_boot::layout::MMIO_LOW_END + || start_addr + size <= dbs_boot::layout::MMIO_LOW_START + { + let region = self.create_region(start_addr, size, info, &mut param)?; + regions.push(region); + start_addr = start_addr + .checked_add(size) + .ok_or_else(|| AddressManagerError::InvalidOperation)?; + } else { + // Add guest memory below the MMIO hole, avoid splitting the memory region + // if the available address region is small than MINIMAL_SPLIT_SPACE MiB. + let mut below_size = dbs_boot::layout::MMIO_LOW_START + .checked_sub(start_addr) + .ok_or_else(|| AddressManagerError::InvalidOperation)?; + if below_size < (MINIMAL_SPLIT_SPACE) { + below_size = 0; + } else { + let region = self.create_region(start_addr, below_size, info, &mut param)?; + regions.push(region); + } + + // Add guest memory above the MMIO hole + let above_start = dbs_boot::layout::MMIO_LOW_END + 1; + let above_size = size + .checked_sub(below_size) + .ok_or_else(|| AddressManagerError::InvalidOperation)?; + let region = self.create_region(above_start, above_size, info, &mut param)?; + regions.push(region); + start_addr = above_start + .checked_add(above_size) + .ok_or_else(|| AddressManagerError::InvalidOperation)?; + } + } + + // Create GuestMemory object + let mut vm_memory = GuestMemoryMmap::new(); + for reg in regions.iter() { + // Allocate used guest memory addresses. + // These addresses are statically allocated, resource allocation/update should not fail. + let constraint = Constraint::new(reg.len()) + .min(reg.start_addr().raw_value()) + .max(reg.last_addr().raw_value()); + let _key = res_mgr + .allocate_mem_address(&constraint) + .ok_or(AddressManagerError::NoAvailableMemAddress)?; + let mmap_reg = self.create_mmap_region(reg.clone())?; + + vm_memory = vm_memory + .insert_region(mmap_reg.clone()) + .map_err(AddressManagerError::CreateGuestMemory)?; + self.map_to_kvm(res_mgr, ¶m, reg, mmap_reg)?; + } + + #[cfg(feature = "atomic-guest-memory")] + { + self.vm_as = Some(AddressSpace::convert_into_vm_as(vm_memory)); + } + #[cfg(not(feature = "atomic-guest-memory"))] + { + self.vm_as = Some(Arc::new(vm_memory)); + } + + let layout = AddressSpaceLayout::new( + *dbs_boot::layout::GUEST_PHYS_END, + dbs_boot::layout::GUEST_MEM_START, + *dbs_boot::layout::GUEST_MEM_END, + ); + self.address_space = Some(AddressSpace::from_regions(regions, layout)); + + Ok(()) + } + + // size unit: Byte + fn create_region( + &mut self, + start_addr: u64, + size_bytes: u64, + info: &NumaRegionInfo, + param: &mut AddressSpaceMgrBuilder, + ) -> Result> { + let mem_file_path = param.get_next_mem_file(); + let region = AddressSpaceRegion::create_default_memory_region( + GuestAddress(start_addr), + size_bytes, + info.host_numa_node_id, + param.mem_type, + &mem_file_path, + param.mem_prealloc, + false, + ) + .map_err(AddressManagerError::CreateAddressSpaceRegion)?; + let region = Arc::new(region); + + self.insert_into_numa_nodes( + ®ion, + info.guest_numa_node_id.unwrap_or(0), + &info.vcpu_ids, + ); + info!( + "create new region: guest addr 0x{:x}-0x{:x} size {}", + start_addr, + start_addr + size_bytes, + size_bytes + ); + + Ok(region) + } + + fn map_to_kvm( + &mut self, + res_mgr: &ResourceManager, + param: &AddressSpaceMgrBuilder, + reg: &Arc, + mmap_reg: Arc, + ) -> Result<()> { + // Build mapping between GPA <-> HVA, by adding kvm memory slot. + let slot = res_mgr + .allocate_kvm_mem_slot(1, None) + .ok_or(AddressManagerError::NoAvailableKvmSlot)?; + + if let Some(vmfd) = param.vmfd.as_ref() { + let host_addr = mmap_reg + .get_host_address(MemoryRegionAddress(0)) + .map_err(|_e| AddressManagerError::InvalidOperation)?; + let flags = if param.dirty_page_logging { + KVM_MEM_LOG_DIRTY_PAGES + } else { + 0 + }; + let mem_region = kvm_userspace_memory_region { + slot: slot as u32, + guest_phys_addr: reg.start_addr().raw_value(), + memory_size: reg.len() as u64, + userspace_addr: host_addr as u64, + flags, + }; + + info!( + "VM: guest memory region {:x} starts at {:x?}", + reg.start_addr().raw_value(), + host_addr + ); + // Safe because the guest regions are guaranteed not to overlap. + unsafe { vmfd.set_user_memory_region(mem_region) } + .map_err(AddressManagerError::KvmSetMemorySlot)?; + } + + self.base_to_slot + .lock() + .unwrap() + .insert(reg.start_addr().raw_value(), slot as u32); + + Ok(()) + } + + /// Mmap the address space region into current process. + pub fn create_mmap_region( + &mut self, + region: Arc, + ) -> Result> { + // Special check for 32bit host with 64bit virtual machines. + if region.len() > usize::MAX as u64 { + return Err(AddressManagerError::InvalidAddressRange( + region.start_addr().raw_value(), + region.len(), + )); + } + // The device MMIO regions may not be backed by memory files, so refuse to mmap them. + if region.region_type() == AddressSpaceRegionType::DeviceMemory { + return Err(AddressManagerError::InvalidOperation); + } + + // The GuestRegionMmap/MmapRegion will take ownership of the FileOffset object, + // so we have to duplicate the fd here. It's really a dirty design. + let file_offset = match region.file_offset().as_ref() { + Some(fo) => { + let fd = dup(fo.file().as_raw_fd()).map_err(AddressManagerError::DupFd)?; + // Safe because we have just duplicated the raw fd. + let file = unsafe { File::from_raw_fd(fd) }; + let file_offset = FileOffset::new(file, fo.start()); + Some(file_offset) + } + None => None, + }; + let perm_flags = if (region.perm_flags() & libc::MAP_POPULATE) != 0 && region.is_hugepage() + { + // mmap(MAP_POPULATE) conflicts with madive(MADV_HUGEPAGE) because mmap(MAP_POPULATE) + // will pre-fault in all memory with normal pages before madive(MADV_HUGEPAGE) gets + // called. So remove the MAP_POPULATE flag and memory will be faulted in by working + // threads. + region.perm_flags() & (!libc::MAP_POPULATE) + } else { + region.perm_flags() + }; + let mmap_reg = MmapRegion::build( + file_offset, + region.len() as usize, + libc::PROT_READ | libc::PROT_WRITE, + perm_flags, + ) + .map_err(AddressManagerError::MmapGuestMemory)?; + + if region.is_anonpage() { + self.configure_anon_mem(&mmap_reg)?; + } + if let Some(node_id) = region.host_numa_node_id() { + self.configure_numa(&mmap_reg, node_id)?; + } + if region.is_hugepage() { + self.configure_thp_and_prealloc(®ion, &mmap_reg)?; + } + + let reg = GuestRegionImpl::new(mmap_reg, region.start_addr()) + .map_err(AddressManagerError::CreateGuestMemory)?; + Ok(Arc::new(reg)) + } + + fn configure_anon_mem(&self, mmap_reg: &MmapRegion) -> Result<()> { + unsafe { + mman::madvise( + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + mman::MmapAdvise::MADV_DONTFORK, + ) + } + .map_err(AddressManagerError::Madvise) + } + + fn configure_numa(&self, mmap_reg: &MmapRegion, node_id: u32) -> Result<()> { + let nodemask = 1_u64 + .checked_shl(node_id) + .ok_or_else(|| AddressManagerError::InvalidOperation)?; + let res = unsafe { + libc::syscall( + libc::SYS_mbind, + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + MPOL_PREFERRED, + &nodemask as *const u64, + MAX_NODE, + MPOL_MF_MOVE, + ) + }; + if res < 0 { + warn!( + "failed to mbind memory to host_numa_node_id {}: this may affect performance", + node_id + ); + } + Ok(()) + } + + // We set Transparent Huge Page (THP) through mmap to increase performance. + // In order to reduce the impact of page fault on performance, we start several threads (up to MAX_PRE_ALLOC_THREAD) to touch every 4k page of the memory region to manually do memory pre-allocation. + // The reason why we don't use mmap to enable THP and pre-alloction is that THP setting won't take effect in this operation (tested in kernel 4.9) + fn configure_thp_and_prealloc( + &mut self, + region: &Arc, + mmap_reg: &MmapRegion, + ) -> Result<()> { + debug!( + "Setting MADV_HUGEPAGE on AddressSpaceRegion addr {:x?} len {:x?}", + mmap_reg.as_ptr(), + mmap_reg.size() + ); + + // Safe because we just create the MmapRegion + unsafe { + mman::madvise( + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + mman::MmapAdvise::MADV_HUGEPAGE, + ) + } + .map_err(AddressManagerError::Madvise)?; + + if region.perm_flags() & libc::MAP_POPULATE > 0 { + // Touch every 4k page to trigger allocation. The step is 4K instead of 2M to ensure + // pre-allocation when running out of huge pages. + const PAGE_SIZE: u64 = 4096; + const PAGE_SHIFT: u32 = 12; + let addr = mmap_reg.as_ptr() as u64; + // Here we use >> PAGE_SHIFT to calculate how many 4K pages in the memory region. + let npage = (mmap_reg.size() as u64) >> PAGE_SHIFT; + + let mut touch_thread = ((mmap_reg.size() as u64) >> PRE_ALLOC_GRANULARITY) + 1; + if touch_thread > MAX_PRE_ALLOC_THREAD { + touch_thread = MAX_PRE_ALLOC_THREAD; + } + + let per_npage = npage / touch_thread; + for n in 0..touch_thread { + let start_npage = per_npage * n; + let end_npage = if n == (touch_thread - 1) { + npage + } else { + per_npage * (n + 1) + }; + let mut per_addr = addr + (start_npage * PAGE_SIZE); + let should_stop = self.prealloc_exit.clone(); + + let handler = thread::Builder::new() + .name("PreallocThread".to_string()) + .spawn(move || { + info!("PreallocThread start start_npage: {:?}, end_npage: {:?}, per_addr: {:?}, thread_number: {:?}", + start_npage, end_npage, per_addr, touch_thread ); + for _ in start_npage..end_npage { + if should_stop.load(Ordering::Acquire) { + info!("PreallocThread stop start_npage: {:?}, end_npage: {:?}, per_addr: {:?}, thread_number: {:?}", + start_npage, end_npage, per_addr, touch_thread); + break; + } + + // Reading from a THP page may be served by the zero page, so only + // write operation could ensure THP memory allocation. So use + // the compare_exchange(old_val, old_val) trick to trigger allocation. + let addr_ptr = per_addr as *mut u8; + let read_byte = unsafe { std::ptr::read_volatile(addr_ptr) }; + let atomic_u8 : &AtomicU8 = unsafe {&*(addr_ptr as *mut AtomicU8)}; + let _ = atomic_u8.compare_exchange(read_byte, read_byte, Ordering::SeqCst, Ordering::SeqCst); + per_addr += PAGE_SIZE; + } + + info!("PreallocThread done start_npage: {:?}, end_npage: {:?}, per_addr: {:?}, thread_number: {:?}", + start_npage, end_npage, per_addr, touch_thread ); + }); + + match handler { + Err(e) => error!( + "Failed to create working thread for async pre-allocation, {:?}. This may affect performance stability at the start of the workload.", + e + ), + Ok(hdl) => self.prealloc_handlers.push(hdl), + } + } + } + + Ok(()) + } + + /// Get the address space object + pub fn get_address_space(&self) -> Option<&AddressSpace> { + self.address_space.as_ref() + } + + /// Get the default guest memory object, which will be used to access virtual machine's default + /// guest memory. + pub fn get_vm_as(&self) -> Option<&GuestAddressSpaceImpl> { + self.vm_as.as_ref() + } + + /// Get the base to slot map + pub fn get_base_to_slot_map(&self) -> Arc>> { + self.base_to_slot.clone() + } + + /// get numa nodes infos from address space manager. + pub fn get_numa_nodes(&self) -> &BTreeMap { + &self.numa_nodes + } + + /// add cpu and memory numa informations to BtreeMap + fn insert_into_numa_nodes( + &mut self, + region: &Arc, + guest_numa_node_id: u32, + vcpu_ids: &[u32], + ) { + let node = self + .numa_nodes + .entry(guest_numa_node_id) + .or_insert(NumaNode::new()); + node.add_info(&NumaNodeInfo { + base: region.start_addr(), + size: region.len(), + }); + node.add_vcpu_ids(vcpu_ids); + } + + /// get address space layout from address space manager. + pub fn get_layout(&self) -> Result { + self.address_space + .as_ref() + .map(|v| v.layout()) + .ok_or(AddressManagerError::GuestMemoryNotInitialized) + } + + /// Wait for the pre-allocation working threads to finish work. + /// + /// Force all working threads to exit if `stop` is true. + pub fn wait_prealloc(&mut self, stop: bool) -> Result<()> { + if stop { + self.prealloc_exit.store(true, Ordering::Release); + } + while let Some(handlers) = self.prealloc_handlers.pop() { + if let Err(e) = handlers.join() { + error!("wait_prealloc join fail {:?}", e); + return Err(AddressManagerError::JoinFail); + } + } + Ok(()) + } +} + +impl Default for AddressSpaceMgr { + /// Create a new empty AddressSpaceMgr + fn default() -> Self { + AddressSpaceMgr { + address_space: None, + vm_as: None, + base_to_slot: Arc::new(Mutex::new(HashMap::new())), + prealloc_handlers: Vec::new(), + prealloc_exit: Arc::new(AtomicBool::new(false)), + numa_nodes: BTreeMap::new(), + } + } +} + +#[cfg(test)] +mod tests { + use dbs_boot::layout::GUEST_MEM_START; + use std::ops::Deref; + + use vm_memory::{Bytes, GuestAddressSpace, GuestMemory, GuestMemoryRegion}; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + + #[test] + fn test_create_address_space() { + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let vm_as = as_mgr.get_vm_as().unwrap(); + let guard = vm_as.memory(); + let gmem = guard.deref(); + assert_eq!(gmem.num_regions(), 1); + + let reg = gmem + .find_region(GuestAddress(GUEST_MEM_START + mem_size - 1)) + .unwrap(); + assert_eq!(reg.start_addr(), GuestAddress(GUEST_MEM_START)); + assert_eq!(reg.len(), mem_size); + assert!(gmem + .find_region(GuestAddress(GUEST_MEM_START + mem_size)) + .is_none()); + assert!(reg.file_offset().is_some()); + + let buf = [0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x5u8]; + gmem.write_slice(&buf, GuestAddress(GUEST_MEM_START)) + .unwrap(); + + // Update middle of mapped memory region + let mut val = 0xa5u8; + gmem.write_obj(val, GuestAddress(GUEST_MEM_START + 0x1)) + .unwrap(); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x1)).unwrap(); + assert_eq!(val, 0xa5); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x0)).unwrap(); + assert_eq!(val, 1); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x2)).unwrap(); + assert_eq!(val, 3); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x5)).unwrap(); + assert_eq!(val, 0); + + // Read ahead of mapped memory region + assert!(gmem + .read_obj::(GuestAddress(GUEST_MEM_START + mem_size)) + .is_err()); + + let res_mgr = ResourceManager::new(None); + let mem_size = dbs_boot::layout::MMIO_LOW_START + (1 << 30); + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let vm_as = as_mgr.get_vm_as().unwrap(); + let guard = vm_as.memory(); + let gmem = guard.deref(); + #[cfg(target_arch = "x86_64")] + assert_eq!(gmem.num_regions(), 2); + #[cfg(target_arch = "aarch64")] + assert_eq!(gmem.num_regions(), 1); + + // Test dropping GuestMemoryMmap object releases all resources. + for _ in 0..10000 { + let res_mgr = ResourceManager::new(None); + let mem_size = 1 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let _as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + } + let file = TempFile::new().unwrap().into_file(); + let fd = file.as_raw_fd(); + // fd should be small enough if there's no leaking of fds. + assert!(fd < 1000); + } + + #[test] + fn test_address_space_mgr_get_boundary() { + let layout = AddressSpaceLayout::new( + *dbs_boot::layout::GUEST_PHYS_END, + dbs_boot::layout::GUEST_MEM_START, + *dbs_boot::layout::GUEST_MEM_END, + ); + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + assert_eq!(as_mgr.get_layout().unwrap(), layout); + } + + #[test] + fn test_address_space_mgr_get_numa_nodes() { + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let cpu_vec = vec![1, 2]; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: cpu_vec.clone(), + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let mut numa_node = NumaNode::new(); + numa_node.add_info(&NumaNodeInfo { + base: GuestAddress(GUEST_MEM_START), + size: mem_size, + }); + numa_node.add_vcpu_ids(&cpu_vec); + + assert_eq!(*as_mgr.get_numa_nodes().get(&0).unwrap(), numa_node); + } + + #[test] + fn test_address_space_mgr_async_prealloc() { + let res_mgr = ResourceManager::new(None); + let mem_size = 2 << 20; + let cpu_vec = vec![1, 2]; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: cpu_vec.clone(), + }]; + let mut builder = AddressSpaceMgrBuilder::new("hugeshmem", "").unwrap(); + builder.toggle_prealloc(true); + let mut as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + as_mgr.wait_prealloc(false).unwrap(); + } + + #[test] + fn test_address_space_mgr_builder() { + let mut builder = AddressSpaceMgrBuilder::new("shmem", "/tmp/shmem").unwrap(); + + assert_eq!(builder.mem_type, "shmem"); + assert_eq!(builder.mem_file, "/tmp/shmem"); + assert_eq!(builder.mem_index, 0); + assert_eq!(builder.mem_suffix, true); + assert_eq!(builder.mem_prealloc, false); + assert_eq!(builder.dirty_page_logging, false); + assert!(builder.vmfd.is_none()); + + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem0"); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem1"); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem2"); + assert_eq!(builder.mem_index, 3); + + builder.toggle_file_suffix(false); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem"); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem"); + assert_eq!(builder.mem_index, 3); + + builder.toggle_prealloc(true); + builder.toggle_dirty_page_logging(true); + assert_eq!(builder.mem_prealloc, true); + assert_eq!(builder.dirty_page_logging, true); + } + + #[test] + fn test_configure_invalid_numa() { + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let mmap_reg = MmapRegion::new(8).unwrap(); + + assert!(as_mgr.configure_numa(&mmap_reg, u32::MAX).is_err()); + } +} diff --git a/src/dragonball/src/config_manager.rs b/src/dragonball/src/config_manager.rs new file mode 100644 index 0000000000..de16ab0fbf --- /dev/null +++ b/src/dragonball/src/config_manager.rs @@ -0,0 +1,724 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryInto; +use std::io; +use std::ops::{Index, IndexMut}; +use std::sync::Arc; + +use dbs_device::DeviceIo; +use dbs_utils::rate_limiter::{RateLimiter, TokenBucket}; +use serde_derive::{Deserialize, Serialize}; + +/// Trait for generic configuration information. +pub trait ConfigItem { + /// Related errors. + type Err; + + /// Get the unique identifier of the configuration item. + fn id(&self) -> &str; + + /// Check whether current configuration item conflicts with another one. + fn check_conflicts(&self, other: &Self) -> std::result::Result<(), Self::Err>; +} + +/// Struct to manage a group of configuration items. +#[derive(Debug, Default, Deserialize, PartialEq, Serialize)] +pub struct ConfigInfos +where + T: ConfigItem + Clone, +{ + configs: Vec, +} + +impl ConfigInfos +where + T: ConfigItem + Clone + Default, +{ + /// Constructor + pub fn new() -> Self { + ConfigInfos::default() + } + + /// Insert a configuration item in the group. + pub fn insert(&mut self, config: T) -> std::result::Result<(), T::Err> { + for item in self.configs.iter() { + config.check_conflicts(item)?; + } + self.configs.push(config); + + Ok(()) + } + + /// Update a configuration item in the group. + pub fn update(&mut self, config: T, err: T::Err) -> std::result::Result<(), T::Err> { + match self.get_index_by_id(&config) { + None => Err(err), + Some(index) => { + for (idx, item) in self.configs.iter().enumerate() { + if idx != index { + config.check_conflicts(item)?; + } + } + self.configs[index] = config; + Ok(()) + } + } + } + + /// Insert or update a configuration item in the group. + pub fn insert_or_update(&mut self, config: T) -> std::result::Result<(), T::Err> { + match self.get_index_by_id(&config) { + None => { + for item in self.configs.iter() { + config.check_conflicts(item)?; + } + + self.configs.push(config) + } + Some(index) => { + for (idx, item) in self.configs.iter().enumerate() { + if idx != index { + config.check_conflicts(item)?; + } + } + self.configs[index] = config; + } + } + + Ok(()) + } + + /// Remove the matching configuration entry. + pub fn remove(&mut self, config: &T) -> Option { + if let Some(index) = self.get_index_by_id(config) { + Some(self.configs.remove(index)) + } else { + None + } + } + + /// Returns an immutable iterator over the config items + pub fn iter(&self) -> ::std::slice::Iter { + self.configs.iter() + } + + /// Get the configuration entry with matching ID. + pub fn get_by_id(&self, item: &T) -> Option<&T> { + let id = item.id(); + + self.configs.iter().rfind(|cfg| cfg.id() == id) + } + + fn get_index_by_id(&self, item: &T) -> Option { + let id = item.id(); + self.configs.iter().position(|cfg| cfg.id() == id) + } +} + +impl Clone for ConfigInfos +where + T: ConfigItem + Clone, +{ + fn clone(&self) -> Self { + ConfigInfos { + configs: self.configs.clone(), + } + } +} + +/// Struct to maintain configuration information for a device. +pub struct DeviceConfigInfo +where + T: ConfigItem + Clone, +{ + /// Configuration information for the device object. + pub config: T, + /// The associated device object. + pub device: Option>, +} + +impl DeviceConfigInfo +where + T: ConfigItem + Clone, +{ + /// Create a new instance of ['DeviceInfoGroup']. + pub fn new(config: T) -> Self { + DeviceConfigInfo { + config, + device: None, + } + } + + /// Create a new instance of ['DeviceInfoGroup'] with optional device. + pub fn new_with_device(config: T, device: Option>) -> Self { + DeviceConfigInfo { config, device } + } + + /// Set the device object associated with the configuration. + pub fn set_device(&mut self, device: Arc) { + self.device = Some(device); + } +} + +impl Clone for DeviceConfigInfo +where + T: ConfigItem + Clone, +{ + fn clone(&self) -> Self { + DeviceConfigInfo::new_with_device(self.config.clone(), self.device.clone()) + } +} + +/// Struct to maintain configuration information for a group of devices. +pub struct DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + info_list: Vec>, +} + +impl DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + /// Create a new instance of ['DeviceConfigInfos']. + pub fn new() -> Self { + DeviceConfigInfos { + info_list: Vec::new(), + } + } + + /// Insert or update configuration information for a device. + pub fn insert_or_update(&mut self, config: &T) -> std::result::Result { + let device_info = DeviceConfigInfo::new(config.clone()); + Ok(match self.get_index_by_id(config) { + Some(index) => { + for (idx, info) in self.info_list.iter().enumerate() { + if idx != index { + info.config.check_conflicts(config)?; + } + } + self.info_list[index] = device_info; + index + } + None => { + for info in self.info_list.iter() { + info.config.check_conflicts(config)?; + } + self.info_list.push(device_info); + self.info_list.len() - 1 + } + }) + } + + /// Remove a device configuration information object. + pub fn remove(&mut self, index: usize) -> Option> { + if self.info_list.len() > index { + Some(self.info_list.remove(index)) + } else { + None + } + } + + #[allow(dead_code)] + /// Get number of device configuration information objects. + pub fn len(&self) -> usize { + self.info_list.len() + } + + /// Add a device configuration information object at the tail. + pub fn push(&mut self, info: DeviceConfigInfo) { + self.info_list.push(info); + } + + /// Iterator for configuration information objects. + pub fn iter(&self) -> std::slice::Iter> { + self.info_list.iter() + } + + /// Mutable iterator for configuration information objects. + pub fn iter_mut(&mut self) -> std::slice::IterMut> { + self.info_list.iter_mut() + } + + fn get_index_by_id(&self, config: &T) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(config.id())) + } +} + +impl Index for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + type Output = DeviceConfigInfo; + fn index(&self, idx: usize) -> &Self::Output { + &self.info_list[idx] + } +} + +impl IndexMut for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + fn index_mut(&mut self, idx: usize) -> &mut Self::Output { + &mut self.info_list[idx] + } +} + +impl Clone for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + fn clone(&self) -> Self { + DeviceConfigInfos { + info_list: self.info_list.clone(), + } + } +} + +/// Configuration information for RateLimiter token bucket. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] +pub struct TokenBucketConfigInfo { + /// The size for the token bucket. A TokenBucket of `size` total capacity will take `refill_time` + /// milliseconds to go from zero tokens to total capacity. + pub size: u64, + /// Number of free initial tokens, that can be consumed at no cost. + pub one_time_burst: u64, + /// Complete refill time in milliseconds. + pub refill_time: u64, +} + +impl TokenBucketConfigInfo { + fn resize(&mut self, n: u64) { + if n != 0 { + self.size /= n; + self.one_time_burst /= n; + } + } +} + +impl From for TokenBucket { + fn from(t: TokenBucketConfigInfo) -> TokenBucket { + (&t).into() + } +} + +impl From<&TokenBucketConfigInfo> for TokenBucket { + fn from(t: &TokenBucketConfigInfo) -> TokenBucket { + TokenBucket::new(t.size, t.one_time_burst, t.refill_time) + } +} + +/// Configuration information for RateLimiter objects. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] +pub struct RateLimiterConfigInfo { + /// Data used to initialize the RateLimiter::bandwidth bucket. + pub bandwidth: TokenBucketConfigInfo, + /// Data used to initialize the RateLimiter::ops bucket. + pub ops: TokenBucketConfigInfo, +} + +impl RateLimiterConfigInfo { + /// Update the bandwidth budget configuration. + pub fn update_bandwidth(&mut self, new_config: TokenBucketConfigInfo) { + self.bandwidth = new_config; + } + + /// Update the ops budget configuration. + pub fn update_ops(&mut self, new_config: TokenBucketConfigInfo) { + self.ops = new_config; + } + + /// resize the limiter to its 1/n. + pub fn resize(&mut self, n: u64) { + self.bandwidth.resize(n); + self.ops.resize(n); + } +} + +impl TryInto for &RateLimiterConfigInfo { + type Error = io::Error; + + fn try_into(self) -> Result { + RateLimiter::new( + self.bandwidth.size, + self.bandwidth.one_time_burst, + self.bandwidth.refill_time, + self.ops.size, + self.ops.one_time_burst, + self.ops.refill_time, + ) + } +} + +impl TryInto for RateLimiterConfigInfo { + type Error = io::Error; + + fn try_into(self) -> Result { + RateLimiter::new( + self.bandwidth.size, + self.bandwidth.one_time_burst, + self.bandwidth.refill_time, + self.ops.size, + self.ops.one_time_burst, + self.ops.refill_time, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, thiserror::Error)] + pub enum DummyError { + #[error("configuration entry exists")] + Exist, + } + + #[derive(Clone, Debug)] + pub struct DummyConfigInfo { + id: String, + content: String, + } + + impl ConfigItem for DummyConfigInfo { + type Err = DummyError; + + fn id(&self) -> &str { + &self.id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), DummyError> { + if self.id == other.id || self.content == other.content { + Err(DummyError::Exist) + } else { + Ok(()) + } + } + } + + type DummyConfigInfos = ConfigInfos; + + #[test] + fn test_insert_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert(config1).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + // Test case: cannot insert new item with the same id. + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert(config2).unwrap_err(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert(config3).unwrap(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + + // Test case: cannot insert new item with the same content. + let config4 = DummyConfigInfo { + id: "3".to_owned(), + content: "c".to_owned(), + }; + configs.insert(config4).unwrap_err(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + } + + #[test] + fn test_update_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert(config1).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + // Test case: succeed to update an existing entry + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.update(config2, DummyError::Exist).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + + // Test case: cannot update a non-existing entry + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.update(config3, DummyError::Exist).unwrap_err(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + + // Test case: cannot update an entry with conflicting content + let config4 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert(config4).unwrap(); + let config5 = DummyConfigInfo { + id: "1".to_owned(), + content: "c".to_owned(), + }; + configs.update(config5, DummyError::Exist).unwrap_err(); + } + + #[test] + fn test_insert_or_update_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(config1).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + // Test case: succeed to update an existing entry + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(config2.clone()).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + + // Add a second entry + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(config3.clone()).unwrap(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + + // Lookup the first entry + let config4 = configs + .get_by_id(&DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }) + .unwrap(); + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + + // Lookup the second entry + let config5 = configs + .get_by_id(&DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }) + .unwrap(); + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + + // Test case: can't insert an entry with conflicting content + let config6 = DummyConfigInfo { + id: "3".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(config6).unwrap_err(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + } + + #[test] + fn test_remove_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(config1).unwrap(); + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(config2.clone()).unwrap(); + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(config3.clone()).unwrap(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + + let config4 = configs + .remove(&DummyConfigInfo { + id: "1".to_owned(), + content: "no value".to_owned(), + }) + .unwrap(); + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "2"); + assert_eq!(configs.configs[0].content, "c"); + + let config5 = configs + .remove(&DummyConfigInfo { + id: "2".to_owned(), + content: "no value".to_owned(), + }) + .unwrap(); + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + assert_eq!(configs.configs.len(), 0); + } + + type DummyDeviceInfoList = DeviceConfigInfos; + + #[test] + fn test_insert_or_update_device_info() { + let mut configs = DummyDeviceInfoList::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(&config1).unwrap(); + assert_eq!(configs.len(), 1); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "a"); + + // Test case: succeed to update an existing entry + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(&config2 /* */).unwrap(); + assert_eq!(configs.len(), 1); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + + // Add a second entry + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(&config3).unwrap(); + assert_eq!(configs.len(), 2); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + assert_eq!(configs[1].config.id, "2"); + assert_eq!(configs[1].config.content, "c"); + + // Lookup the first entry + let config4_id = configs + .get_index_by_id(&DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }) + .unwrap(); + let config4 = &configs[config4_id].config; + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + + // Lookup the second entry + let config5_id = configs + .get_index_by_id(&DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }) + .unwrap(); + let config5 = &configs[config5_id].config; + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + + // Test case: can't insert an entry with conflicting content + let config6 = DummyConfigInfo { + id: "3".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(&config6).unwrap_err(); + assert_eq!(configs.len(), 2); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + assert_eq!(configs[1].config.id, "2"); + assert_eq!(configs[1].config.content, "c"); + } + + #[test] + fn test_remove_device_info() { + let mut configs = DummyDeviceInfoList::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(&config1).unwrap(); + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(&config2).unwrap(); + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(&config3).unwrap(); + assert_eq!(configs.len(), 2); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + assert_eq!(configs[1].config.id, "2"); + assert_eq!(configs[1].config.content, "c"); + + let config4 = configs.remove(0).unwrap().config; + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + assert_eq!(configs.len(), 1); + assert_eq!(configs[0].config.id, "2"); + assert_eq!(configs[0].config.content, "c"); + + let config5 = configs.remove(0).unwrap().config; + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + assert_eq!(configs.len(), 0); + } +} diff --git a/src/dragonball/src/device_manager/console_manager.rs b/src/dragonball/src/device_manager/console_manager.rs new file mode 100644 index 0000000000..617b98b167 --- /dev/null +++ b/src/dragonball/src/device_manager/console_manager.rs @@ -0,0 +1,430 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Virtual machine console device manager. +//! +//! A virtual console are composed up of two parts: frontend in virtual machine and backend in +//! host OS. A frontend may be serial port, virtio-console etc, a backend may be stdio or Unix +//! domain socket. The manager connects the frontend with the backend. +use std::io::{self, Read}; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use bytes::{BufMut, BytesMut}; +use dbs_legacy_devices::{ConsoleHandler, SerialDevice}; +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use vmm_sys_util::terminal::Terminal; + +use super::{DeviceMgrError, Result}; + +const EPOLL_EVENT_SERIAL: u32 = 0; +const EPOLL_EVENT_SERIAL_DATA: u32 = 1; +const EPOLL_EVENT_STDIN: u32 = 2; +// Maximal backend throughput for every data transaction. +const MAX_BACKEND_THROUGHPUT: usize = 64; + +/// Errors related to Console manager operations. +#[derive(Debug, thiserror::Error)] +pub enum ConsoleManagerError { + /// Cannot create unix domain socket for serial port + #[error("cannot create socket for serial console")] + CreateSerialSock(#[source] std::io::Error), + + /// An operation on the epoll instance failed due to resource exhaustion or bad configuration. + #[error("failure while managing epoll event for console fd")] + EpollMgr(#[source] dbs_utils::epoll_manager::Error), + + /// Cannot set mode for terminal. + #[error("failure while setting attribute for terminal")] + StdinHandle(#[source] vmm_sys_util::errno::Error), +} + +enum Backend { + StdinHandle(std::io::Stdin), + SockPath(String), +} + +/// Console manager to manage frontend and backend console devices. +pub struct ConsoleManager { + epoll_mgr: EpollManager, + logger: slog::Logger, + subscriber_id: Option, + backend: Option, +} + +impl ConsoleManager { + /// Create a console manager instance. + pub fn new(epoll_mgr: EpollManager, logger: &slog::Logger) -> Self { + let logger = logger.new(slog::o!("subsystem" => "console_manager")); + ConsoleManager { + epoll_mgr, + logger, + subscriber_id: Default::default(), + backend: None, + } + } + + /// Create a console backend device by using stdio streams. + pub fn create_stdio_console(&mut self, device: Arc>) -> Result<()> { + let stdin_handle = std::io::stdin(); + stdin_handle + .lock() + .set_raw_mode() + .map_err(|e| DeviceMgrError::ConsoleManager(ConsoleManagerError::StdinHandle(e)))?; + + let handler = ConsoleEpollHandler::new(device, Some(stdin_handle), None, &self.logger); + self.subscriber_id = Some(self.epoll_mgr.add_subscriber(Box::new(handler))); + self.backend = Some(Backend::StdinHandle(std::io::stdin())); + + Ok(()) + } + + /// Create s console backend device by using Unix Domain socket. + pub fn create_socket_console( + &mut self, + device: Arc>, + sock_path: String, + ) -> Result<()> { + let sock_listener = Self::bind_domain_socket(&sock_path).map_err(|e| { + DeviceMgrError::ConsoleManager(ConsoleManagerError::CreateSerialSock(e)) + })?; + let handler = ConsoleEpollHandler::new(device, None, Some(sock_listener), &self.logger); + + self.subscriber_id = Some(self.epoll_mgr.add_subscriber(Box::new(handler))); + self.backend = Some(Backend::SockPath(sock_path)); + + Ok(()) + } + + /// Reset the host side terminal to canonical mode. + pub fn reset_console(&self) -> Result<()> { + if let Some(Backend::StdinHandle(stdin_handle)) = self.backend.as_ref() { + stdin_handle + .lock() + .set_canon_mode() + .map_err(|e| DeviceMgrError::ConsoleManager(ConsoleManagerError::StdinHandle(e)))?; + } + + Ok(()) + } + + fn bind_domain_socket(serial_path: &str) -> std::result::Result { + let path = Path::new(serial_path); + if path.is_file() { + let _ = std::fs::remove_file(serial_path); + } + + UnixListener::bind(path) + } +} + +struct ConsoleEpollHandler { + device: Arc>, + stdin_handle: Option, + sock_listener: Option, + sock_conn: Option, + logger: slog::Logger, +} + +impl ConsoleEpollHandler { + fn new( + device: Arc>, + stdin_handle: Option, + sock_listener: Option, + logger: &slog::Logger, + ) -> Self { + ConsoleEpollHandler { + device, + stdin_handle, + sock_listener, + sock_conn: None, + logger: logger.new(slog::o!("subsystem" => "console_manager")), + } + } + + fn uds_listener_accept(&mut self, ops: &mut EventOps) -> std::io::Result<()> { + if self.sock_conn.is_some() { + slog::warn!(self.logger, + "UDS for serial port 1 already exists, reject the new connection"; + "subsystem" => "console_mgr", + ); + // Do not expected poisoned lock. + let _ = self.sock_listener.as_mut().unwrap().accept(); + } else { + // Safe to unwrap() because self.sock_conn is Some(). + let (conn_sock, _) = self.sock_listener.as_ref().unwrap().accept()?; + let events = Events::with_data(&conn_sock, EPOLL_EVENT_SERIAL_DATA, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!(self.logger, + "failed to register epoll event for serial, {:?}", e; + "subsystem" => "console_mgr", + ); + return Err(std::io::Error::last_os_error()); + } + + let conn_sock_copy = conn_sock.try_clone()?; + // Do not expected poisoned lock. + self.device + .lock() + .unwrap() + .set_output_stream(Some(Box::new(conn_sock_copy))); + + self.sock_conn = Some(conn_sock); + } + + Ok(()) + } + + fn uds_read_in(&mut self, ops: &mut EventOps) -> std::io::Result<()> { + let mut should_drop = true; + + if let Some(conn_sock) = self.sock_conn.as_mut() { + let mut out = [0u8; MAX_BACKEND_THROUGHPUT]; + match conn_sock.read(&mut out[..]) { + Ok(0) => { + // Zero-length read means EOF. Remove this conn sock. + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + Ok(count) => { + self.device + .lock() + .expect("console: poisoned console lock") + .raw_input(&out[..count])?; + should_drop = false; + } + Err(e) => { + slog::warn!(self.logger, + "error while reading serial conn sock: {:?}", e; + "subsystem" => "console_mgr" + ); + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + } + } + + if should_drop { + assert!(self.sock_conn.is_some()); + // Safe to unwrap() because self.sock_conn is Some(). + let sock_conn = self.sock_conn.take().unwrap(); + let events = Events::with_data(&sock_conn, EPOLL_EVENT_SERIAL_DATA, EventSet::IN); + if let Err(e) = ops.remove(events) { + slog::error!(self.logger, + "failed deregister epoll event for UDS, {:?}", e; + "subsystem" => "console_mgr" + ); + } + } + + Ok(()) + } + + fn stdio_read_in(&mut self, ops: &mut EventOps) -> std::io::Result<()> { + let mut should_drop = true; + + if let Some(handle) = self.stdin_handle.as_ref() { + let mut out = [0u8; MAX_BACKEND_THROUGHPUT]; + // Safe to unwrap() because self.stdin_handle is Some(). + let stdin_lock = handle.lock(); + match stdin_lock.read_raw(&mut out[..]) { + Ok(0) => { + // Zero-length read indicates EOF. Remove from pollables. + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + Ok(count) => { + self.device + .lock() + .expect("console: poisoned console lock") + .raw_input(&out[..count])?; + should_drop = false; + } + Err(e) => { + slog::warn!(self.logger, + "error while reading stdin: {:?}", e; + "subsystem" => "console_mgr" + ); + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + } + } + + if should_drop { + let events = Events::with_data_raw(libc::STDIN_FILENO, EPOLL_EVENT_STDIN, EventSet::IN); + if let Err(e) = ops.remove(events) { + slog::error!(self.logger, + "failed to deregister epoll event for stdin, {:?}", e; + "subsystem" => "console_mgr" + ); + } + } + + Ok(()) + } +} + +impl MutEventSubscriber for ConsoleEpollHandler { + fn process(&mut self, events: Events, ops: &mut EventOps) { + slog::trace!(self.logger, "ConsoleEpollHandler::process()"); + let slot = events.data(); + match slot { + EPOLL_EVENT_SERIAL => { + if let Err(e) = self.uds_listener_accept(ops) { + slog::warn!(self.logger, "failed to accept incoming connection, {:?}", e); + } + } + EPOLL_EVENT_SERIAL_DATA => { + if let Err(e) = self.uds_read_in(ops) { + slog::warn!(self.logger, "failed to read data from UDS, {:?}", e); + } + } + EPOLL_EVENT_STDIN => { + if let Err(e) = self.stdio_read_in(ops) { + slog::warn!(self.logger, "failed to read data from stdin, {:?}", e); + } + } + _ => slog::error!(self.logger, "unknown epoll slot number {}", slot), + } + } + + fn init(&mut self, ops: &mut EventOps) { + slog::trace!(self.logger, "ConsoleEpollHandler::init()"); + + if self.stdin_handle.is_some() { + slog::info!(self.logger, "ConsoleEpollHandler: stdin handler"); + let events = Events::with_data_raw(libc::STDIN_FILENO, EPOLL_EVENT_STDIN, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!( + self.logger, + "failed to register epoll event for stdin, {:?}", + e + ); + } + } + if let Some(sock) = self.sock_listener.as_ref() { + slog::info!(self.logger, "ConsoleEpollHandler: sock listener"); + let events = Events::with_data(sock, EPOLL_EVENT_SERIAL, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!( + self.logger, + "failed to register epoll event for UDS listener, {:?}", + e + ); + } + } + + if let Some(conn) = self.sock_conn.as_ref() { + slog::info!(self.logger, "ConsoleEpollHandler: sock connection"); + let events = Events::with_data(conn, EPOLL_EVENT_SERIAL_DATA, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!( + self.logger, + "failed to register epoll event for UDS connection, {:?}", + e + ); + } + } + } +} + +/// Writer to process guest kernel dmesg. +pub struct DmesgWriter { + buf: BytesMut, + logger: slog::Logger, +} + +impl io::Write for DmesgWriter { + /// 0000000 [ 0 . 0 3 4 9 1 6 ] R + /// 5b 20 20 20 20 30 2e 30 33 34 39 31 36 5d 20 52 + /// 0000020 u n / s b i n / i n i t a s + /// 75 6e 20 2f 73 62 69 6e 2f 69 6e 69 74 20 61 73 + /// 0000040 i n i t p r o c e s s \r \n [ + /// + /// dmesg message end a line with /r/n . When redirect message to logger, we should + /// remove the /r/n . + fn write(&mut self, buf: &[u8]) -> io::Result { + let arr: Vec<&[u8]> = buf.split(|c| *c == b'\n').collect(); + let count = arr.len(); + + for (i, sub) in arr.iter().enumerate() { + if sub.is_empty() { + if !self.buf.is_empty() { + slog::info!( + self.logger, + "{}", + String::from_utf8_lossy(self.buf.as_ref()).trim_end() + ); + self.buf.clear(); + } + } else if sub.len() < buf.len() && i < count - 1 { + slog::info!( + self.logger, + "{}{}", + String::from_utf8_lossy(self.buf.as_ref()).trim_end(), + String::from_utf8_lossy(sub).trim_end(), + ); + self.buf.clear(); + } else { + self.buf.put_slice(sub); + } + } + + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use slog::Drain; + use std::io::Write; + + fn create_logger() -> slog::Logger { + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + slog::Logger::root(drain, slog::o!()) + } + + #[test] + fn test_dmesg_writer() { + let mut writer = DmesgWriter { + buf: Default::default(), + logger: create_logger(), + }; + + writer.flush().unwrap(); + writer.write("".as_bytes()).unwrap(); + writer.write("\n".as_bytes()).unwrap(); + writer.write("\n\n".as_bytes()).unwrap(); + writer.write("\n\n\n".as_bytes()).unwrap(); + writer.write("12\n23\n34\n56".as_bytes()).unwrap(); + writer.write("78".as_bytes()).unwrap(); + writer.write("90\n".as_bytes()).unwrap(); + writer.flush().unwrap(); + } + + // TODO: add unit tests for console manager +} diff --git a/src/dragonball/src/device_manager/legacy.rs b/src/dragonball/src/device_manager/legacy.rs new file mode 100644 index 0000000000..9dbb300d4b --- /dev/null +++ b/src/dragonball/src/device_manager/legacy.rs @@ -0,0 +1,155 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Device Manager for Legacy Devices. + +use std::io; +use std::sync::{Arc, Mutex}; + +use dbs_device::device_manager::Error as IoManagerError; +use dbs_legacy_devices::SerialDevice; +use vmm_sys_util::eventfd::EventFd; + +// The I8042 Data Port (IO Port 0x60) is used for reading data that was received from a I8042 device or from the I8042 controller itself and writing data to a I8042 device or to the I8042 controller itself. +const I8042_DATA_PORT: u16 = 0x60; + +/// Errors generated by legacy device manager. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Cannot add legacy device to Bus. + #[error("bus failure while managing legacy device")] + BusError(#[source] IoManagerError), + + /// Cannot create EventFd. + #[error("failure while reading EventFd file descriptor")] + EventFd(#[source] io::Error), + + /// Failed to register/deregister interrupt. + #[error("failure while managing interrupt for legacy device")] + IrqManager(#[source] vmm_sys_util::errno::Error), +} + +/// The `LegacyDeviceManager` is a wrapper that is used for registering legacy devices +/// on an I/O Bus. +/// +/// It currently manages the uart and i8042 devices. The `LegacyDeviceManger` should be initialized +/// only by using the constructor. +pub struct LegacyDeviceManager { + #[cfg(target_arch = "x86_64")] + i8042_reset_eventfd: EventFd, + pub(crate) com1_device: Arc>, + _com1_eventfd: EventFd, + pub(crate) com2_device: Arc>, + _com2_eventfd: EventFd, +} + +impl LegacyDeviceManager { + /// Get the serial device for com1. + pub fn get_com1_serial(&self) -> Arc> { + self.com1_device.clone() + } + + /// Get the serial device for com2 + pub fn get_com2_serial(&self) -> Arc> { + self.com2_device.clone() + } +} + +#[cfg(target_arch = "x86_64")] +pub(crate) mod x86_64 { + use super::*; + use dbs_device::device_manager::IoManager; + use dbs_device::resources::Resource; + use dbs_legacy_devices::{EventFdTrigger, I8042Device, I8042DeviceMetrics}; + use kvm_ioctls::VmFd; + + pub(crate) const COM1_IRQ: u32 = 4; + pub(crate) const COM1_PORT1: u16 = 0x3f8; + pub(crate) const COM2_IRQ: u32 = 3; + pub(crate) const COM2_PORT1: u16 = 0x2f8; + + type Result = ::std::result::Result; + + impl LegacyDeviceManager { + /// Create a LegacyDeviceManager instance handling legacy devices (uart, i8042). + pub fn create_manager(bus: &mut IoManager, vm_fd: Option>) -> Result { + let (com1_device, com1_eventfd) = + Self::create_com_device(bus, vm_fd.as_ref(), COM1_IRQ, COM1_PORT1)?; + let (com2_device, com2_eventfd) = + Self::create_com_device(bus, vm_fd.as_ref(), COM2_IRQ, COM2_PORT1)?; + + let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::EventFd)?; + let i8042_device = Arc::new(Mutex::new(I8042Device::new( + EventFdTrigger::new(exit_evt.try_clone().map_err(Error::EventFd)?), + Arc::new(I8042DeviceMetrics::default()), + ))); + let resources = [Resource::PioAddressRange { + // 0x60 and 0x64 are the io ports that i8042 devices used. + // We register pio address range from 0x60 - 0x64 with base I8042_DATA_PORT for i8042 to use. + base: I8042_DATA_PORT, + size: 0x5, + }]; + bus.register_device_io(i8042_device, &resources) + .map_err(Error::BusError)?; + + Ok(LegacyDeviceManager { + i8042_reset_eventfd: exit_evt, + com1_device, + _com1_eventfd: com1_eventfd, + com2_device, + _com2_eventfd: com2_eventfd, + }) + } + + /// Get the eventfd for exit notification. + pub fn get_reset_eventfd(&self) -> Result { + self.i8042_reset_eventfd.try_clone().map_err(Error::EventFd) + } + + fn create_com_device( + bus: &mut IoManager, + vm_fd: Option<&Arc>, + irq: u32, + port_base: u16, + ) -> Result<(Arc>, EventFd)> { + let eventfd = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::EventFd)?; + let device = Arc::new(Mutex::new(SerialDevice::new( + eventfd.try_clone().map_err(Error::EventFd)?, + ))); + // port_base defines the base port address for the COM devices. + // Since every COM device has 8 data registers so we register the pio address range as size 0x8. + let resources = [Resource::PioAddressRange { + base: port_base, + size: 0x8, + }]; + bus.register_device_io(device.clone(), &resources) + .map_err(Error::BusError)?; + + if let Some(fd) = vm_fd { + fd.register_irqfd(&eventfd, irq) + .map_err(Error::IrqManager)?; + } + + Ok((device, eventfd)) + } + } +} + +#[cfg(test)] +mod tests { + #[cfg(target_arch = "x86_64")] + use super::*; + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_create_legacy_device_manager() { + let mut bus = dbs_device::device_manager::IoManager::new(); + let mgr = LegacyDeviceManager::create_manager(&mut bus, None).unwrap(); + let _exit_fd = mgr.get_reset_eventfd().unwrap(); + } +} diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs new file mode 100644 index 0000000000..5691690ea8 --- /dev/null +++ b/src/dragonball/src/device_manager/mod.rs @@ -0,0 +1,602 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Device manager to manage IO devices for a virtual machine. + +use std::io; +use std::sync::{Arc, Mutex, MutexGuard}; + +use arc_swap::ArcSwap; +use dbs_address_space::AddressSpace; +use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext}; +use dbs_device::resources::Resource; +use dbs_device::DeviceIo; +use dbs_interrupt::KvmIrqManager; +use dbs_legacy_devices::ConsoleHandler; +use dbs_utils::epoll_manager::EpollManager; +use kvm_ioctls::VmFd; + +#[cfg(feature = "dbs-virtio-devices")] +use dbs_device::resources::ResourceConstraint; +#[cfg(feature = "dbs-virtio-devices")] +use dbs_virtio_devices as virtio; +#[cfg(feature = "dbs-virtio-devices")] +use dbs_virtio_devices::{ + mmio::{ + MmioV2Device, DRAGONBALL_FEATURE_INTR_USED, DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY, + DRAGONBALL_MMIO_DOORBELL_SIZE, MMIO_DEFAULT_CFG_SIZE, + }, + VirtioDevice, +}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::error::StartMicrovmError; +use crate::resource_manager::ResourceManager; +use crate::vm::KernelConfigInfo; + +/// Virtual machine console device manager. +pub mod console_manager; +/// Console Manager for virtual machines console device. +pub use self::console_manager::ConsoleManager; + +mod legacy; +pub use self::legacy::{Error as LegacyDeviceError, LegacyDeviceManager}; + +#[cfg(feature = "virtio-vsock")] +/// Device manager for user-space vsock devices. +pub mod vsock_dev_mgr; +#[cfg(feature = "virtio-vsock")] +use self::vsock_dev_mgr::VsockDeviceMgr; + +macro_rules! info( + ($l:expr, $($args:tt)+) => { + slog::info!($l, $($args)+; slog::o!("subsystem" => "device_manager")) + }; +); + +/// Errors related to device manager operations. +#[derive(Debug, thiserror::Error)] +pub enum DeviceMgrError { + /// Invalid operation. + #[error("invalid device manager operation")] + InvalidOperation, + /// Failed to get device resource. + #[error("failed to get device assigned resources")] + GetDeviceResource, + /// Appending to kernel command line failed. + #[error("failed to add kernel command line parameter for device: {0}")] + Cmdline(#[source] linux_loader::cmdline::Error), + /// Failed to manage console devices. + #[error(transparent)] + ConsoleManager(console_manager::ConsoleManagerError), + /// Failed to create the device. + #[error("failed to create virtual device: {0}")] + CreateDevice(#[source] io::Error), + /// Failed to perform an operation on the bus. + #[error(transparent)] + IoManager(IoManagerError), + /// Failure from legacy device manager. + #[error(transparent)] + LegacyManager(legacy::Error), + + #[cfg(feature = "dbs-virtio-devices")] + /// Error from Virtio subsystem. + #[error(transparent)] + Virtio(virtio::Error), +} + +/// Specialized version of `std::result::Result` for device manager operations. +pub type Result = ::std::result::Result; + +/// Type of the dragonball virtio devices. +#[cfg(feature = "dbs-virtio-devices")] +pub type DbsVirtioDevice = Box< + dyn VirtioDevice, +>; + +/// Type of the dragonball virtio mmio devices. +#[cfg(feature = "dbs-virtio-devices")] +pub type DbsMmioV2Device = + MmioV2Device; + +/// Struct to support transactional operations for device management. +pub struct DeviceManagerTx { + io_manager: IoManager, + _io_lock: Arc>, + _guard: MutexGuard<'static, ()>, +} + +impl DeviceManagerTx { + fn new(mgr_ctx: &DeviceManagerContext) -> Self { + // Do not expect poisoned lock. + let guard = mgr_ctx.io_lock.lock().unwrap(); + + // It's really a heavy burden to carry on a lifetime parameter for MutexGuard. + // So we play a tricky here that we hold a reference to the Arc> and transmute + // the MutexGuard<'a, ()> to MutexGuard<'static, ()>. + // It's safe because we hold a reference to the Mutex lock. + let guard = + unsafe { std::mem::transmute::, MutexGuard<'static, ()>>(guard) }; + + DeviceManagerTx { + io_manager: mgr_ctx.io_manager.load().as_ref().clone(), + _io_lock: mgr_ctx.io_lock.clone(), + _guard: guard, + } + } +} + +/// Operation context for device management. +#[derive(Clone)] +pub struct DeviceManagerContext { + io_manager: Arc>, + io_lock: Arc>, +} + +impl DeviceManagerContext { + /// Create a DeviceManagerContext object. + pub fn new(io_manager: Arc>, io_lock: Arc>) -> Self { + DeviceManagerContext { + io_manager, + io_lock, + } + } +} + +impl IoManagerContext for DeviceManagerContext { + type Context = DeviceManagerTx; + + fn begin_tx(&self) -> Self::Context { + DeviceManagerTx::new(self) + } + + fn commit_tx(&self, context: Self::Context) { + self.io_manager.store(Arc::new(context.io_manager)); + } + + fn cancel_tx(&self, context: Self::Context) { + drop(context); + } + + fn register_device_io( + &self, + ctx: &mut Self::Context, + device: Arc, + resources: &[Resource], + ) -> std::result::Result<(), dbs_device::device_manager::Error> { + ctx.io_manager.register_device_io(device, resources) + } + + fn unregister_device_io( + &self, + ctx: &mut Self::Context, + resources: &[Resource], + ) -> std::result::Result<(), dbs_device::device_manager::Error> { + ctx.io_manager.unregister_device_io(resources) + } +} + +/// Context for device addition/removal operations. +pub struct DeviceOpContext { + epoll_mgr: Option, + io_context: DeviceManagerContext, + irq_manager: Arc, + res_manager: Arc, + vm_fd: Arc, + vm_as: Option, + address_space: Option, + logger: slog::Logger, + is_hotplug: bool, + + #[cfg(feature = "dbs-virtio-devices")] + virtio_devices: Vec>, +} + +impl DeviceOpContext { + pub(crate) fn new( + epoll_mgr: Option, + device_mgr: &DeviceManager, + vm_as: Option, + address_space: Option, + is_hotplug: bool, + ) -> Self { + let irq_manager = device_mgr.irq_manager.clone(); + let res_manager = device_mgr.res_manager.clone(); + + let vm_fd = device_mgr.vm_fd.clone(); + let io_context = DeviceManagerContext { + io_manager: device_mgr.io_manager.clone(), + io_lock: device_mgr.io_lock.clone(), + }; + let logger = device_mgr.logger.new(slog::o!()); + + DeviceOpContext { + epoll_mgr, + io_context, + irq_manager, + res_manager, + vm_fd, + vm_as, + address_space, + logger, + is_hotplug, + #[cfg(feature = "dbs-virtio-devices")] + virtio_devices: Vec::new(), + } + } + + pub(crate) fn get_vm_as(&self) -> Result { + match self.vm_as.as_ref() { + Some(v) => Ok(v.clone()), + None => Err(DeviceMgrError::InvalidOperation), + } + } + + pub(crate) fn logger(&self) -> &slog::Logger { + &self.logger + } + + fn generate_kernel_boot_args(&mut self, kernel_config: &mut KernelConfigInfo) -> Result<()> { + if !self.is_hotplug { + return Err(DeviceMgrError::InvalidOperation); + } + + #[cfg(feature = "dbs-virtio-devices")] + let cmdline = kernel_config.kernel_cmdline_mut(); + + #[cfg(feature = "dbs-virtio-devices")] + for device in self.virtio_devices.iter() { + let (mmio_base, mmio_size, irq) = DeviceManager::get_virtio_device_info(device)?; + + // as per doc, [virtio_mmio.]device=@: needs to be appended + // to kernel commandline for virtio mmio devices to get recognized + // the size parameter has to be transformed to KiB, so dividing hexadecimal value in + // bytes to 1024; further, the '{}' formatting rust construct will automatically + // transform it to decimal + cmdline + .insert( + "virtio_mmio.device", + &format!("{}K@0x{:08x}:{}", mmio_size / 1024, mmio_base, irq), + ) + .map_err(DeviceMgrError::Cmdline)?; + } + + Ok(()) + } +} + +/// Device manager for virtual machines, which manages all device for a virtual machine. +pub struct DeviceManager { + io_manager: Arc>, + io_lock: Arc>, + irq_manager: Arc, + res_manager: Arc, + vm_fd: Arc, + pub(crate) logger: slog::Logger, + + pub(crate) con_manager: ConsoleManager, + pub(crate) legacy_manager: Option, + #[cfg(feature = "virtio-vsock")] + pub(crate) vsock_manager: VsockDeviceMgr, +} + +impl DeviceManager { + /// Create a new device manager instance. + pub fn new( + vm_fd: Arc, + res_manager: Arc, + epoll_manager: EpollManager, + logger: &slog::Logger, + ) -> Self { + DeviceManager { + io_manager: Arc::new(ArcSwap::new(Arc::new(IoManager::new()))), + io_lock: Arc::new(Mutex::new(())), + irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())), + res_manager, + vm_fd, + logger: logger.new(slog::o!()), + con_manager: ConsoleManager::new(epoll_manager, logger), + legacy_manager: None, + #[cfg(feature = "virtio-vsock")] + vsock_manager: VsockDeviceMgr::default(), + } + } + + /// Create the underline interrupt manager for the device manager. + pub fn create_interrupt_manager(&mut self) -> Result<()> { + self.irq_manager + .initialize() + .map_err(DeviceMgrError::CreateDevice) + } + + /// Get the underlying logger. + pub fn logger(&self) -> &slog::Logger { + &self.logger + } + + /// Create legacy devices associted virtual machine + pub fn create_legacy_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicrovmError> { + #[cfg(target_arch = "x86_64")] + { + let mut tx = ctx.io_context.begin_tx(); + let legacy_manager = + LegacyDeviceManager::create_manager(&mut tx.io_manager, Some(self.vm_fd.clone())); + + match legacy_manager { + Ok(v) => { + self.legacy_manager = Some(v); + ctx.io_context.commit_tx(tx); + } + Err(e) => { + ctx.io_context.cancel_tx(tx); + return Err(StartMicrovmError::LegacyDevice(e)); + } + } + } + + Ok(()) + } + + /// Init legacy devices with logger stream in associted virtual machine + pub fn init_legacy_devices( + &mut self, + dmesg_fifo: Option>, + com1_sock_path: Option, + _ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicrovmError> { + // Connect serial ports to the console and dmesg_fifo. + self.set_guest_kernel_log_stream(dmesg_fifo) + .map_err(|_| StartMicrovmError::EventFd)?; + + slog::info!(self.logger, "init console path: {:?}", com1_sock_path); + if let Some(path) = com1_sock_path { + if let Some(legacy_manager) = self.legacy_manager.as_ref() { + let com1 = legacy_manager.get_com1_serial(); + self.con_manager + .create_socket_console(com1, path) + .map_err(StartMicrovmError::DeviceManager)?; + } + } else if let Some(legacy_manager) = self.legacy_manager.as_ref() { + let com1 = legacy_manager.get_com1_serial(); + self.con_manager + .create_stdio_console(com1) + .map_err(StartMicrovmError::DeviceManager)?; + } + + Ok(()) + } + + /// Set the stream for guest kernel log. + /// + /// Note: com2 is used for guest kernel logging. + /// TODO: check whether it works with aarch64. + pub fn set_guest_kernel_log_stream( + &self, + stream: Option>, + ) -> std::result::Result<(), io::Error> { + if let Some(legacy) = self.legacy_manager.as_ref() { + legacy + .get_com2_serial() + .lock() + .unwrap() + .set_output_stream(stream); + } + Ok(()) + } + + /// Restore legacy devices + pub fn restore_legacy_devices( + &mut self, + dmesg_fifo: Option>, + com1_sock_path: Option, + ) -> std::result::Result<(), StartMicrovmError> { + self.set_guest_kernel_log_stream(dmesg_fifo) + .map_err(|_| StartMicrovmError::EventFd)?; + slog::info!(self.logger, "restore console path: {:?}", com1_sock_path); + // TODO: restore console + Ok(()) + } + + /// Reset the console into canonical mode. + pub fn reset_console(&self) -> Result<()> { + self.con_manager.reset_console() + } + + /// Create all registered devices when booting the associated virtual machine. + pub fn create_devices( + &mut self, + vm_as: GuestAddressSpaceImpl, + epoll_mgr: EpollManager, + kernel_config: &mut KernelConfigInfo, + com1_sock_path: Option, + dmesg_fifo: Option>, + address_space: Option<&AddressSpace>, + ) -> std::result::Result<(), StartMicrovmError> { + let mut ctx = DeviceOpContext::new( + Some(epoll_mgr), + self, + Some(vm_as), + address_space.cloned(), + false, + ); + + self.create_legacy_devices(&mut ctx)?; + self.init_legacy_devices(dmesg_fifo, com1_sock_path, &mut ctx)?; + + #[cfg(feature = "virtio-vsock")] + self.vsock_manager.attach_devices(&mut ctx)?; + + ctx.generate_kernel_boot_args(kernel_config) + .map_err(StartMicrovmError::DeviceManager)?; + + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + /// Get the underlying eventfd for vm exit notification. + pub fn get_reset_eventfd(&self) -> Result { + if let Some(legacy) = self.legacy_manager.as_ref() { + legacy + .get_reset_eventfd() + .map_err(DeviceMgrError::LegacyManager) + } else { + Err(DeviceMgrError::LegacyManager(legacy::Error::EventFd( + io::Error::from_raw_os_error(libc::ENOENT), + ))) + } + } +} + +#[cfg(feature = "dbs-virtio-devices")] +impl DeviceManager { + fn get_virtio_device_info(device: &Arc) -> Result<(u64, u64, u32)> { + let resources = device.get_assigned_resources(); + let irq = resources + .get_legacy_irq() + .ok_or(DeviceMgrError::GetDeviceResource)?; + let mmio_address_range = device.get_trapped_io_resources().get_mmio_address_ranges(); + + // Assume the first MMIO region is virtio configuration region. + // Virtio-fs needs to pay attention to this assumption. + if let Some(range) = mmio_address_range.into_iter().next() { + Ok((range.0, range.1, irq)) + } else { + Err(DeviceMgrError::GetDeviceResource) + } + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device. + pub fn create_mmio_virtio_device( + device: DbsVirtioDevice, + ctx: &mut DeviceOpContext, + use_shared_irq: bool, + use_generic_irq: bool, + ) -> std::result::Result, DeviceMgrError> { + let features = DRAGONBALL_FEATURE_INTR_USED | DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY; + DeviceManager::create_mmio_virtio_device_with_features( + device, + ctx, + Some(features), + use_shared_irq, + use_generic_irq, + ) + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device with specified + /// features. + pub fn create_mmio_virtio_device_with_features( + device: DbsVirtioDevice, + ctx: &mut DeviceOpContext, + features: Option, + use_shared_irq: bool, + use_generic_irq: bool, + ) -> std::result::Result, DeviceMgrError> { + // Every emulated Virtio MMIO device needs a 4K configuration space, + // and another 4K space for per queue notification. + const MMIO_ADDRESS_DEFAULT: ResourceConstraint = ResourceConstraint::MmioAddress { + range: None, + align: 0, + size: MMIO_DEFAULT_CFG_SIZE + DRAGONBALL_MMIO_DOORBELL_SIZE, + }; + let mut requests = vec![MMIO_ADDRESS_DEFAULT]; + device.get_resource_requirements(&mut requests, use_generic_irq); + let resources = ctx + .res_manager + .allocate_device_resources(&requests, use_shared_irq) + .map_err(|_| DeviceMgrError::GetDeviceResource)?; + + let virtio_dev = match MmioV2Device::new( + ctx.vm_fd.clone(), + ctx.get_vm_as()?, + ctx.irq_manager.clone(), + device, + resources, + features, + ) { + Ok(d) => d, + Err(e) => return Err(DeviceMgrError::Virtio(e)), + }; + + Self::register_mmio_virtio_device(Arc::new(virtio_dev), ctx) + } + + /// Teardown the Virtio MMIO transport layer device associated with the virtio backend device. + pub fn destroy_mmio_virtio_device( + device: Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), DeviceMgrError> { + Self::destroy_mmio_device(device.clone(), ctx)?; + + let mmio_dev = device + .as_any() + .downcast_ref::() + .ok_or(DeviceMgrError::InvalidOperation)?; + + mmio_dev.remove(); + + Ok(()) + } + + fn destroy_mmio_device( + device: Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), DeviceMgrError> { + // unregister IoManager + Self::deregister_mmio_virtio_device(&device, ctx)?; + + // unregister Resource manager + let resources = device.get_assigned_resources(); + ctx.res_manager.free_device_resources(&resources); + + Ok(()) + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device. + pub fn register_mmio_virtio_device( + device: Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result, DeviceMgrError> { + let (mmio_base, mmio_size, irq) = Self::get_virtio_device_info(&device)?; + info!( + ctx.logger(), + "create virtio mmio device 0x{:x}@0x{:x}, irq: 0x{:x}", mmio_size, mmio_base, irq + ); + let resources = device.get_trapped_io_resources(); + + let mut tx = ctx.io_context.begin_tx(); + if let Err(e) = ctx + .io_context + .register_device_io(&mut tx, device.clone(), &resources) + { + ctx.io_context.cancel_tx(tx); + Err(DeviceMgrError::IoManager(e)) + } else { + ctx.virtio_devices.push(device.clone()); + ctx.io_context.commit_tx(tx); + Ok(device) + } + } + + /// Deregister a Virtio MMIO device from IoManager + pub fn deregister_mmio_virtio_device( + device: &Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), DeviceMgrError> { + let resources = device.get_trapped_io_resources(); + info!( + ctx.logger(), + "unregister mmio virtio device: {:?}", resources + ); + let mut tx = ctx.io_context.begin_tx(); + if let Err(e) = ctx.io_context.unregister_device_io(&mut tx, &resources) { + ctx.io_context.cancel_tx(tx); + Err(DeviceMgrError::IoManager(e)) + } else { + ctx.io_context.commit_tx(tx); + Ok(()) + } + } +} diff --git a/src/dragonball/src/device_manager/vsock_dev_mgr.rs b/src/dragonball/src/device_manager/vsock_dev_mgr.rs new file mode 100644 index 0000000000..cec58d7de1 --- /dev/null +++ b/src/dragonball/src/device_manager/vsock_dev_mgr.rs @@ -0,0 +1,285 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::sync::Arc; + +use dbs_virtio_devices as virtio; +use dbs_virtio_devices::mmio::DRAGONBALL_FEATURE_INTR_USED; +use dbs_virtio_devices::vsock::backend::{ + VsockInnerBackend, VsockInnerConnector, VsockTcpBackend, VsockUnixStreamBackend, +}; +use dbs_virtio_devices::vsock::Vsock; +use dbs_virtio_devices::Error as VirtioError; +use serde_derive::{Deserialize, Serialize}; + +use super::StartMicrovmError; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::{DeviceManager, DeviceOpContext}; + +pub use dbs_virtio_devices::vsock::QUEUE_SIZES; + +const SUBSYSTEM: &str = "vsock_dev_mgr"; +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; + +/// Errors associated with `VsockDeviceConfigInfo`. +#[derive(Debug, thiserror::Error)] +pub enum VsockDeviceError { + /// The virtual machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// The Context Identifier is already in use. + #[error("the device ID {0} already exists")] + DeviceIDAlreadyExist(String), + + /// The Context Identifier is invalid. + #[error("the guest CID {0} is invalid")] + GuestCIDInvalid(u32), + + /// The Context Identifier is already in use. + #[error("the guest CID {0} is already in use")] + GuestCIDAlreadyInUse(u32), + + /// The Unix Domain Socket path is already in use. + #[error("the Unix Domain Socket path {0} is already in use")] + UDSPathAlreadyInUse(String), + + /// The net address is already in use. + #[error("the net address {0} is already in use")] + NetAddrAlreadyInUse(String), + + /// The update is not allowed after booting the microvm. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// The VsockId Already Exists + #[error("vsock id {0} already exists")] + VsockIdAlreadyExists(String), + + /// Inner backend create error + #[error("vsock inner backend create error: {0}")] + CreateInnerBackend(#[source] std::io::Error), +} + +/// Configuration information for a vsock device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct VsockDeviceConfigInfo { + /// ID of the vsock device. + pub id: String, + /// A 32-bit Context Identifier (CID) used to identify the guest. + pub guest_cid: u32, + /// unix domain socket path. + pub uds_path: Option, + /// tcp socket address. + pub tcp_addr: Option, + /// Virtio queue size. + pub queue_size: Vec, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl VsockDeviceConfigInfo { + /// Get number and size of queues supported. + pub fn queue_sizes(&self) -> Vec { + self.queue_size.clone() + } +} + +impl ConfigItem for VsockDeviceConfigInfo { + type Err = VsockDeviceError; + + fn id(&self) -> &str { + &self.id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), VsockDeviceError> { + if self.id == other.id { + return Err(VsockDeviceError::DeviceIDAlreadyExist(self.id.clone())); + } + if self.guest_cid == other.guest_cid { + return Err(VsockDeviceError::GuestCIDAlreadyInUse(self.guest_cid)); + } + if let (Some(self_uds_path), Some(other_uds_path)) = + (self.uds_path.as_ref(), other.uds_path.as_ref()) + { + if self_uds_path == other_uds_path { + return Err(VsockDeviceError::UDSPathAlreadyInUse(self_uds_path.clone())); + } + } + if let (Some(self_net_addr), Some(other_net_addr)) = + (self.tcp_addr.as_ref(), other.tcp_addr.as_ref()) + { + if self_net_addr == other_net_addr { + return Err(VsockDeviceError::NetAddrAlreadyInUse(self_net_addr.clone())); + } + } + + Ok(()) + } +} + +/// Vsock Device Info +pub type VsockDeviceInfo = DeviceConfigInfo; + +/// Device manager to manage all vsock devices. +pub struct VsockDeviceMgr { + pub(crate) info_list: DeviceConfigInfos, + pub(crate) default_inner_backend: Option, + pub(crate) default_inner_connector: Option, + pub(crate) use_shared_irq: bool, +} + +impl VsockDeviceMgr { + /// Insert or update a vsock device into the manager. + pub fn insert_device( + &mut self, + ctx: DeviceOpContext, + config: VsockDeviceConfigInfo, + ) -> std::result::Result<(), VsockDeviceError> { + if ctx.is_hotplug { + slog::error!( + ctx.logger(), + "no support of virtio-vsock device hotplug"; + "subsystem" => SUBSYSTEM, + "id" => &config.id, + "uds_path" => &config.uds_path, + ); + + return Err(VsockDeviceError::UpdateNotAllowedPostBoot); + } + + // VMADDR_CID_ANY (-1U) means any address for binding; + // VMADDR_CID_HYPERVISOR (0) is reserved for services built into the hypervisor; + // VMADDR_CID_RESERVED (1) must not be used; + // VMADDR_CID_HOST (2) is the well-known address of the host. + if config.guest_cid <= 2 { + return Err(VsockDeviceError::GuestCIDInvalid(config.guest_cid)); + } + + slog::info!( + ctx.logger(), + "add virtio-vsock device configuration"; + "subsystem" => SUBSYSTEM, + "id" => &config.id, + "uds_path" => &config.uds_path, + ); + + self.lazy_make_default_connector()?; + + self.info_list.insert_or_update(&config)?; + + Ok(()) + } + + /// Attach all configured vsock device to the virtual machine instance. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicrovmError> { + let epoll_mgr = ctx + .epoll_mgr + .clone() + .ok_or(StartMicrovmError::CreateVsockDevice( + virtio::Error::InvalidInput, + ))?; + + for info in self.info_list.iter_mut() { + slog::info!( + ctx.logger(), + "attach virtio-vsock device"; + "subsystem" => SUBSYSTEM, + "id" => &info.config.id, + "uds_path" => &info.config.uds_path, + ); + + let mut device = Box::new( + Vsock::new( + info.config.guest_cid as u64, + Arc::new(info.config.queue_sizes()), + epoll_mgr.clone(), + ) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicrovmError::CreateVsockDevice)?, + ); + if let Some(uds_path) = info.config.uds_path.as_ref() { + let unix_backend = VsockUnixStreamBackend::new(uds_path.clone()) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicrovmError::CreateVsockDevice)?; + device + .add_backend(Box::new(unix_backend), true) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicrovmError::CreateVsockDevice)?; + } + if let Some(tcp_addr) = info.config.tcp_addr.as_ref() { + let tcp_backend = VsockTcpBackend::new(tcp_addr.clone()) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicrovmError::CreateVsockDevice)?; + device + .add_backend(Box::new(tcp_backend), false) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicrovmError::CreateVsockDevice)?; + } + // add inner backend to the the first added vsock device + if let Some(inner_backend) = self.default_inner_backend.take() { + device + .add_backend(Box::new(inner_backend), false) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicrovmError::CreateVsockDevice)?; + } + let device = DeviceManager::create_mmio_virtio_device_with_features( + device, + ctx, + Some(DRAGONBALL_FEATURE_INTR_USED), + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(StartMicrovmError::RegisterVsockDevice)?; + info.device = Some(device); + } + + Ok(()) + } + + // check the default connector is present, or build it. + fn lazy_make_default_connector(&mut self) -> std::result::Result<(), VsockDeviceError> { + if self.default_inner_connector.is_none() { + let inner_backend = + VsockInnerBackend::new().map_err(VsockDeviceError::CreateInnerBackend)?; + self.default_inner_connector = Some(inner_backend.get_connector()); + self.default_inner_backend = Some(inner_backend); + } + Ok(()) + } + + /// Get the default vsock inner connector. + pub fn get_default_connector( + &mut self, + ) -> std::result::Result { + self.lazy_make_default_connector()?; + + // safe to unwrap, because we created the inner connector before + Ok(self.default_inner_connector.clone().unwrap()) + } +} + +impl Default for VsockDeviceMgr { + /// Create a new Vsock device manager. + fn default() -> Self { + VsockDeviceMgr { + info_list: DeviceConfigInfos::new(), + default_inner_backend: None, + default_inner_connector: None, + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs new file mode 100644 index 0000000000..5a497abb6b --- /dev/null +++ b/src/dragonball/src/error.rs @@ -0,0 +1,41 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file + +//! Error codes for the virtual machine monitor subsystem. + +#[cfg(feature = "dbs-virtio-devices")] +use dbs_virtio_devices::Error as VirtIoError; + +use crate::device_manager; + +/// Errors associated with starting the instance. +#[derive(Debug, thiserror::Error)] +pub enum StartMicrovmError { + /// Cannot read from an Event file descriptor. + #[error("failure while reading from EventFd file descriptor")] + EventFd, + + /// The device manager was not configured. + #[error("the device manager failed to manage devices: {0}")] + DeviceManager(#[source] device_manager::DeviceMgrError), + + /// Cannot add devices to the Legacy I/O Bus. + #[error("failure in managing legacy device: {0}")] + LegacyDevice(#[source] device_manager::LegacyDeviceError), + + #[cfg(feature = "virtio-vsock")] + /// Failed to create the vsock device. + #[error("cannot create virtio-vsock device: {0}")] + CreateVsockDevice(#[source] VirtIoError), + + #[cfg(feature = "virtio-vsock")] + /// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus. + #[error("failure while registering virtio-vsock device: {0}")] + RegisterVsockDevice(#[source] device_manager::DeviceMgrError), +} diff --git a/src/dragonball/src/lib.rs b/src/dragonball/src/lib.rs new file mode 100644 index 0000000000..cf528d067e --- /dev/null +++ b/src/dragonball/src/lib.rs @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Dragonball is a light-weight virtual machine manager(VMM) based on Linux Kernel-based Virtual +//! Machine(KVM) which is optimized for container workloads. + +#![warn(missing_docs)] +//TODO: Remove this, after the rest of dragonball has been committed. +#![allow(dead_code)] + +/// Address space manager for virtual machines. +pub mod address_space_manager; +/// Structs to maintain configuration information. +pub mod config_manager; +/// Device manager for virtual machines. +pub mod device_manager; +/// Errors related to Virtual machine manager. +pub mod error; +/// Resource manager for virtual machines. +pub mod resource_manager; +/// Virtual machine manager for virtual machines. +pub mod vm; diff --git a/src/dragonball/src/resource_manager.rs b/src/dragonball/src/resource_manager.rs new file mode 100644 index 0000000000..2cb32c0546 --- /dev/null +++ b/src/dragonball/src/resource_manager.rs @@ -0,0 +1,785 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Mutex; + +use dbs_allocator::{Constraint, IntervalTree, Range}; +use dbs_boot::layout::{ + GUEST_MEM_END, GUEST_MEM_START, GUEST_PHYS_END, IRQ_BASE as LEGACY_IRQ_BASE, + IRQ_MAX as LEGACY_IRQ_MAX, MMIO_LOW_END, MMIO_LOW_START, +}; +use dbs_device::resources::{DeviceResources, MsiIrqType, Resource, ResourceConstraint}; + +// We reserve the LEGACY_IRQ_BASE(5) for shared IRQ. +const SHARED_IRQ: u32 = LEGACY_IRQ_BASE; +// Since ioapic2 have 24 pins for legacy devices, so irq number 0-23 are used. We will set MSI_IRQ_BASE at 24. +#[cfg(target_arch = "x86_64")] +const MSI_IRQ_BASE: u32 = 24; +#[cfg(target_arch = "aarch64")] +/// We define MSI_IRQ_BASE as LEGACY_IRQ_MAX for aarch64 in order not to conflict with legacy irq numbers. +const MSI_IRQ_BASE: u32 = LEGACY_IRQ_MAX + 1; + +// kvm max irq is defined in arch/x86/include/asm/kvm_host.h +const MSI_IRQ_MAX: u32 = 1023; +// x86's kvm user mem slots is defined in arch/x86/include/asm/kvm_host.h +#[cfg(target_arch = "x86_64")] +const KVM_USER_MEM_SLOTS: u32 = 509; +// aarch64's kvm user mem slots is defined in arch/arm64/include/asm/kvm_host.h +#[cfg(target_arch = "aarch64")] +const KVM_USER_MEM_SLOTS: u32 = 512; +const PIO_MIN: u16 = 0x0; +const PIO_MAX: u16 = 0xFFFF; +// Reserve the 64MB MMIO address range just below 4G, x86 systems have special +// devices, such as LAPIC, IOAPIC, HPET etc, in this range. And we don't explicitly +// allocate MMIO address for those devices. +const MMIO_SPACE_RESERVED: u64 = 0x400_0000; + +/// Errors associated with resource management operations +#[derive(Debug, PartialEq, thiserror::Error)] +pub enum ResourceError { + /// Unknown/unsupported resource type. + #[error("unsupported resource type")] + UnknownResourceType, + + /// Invalid resource range. + #[error("invalid resource range for resource type : {0}")] + InvalidResourceRange(String), + + /// No resource available. + #[error("no resource available")] + NoAvailResource, +} + +#[derive(Default)] +struct ResourceManagerBuilder { + // IntervalTree for allocating legacy irq number. + legacy_irq_pool: IntervalTree<()>, + // IntervalTree for allocating message signal interrupt (MSI) irq number. + msi_irq_pool: IntervalTree<()>, + // IntervalTree for allocating port-mapped io (PIO) address. + pio_pool: IntervalTree<()>, + // IntervalTree for allocating memory-mapped io (MMIO) address. + mmio_pool: IntervalTree<()>, + // IntervalTree for allocating guest memory. + mem_pool: IntervalTree<()>, + // IntervalTree for allocating kvm memory slot. + kvm_mem_slot_pool: IntervalTree<()>, +} + +impl ResourceManagerBuilder { + /// init legacy_irq_pool with arch specific constants. + fn init_legacy_irq_pool(mut self) -> Self { + // The LEGACY_IRQ_BASE irq is reserved for shared IRQ and won't be allocated / reallocated, + // so we don't insert it into the legacy_irq interval tree. + self.legacy_irq_pool + .insert(Range::new(LEGACY_IRQ_BASE + 1, LEGACY_IRQ_MAX), None); + self + } + + /// init msi_irq_pool with arch specific constants. + fn init_msi_irq_pool(mut self) -> Self { + self.msi_irq_pool + .insert(Range::new(MSI_IRQ_BASE, MSI_IRQ_MAX), None); + self + } + + /// init pio_pool with arch specific constants. + fn init_pio_pool(mut self) -> Self { + self.pio_pool.insert(Range::new(PIO_MIN, PIO_MAX), None); + self + } + + /// Create mmio_pool with arch specific constants. + /// allow(clippy) is because `GUEST_MEM_START > MMIO_LOW_END`, we may modify GUEST_MEM_START or + /// MMIO_LOW_END in the future. + #[allow(clippy::absurd_extreme_comparisons)] + fn init_mmio_pool_helper(mmio: &mut IntervalTree<()>) { + mmio.insert(Range::new(MMIO_LOW_START, MMIO_LOW_END), None); + if !(*GUEST_MEM_END < MMIO_LOW_START + || GUEST_MEM_START > MMIO_LOW_END + || MMIO_LOW_START == MMIO_LOW_END) + { + #[cfg(target_arch = "x86_64")] + { + let constraint = Constraint::new(MMIO_SPACE_RESERVED) + .min(MMIO_LOW_END - MMIO_SPACE_RESERVED) + .max(0xffff_ffffu64); + let key = mmio.allocate(&constraint); + if let Some(k) = key.as_ref() { + mmio.update(k, ()); + } else { + panic!("failed to reserve MMIO address range for x86 system devices"); + } + } + } + + if *GUEST_MEM_END < *GUEST_PHYS_END { + mmio.insert(Range::new(*GUEST_MEM_END + 1, *GUEST_PHYS_END), None); + } + } + + /// init mmio_pool with helper function + fn init_mmio_pool(mut self) -> Self { + Self::init_mmio_pool_helper(&mut self.mmio_pool); + self + } + + /// Create mem_pool with arch specific constants. + /// deny(clippy) is because `GUEST_MEM_START > MMIO_LOW_END`, we may modify GUEST_MEM_START or + /// MMIO_LOW_END in the future. + #[allow(clippy::absurd_extreme_comparisons)] + pub(crate) fn init_mem_pool_helper(mem: &mut IntervalTree<()>) { + if *GUEST_MEM_END < MMIO_LOW_START + || GUEST_MEM_START > MMIO_LOW_END + || MMIO_LOW_START == MMIO_LOW_END + { + mem.insert(Range::new(GUEST_MEM_START, *GUEST_MEM_END), None); + } else { + if MMIO_LOW_START > GUEST_MEM_START { + mem.insert(Range::new(GUEST_MEM_START, MMIO_LOW_START - 1), None); + } + if MMIO_LOW_END < *GUEST_MEM_END { + mem.insert(Range::new(MMIO_LOW_END + 1, *GUEST_MEM_END), None); + } + } + } + + /// init mem_pool with helper function + fn init_mem_pool(mut self) -> Self { + Self::init_mem_pool_helper(&mut self.mem_pool); + self + } + + /// init kvm_mem_slot_pool with arch specific constants. + fn init_kvm_mem_slot_pool(mut self, max_kvm_mem_slot: Option) -> Self { + let max_slots = max_kvm_mem_slot.unwrap_or(KVM_USER_MEM_SLOTS as usize); + self.kvm_mem_slot_pool + .insert(Range::new(0, max_slots as u64), None); + self + } + + fn build(self) -> ResourceManager { + ResourceManager { + legacy_irq_pool: Mutex::new(self.legacy_irq_pool), + msi_irq_pool: Mutex::new(self.msi_irq_pool), + pio_pool: Mutex::new(self.pio_pool), + mmio_pool: Mutex::new(self.mmio_pool), + mem_pool: Mutex::new(self.mem_pool), + kvm_mem_slot_pool: Mutex::new(self.kvm_mem_slot_pool), + } + } +} + +/// Resource manager manages all resources for a virtual machine instance. +pub struct ResourceManager { + legacy_irq_pool: Mutex>, + msi_irq_pool: Mutex>, + pio_pool: Mutex>, + mmio_pool: Mutex>, + mem_pool: Mutex>, + kvm_mem_slot_pool: Mutex>, +} + +impl Default for ResourceManager { + fn default() -> Self { + ResourceManagerBuilder::default().build() + } +} + +impl ResourceManager { + /// Create a resource manager instance. + pub fn new(max_kvm_mem_slot: Option) -> Self { + let res_manager_builder = ResourceManagerBuilder::default(); + res_manager_builder + .init_legacy_irq_pool() + .init_msi_irq_pool() + .init_pio_pool() + .init_mmio_pool() + .init_mem_pool() + .init_kvm_mem_slot_pool(max_kvm_mem_slot) + .build() + } + + /// Init mem_pool with arch specific constants. + pub fn init_mem_pool(&self) { + let mut mem = self.mem_pool.lock().unwrap(); + ResourceManagerBuilder::init_mem_pool_helper(&mut mem); + } + + /// Check if mem_pool is empty. + pub fn is_mem_pool_empty(&self) -> bool { + self.mem_pool.lock().unwrap().is_empty() + } + + /// Allocate one legacy irq number. + /// + /// Allocate the specified irq number if `fixed` contains an irq number. + pub fn allocate_legacy_irq(&self, shared: bool, fixed: Option) -> Option { + // if shared_irq is used, just return the shared irq num. + if shared { + return Some(SHARED_IRQ); + } + + let mut constraint = Constraint::new(1u32); + if let Some(v) = fixed { + if v == SHARED_IRQ { + return None; + } + + constraint.min = v as u64; + constraint.max = v as u64; + } + // Safe to unwrap() because we don't expect poisoned lock here. + let mut legacy_irq_pool = self.legacy_irq_pool.lock().unwrap(); + let key = legacy_irq_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + legacy_irq_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Free a legacy irq number. + /// + /// Panic if the irq number is invalid. + pub fn free_legacy_irq(&self, irq: u32) -> Result<(), ResourceError> { + // if the irq number is shared_irq, we don't need to do anything. + if irq == SHARED_IRQ { + return Ok(()); + } + + if !(LEGACY_IRQ_BASE..=LEGACY_IRQ_MAX).contains(&irq) { + return Err(ResourceError::InvalidResourceRange( + "Legacy IRQ".to_string(), + )); + } + let key = Range::new(irq, irq); + // Safe to unwrap() because we don't expect poisoned lock here. + self.legacy_irq_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a group of MSI irq numbers. + /// + /// The allocated MSI irq numbers may or may not be naturally aligned. + pub fn allocate_msi_irq(&self, count: u32) -> Option { + let constraint = Constraint::new(count); + // Safe to unwrap() because we don't expect poisoned lock here. + let mut msi_irq_pool = self.msi_irq_pool.lock().unwrap(); + let key = msi_irq_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + msi_irq_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Allocate a group of MSI irq numbers, naturally aligned to `count`. + /// + /// This may be used to support PCI MSI, which requires the allocated irq number is naturally + /// aligned. + pub fn allocate_msi_irq_aligned(&self, count: u32) -> Option { + let constraint = Constraint::new(count).align(count); + // Safe to unwrap() because we don't expect poisoned lock here. + let mut msi_irq_pool = self.msi_irq_pool.lock().unwrap(); + let key = msi_irq_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + msi_irq_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Free a group of MSI irq numbers. + /// + /// Panic if `irq` or `count` is invalid. + pub fn free_msi_irq(&self, irq: u32, count: u32) -> Result<(), ResourceError> { + if irq < MSI_IRQ_BASE + || count == 0 + || irq.checked_add(count).is_none() + || irq + count - 1 > MSI_IRQ_MAX + { + return Err(ResourceError::InvalidResourceRange("MSI IRQ".to_string())); + } + let key = Range::new(irq, irq + count - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.msi_irq_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a group of PIO address and returns the allocated PIO base address. + pub fn allocate_pio_address_simple(&self, size: u16) -> Option { + let constraint = Constraint::new(size); + self.allocate_pio_address(&constraint) + } + + /// Allocate a group of PIO address and returns the allocated PIO base address. + pub fn allocate_pio_address(&self, constraint: &Constraint) -> Option { + // Safe to unwrap() because we don't expect poisoned lock here. + let mut pio_pool = self.pio_pool.lock().unwrap(); + let key = pio_pool.allocate(constraint); + if let Some(k) = key.as_ref() { + pio_pool.update(k, ()); + } + key.map(|v| v.min as u16) + } + + /// Free PIO address range `[base, base + size - 1]`. + /// + /// Panic if `base` or `size` is invalid. + pub fn free_pio_address(&self, base: u16, size: u16) -> Result<(), ResourceError> { + if base.checked_add(size).is_none() { + return Err(ResourceError::InvalidResourceRange( + "PIO Address".to_string(), + )); + } + let key = Range::new(base, base + size - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.pio_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a MMIO address range alinged to `align` and returns the allocated base address. + pub fn allocate_mmio_address_aligned(&self, size: u64, align: u64) -> Option { + let constraint = Constraint::new(size).align(align); + self.allocate_mmio_address(&constraint) + } + + /// Allocate a MMIO address range and returns the allocated base address. + pub fn allocate_mmio_address(&self, constraint: &Constraint) -> Option { + // Safe to unwrap() because we don't expect poisoned lock here. + let mut mmio_pool = self.mmio_pool.lock().unwrap(); + let key = mmio_pool.allocate(constraint); + key.map(|v| v.min) + } + + /// Free MMIO address range `[base, base + size - 1]` + pub fn free_mmio_address(&self, base: u64, size: u64) -> Result<(), ResourceError> { + if base.checked_add(size).is_none() { + return Err(ResourceError::InvalidResourceRange( + "MMIO Address".to_string(), + )); + } + let key = Range::new(base, base + size - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.mmio_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate guest memory address range and returns the allocated base memory address. + pub fn allocate_mem_address(&self, constraint: &Constraint) -> Option { + // Safe to unwrap() because we don't expect poisoned lock here. + let mut mem_pool = self.mem_pool.lock().unwrap(); + let key = mem_pool.allocate(constraint); + + key.map(|v| v.min) + } + + /// Free the guest memory address range `[base, base + size - 1]`. + /// + /// Panic if the guest memory address range is invalid. + /// allow(clippy) is because `base < GUEST_MEM_START`, we may modify GUEST_MEM_START in the future. + #[allow(clippy::absurd_extreme_comparisons)] + pub fn free_mem_address(&self, base: u64, size: u64) -> Result<(), ResourceError> { + if base.checked_add(size).is_none() + || base < GUEST_MEM_START + || base + size > *GUEST_MEM_END + { + return Err(ResourceError::InvalidResourceRange( + "MEM Address".to_string(), + )); + } + let key = Range::new(base, base + size - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.mem_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a kvm memory slot number. + /// + /// Allocate the specified slot if `fixed` contains a slot number. + pub fn allocate_kvm_mem_slot(&self, size: u32, fixed: Option) -> Option { + let mut constraint = Constraint::new(size); + if let Some(v) = fixed { + constraint.min = v as u64; + constraint.max = v as u64; + } + // Safe to unwrap() because we don't expect poisoned lock here. + let mut kvm_mem_slot_pool = self.kvm_mem_slot_pool.lock().unwrap(); + let key = kvm_mem_slot_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + kvm_mem_slot_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Free a kvm memory slot number. + pub fn free_kvm_mem_slot(&self, slot: u32) -> Result<(), ResourceError> { + let key = Range::new(slot, slot); + // Safe to unwrap() because we don't expect poisoned lock here. + self.kvm_mem_slot_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate requested resources for a device. + pub fn allocate_device_resources( + &self, + requests: &[ResourceConstraint], + shared_irq: bool, + ) -> std::result::Result { + let mut resources = DeviceResources::new(); + for resource in requests.iter() { + let res = match resource { + ResourceConstraint::PioAddress { range, align, size } => { + let mut constraint = Constraint::new(*size).align(*align); + if let Some(r) = range { + constraint.min = r.0 as u64; + constraint.max = r.1 as u64; + } + match self.allocate_pio_address(&constraint) { + Some(base) => Resource::PioAddressRange { + base: base as u16, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::MmioAddress { range, align, size } => { + let mut constraint = Constraint::new(*size).align(*align); + if let Some(r) = range { + constraint.min = r.0; + constraint.max = r.1; + } + match self.allocate_mmio_address(&constraint) { + Some(base) => Resource::MmioAddressRange { base, size: *size }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::MemAddress { range, align, size } => { + let mut constraint = Constraint::new(*size).align(*align); + if let Some(r) = range { + constraint.min = r.0; + constraint.max = r.1; + } + match self.allocate_mem_address(&constraint) { + Some(base) => Resource::MemAddressRange { base, size: *size }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::LegacyIrq { irq } => { + match self.allocate_legacy_irq(shared_irq, *irq) { + Some(v) => Resource::LegacyIrq(v), + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::PciMsiIrq { size } => { + match self.allocate_msi_irq_aligned(*size) { + Some(base) => Resource::MsiIrq { + ty: MsiIrqType::PciMsi, + base, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::PciMsixIrq { size } => match self.allocate_msi_irq(*size) { + Some(base) => Resource::MsiIrq { + ty: MsiIrqType::PciMsix, + base, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + }, + ResourceConstraint::GenericIrq { size } => match self.allocate_msi_irq(*size) { + Some(base) => Resource::MsiIrq { + ty: MsiIrqType::GenericMsi, + base, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + }, + ResourceConstraint::KvmMemSlot { slot, size } => { + match self.allocate_kvm_mem_slot(*size, *slot) { + Some(v) => Resource::KvmMemSlot(v), + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + }; + resources.append(res); + } + + Ok(resources) + } + + /// Free resources allocated for a device. + pub fn free_device_resources(&self, resources: &DeviceResources) -> Result<(), ResourceError> { + for res in resources.iter() { + let result = match res { + Resource::PioAddressRange { base, size } => self.free_pio_address(*base, *size), + Resource::MmioAddressRange { base, size } => self.free_mmio_address(*base, *size), + Resource::MemAddressRange { base, size } => self.free_mem_address(*base, *size), + Resource::LegacyIrq(base) => self.free_legacy_irq(*base), + Resource::MsiIrq { ty: _, base, size } => self.free_msi_irq(*base, *size), + Resource::KvmMemSlot(slot) => self.free_kvm_mem_slot(*slot), + Resource::MacAddresss(_) => Ok(()), + }; + if result.is_err() { + return result; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_allocate_legacy_irq() { + let mgr = ResourceManager::new(None); + + // Allocate/free shared IRQs multiple times. + assert_eq!(mgr.allocate_legacy_irq(true, None).unwrap(), SHARED_IRQ); + assert_eq!(mgr.allocate_legacy_irq(true, None).unwrap(), SHARED_IRQ); + mgr.free_legacy_irq(SHARED_IRQ); + mgr.free_legacy_irq(SHARED_IRQ); + mgr.free_legacy_irq(SHARED_IRQ); + + // Allocate specified IRQs. + assert_eq!( + mgr.allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE + 10)) + .unwrap(), + LEGACY_IRQ_BASE + 10 + ); + mgr.free_legacy_irq(LEGACY_IRQ_BASE + 10); + assert_eq!( + mgr.allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE + 10)) + .unwrap(), + LEGACY_IRQ_BASE + 10 + ); + assert!(mgr + .allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE + 10)) + .is_none()); + + assert!(mgr.allocate_legacy_irq(false, None).is_some()); + + assert!(mgr + .allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE - 1)) + .is_none()); + assert!(mgr + .allocate_legacy_irq(false, Some(LEGACY_IRQ_MAX + 1)) + .is_none()); + assert!(mgr.allocate_legacy_irq(false, Some(SHARED_IRQ)).is_none()); + } + + #[test] + fn test_invalid_free_legacy_irq() { + let mgr = ResourceManager::new(None); + assert_eq!( + mgr.free_legacy_irq(LEGACY_IRQ_MAX + 1), + Err(ResourceError::InvalidResourceRange( + "Legacy IRQ".to_string(), + )) + ); + } + + #[test] + fn test_allocate_msi_irq() { + let mgr = ResourceManager::new(None); + + let msi = mgr.allocate_msi_irq(3).unwrap(); + mgr.free_msi_irq(msi, 3); + let msi = mgr.allocate_msi_irq(3).unwrap(); + mgr.free_msi_irq(msi, 3); + + let irq = mgr.allocate_msi_irq_aligned(8).unwrap(); + assert_eq!(irq & 0x7, 0); + mgr.free_msi_irq(msi, 8); + let irq = mgr.allocate_msi_irq_aligned(8).unwrap(); + assert_eq!(irq & 0x7, 0); + + let irq = mgr.allocate_msi_irq_aligned(512).unwrap(); + assert_eq!(irq, 512); + mgr.free_msi_irq(irq, 512); + let irq = mgr.allocate_msi_irq_aligned(512).unwrap(); + assert_eq!(irq, 512); + + assert!(mgr.allocate_msi_irq(4099).is_none()); + } + + #[test] + fn test_invalid_free_msi_irq() { + let mgr = ResourceManager::new(None); + assert_eq!( + mgr.free_msi_irq(MSI_IRQ_MAX, 3), + Err(ResourceError::InvalidResourceRange("MSI IRQ".to_string())) + ); + } + + #[test] + fn test_allocate_pio_addr() { + let mgr = ResourceManager::new(None); + assert!(mgr.allocate_pio_address_simple(10).is_some()); + let mut requests = vec![ + ResourceConstraint::PioAddress { + range: None, + align: 0x1000, + size: 0x2000, + }, + ResourceConstraint::PioAddress { + range: Some((0x8000, 0x9000)), + align: 0x1000, + size: 0x1000, + }, + ResourceConstraint::PioAddress { + range: Some((0x9000, 0xa000)), + align: 0x1000, + size: 0x1000, + }, + ResourceConstraint::PioAddress { + range: Some((0xb000, 0xc000)), + align: 0x1000, + size: 0x1000, + }, + ]; + let resources = mgr.allocate_device_resources(&requests, false).unwrap(); + mgr.free_device_resources(&resources); + let resources = mgr.allocate_device_resources(&requests, false).unwrap(); + mgr.free_device_resources(&resources); + requests.push(ResourceConstraint::PioAddress { + range: Some((0xc000, 0xc000)), + align: 0x1000, + size: 0x1000, + }); + assert!(mgr.allocate_device_resources(&requests, false).is_err()); + let resources = mgr + .allocate_device_resources(&requests[0..requests.len() - 1], false) + .unwrap(); + mgr.free_device_resources(&resources); + } + + #[test] + fn test_invalid_free_pio_addr() { + let mgr = ResourceManager::new(None); + assert_eq!( + mgr.free_pio_address(u16::MAX, 3), + Err(ResourceError::InvalidResourceRange( + "PIO Address".to_string(), + )) + ); + } + + #[test] + fn test_allocate_kvm_mem_slot() { + let mgr = ResourceManager::new(None); + assert_eq!(mgr.allocate_kvm_mem_slot(1, None).unwrap(), 0); + assert_eq!(mgr.allocate_kvm_mem_slot(1, Some(200)).unwrap(), 200); + mgr.free_kvm_mem_slot(200); + assert_eq!(mgr.allocate_kvm_mem_slot(1, Some(200)).unwrap(), 200); + assert_eq!( + mgr.allocate_kvm_mem_slot(1, Some(KVM_USER_MEM_SLOTS)) + .unwrap(), + KVM_USER_MEM_SLOTS + ); + assert!(mgr + .allocate_kvm_mem_slot(1, Some(KVM_USER_MEM_SLOTS + 1)) + .is_none()); + } + + #[test] + fn test_allocate_mmio_address() { + let mgr = ResourceManager::new(None); + + #[cfg(target_arch = "x86_64")] + { + // Can't allocate from reserved region + let constraint = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64 - 0x200_0000u64) + .max(0xffff_ffffu64); + assert!(mgr.allocate_mmio_address(&constraint).is_none()); + } + let constraint = Constraint::new(0x100_0000u64).min(0x1_0000_0000u64 - 0x200_0000u64); + assert!(mgr.allocate_mmio_address(&constraint).is_some()); + + #[cfg(target_arch = "x86_64")] + { + // Can't allocate from reserved region + let constraint = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64 - 0x200_0000u64) + .max(0xffff_ffffu64); + assert!(mgr.allocate_mem_address(&constraint).is_none()); + } + #[cfg(target_arch = "aarch64")] + { + let constraint = Constraint::new(0x200_0000u64) + .min(0x1_0000_0000u64 - 0x200_0000u64) + .max(0xffff_fffeu64); + assert!(mgr.allocate_mem_address(&constraint).is_none()); + } + let constraint = Constraint::new(0x100_0000u64).min(0x1_0000_0000u64 - 0x200_0000u64); + assert!(mgr.allocate_mem_address(&constraint).is_some()); + } + + #[test] + #[should_panic] + fn test_allocate_duplicate_memory() { + let mgr = ResourceManager::new(None); + + let constraint_1 = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64) + .max(0x1_0000_0000u64 + 0x100_0000u64); + let constraint_2 = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64) + .max(0x1_0000_0000u64 + 0x100_0000u64); + + assert!(mgr.allocate_mem_address(&constraint_1).is_some()); + assert!(mgr.allocate_mem_address(&constraint_2).is_some()); + } +} diff --git a/src/dragonball/src/vm/kernel_config.rs b/src/dragonball/src/vm/kernel_config.rs new file mode 100644 index 0000000000..f266b48c4d --- /dev/null +++ b/src/dragonball/src/vm/kernel_config.rs @@ -0,0 +1,67 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::File; + +/// Structure to hold guest kernel configuration information. +pub struct KernelConfigInfo { + /// The descriptor to the kernel file. + kernel_file: File, + /// The descriptor to the initrd file, if there is one + initrd_file: Option, + /// The commandline for guest kernel. + cmdline: linux_loader::cmdline::Cmdline, +} + +impl KernelConfigInfo { + /// Create a KernelConfigInfo instance. + pub fn new( + kernel_file: File, + initrd_file: Option, + cmdline: linux_loader::cmdline::Cmdline, + ) -> Self { + KernelConfigInfo { + kernel_file, + initrd_file, + cmdline, + } + } + + /// Get a mutable reference to the kernel file. + pub fn kernel_file_mut(&mut self) -> &mut File { + &mut self.kernel_file + } + + /// Get a mutable reference to the initrd file. + pub fn initrd_file_mut(&mut self) -> Option<&mut File> { + self.initrd_file.as_mut() + } + + /// Get a shared reference to the guest kernel boot parameter object. + pub fn kernel_cmdline(&self) -> &linux_loader::cmdline::Cmdline { + &self.cmdline + } + + /// Get a mutable reference to the guest kernel boot parameter object. + pub fn kernel_cmdline_mut(&mut self) -> &mut linux_loader::cmdline::Cmdline { + &mut self.cmdline + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_kernel_config_info() { + let kernel = TempFile::new().unwrap(); + let initrd = TempFile::new().unwrap(); + let mut cmdline = linux_loader::cmdline::Cmdline::new(1024); + cmdline.insert_str("ro").unwrap(); + let mut info = KernelConfigInfo::new(kernel.into_file(), Some(initrd.into_file()), cmdline); + + assert_eq!(info.cmdline.as_str(), "ro"); + assert!(info.initrd_file_mut().is_some()); + } +} diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs new file mode 100644 index 0000000000..c1510308da --- /dev/null +++ b/src/dragonball/src/vm/mod.rs @@ -0,0 +1,20 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde_derive::{Deserialize, Serialize}; + +mod kernel_config; +pub use self::kernel_config::KernelConfigInfo; + +/// Configuration information for user defined NUMA nodes. +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +pub struct NumaRegionInfo { + /// memory size for this region (unit: MiB) + pub size: u64, + /// numa node id on host for this region + pub host_numa_node_id: Option, + /// numa node id on guest for this region + pub guest_numa_node_id: Option, + /// vcpu ids belonging to this region + pub vcpu_ids: Vec, +} diff --git a/src/libs/safe-path/src/pinned_path_buf.rs b/src/libs/safe-path/src/pinned_path_buf.rs index 4310637df5..d1816f450d 100644 --- a/src/libs/safe-path/src/pinned_path_buf.rs +++ b/src/libs/safe-path/src/pinned_path_buf.rs @@ -253,7 +253,7 @@ mod tests { fs::write(rootfs_path.join("endpoint"), "test").unwrap(); // Pin the target and validate the path/content. - let path = PinnedPathBuf::new(rootfs_path.to_path_buf(), "symlink_dir/endpoint").unwrap(); + let path = PinnedPathBuf::new(rootfs_path, "symlink_dir/endpoint").unwrap(); assert!(!path.is_dir()); let path_ref = path.deref(); let target = fs::read_link(path_ref).unwrap(); @@ -344,6 +344,7 @@ mod tests { PinnedPathBuf::new(rootfs_path, "does_not_exist").unwrap_err(); } + #[allow(clippy::zero_prefixed_literal)] #[test] fn test_new_pinned_path_buf_without_read_perm() { let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); diff --git a/src/libs/safe-path/src/scoped_dir_builder.rs b/src/libs/safe-path/src/scoped_dir_builder.rs index 39ceac1076..1a4ba189f2 100644 --- a/src/libs/safe-path/src/scoped_dir_builder.rs +++ b/src/libs/safe-path/src/scoped_dir_builder.rs @@ -87,7 +87,7 @@ impl ScopedDirBuilder { ) })?; - self.do_mkdir(&stripped_path) + self.do_mkdir(stripped_path) } /// Creates sub-directory with the options configured in this builder. @@ -134,7 +134,7 @@ impl ScopedDirBuilder { if !self.recursive && idx != levels { return Err(Error::new( ErrorKind::NotFound, - format!("parent directory does not exist"), + "parent directory does not exist".to_string(), )); } dir = dir.mkdir(comp, self.mode)?; @@ -146,6 +146,7 @@ impl ScopedDirBuilder { } } +#[allow(clippy::zero_prefixed_literal)] #[cfg(test)] mod tests { use super::*;