diff --git a/src/dragonball/src/api/v1/boot_source.rs b/src/dragonball/src/api/v1/boot_source.rs new file mode 100644 index 0000000000..e7de030438 --- /dev/null +++ b/src/dragonball/src/api/v1/boot_source.rs @@ -0,0 +1,59 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde_derive::{Deserialize, Serialize}; + +/// Default guest kernel command line: +/// - `reboot=k` shut down the guest on reboot, instead of well... rebooting; +/// - `panic=1` on panic, reboot after 1 second; +/// - `pci=off` do not scan for PCI devices (ser boot time); +/// - `nomodules` disable loadable kernel module support; +/// - `8250.nr_uarts=0` disable 8250 serial interface; +/// - `i8042.noaux` do not probe the i8042 controller for an attached mouse (ser boot time); +/// - `i8042.nomux` do not probe i8042 for a multiplexing controller (ser boot time); +/// - `i8042.nopnp` do not use ACPIPnP to discover KBD/AUX controllers (ser boot time); +/// - `i8042.dumbkbd` do not attempt to control kbd state via the i8042 (ser boot time). +pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0 \ + i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd"; + +/// Strongly typed data structure used to configure the boot source of the microvm. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)] +#[serde(deny_unknown_fields)] +pub struct BootSourceConfig { + /// Path of the kernel image. + /// We only support uncompressed kernel for Dragonball. + pub kernel_path: String, + /// Path of the initrd, if there is one. + /// ps. rootfs is set in BlockDeviceConfigInfo + pub initrd_path: Option, + /// The boot arguments to pass to the kernel. + #[serde(skip_serializing_if = "Option::is_none")] + pub boot_args: Option, +} + +/// Errors associated with actions on `BootSourceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum BootSourceConfigError { + /// The virutal machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// The kernel file cannot be opened. + #[error( + "the kernel file cannot be opened due to invalid kernel path or invalid permissions: {0}" + )] + InvalidKernelPath(#[source] std::io::Error), + + /// The initrd file cannot be opened. + #[error("the initrd file cannot be opened due to invalid path or invalid permissions: {0}")] + InvalidInitrdPath(#[source] std::io::Error), + + /// The kernel command line is invalid. + #[error("the kernel command line is invalid: {0}")] + InvalidKernelCommandLine(#[source] linux_loader::cmdline::Error), + + /// The boot source cannot be update post boot. + #[error("the update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, +} diff --git a/src/dragonball/src/api/v1/mod.rs b/src/dragonball/src/api/v1/mod.rs index f25fb84364..c1ab3f5d32 100644 --- a/src/dragonball/src/api/v1/mod.rs +++ b/src/dragonball/src/api/v1/mod.rs @@ -3,5 +3,15 @@ //! API Version 1 related data structures to configure the vmm. +mod vmm_action; +pub use self::vmm_action::{ + VmmAction, VmmActionError, VmmData, VmmRequest, VmmResponse, VmmService, +}; + +/// Wrapper for configuring the microVM boot source. +mod boot_source; +pub use self::boot_source::{BootSourceConfig, BootSourceConfigError, DEFAULT_KERNEL_CMDLINE}; + +/// Wrapper over the microVM general information. mod instance_info; pub use self::instance_info::{InstanceInfo, InstanceState}; diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs new file mode 100644 index 0000000000..1d7ac7e630 --- /dev/null +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -0,0 +1,148 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::fs::File; +use std::sync::mpsc::{Receiver, Sender, TryRecvError}; + +use log::{debug, error, warn}; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::Result; +use crate::event_manager::EventManager; +use crate::vm::{KernelConfigInfo, VmConfigInfo}; +use crate::vmm::Vmm; + +use super::*; + +/// Wrapper for all errors associated with VMM actions. +#[derive(Debug, thiserror::Error)] +pub enum VmmActionError { + /// The action `ConfigureBootSource` failed either because of bad user input or an internal + /// error. + #[error("failed to configure boot source for VM: {0}")] + BootSource(#[source] BootSourceConfigError), +} + +/// This enum represents the public interface of the VMM. Each action contains various +/// bits of information (ids, paths, etc.). +#[derive(Clone, Debug, PartialEq)] +pub enum VmmAction { + /// Configure the boot source of the microVM using as input the `ConfigureBootSource`. This + /// action can only be called before the microVM has booted. + ConfigureBootSource(BootSourceConfig), +} + +/// The enum represents the response sent by the VMM in case of success. The response is either +/// empty, when no data needs to be sent, or an internal VMM structure. +#[derive(Debug)] +pub enum VmmData { + /// No data is sent on the channel. + Empty, +} + +/// Request data type used to communicate between the API and the VMM. +pub type VmmRequest = Box; + +/// Data type used to communicate between the API and the VMM. +pub type VmmRequestResult = std::result::Result; + +/// Response data type used to communicate between the API and the VMM. +pub type VmmResponse = Box; + +/// VMM Service to handle requests from the API server. +/// +/// There are two levels of API servers as below: +/// API client <--> VMM API Server <--> VMM Core +pub struct VmmService { + from_api: Receiver, + to_api: Sender, + machine_config: VmConfigInfo, +} + +impl VmmService { + /// Create a new VMM API server instance. + pub fn new(from_api: Receiver, to_api: Sender) -> Self { + VmmService { + from_api, + to_api, + machine_config: VmConfigInfo::default(), + } + } + + /// Handle requests from the HTTP API Server and send back replies. + pub fn run_vmm_action(&mut self, vmm: &mut Vmm, _event_mgr: &mut EventManager) -> Result<()> { + let request = match self.from_api.try_recv() { + Ok(t) => *t, + Err(TryRecvError::Empty) => { + warn!("Got a spurious notification from api thread"); + return Ok(()); + } + Err(TryRecvError::Disconnected) => { + panic!("The channel's sending half was disconnected. Cannot receive data."); + } + }; + debug!("receive vmm action: {:?}", request); + + let response = match request { + VmmAction::ConfigureBootSource(boot_source_body) => { + self.configure_boot_source(vmm, boot_source_body) + } + }; + + debug!("send vmm response: {:?}", response); + self.send_response(response) + } + + fn send_response(&self, result: VmmRequestResult) -> Result<()> { + self.to_api + .send(Box::new(result)) + .map_err(|_| ()) + .expect("vmm: one-shot API result channel has been closed"); + + Ok(()) + } + + fn configure_boot_source( + &self, + vmm: &mut Vmm, + boot_source_config: BootSourceConfig, + ) -> VmmRequestResult { + use super::BootSourceConfigError::{ + InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, InvalidVMID, + UpdateNotAllowedPostBoot, + }; + use super::VmmActionError::BootSource; + + let vm = vmm.get_vm_by_id_mut("").ok_or(BootSource(InvalidVMID))?; + if vm.is_vm_initialized() { + return Err(BootSource(UpdateNotAllowedPostBoot)); + } + + let kernel_file = File::open(&boot_source_config.kernel_path) + .map_err(|e| BootSource(InvalidKernelPath(e)))?; + + let initrd_file = match boot_source_config.initrd_path { + None => None, + Some(ref path) => Some(File::open(path).map_err(|e| BootSource(InvalidInitrdPath(e)))?), + }; + + let mut cmdline = linux_loader::cmdline::Cmdline::new(dbs_boot::layout::CMDLINE_MAX_SIZE); + let boot_args = boot_source_config + .boot_args + .clone() + .unwrap_or_else(|| String::from(DEFAULT_KERNEL_CMDLINE)); + cmdline + .insert_str(boot_args) + .map_err(|e| BootSource(InvalidKernelCommandLine(e)))?; + + let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline); + vm.set_kernel_config(kernel_config); + + Ok(VmmData::Empty) + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index c50e50b256..9cb27fdd73 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -178,3 +178,24 @@ pub enum LoadInitrdError { #[error("failed to read the initrd image: {0}")] ReadInitrd(#[source] std::io::Error), } + +/// A dedicated error type to glue with the vmm_epoll crate. +#[derive(Debug, thiserror::Error)] +pub enum EpollError { + /// Generic internal error. + #[error("unclassfied internal error")] + InternalError, + + /// Errors from the epoll subsystem. + #[error("failed to issue epoll syscall: {0}")] + EpollMgr(#[from] dbs_utils::epoll_manager::Error), + + /// Generic IO errors. + #[error(transparent)] + IOError(std::io::Error), + + #[cfg(feature = "dbs-virtio-devices")] + /// Errors from virtio devices. + #[error("failed to manager Virtio device: {0}")] + VirtIoDevice(#[source] VirtIoError), +} diff --git a/src/dragonball/src/event_manager.rs b/src/dragonball/src/event_manager.rs new file mode 100644 index 0000000000..cd0da10c87 --- /dev/null +++ b/src/dragonball/src/event_manager.rs @@ -0,0 +1,169 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Event manager to manage and handle IO events and requests from API server . + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use log::{error, warn}; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::{EpollError, Result}; +use crate::vmm::Vmm; + +// Statically assigned epoll slot for VMM events. +pub(crate) const EPOLL_EVENT_EXIT: u32 = 0; +pub(crate) const EPOLL_EVENT_API_REQUEST: u32 = 1; + +/// Shared information between vmm::vmm_thread_event_loop() and VmmEpollHandler. +pub(crate) struct EventContext { + pub api_event_fd: EventFd, + pub api_event_flag: bool, + pub exit_evt_flag: bool, +} + +impl EventContext { + /// Create a new instance of [`EventContext`]. + pub fn new(api_event_fd: EventFd) -> Result { + Ok(EventContext { + api_event_fd, + api_event_flag: false, + exit_evt_flag: false, + }) + } +} + +/// Event manager for VMM to handle API requests and IO events. +pub struct EventManager { + epoll_mgr: EpollManager, + subscriber_id: SubscriberId, + vmm_event_count: Arc, +} + +impl Drop for EventManager { + fn drop(&mut self) { + // Vmm -> Vm -> EpollManager -> VmmEpollHandler -> Vmm + // We need to remove VmmEpollHandler to break the circular reference + // so that Vmm can drop. + self.epoll_mgr + .remove_subscriber(self.subscriber_id) + .map_err(|e| { + error!("event_manager: remove_subscriber err. {:?}", e); + e + }) + .ok(); + } +} + +impl EventManager { + /// Create a new event manager associated with the VMM object. + pub fn new(vmm: &Arc>, epoll_mgr: EpollManager) -> Result { + let vmm_event_count = Arc::new(AtomicUsize::new(0)); + let handler: Box = Box::new(VmmEpollHandler { + vmm: vmm.clone(), + vmm_event_count: vmm_event_count.clone(), + }); + let subscriber_id = epoll_mgr.add_subscriber(handler); + + Ok(EventManager { + epoll_mgr, + subscriber_id, + vmm_event_count, + }) + } + + /// Get the underlying epoll event manager. + pub fn epoll_manager(&self) -> EpollManager { + self.epoll_mgr.clone() + } + + /// Registry the eventfd for exit notification. + pub fn register_exit_eventfd( + &mut self, + exit_evt: &EventFd, + ) -> std::result::Result<(), EpollError> { + let events = Events::with_data(exit_evt, EPOLL_EVENT_EXIT, EventSet::IN); + + self.epoll_mgr + .add_event(self.subscriber_id, events) + .map_err(EpollError::EpollMgr) + } + + /// Poll pending events and invoke registered event handler. + /// + /// # Arguments: + /// * max_events: maximum number of pending events to handle + /// * timeout: maximum time in milliseconds to wait + pub fn handle_events(&self, timeout: i32) -> std::result::Result { + self.epoll_mgr + .handle_events(timeout) + .map_err(EpollError::EpollMgr) + } + + /// Fetch the VMM event count and reset it to zero. + pub fn fetch_vmm_event_count(&self) -> usize { + self.vmm_event_count.swap(0, Ordering::AcqRel) + } +} + +struct VmmEpollHandler { + vmm: Arc>, + vmm_event_count: Arc, +} + +impl MutEventSubscriber for VmmEpollHandler { + fn process(&mut self, events: Events, _ops: &mut EventOps) { + // Do not try to recover when the lock has already been poisoned. + // And be careful to avoid deadlock between process() and vmm::vmm_thread_event_loop(). + let mut vmm = self.vmm.lock().unwrap(); + + match events.data() { + EPOLL_EVENT_API_REQUEST => { + if let Err(e) = vmm.event_ctx.api_event_fd.read() { + error!("event_manager: failed to read API eventfd, {:?}", e); + } + vmm.event_ctx.api_event_flag = true; + self.vmm_event_count.fetch_add(1, Ordering::AcqRel); + } + EPOLL_EVENT_EXIT => { + let vm = vmm.get_vm_by_id("").unwrap(); + match vm.get_reset_eventfd() { + Some(ev) => { + if let Err(e) = ev.read() { + error!("event_manager: failed to read exit eventfd, {:?}", e); + } + } + None => warn!("event_manager: leftover exit event in epoll context!"), + } + vmm.event_ctx.exit_evt_flag = true; + self.vmm_event_count.fetch_add(1, Ordering::AcqRel); + } + _ => error!("event_manager: unknown epoll slot number {}", events.data()), + } + } + + fn init(&mut self, ops: &mut EventOps) { + // Do not expect poisoned lock. + let vmm = self.vmm.lock().unwrap(); + let events = Events::with_data( + &vmm.event_ctx.api_event_fd, + EPOLL_EVENT_API_REQUEST, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "event_manager: failed to register epoll event for API server, {:?}", + e + ); + } + } +} diff --git a/src/dragonball/src/lib.rs b/src/dragonball/src/lib.rs index 6bf0b9298a..bd58159ac2 100644 --- a/src/dragonball/src/lib.rs +++ b/src/dragonball/src/lib.rs @@ -32,8 +32,13 @@ pub mod vcpu; /// Virtual machine manager for virtual machines. pub mod vm; +mod event_manager; mod io_manager; +mod vmm; + +pub use self::error::StartMicrovmError; pub use self::io_manager::IoManagerCached; +pub use self::vmm::Vmm; /// Success exit code. pub const EXIT_CODE_OK: u8 = 0; diff --git a/src/dragonball/src/vmm.rs b/src/dragonball/src/vmm.rs new file mode 100644 index 0000000000..a2c75ff716 --- /dev/null +++ b/src/dragonball/src/vmm.rs @@ -0,0 +1,215 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex, RwLock}; + +use dbs_utils::epoll_manager::EpollManager; +use log::{error, info, warn}; +use seccompiler::BpfProgram; +use vmm_sys_util::eventfd::EventFd; + +use crate::api::v1::{InstanceInfo, VmmService}; +use crate::error::{EpollError, Result}; +use crate::event_manager::{EventContext, EventManager}; +use crate::vm::Vm; +use crate::{EXIT_CODE_GENERIC_ERROR, EXIT_CODE_OK}; + +/// Global coordinator to manage API servers, virtual machines, upgrade etc. +/// +/// Originally firecracker assumes an VMM only manages an VM, and doesn't distinguish VMM and VM. +/// Thus caused a mixed and confusion design. Now we have explicit build the object model as: +/// |---Vmm API Server--<-1:1-> HTTP API Server +/// | |----------<-1:1-> Shimv2/CRI API Server +/// | +/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory +/// ^ ^---1:1-> Device Manager <-1:N-> Device +/// | ^---1:1-> Resource Manager +/// | ^---1:N-> Vcpu +/// |---<-1:N-> Event Manager +pub struct Vmm { + pub(crate) event_ctx: EventContext, + epoll_manager: EpollManager, + + // Will change to a HashMap when enabling 1 VMM with multiple VMs. + vm: Vm, + + vcpu_seccomp_filter: BpfProgram, + vmm_seccomp_filter: BpfProgram, +} + +impl Vmm { + /// Create a Virtual Machine Monitor instance. + pub fn new( + api_shared_info: Arc>, + api_event_fd: EventFd, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + kvm_fd: Option, + ) -> Result { + let epoll_manager = EpollManager::default(); + Self::new_with_epoll_manager( + api_shared_info, + api_event_fd, + epoll_manager, + vmm_seccomp_filter, + vcpu_seccomp_filter, + kvm_fd, + ) + } + + /// Create a Virtual Machine Monitor instance with a epoll_manager. + pub fn new_with_epoll_manager( + api_shared_info: Arc>, + api_event_fd: EventFd, + epoll_manager: EpollManager, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + kvm_fd: Option, + ) -> Result { + let vm = Vm::new(kvm_fd, api_shared_info, epoll_manager.clone())?; + let event_ctx = EventContext::new(api_event_fd)?; + + Ok(Vmm { + event_ctx, + epoll_manager, + vm, + vcpu_seccomp_filter, + vmm_seccomp_filter, + }) + } + + /// Get a reference to a virtual machine managed by the VMM. + pub fn get_vm_by_id(&self, _id: &str) -> Option<&Vm> { + Some(&self.vm) + } + + /// Get a mutable reference to a virtual machine managed by the VMM. + pub fn get_vm_by_id_mut(&mut self, _id: &str) -> Option<&mut Vm> { + Some(&mut self.vm) + } + + /// Get the seccomp rules for vCPU threads. + pub fn vcpu_seccomp_filter(&self) -> BpfProgram { + self.vcpu_seccomp_filter.clone() + } + + /// Get the seccomp rules for VMM threads. + pub fn vmm_seccomp_filter(&self) -> BpfProgram { + self.vmm_seccomp_filter.clone() + } + + /// Run the event loop to service API requests. + /// + /// # Arguments + /// + /// * `vmm` - An Arc reference to the global Vmm instance. + /// * `service` - VMM Service provider. + pub fn run_vmm_event_loop(vmm: Arc>, mut service: VmmService) -> i32 { + let epoll_mgr = vmm.lock().unwrap().epoll_manager.clone(); + let mut event_mgr = + EventManager::new(&vmm, epoll_mgr).expect("Cannot create epoll manager"); + + 'poll: loop { + match event_mgr.handle_events(-1) { + Ok(_) => { + // Check whether there are pending vmm events. + if event_mgr.fetch_vmm_event_count() == 0 { + continue; + } + + let mut v = vmm.lock().unwrap(); + if v.event_ctx.api_event_flag { + // The run_vmm_action() needs to access event_mgr, so it could + // not be handled in EpollHandler::handle_events(). It has been + // delayed to the main loop. + v.event_ctx.api_event_flag = false; + service + .run_vmm_action(&mut v, &mut event_mgr) + .unwrap_or_else(|_| { + warn!("got spurious notification from api thread"); + }); + } + if v.event_ctx.exit_evt_flag { + info!("Gracefully terminated VMM control loop"); + return v.stop(EXIT_CODE_OK as i32); + } + } + Err(e) => { + error!("Abruptly exited VMM control loop: {:?}", e); + if let EpollError::EpollMgr(dbs_utils::epoll_manager::Error::Epoll(e)) = e { + if e.errno() == libc::EAGAIN || e.errno() == libc::EINTR { + continue 'poll; + } + } + return vmm.lock().unwrap().stop(EXIT_CODE_GENERIC_ERROR as i32); + } + } + } + } + + /// Waits for all vCPUs to exit and terminates the Dragonball process. + fn stop(&mut self, exit_code: i32) -> i32 { + info!("Vmm is stopping."); + if let Some(vm) = self.get_vm_by_id_mut("") { + if vm.is_vm_initialized() { + if let Err(e) = vm.remove_devices() { + warn!("failed to remove devices: {:?}", e); + } + + if let Err(e) = vm.reset_console() { + warn!("Cannot set canonical mode for the terminal. {:?}", e); + } + + // Now, we use exit_code instead of invoking _exit to + // terminate process, so all of vcpu threads should be stopped + // prior to vmm event loop. + match vm.vcpu_manager() { + Ok(mut mgr) => { + if let Err(e) = mgr.exit_all_vcpus() { + warn!("Failed to exit vcpu thread. {:?}", e); + } + } + Err(e) => warn!("Failed to get vcpu manager {:?}", e), + } + + // save exit state to VM, instead of exit process. + vm.vm_exit(exit_code); + } + } + + exit_code + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + + pub fn create_vmm_instance() -> Vmm { + let info = Arc::new(RwLock::new(InstanceInfo::default())); + let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let seccomp_filter: BpfProgram = Vec::new(); + let epoll_manager = EpollManager::default(); + + Vmm::new_with_epoll_manager( + info, + event_fd, + epoll_manager, + seccomp_filter.clone(), + seccomp_filter, + None, + ) + .unwrap() + } + + #[test] + fn test_create_vmm_instance() { + create_vmm_instance(); + } +}