dragonball: add Vmm struct

The Vmm struct is global coordinator to manage API servers, virtual
machines etc.

Signed-off-by: wllenyj <wllenyj@linux.alibaba.com>
This commit is contained in:
wllenyj 2022-05-15 23:45:56 +08:00 committed by Chao Wu
parent 4d234f5742
commit 5c1ccc376b
7 changed files with 627 additions and 0 deletions

View File

@ -0,0 +1,59 @@
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
use serde_derive::{Deserialize, Serialize};
/// Default guest kernel command line:
/// - `reboot=k` shut down the guest on reboot, instead of well... rebooting;
/// - `panic=1` on panic, reboot after 1 second;
/// - `pci=off` do not scan for PCI devices (ser boot time);
/// - `nomodules` disable loadable kernel module support;
/// - `8250.nr_uarts=0` disable 8250 serial interface;
/// - `i8042.noaux` do not probe the i8042 controller for an attached mouse (ser boot time);
/// - `i8042.nomux` do not probe i8042 for a multiplexing controller (ser boot time);
/// - `i8042.nopnp` do not use ACPIPnP to discover KBD/AUX controllers (ser boot time);
/// - `i8042.dumbkbd` do not attempt to control kbd state via the i8042 (ser boot time).
pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0 \
i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd";
/// Strongly typed data structure used to configure the boot source of the microvm.
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)]
#[serde(deny_unknown_fields)]
pub struct BootSourceConfig {
/// Path of the kernel image.
/// We only support uncompressed kernel for Dragonball.
pub kernel_path: String,
/// Path of the initrd, if there is one.
/// ps. rootfs is set in BlockDeviceConfigInfo
pub initrd_path: Option<String>,
/// The boot arguments to pass to the kernel.
#[serde(skip_serializing_if = "Option::is_none")]
pub boot_args: Option<String>,
}
/// Errors associated with actions on `BootSourceConfig`.
#[derive(Debug, thiserror::Error)]
pub enum BootSourceConfigError {
/// The virutal machine instance ID is invalid.
#[error("the virtual machine instance ID is invalid")]
InvalidVMID,
/// The kernel file cannot be opened.
#[error(
"the kernel file cannot be opened due to invalid kernel path or invalid permissions: {0}"
)]
InvalidKernelPath(#[source] std::io::Error),
/// The initrd file cannot be opened.
#[error("the initrd file cannot be opened due to invalid path or invalid permissions: {0}")]
InvalidInitrdPath(#[source] std::io::Error),
/// The kernel command line is invalid.
#[error("the kernel command line is invalid: {0}")]
InvalidKernelCommandLine(#[source] linux_loader::cmdline::Error),
/// The boot source cannot be update post boot.
#[error("the update operation is not allowed after boot")]
UpdateNotAllowedPostBoot,
}

View File

@ -3,5 +3,15 @@
//! API Version 1 related data structures to configure the vmm.
mod vmm_action;
pub use self::vmm_action::{
VmmAction, VmmActionError, VmmData, VmmRequest, VmmResponse, VmmService,
};
/// Wrapper for configuring the microVM boot source.
mod boot_source;
pub use self::boot_source::{BootSourceConfig, BootSourceConfigError, DEFAULT_KERNEL_CMDLINE};
/// Wrapper over the microVM general information.
mod instance_info;
pub use self::instance_info::{InstanceInfo, InstanceState};

View File

@ -0,0 +1,148 @@
// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use std::fs::File;
use std::sync::mpsc::{Receiver, Sender, TryRecvError};
use log::{debug, error, warn};
use vmm_sys_util::eventfd::EventFd;
use crate::error::Result;
use crate::event_manager::EventManager;
use crate::vm::{KernelConfigInfo, VmConfigInfo};
use crate::vmm::Vmm;
use super::*;
/// Wrapper for all errors associated with VMM actions.
#[derive(Debug, thiserror::Error)]
pub enum VmmActionError {
/// The action `ConfigureBootSource` failed either because of bad user input or an internal
/// error.
#[error("failed to configure boot source for VM: {0}")]
BootSource(#[source] BootSourceConfigError),
}
/// This enum represents the public interface of the VMM. Each action contains various
/// bits of information (ids, paths, etc.).
#[derive(Clone, Debug, PartialEq)]
pub enum VmmAction {
/// Configure the boot source of the microVM using as input the `ConfigureBootSource`. This
/// action can only be called before the microVM has booted.
ConfigureBootSource(BootSourceConfig),
}
/// The enum represents the response sent by the VMM in case of success. The response is either
/// empty, when no data needs to be sent, or an internal VMM structure.
#[derive(Debug)]
pub enum VmmData {
/// No data is sent on the channel.
Empty,
}
/// Request data type used to communicate between the API and the VMM.
pub type VmmRequest = Box<VmmAction>;
/// Data type used to communicate between the API and the VMM.
pub type VmmRequestResult = std::result::Result<VmmData, VmmActionError>;
/// Response data type used to communicate between the API and the VMM.
pub type VmmResponse = Box<VmmRequestResult>;
/// VMM Service to handle requests from the API server.
///
/// There are two levels of API servers as below:
/// API client <--> VMM API Server <--> VMM Core
pub struct VmmService {
from_api: Receiver<VmmRequest>,
to_api: Sender<VmmResponse>,
machine_config: VmConfigInfo,
}
impl VmmService {
/// Create a new VMM API server instance.
pub fn new(from_api: Receiver<VmmRequest>, to_api: Sender<VmmResponse>) -> Self {
VmmService {
from_api,
to_api,
machine_config: VmConfigInfo::default(),
}
}
/// Handle requests from the HTTP API Server and send back replies.
pub fn run_vmm_action(&mut self, vmm: &mut Vmm, _event_mgr: &mut EventManager) -> Result<()> {
let request = match self.from_api.try_recv() {
Ok(t) => *t,
Err(TryRecvError::Empty) => {
warn!("Got a spurious notification from api thread");
return Ok(());
}
Err(TryRecvError::Disconnected) => {
panic!("The channel's sending half was disconnected. Cannot receive data.");
}
};
debug!("receive vmm action: {:?}", request);
let response = match request {
VmmAction::ConfigureBootSource(boot_source_body) => {
self.configure_boot_source(vmm, boot_source_body)
}
};
debug!("send vmm response: {:?}", response);
self.send_response(response)
}
fn send_response(&self, result: VmmRequestResult) -> Result<()> {
self.to_api
.send(Box::new(result))
.map_err(|_| ())
.expect("vmm: one-shot API result channel has been closed");
Ok(())
}
fn configure_boot_source(
&self,
vmm: &mut Vmm,
boot_source_config: BootSourceConfig,
) -> VmmRequestResult {
use super::BootSourceConfigError::{
InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, InvalidVMID,
UpdateNotAllowedPostBoot,
};
use super::VmmActionError::BootSource;
let vm = vmm.get_vm_by_id_mut("").ok_or(BootSource(InvalidVMID))?;
if vm.is_vm_initialized() {
return Err(BootSource(UpdateNotAllowedPostBoot));
}
let kernel_file = File::open(&boot_source_config.kernel_path)
.map_err(|e| BootSource(InvalidKernelPath(e)))?;
let initrd_file = match boot_source_config.initrd_path {
None => None,
Some(ref path) => Some(File::open(path).map_err(|e| BootSource(InvalidInitrdPath(e)))?),
};
let mut cmdline = linux_loader::cmdline::Cmdline::new(dbs_boot::layout::CMDLINE_MAX_SIZE);
let boot_args = boot_source_config
.boot_args
.clone()
.unwrap_or_else(|| String::from(DEFAULT_KERNEL_CMDLINE));
cmdline
.insert_str(boot_args)
.map_err(|e| BootSource(InvalidKernelCommandLine(e)))?;
let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline);
vm.set_kernel_config(kernel_config);
Ok(VmmData::Empty)
}
}

View File

@ -178,3 +178,24 @@ pub enum LoadInitrdError {
#[error("failed to read the initrd image: {0}")]
ReadInitrd(#[source] std::io::Error),
}
/// A dedicated error type to glue with the vmm_epoll crate.
#[derive(Debug, thiserror::Error)]
pub enum EpollError {
/// Generic internal error.
#[error("unclassfied internal error")]
InternalError,
/// Errors from the epoll subsystem.
#[error("failed to issue epoll syscall: {0}")]
EpollMgr(#[from] dbs_utils::epoll_manager::Error),
/// Generic IO errors.
#[error(transparent)]
IOError(std::io::Error),
#[cfg(feature = "dbs-virtio-devices")]
/// Errors from virtio devices.
#[error("failed to manager Virtio device: {0}")]
VirtIoDevice(#[source] VirtIoError),
}

View File

@ -0,0 +1,169 @@
// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
//! Event manager to manage and handle IO events and requests from API server .
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use dbs_utils::epoll_manager::{
EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId,
};
use log::{error, warn};
use vmm_sys_util::eventfd::EventFd;
use crate::error::{EpollError, Result};
use crate::vmm::Vmm;
// Statically assigned epoll slot for VMM events.
pub(crate) const EPOLL_EVENT_EXIT: u32 = 0;
pub(crate) const EPOLL_EVENT_API_REQUEST: u32 = 1;
/// Shared information between vmm::vmm_thread_event_loop() and VmmEpollHandler.
pub(crate) struct EventContext {
pub api_event_fd: EventFd,
pub api_event_flag: bool,
pub exit_evt_flag: bool,
}
impl EventContext {
/// Create a new instance of [`EventContext`].
pub fn new(api_event_fd: EventFd) -> Result<Self> {
Ok(EventContext {
api_event_fd,
api_event_flag: false,
exit_evt_flag: false,
})
}
}
/// Event manager for VMM to handle API requests and IO events.
pub struct EventManager {
epoll_mgr: EpollManager,
subscriber_id: SubscriberId,
vmm_event_count: Arc<AtomicUsize>,
}
impl Drop for EventManager {
fn drop(&mut self) {
// Vmm -> Vm -> EpollManager -> VmmEpollHandler -> Vmm
// We need to remove VmmEpollHandler to break the circular reference
// so that Vmm can drop.
self.epoll_mgr
.remove_subscriber(self.subscriber_id)
.map_err(|e| {
error!("event_manager: remove_subscriber err. {:?}", e);
e
})
.ok();
}
}
impl EventManager {
/// Create a new event manager associated with the VMM object.
pub fn new(vmm: &Arc<Mutex<Vmm>>, epoll_mgr: EpollManager) -> Result<Self> {
let vmm_event_count = Arc::new(AtomicUsize::new(0));
let handler: Box<dyn MutEventSubscriber + Send> = Box::new(VmmEpollHandler {
vmm: vmm.clone(),
vmm_event_count: vmm_event_count.clone(),
});
let subscriber_id = epoll_mgr.add_subscriber(handler);
Ok(EventManager {
epoll_mgr,
subscriber_id,
vmm_event_count,
})
}
/// Get the underlying epoll event manager.
pub fn epoll_manager(&self) -> EpollManager {
self.epoll_mgr.clone()
}
/// Registry the eventfd for exit notification.
pub fn register_exit_eventfd(
&mut self,
exit_evt: &EventFd,
) -> std::result::Result<(), EpollError> {
let events = Events::with_data(exit_evt, EPOLL_EVENT_EXIT, EventSet::IN);
self.epoll_mgr
.add_event(self.subscriber_id, events)
.map_err(EpollError::EpollMgr)
}
/// Poll pending events and invoke registered event handler.
///
/// # Arguments:
/// * max_events: maximum number of pending events to handle
/// * timeout: maximum time in milliseconds to wait
pub fn handle_events(&self, timeout: i32) -> std::result::Result<usize, EpollError> {
self.epoll_mgr
.handle_events(timeout)
.map_err(EpollError::EpollMgr)
}
/// Fetch the VMM event count and reset it to zero.
pub fn fetch_vmm_event_count(&self) -> usize {
self.vmm_event_count.swap(0, Ordering::AcqRel)
}
}
struct VmmEpollHandler {
vmm: Arc<Mutex<Vmm>>,
vmm_event_count: Arc<AtomicUsize>,
}
impl MutEventSubscriber for VmmEpollHandler {
fn process(&mut self, events: Events, _ops: &mut EventOps) {
// Do not try to recover when the lock has already been poisoned.
// And be careful to avoid deadlock between process() and vmm::vmm_thread_event_loop().
let mut vmm = self.vmm.lock().unwrap();
match events.data() {
EPOLL_EVENT_API_REQUEST => {
if let Err(e) = vmm.event_ctx.api_event_fd.read() {
error!("event_manager: failed to read API eventfd, {:?}", e);
}
vmm.event_ctx.api_event_flag = true;
self.vmm_event_count.fetch_add(1, Ordering::AcqRel);
}
EPOLL_EVENT_EXIT => {
let vm = vmm.get_vm_by_id("").unwrap();
match vm.get_reset_eventfd() {
Some(ev) => {
if let Err(e) = ev.read() {
error!("event_manager: failed to read exit eventfd, {:?}", e);
}
}
None => warn!("event_manager: leftover exit event in epoll context!"),
}
vmm.event_ctx.exit_evt_flag = true;
self.vmm_event_count.fetch_add(1, Ordering::AcqRel);
}
_ => error!("event_manager: unknown epoll slot number {}", events.data()),
}
}
fn init(&mut self, ops: &mut EventOps) {
// Do not expect poisoned lock.
let vmm = self.vmm.lock().unwrap();
let events = Events::with_data(
&vmm.event_ctx.api_event_fd,
EPOLL_EVENT_API_REQUEST,
EventSet::IN,
);
if let Err(e) = ops.add(events) {
error!(
"event_manager: failed to register epoll event for API server, {:?}",
e
);
}
}
}

View File

@ -32,8 +32,13 @@ pub mod vcpu;
/// Virtual machine manager for virtual machines.
pub mod vm;
mod event_manager;
mod io_manager;
mod vmm;
pub use self::error::StartMicrovmError;
pub use self::io_manager::IoManagerCached;
pub use self::vmm::Vmm;
/// Success exit code.
pub const EXIT_CODE_OK: u8 = 0;

215
src/dragonball/src/vmm.rs Normal file
View File

@ -0,0 +1,215 @@
// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use std::os::unix::io::RawFd;
use std::sync::{Arc, Mutex, RwLock};
use dbs_utils::epoll_manager::EpollManager;
use log::{error, info, warn};
use seccompiler::BpfProgram;
use vmm_sys_util::eventfd::EventFd;
use crate::api::v1::{InstanceInfo, VmmService};
use crate::error::{EpollError, Result};
use crate::event_manager::{EventContext, EventManager};
use crate::vm::Vm;
use crate::{EXIT_CODE_GENERIC_ERROR, EXIT_CODE_OK};
/// Global coordinator to manage API servers, virtual machines, upgrade etc.
///
/// Originally firecracker assumes an VMM only manages an VM, and doesn't distinguish VMM and VM.
/// Thus caused a mixed and confusion design. Now we have explicit build the object model as:
/// |---Vmm API Server--<-1:1-> HTTP API Server
/// | |----------<-1:1-> Shimv2/CRI API Server
/// |
/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory
/// ^ ^---1:1-> Device Manager <-1:N-> Device
/// | ^---1:1-> Resource Manager
/// | ^---1:N-> Vcpu
/// |---<-1:N-> Event Manager
pub struct Vmm {
pub(crate) event_ctx: EventContext,
epoll_manager: EpollManager,
// Will change to a HashMap when enabling 1 VMM with multiple VMs.
vm: Vm,
vcpu_seccomp_filter: BpfProgram,
vmm_seccomp_filter: BpfProgram,
}
impl Vmm {
/// Create a Virtual Machine Monitor instance.
pub fn new(
api_shared_info: Arc<RwLock<InstanceInfo>>,
api_event_fd: EventFd,
vmm_seccomp_filter: BpfProgram,
vcpu_seccomp_filter: BpfProgram,
kvm_fd: Option<RawFd>,
) -> Result<Self> {
let epoll_manager = EpollManager::default();
Self::new_with_epoll_manager(
api_shared_info,
api_event_fd,
epoll_manager,
vmm_seccomp_filter,
vcpu_seccomp_filter,
kvm_fd,
)
}
/// Create a Virtual Machine Monitor instance with a epoll_manager.
pub fn new_with_epoll_manager(
api_shared_info: Arc<RwLock<InstanceInfo>>,
api_event_fd: EventFd,
epoll_manager: EpollManager,
vmm_seccomp_filter: BpfProgram,
vcpu_seccomp_filter: BpfProgram,
kvm_fd: Option<RawFd>,
) -> Result<Self> {
let vm = Vm::new(kvm_fd, api_shared_info, epoll_manager.clone())?;
let event_ctx = EventContext::new(api_event_fd)?;
Ok(Vmm {
event_ctx,
epoll_manager,
vm,
vcpu_seccomp_filter,
vmm_seccomp_filter,
})
}
/// Get a reference to a virtual machine managed by the VMM.
pub fn get_vm_by_id(&self, _id: &str) -> Option<&Vm> {
Some(&self.vm)
}
/// Get a mutable reference to a virtual machine managed by the VMM.
pub fn get_vm_by_id_mut(&mut self, _id: &str) -> Option<&mut Vm> {
Some(&mut self.vm)
}
/// Get the seccomp rules for vCPU threads.
pub fn vcpu_seccomp_filter(&self) -> BpfProgram {
self.vcpu_seccomp_filter.clone()
}
/// Get the seccomp rules for VMM threads.
pub fn vmm_seccomp_filter(&self) -> BpfProgram {
self.vmm_seccomp_filter.clone()
}
/// Run the event loop to service API requests.
///
/// # Arguments
///
/// * `vmm` - An Arc reference to the global Vmm instance.
/// * `service` - VMM Service provider.
pub fn run_vmm_event_loop(vmm: Arc<Mutex<Vmm>>, mut service: VmmService) -> i32 {
let epoll_mgr = vmm.lock().unwrap().epoll_manager.clone();
let mut event_mgr =
EventManager::new(&vmm, epoll_mgr).expect("Cannot create epoll manager");
'poll: loop {
match event_mgr.handle_events(-1) {
Ok(_) => {
// Check whether there are pending vmm events.
if event_mgr.fetch_vmm_event_count() == 0 {
continue;
}
let mut v = vmm.lock().unwrap();
if v.event_ctx.api_event_flag {
// The run_vmm_action() needs to access event_mgr, so it could
// not be handled in EpollHandler::handle_events(). It has been
// delayed to the main loop.
v.event_ctx.api_event_flag = false;
service
.run_vmm_action(&mut v, &mut event_mgr)
.unwrap_or_else(|_| {
warn!("got spurious notification from api thread");
});
}
if v.event_ctx.exit_evt_flag {
info!("Gracefully terminated VMM control loop");
return v.stop(EXIT_CODE_OK as i32);
}
}
Err(e) => {
error!("Abruptly exited VMM control loop: {:?}", e);
if let EpollError::EpollMgr(dbs_utils::epoll_manager::Error::Epoll(e)) = e {
if e.errno() == libc::EAGAIN || e.errno() == libc::EINTR {
continue 'poll;
}
}
return vmm.lock().unwrap().stop(EXIT_CODE_GENERIC_ERROR as i32);
}
}
}
}
/// Waits for all vCPUs to exit and terminates the Dragonball process.
fn stop(&mut self, exit_code: i32) -> i32 {
info!("Vmm is stopping.");
if let Some(vm) = self.get_vm_by_id_mut("") {
if vm.is_vm_initialized() {
if let Err(e) = vm.remove_devices() {
warn!("failed to remove devices: {:?}", e);
}
if let Err(e) = vm.reset_console() {
warn!("Cannot set canonical mode for the terminal. {:?}", e);
}
// Now, we use exit_code instead of invoking _exit to
// terminate process, so all of vcpu threads should be stopped
// prior to vmm event loop.
match vm.vcpu_manager() {
Ok(mut mgr) => {
if let Err(e) = mgr.exit_all_vcpus() {
warn!("Failed to exit vcpu thread. {:?}", e);
}
}
Err(e) => warn!("Failed to get vcpu manager {:?}", e),
}
// save exit state to VM, instead of exit process.
vm.vm_exit(exit_code);
}
}
exit_code
}
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
pub fn create_vmm_instance() -> Vmm {
let info = Arc::new(RwLock::new(InstanceInfo::default()));
let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let seccomp_filter: BpfProgram = Vec::new();
let epoll_manager = EpollManager::default();
Vmm::new_with_epoll_manager(
info,
event_fd,
epoll_manager,
seccomp_filter.clone(),
seccomp_filter,
None,
)
.unwrap()
}
#[test]
fn test_create_vmm_instance() {
create_vmm_instance();
}
}