dragonball: add vm struct

The vm struct to manage resources and control states of an virtual
machine instance.

Signed-off-by: wllenyj <wllenyj@linux.alibaba.com>
Signed-off-by: jingshan <jingshan@linux.alibaba.com>
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
This commit is contained in:
wllenyj 2022-05-15 21:42:00 +08:00 committed by Chao Wu
parent 527b73a8e5
commit cfd5dae47c
9 changed files with 1236 additions and 6 deletions

View File

@ -351,6 +351,16 @@ pub struct DmesgWriter {
logger: slog::Logger,
}
impl DmesgWriter {
/// Creates a new instance.
pub fn new(logger: slog::Logger) -> Self {
Self {
buf: BytesMut::with_capacity(1024),
logger: logger.new(slog::o!("subsystem" => "dmesg")),
}
}
}
impl io::Write for DmesgWriter {
/// 0000000 [ 0 . 0 3 4 9 1 6 ] R
/// 5b 20 20 20 20 30 2e 30 33 34 39 31 36 5d 20 52

View File

@ -8,6 +8,8 @@ use std::sync::{Arc, Mutex, MutexGuard};
use arc_swap::ArcSwap;
use dbs_address_space::AddressSpace;
#[cfg(target_arch = "aarch64")]
use dbs_arch::{DeviceType, MMIODeviceInfo};
use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext};
use dbs_device::resources::Resource;
use dbs_device::DeviceIo;
@ -20,6 +22,8 @@ use kvm_ioctls::VmFd;
use dbs_device::resources::ResourceConstraint;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices as virtio;
#[cfg(feature = "virtio-vsock")]
use dbs_virtio_devices::vsock::backend::VsockInnerConnector;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices::{
mmio::{
@ -38,7 +42,8 @@ use dbs_upcall::{
use crate::address_space_manager::GuestAddressSpaceImpl;
use crate::error::StartMicrovmError;
use crate::resource_manager::ResourceManager;
use crate::vm::KernelConfigInfo;
use crate::vm::{KernelConfigInfo, Vm};
use crate::IoManagerCached;
/// Virtual machine console device manager.
pub mod console_manager;
@ -240,6 +245,10 @@ impl DeviceOpContext {
}
}
pub(crate) fn create_boot_ctx(vm: &Vm, epoll_mgr: Option<EpollManager>) -> Self {
Self::new(epoll_mgr, vm.device_manager(), None, None, false)
}
pub(crate) fn get_vm_as(&self) -> Result<GuestAddressSpaceImpl> {
match self.vm_as.as_ref() {
Some(v) => Ok(v.clone()),
@ -303,6 +312,23 @@ impl DeviceOpContext {
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
impl DeviceOpContext {
pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option<EpollManager>) -> Self {
let vm_as = vm.vm_as().expect("VM should have memory ready").clone();
let vm_config = vm.vm_config().clone();
let mut ctx = Self::new(
epoll_mgr,
vm.device_manager(),
Some(vm_as),
vm.vm_address_space().cloned(),
true,
);
ctx.upcall_client = vm.upcall_client().clone();
ctx
}
fn call_hotplug_device(
&self,
req: DevMgrRequest,
@ -380,6 +406,8 @@ pub struct DeviceManager {
pub(crate) legacy_manager: Option<LegacyDeviceManager>,
#[cfg(feature = "virtio-vsock")]
pub(crate) vsock_manager: VsockDeviceMgr,
#[cfg(target_arch = "aarch64")]
mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>,
}
impl DeviceManager {
@ -401,9 +429,16 @@ impl DeviceManager {
legacy_manager: None,
#[cfg(feature = "virtio-vsock")]
vsock_manager: VsockDeviceMgr::default(),
#[cfg(target_arch = "aarch64")]
mmio_device_info: HashMap::new(),
}
}
/// Get the underlying IoManager to dispatch IO read/write requests.
pub fn io_manager(&self) -> IoManagerCached {
IoManagerCached::new(self.io_manager.clone())
}
/// Create the underline interrupt manager for the device manager.
pub fn create_interrupt_manager(&mut self) -> Result<()> {
self.irq_manager
@ -494,6 +529,12 @@ impl DeviceManager {
self.con_manager.reset_console()
}
#[cfg(target_arch = "aarch64")]
/// Return mmio device info for FDT build.
pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), MMIODeviceInfo>> {
Some(&self.mmio_device_info)
}
/// Create all registered devices when booting the associated virtual machine.
pub fn create_devices(
&mut self,
@ -524,6 +565,21 @@ impl DeviceManager {
Ok(())
}
/// Start all registered devices when booting the associated virtual machine.
pub fn start_devices(&mut self) -> std::result::Result<(), StartMicrovmError> {
Ok(())
}
/// Remove all devices when shutdown the associated virtual machine
pub fn remove_devices(
&mut self,
_vm_as: GuestAddressSpaceImpl,
_epoll_mgr: EpollManager,
_address_space: Option<&AddressSpace>,
) -> Result<()> {
Ok(())
}
#[cfg(target_arch = "x86_64")]
/// Get the underlying eventfd for vm exit notification.
pub fn get_reset_eventfd(&self) -> Result<vmm_sys_util::eventfd::EventFd> {
@ -689,3 +745,21 @@ impl DeviceManager {
}
}
}
#[cfg(feature = "hotplug")]
impl DeviceManager {
/// Get Unix Domain Socket path for the vsock device.
pub fn get_vsock_inner_connector(&mut self) -> Option<VsockInnerConnector> {
#[cfg(feature = "virtio-vsock")]
{
self.vsock_manager
.get_default_connector()
.map(|d| Some(d))
.unwrap_or(None)
}
#[cfg(not(feature = "virtio-vsock"))]
{
return None;
}
}
}

View File

@ -12,7 +12,10 @@
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices::Error as VirtIoError;
use crate::address_space_manager;
use crate::device_manager;
use crate::vcpu;
use crate::vm;
/// Shorthand result type for internal VMM commands.
pub type Result<T> = std::result::Result<T, Error>;
@ -23,8 +26,20 @@ pub type Result<T> = std::result::Result<T, Error>;
/// of the host (for example if Dragonball doesn't have permissions to open the KVM fd).
#[derive(Debug, thiserror::Error)]
pub enum Error {
/// Empty AddressSpace from parameters.
#[error("Empty AddressSpace from parameters")]
AddressSpace,
/// The zero page extends past the end of guest_mem.
#[error("the guest zero page extends past the end of guest memory")]
ZeroPagePastRamEnd,
/// Error writing the zero page of guest memory.
#[error("failed to write to guest zero page")]
ZeroPageSetup,
/// Failure occurs in issuing KVM ioctls and errors will be returned from kvm_ioctls lib.
#[error("failure in issuing KVM ioctl command")]
#[error("failure in issuing KVM ioctl command: {0}")]
Kvm(#[source] kvm_ioctls::Error),
/// The host kernel reports an unsupported KVM API version.
@ -32,17 +47,30 @@ pub enum Error {
KvmApiVersion(i32),
/// Cannot initialize the KVM context due to missing capabilities.
#[error("missing KVM capability")]
#[error("missing KVM capability: {0:?}")]
KvmCap(kvm_ioctls::Cap),
#[cfg(target_arch = "x86_64")]
#[error("failed to configure MSRs")]
#[error("failed to configure MSRs: {0:?}")]
/// Cannot configure MSRs
GuestMSRs(dbs_arch::msr::Error),
/// MSR inner error
#[error("MSR inner error")]
Msr(vmm_sys_util::fam::Error),
/// Error writing MP table to memory.
#[cfg(target_arch = "x86_64")]
#[error("failed to write MP table to guest memory: {0}")]
MpTableSetup(#[source] dbs_boot::mptable::Error),
/// Fail to boot system
#[error("failed to boot system: {0}")]
BootSystem(#[source] dbs_boot::Error),
/// Cannot open the VM file descriptor.
#[error(transparent)]
Vm(vm::VmError),
}
/// Errors associated with starting the instance.
@ -52,6 +80,48 @@ pub enum StartMicrovmError {
#[error("failure while reading from EventFd file descriptor")]
EventFd,
/// The start command was issued more than once.
#[error("the virtual machine is already running")]
MicroVMAlreadyRunning,
/// Cannot start the VM because the kernel was not configured.
#[error("cannot start the virtual machine without kernel configuration")]
MissingKernelConfig,
#[cfg(feature = "hotplug")]
/// Upcall initialize miss vsock device.
#[error("the upcall client needs a virtio-vsock device for communication")]
UpcallMissVsock,
/// Upcall is not ready
#[error("the upcall client is not ready")]
UpcallNotReady,
/// Configuration passed in is invalidate.
#[error("invalid virtual machine configuration: {0} ")]
ConfigureInvalid(String),
/// This error is thrown by the minimal boot loader implementation.
/// It is related to a faulty memory configuration.
#[error("failure while configuring boot information for the virtual machine: {0}")]
ConfigureSystem(#[source] Error),
/// Cannot configure the VM.
#[error("failure while configuring the virtual machine: {0}")]
ConfigureVm(#[source] vm::VmError),
/// Cannot load initrd.
#[error("cannot load Initrd into guest memory: {0}")]
InitrdLoader(#[from] LoadInitrdError),
/// Cannot load kernel due to invalid memory configuration or invalid kernel image.
#[error("cannot load guest kernel into guest memory: {0}")]
KernelLoader(#[source] linux_loader::loader::Error),
/// Cannot load command line string.
#[error("failure while configuring guest kernel commandline: {0}")]
LoadCommandline(#[source] linux_loader::loader::Error),
/// The device manager was not configured.
#[error("the device manager failed to manage devices: {0}")]
DeviceManager(#[source] device_manager::DeviceMgrError),
@ -69,4 +139,45 @@ pub enum StartMicrovmError {
/// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus.
#[error("failure while registering virtio-vsock device: {0}")]
RegisterVsockDevice(#[source] device_manager::DeviceMgrError),
/// Address space manager related error, e.g.cannot access guest address space manager.
#[error("address space manager related error: {0}")]
AddressManagerError(#[source] address_space_manager::AddressManagerError),
/// Cannot create a new vCPU file descriptor.
#[error("vCPU related error: {0}")]
Vcpu(#[source] vcpu::VcpuManagerError),
#[cfg(feature = "hotplug")]
/// Upcall initialize Error.
#[error("failure while initializing the upcall client: {0}")]
UpcallInitError(#[source] dbs_upcall::UpcallClientError),
#[cfg(feature = "hotplug")]
/// Upcall connect Error.
#[error("failure while connecting the upcall client: {0}")]
UpcallConnectError(#[source] dbs_upcall::UpcallClientError),
}
/// Errors associated with starting the instance.
#[derive(Debug, thiserror::Error)]
pub enum StopMicrovmError {
/// Guest memory has not been initialized.
#[error("Guest memory has not been initialized")]
GuestMemoryNotInitialized,
/// Cannnot remove devices
#[error("Failed to remove devices in device_manager {0}")]
DeviceManager(#[source] device_manager::DeviceMgrError),
}
/// Errors associated with loading initrd
#[derive(Debug, thiserror::Error)]
pub enum LoadInitrdError {
/// Cannot load initrd due to an invalid memory configuration.
#[error("failed to load the initrd image to guest memory")]
LoadInitrd,
/// Cannot load initrd due to an invalid image.
#[error("failed to read the initrd image: {0}")]
ReadInitrd(#[source] std::io::Error),
}

View File

@ -6,6 +6,7 @@
mod sm;
pub mod vcpu_impl;
pub mod vcpu_manager;
pub use vcpu_manager::{VcpuManager, VcpuManagerError};
#[cfg(target_arch = "x86_64")]
use dbs_arch::cpuid::VpmuFeatureLevel;

View File

@ -964,7 +964,7 @@ pub mod tests {
#[cfg(target_arch = "x86_64")]
#[test]
fn test_vcpu_check_io_port_info() {
let (vcpu, receiver) = create_vcpu();
let (vcpu, _receiver) = create_vcpu();
// boot complete signal
let res = vcpu

View File

@ -0,0 +1,148 @@
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use std::collections::HashMap;
use std::ops::Deref;
use dbs_arch::gic::GICDevice;
use dbs_arch::{DeviceInfoForFDT, DeviceType};
use dbs_boot::InitrdConfig;
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
use std::fmt::Debug;
use vm_memory::{GuestAddressSpace, GuestMemory};
use vmm_sys_util::eventfd::EventFd;
use super::{Vm, VmError};
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
use crate::error::Error;
use crate::StartMicrovmError;
use linux_loader::loader::Cmdline;
/// Configures the system and should be called once per vm before starting vcpu threads.
/// For aarch64, we only setup the FDT.
///
/// # Arguments
///
/// * `guest_mem` - The memory to be used by the guest.
/// * `cmdline` - The kernel commandline.
/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu.
/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes.
/// * `gic_device` - The GIC device.
/// * `initrd` - Information about an optional initrd.
pub fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
guest_mem: &M,
cmdline: &str,
vcpu_mpidr: Vec<u64>,
device_info: Option<&HashMap<(DeviceType, String), T>>,
gic_device: &Box<dyn GICDevice>,
initrd: &Option<super::InitrdConfig>,
) -> super::Result<()> {
dbs_boot::fdt::create_fdt(
guest_mem,
vcpu_mpidr,
cmdline,
device_info,
gic_device,
initrd,
)
.map_err(Error::BootSystem)?;
Ok(())
}
#[cfg(target_arch = "aarch64")]
impl Vm {
/// Gets a reference to the irqchip of the VM
pub fn get_irqchip(&self) -> &Box<dyn GICDevice> {
&self.irqchip_handle.as_ref().unwrap()
}
/// Initialize the virtual machine instance.
///
/// It initialize the virtual machine instance by:
/// 1) initialize virtual machine global state and configuration.
/// 2) create system devices, such as interrupt controller.
/// 3) create and start IO devices, such as serial, console, block, net, vsock etc.
/// 4) create and initialize vCPUs.
/// 5) configure CPU power management features.
/// 6) load guest kernel image.
pub fn init_microvm(
&mut self,
epoll_mgr: EpollManager,
vm_as: GuestAddressSpaceImpl,
request_ts: TimestampUs,
) -> std::result::Result<(), StartMicrovmError> {
let kernel_loader_result = self.load_kernel(vm_as.memory().deref())?;
// On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before
// setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP
// was already initialized.
// Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c.
let reset_eventfd =
EventFd::new(libc::EFD_NONBLOCK).map_err(|_| StartMicrovmError::EventFd)?;
self.reset_eventfd = Some(
reset_eventfd
.try_clone()
.map_err(|_| StartMicrovmError::EventFd)?,
);
self.vcpu_manager()
.map_err(StartMicrovmError::Vcpu)?
.set_reset_event_fd(reset_eventfd);
self.vcpu_manager()
.map_err(StartMicrovmError::Vcpu)?
.create_boot_vcpus(request_ts, kernel_loader_result.kernel_load)
.map_err(StartMicrovmError::Vcpu)?;
self.setup_interrupt_controller()?;
self.init_devices(epoll_mgr)?;
Ok(())
}
/// Creates the irq chip in-kernel device model.
pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicrovmError> {
let vcpu_count = self.vm_config.vcpu_count;
self.irqchip_handle = Some(
dbs_arch::gic::create_gic(&self.fd, vcpu_count.into())
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::SetupGIC(e)))?,
);
Ok(())
}
/// Execute system architecture specific configurations.
///
/// 1) set guest kernel boot parameters
/// 2) setup FDT data structs.
pub fn configure_system_arch(
&self,
vm_memory: &GuestMemoryImpl,
cmdline: &Cmdline,
initrd: Option<InitrdConfig>,
) -> std::result::Result<(), StartMicrovmError> {
let vcpu_manager = self.vcpu_manager().map_err(StartMicrovmError::Vcpu)?;
let vcpu_mpidr = vcpu_manager
.vcpus()
.into_iter()
.map(|cpu| cpu.get_mpidr())
.collect();
let guest_memory = vm_memory.memory();
configure_system(
guest_memory,
cmdline.as_str(),
vcpu_mpidr,
self.device_manager.get_mmio_device_info(),
self.get_irqchip(),
&initrd,
)
.map_err(StartMicrovmError::ConfigureSystem)
}
}

View File

@ -8,7 +8,7 @@ pub struct KernelConfigInfo {
/// The descriptor to the kernel file.
kernel_file: File,
/// The descriptor to the initrd file, if there is one
initrd_file: Option<File>,
pub initrd_file: Option<File>,
/// The commandline for guest kernel.
cmdline: linux_loader::cmdline::Cmdline,
}

View File

@ -1,11 +1,69 @@
// Copyright (C) 2021 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::{self, Read, Seek, SeekFrom};
use std::ops::Deref;
use std::os::unix::io::RawFd;
use std::sync::{Arc, Mutex, RwLock};
use dbs_address_space::AddressSpace;
#[cfg(target_arch = "aarch64")]
use dbs_arch::gic::GICDevice;
use dbs_boot::InitrdConfig;
#[cfg(feature = "hotplug")]
use dbs_upcall::{DevMgrService, UpcallClient};
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
use kvm_ioctls::VmFd;
use linux_loader::loader::{KernelLoader, KernelLoaderResult};
use seccompiler::BpfProgram;
use serde_derive::{Deserialize, Serialize};
use slog::{error, info};
use vm_memory::{Bytes, GuestAddress, GuestAddressSpace};
use vmm_sys_util::eventfd::EventFd;
use crate::address_space_manager::{
AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl,
GuestMemoryImpl,
};
use crate::api::v1::{InstanceInfo, InstanceState};
use crate::device_manager::console_manager::DmesgWriter;
use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext};
use crate::error::{LoadInitrdError, Result, StartMicrovmError, StopMicrovmError};
use crate::kvm_context::KvmContext;
use crate::resource_manager::ResourceManager;
use crate::vcpu::{VcpuManager, VcpuManagerError};
#[cfg(target_arch = "aarch64")]
use dbs_arch::gic::Error as GICError;
mod kernel_config;
pub use self::kernel_config::KernelConfigInfo;
#[cfg(target_arch = "aarch64")]
#[path = "aarch64.rs"]
mod aarch64;
#[cfg(target_arch = "x86_64")]
#[path = "x86_64.rs"]
mod x86_64;
/// Errors associated with virtual machine instance related operations.
#[derive(Debug, thiserror::Error)]
pub enum VmError {
/// Cannot configure the IRQ.
#[error("failed to configure IRQ fot the virtual machine: {0}")]
Irq(#[source] kvm_ioctls::Error),
/// Cannot configure the microvm.
#[error("failed to initialize the virtual machine: {0}")]
VmSetup(#[source] kvm_ioctls::Error),
/// Cannot setup GIC
#[cfg(target_arch = "aarch64")]
#[error("failed to configure GIC")]
SetupGIC(GICError),
}
/// Configuration information for user defined NUMA nodes.
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct NumaRegionInfo {
@ -94,3 +152,555 @@ impl Default for VmConfigInfo {
}
}
}
/// Struct to manage resources and control states of an virtual machine instance.
///
/// An `Vm` instance holds a resources assigned to a virtual machine instance, such as CPU, memory,
/// devices etc. When an `Vm` instance gets deconstructed, all resources assigned should be
/// released.
///
/// We have explicit build the object model as:
/// |---Vmm API Server--<-1:1-> HTTP API Server
/// | |----------<-1:1-> Shimv2/CRI API Server
/// |
/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory
/// ^ ^---1:1-> Device Manager <-1:N-> Device
/// | ^---1:1-> Resource Manager
/// | ^---1:N-> Vcpu
/// |---<-1:N-> Event Manager
pub struct Vm {
fd: Arc<VmFd>,
kvm: KvmContext,
address_space: AddressSpaceMgr,
device_manager: DeviceManager,
epoll_manager: EpollManager,
resource_manager: Arc<ResourceManager>,
vcpu_manager: Option<Arc<Mutex<VcpuManager>>>,
logger: slog::Logger,
/// Config of virtual machine
vm_config: VmConfigInfo,
kernel_config: Option<KernelConfigInfo>,
shared_info: Arc<RwLock<InstanceInfo>>,
reset_eventfd: Option<EventFd>,
dmesg_fifo: Option<Box<dyn io::Write + Send>>,
start_instance_request_ts: u64,
start_instance_request_cpu_ts: u64,
start_instance_downtime: u64,
// Arm specific fields.
// On aarch64 we need to keep around the fd obtained by creating the VGIC device.
#[cfg(target_arch = "aarch64")]
irqchip_handle: Option<Box<dyn GICDevice>>,
#[cfg(feature = "hotplug")]
upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
}
impl Vm {
/// Constructs a new `Vm` instance using the given `Kvm` instance.
pub fn new(
kvm_fd: Option<RawFd>,
api_shared_info: Arc<RwLock<InstanceInfo>>,
epoll_manager: EpollManager,
) -> Result<Self> {
let id = api_shared_info.read().unwrap().id.clone();
let logger = slog_scope::logger().new(slog::o!("id" => id));
let kvm = KvmContext::new(kvm_fd)?;
let fd = Arc::new(kvm.create_vm()?);
let resource_manager = Arc::new(ResourceManager::new(Some(kvm.max_memslots())));
let device_manager = DeviceManager::new(
fd.clone(),
resource_manager.clone(),
epoll_manager.clone(),
&logger,
);
Ok(Vm {
fd,
kvm,
address_space: AddressSpaceMgr::default(),
device_manager,
epoll_manager,
resource_manager,
vcpu_manager: None,
logger,
vm_config: Default::default(),
kernel_config: None,
shared_info: api_shared_info,
reset_eventfd: None,
dmesg_fifo: None,
start_instance_request_ts: 0,
start_instance_request_cpu_ts: 0,
start_instance_downtime: 0,
#[cfg(target_arch = "aarch64")]
irqchip_handle: None,
#[cfg(feature = "hotplug")]
upcall_client: None,
})
}
/// Gets a reference to the kvm file descriptor owned by this VM.
pub fn vm_fd(&self) -> &VmFd {
&self.fd
}
/// Gets a reference to the address_space.address_space for guest memory owned by this VM.
pub fn vm_address_space(&self) -> Option<&AddressSpace> {
self.address_space.get_address_space()
}
/// Gets a reference to the device manager by this VM.
pub fn device_manager(&self) -> &DeviceManager {
&self.device_manager
}
/// Gets a reference to the address space for guest memory owned by this VM.
///
/// Note that `GuestMemory` does not include any device memory that may have been added after
/// this VM was constructed.
pub fn vm_as(&self) -> Option<&GuestAddressSpaceImpl> {
self.address_space.get_vm_as()
}
/// Get a immutable reference to the virtual machine configuration information.
pub fn vm_config(&self) -> &VmConfigInfo {
&self.vm_config
}
/// Set the virtual machine configuration information.
pub fn set_vm_config(&mut self, config: VmConfigInfo) {
self.vm_config = config;
}
/// Set guest kernel boot configurations.
pub fn set_kernel_config(&mut self, kernel_config: KernelConfigInfo) {
self.kernel_config = Some(kernel_config);
}
/// Get virtual machine shared instance information.
pub fn shared_info(&self) -> &Arc<RwLock<InstanceInfo>> {
&self.shared_info
}
/// Get a reference to EpollManager.
pub fn epoll_manager(&self) -> &EpollManager {
&self.epoll_manager
}
/// Get eventfd for exit notification.
pub fn get_reset_eventfd(&self) -> Option<&EventFd> {
self.reset_eventfd.as_ref()
}
/// Check whether the VM has been initialized.
pub fn is_vm_initialized(&self) -> bool {
let instance_state = {
// Use expect() to crash if the other thread poisoned this lock.
let shared_info = self.shared_info.read()
.expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
shared_info.state
};
instance_state != InstanceState::Uninitialized
}
/// Check whether the VM instance is running.
pub fn is_vm_running(&self) -> bool {
let instance_state = {
// Use expect() to crash if the other thread poisoned this lock.
let shared_info = self.shared_info.read()
.expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
shared_info.state
};
instance_state == InstanceState::Running
}
/// returns true if system upcall service is ready
pub fn is_upcall_client_ready(&self) -> bool {
#[cfg(feature = "hotplug")]
{
if let Some(upcall_client) = self.upcall_client() {
return upcall_client.is_ready();
}
}
false
}
/// Create device operation context.
/// vm is not running, return false
/// vm is running, but hotplug feature is not enable, return error
/// vm is running, but upcall initialize failed, return error
/// vm is running, upcall initialize OK, return true
pub fn create_device_op_context(
&mut self,
epoll_mgr: Option<EpollManager>,
) -> std::result::Result<DeviceOpContext, StartMicrovmError> {
if !self.is_vm_initialized() {
Ok(DeviceOpContext::create_boot_ctx(self, epoll_mgr))
} else {
#[cfg(feature = "hotplug")]
{
if self.upcall_client().is_none() {
Err(StartMicrovmError::UpcallMissVsock)
} else if self.is_upcall_client_ready() {
Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr))
} else {
Err(StartMicrovmError::UpcallNotReady)
}
}
#[cfg(not(feature = "hotplug"))]
{
Err(StartMicrovmError::MicroVMAlreadyRunning)
}
}
}
/// Save VM instance exit state
pub fn vm_exit(&self, exit_code: i32) {
if let Ok(mut info) = self.shared_info.write() {
info.state = InstanceState::Exited(exit_code);
} else {
error!(
self.logger,
"Failed to save exit state, couldn't be written due to poisoned lock"
);
}
}
/// Reset the console into canonical mode.
pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> {
self.device_manager.reset_console()
}
fn get_dragonball_info(&self) -> (String, String) {
let guard = self.shared_info.read().unwrap();
let instance_id = guard.id.clone();
let dragonball_version = guard.vmm_version.clone();
(dragonball_version, instance_id)
}
fn init_dmesg_logger(&mut self) {
let writer = self.dmesg_logger();
self.dmesg_fifo = Some(writer);
}
/// dmesg write to logger
pub fn dmesg_logger(&self) -> Box<dyn io::Write + Send> {
Box::new(DmesgWriter::new(self.logger.clone()))
}
pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicrovmError> {
if self.kernel_config.is_none() {
return Err(StartMicrovmError::MissingKernelConfig);
}
Ok(())
}
pub(crate) fn init_vcpu_manager(
&mut self,
vm_as: GuestAddressSpaceImpl,
vcpu_seccomp_filter: BpfProgram,
) -> std::result::Result<(), VcpuManagerError> {
let vcpu_manager = VcpuManager::new(
self.fd.clone(),
&self.kvm,
&self.vm_config,
vm_as,
vcpu_seccomp_filter,
self.shared_info.clone(),
self.device_manager.io_manager(),
self.epoll_manager.clone(),
)?;
self.vcpu_manager = Some(vcpu_manager);
Ok(())
}
/// get the cpu manager's reference
pub fn vcpu_manager(
&self,
) -> std::result::Result<std::sync::MutexGuard<'_, VcpuManager>, VcpuManagerError> {
self.vcpu_manager
.as_ref()
.ok_or(VcpuManagerError::VcpuManagerNotInitialized)
.map(|mgr| mgr.lock().unwrap())
}
/// Pause all vcpus and record the instance downtime
pub fn pause_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> {
let ts = TimestampUs::default();
self.start_instance_downtime = ts.time_us;
self.vcpu_manager()?.pause_all_vcpus()?;
Ok(())
}
/// Resume all vcpus and calc the intance downtime
pub fn resume_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> {
self.vcpu_manager()?.resume_all_vcpus()?;
Ok(())
}
pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> {
info!(self.logger, "VM: initializing guest memory...");
// We are not allowing reinitialization of vm guest memory.
if self.address_space.is_initialized() {
return Ok(());
}
// vcpu boot up require local memory. reserve 100 MiB memory
let mem_size = (self.vm_config.mem_size_mib as u64) << 20;
let reserve_memory_bytes = self.vm_config.reserve_memory_bytes;
if reserve_memory_bytes > (mem_size >> 1) as u64 {
return Err(StartMicrovmError::ConfigureInvalid(String::from(
"invalid reserve_memory_bytes",
)));
}
let mem_type = self.vm_config.mem_type.clone();
let mut mem_file_path = String::from("");
if mem_type == "hugetlbfs" {
let shared_info = self.shared_info.read()
.expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
mem_file_path.push_str("/dragonball/");
mem_file_path.push_str(shared_info.id.as_str());
}
// init default regions.
let mut numa_regions = Vec::with_capacity(1);
let mut vcpu_ids: Vec<u32> = Vec::new();
for i in 0..self.vm_config().max_vcpu_count {
vcpu_ids.push(i as u32);
}
let numa_node = NumaRegionInfo {
size: self.vm_config.mem_size_mib as u64,
host_numa_node_id: None,
guest_numa_node_id: Some(0),
vcpu_ids,
};
numa_regions.push(numa_node);
info!(
self.logger,
"VM: mem_type:{} mem_file_path:{}, mem_size:{}, reserve_memory_bytes:{}, \
numa_regions:{:?}",
mem_type,
mem_file_path,
mem_size,
reserve_memory_bytes,
numa_regions,
);
let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path)
.map_err(StartMicrovmError::AddressManagerError)?;
address_space_param.set_kvm_vm_fd(self.fd.clone());
self.address_space
.create_address_space(&self.resource_manager, &numa_regions, address_space_param)
.map_err(StartMicrovmError::AddressManagerError)?;
info!(self.logger, "VM: initializing guest memory done");
Ok(())
}
fn init_devices(
&mut self,
epoll_manager: EpollManager,
) -> std::result::Result<(), StartMicrovmError> {
info!(self.logger, "VM: initializing devices ...");
let com1_sock_path = self.vm_config.serial_path.clone();
let kernel_config = self
.kernel_config
.as_mut()
.ok_or(StartMicrovmError::MissingKernelConfig)?;
info!(self.logger, "VM: create interrupt manager");
self.device_manager
.create_interrupt_manager()
.map_err(StartMicrovmError::DeviceManager)?;
info!(self.logger, "VM: create devices");
let vm_as =
self.address_space
.get_vm_as()
.ok_or(StartMicrovmError::AddressManagerError(
AddressManagerError::GuestMemoryNotInitialized,
))?;
self.device_manager.create_devices(
vm_as.clone(),
epoll_manager,
kernel_config,
com1_sock_path,
self.dmesg_fifo.take(),
self.address_space.address_space(),
)?;
info!(self.logger, "VM: start devices");
self.device_manager.start_devices()?;
info!(self.logger, "VM: initializing devices done");
Ok(())
}
/// Remove devices when shutdown vm
pub fn remove_devices(&mut self) -> std::result::Result<(), StopMicrovmError> {
info!(self.logger, "VM: remove devices");
let vm_as = self
.address_space
.get_vm_as()
.ok_or(StopMicrovmError::GuestMemoryNotInitialized)?;
self.device_manager
.remove_devices(
vm_as.clone(),
self.epoll_manager.clone(),
self.address_space.address_space(),
)
.map_err(StopMicrovmError::DeviceManager)
}
fn load_kernel(
&mut self,
vm_memory: &GuestMemoryImpl,
) -> std::result::Result<KernelLoaderResult, StartMicrovmError> {
// This is the easy way out of consuming the value of the kernel_cmdline.
let kernel_config = self
.kernel_config
.as_mut()
.ok_or(StartMicrovmError::MissingKernelConfig)?;
let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start());
#[cfg(target_arch = "x86_64")]
return linux_loader::loader::elf::Elf::load(
vm_memory,
None,
kernel_config.kernel_file_mut(),
Some(high_mem_addr),
)
.map_err(StartMicrovmError::KernelLoader);
#[cfg(target_arch = "aarch64")]
return linux_loader::loader::pe::PE::load(
vm_memory,
Some(GuestAddress(dbs_boot::get_kernel_start())),
kernel_config.kernel_file_mut(),
Some(high_mem_addr),
)
.map_err(StartMicrovmError::KernelLoader);
}
/// Loads the initrd from a file into the given memory slice.
///
/// * `vm_memory` - The guest memory the initrd is written to.
/// * `image` - The initrd image.
///
/// Returns the result of initrd loading
fn load_initrd<F>(
&self,
vm_memory: &GuestMemoryImpl,
image: &mut F,
) -> std::result::Result<InitrdConfig, LoadInitrdError>
where
F: Read + Seek,
{
use crate::error::LoadInitrdError::*;
let size: usize;
// Get the image size
match image.seek(SeekFrom::End(0)) {
Err(e) => return Err(ReadInitrd(e)),
Ok(0) => {
return Err(ReadInitrd(io::Error::new(
io::ErrorKind::InvalidData,
"Initrd image seek returned a size of zero",
)))
}
Ok(s) => size = s as usize,
};
// Go back to the image start
image.seek(SeekFrom::Start(0)).map_err(ReadInitrd)?;
// Get the target address
let address = dbs_boot::initrd_load_addr(vm_memory, size as u64).map_err(|_| LoadInitrd)?;
// Load the image into memory
vm_memory
.read_from(GuestAddress(address), image, size)
.map_err(|_| LoadInitrd)?;
Ok(InitrdConfig {
address: GuestAddress(address),
size,
})
}
fn init_configure_system(
&mut self,
vm_as: &GuestAddressSpaceImpl,
) -> std::result::Result<(), StartMicrovmError> {
let vm_memory = vm_as.memory();
let kernel_config = self
.kernel_config
.as_ref()
.ok_or(StartMicrovmError::MissingKernelConfig)?;
//let cmdline = kernel_config.cmdline.clone();
let initrd: Option<InitrdConfig> = match &kernel_config.initrd_file {
Some(f) => {
let initrd_file = f.try_clone();
if initrd_file.is_err() {
return Err(StartMicrovmError::InitrdLoader(
LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)),
));
}
let res = self.load_initrd(vm_memory.deref(), &mut initrd_file.unwrap())?;
Some(res)
}
None => None,
};
self.configure_system_arch(vm_memory.deref(), kernel_config.kernel_cmdline(), initrd)
}
}
#[cfg(feature = "hotplug")]
impl Vm {
/// Get upcall client.
pub fn upcall_client(&self) -> &Option<Arc<UpcallClient<DevMgrService>>> {
&self.upcall_client
}
/// initialize upcall client for guest os
fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> {
// get vsock inner connector for upcall
let inner_connector = self
.device_manager
.get_vsock_inner_connector()
.ok_or(StartMicrovmError::UpcallMissVsock)?;
let mut upcall_client = UpcallClient::new(
inner_connector,
self.epoll_manager.clone(),
DevMgrService::default(),
)
.map_err(StartMicrovmError::UpcallInitError)?;
upcall_client
.connect()
.map_err(StartMicrovmError::UpcallConnectError)?;
self.upcall_client = Some(Arc::new(upcall_client));
info!(self.logger, "upcall client init success");
Ok(())
}
}

View File

@ -0,0 +1,276 @@
// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use std::convert::TryInto;
use std::mem;
use std::ops::Deref;
use dbs_address_space::AddressSpace;
use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, InitrdConfig};
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
use kvm_bindings::{kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_PIT_SPEAKER_DUMMY};
use slog::info;
use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory};
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
use crate::error::{Error, Result, StartMicrovmError};
use crate::vm::{Vm, VmError};
use linux_loader::cmdline::Cmdline;
/// Configures the system and should be called once per vm before starting vcpu
/// threads.
///
/// # Arguments
///
/// * `guest_mem` - The memory to be used by the guest.
/// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was
/// loaded.
/// * `cmdline_size` - Size of the kernel command line in bytes including the
/// null terminator.
/// * `initrd` - Information about where the ramdisk image was loaded in the
/// `guest_mem`.
/// * `boot_cpus` - Number of virtual CPUs the guest will have at boot time.
/// * `max_cpus` - Max number of virtual CPUs the guest will have.
/// * `rsv_mem_bytes` - Reserve memory from microVM..
#[allow(clippy::too_many_arguments)]
pub fn configure_system<M: GuestMemory>(
guest_mem: &M,
address_space: Option<&AddressSpace>,
cmdline_addr: GuestAddress,
cmdline_size: usize,
initrd: &Option<InitrdConfig>,
boot_cpus: u8,
max_cpus: u8,
rsv_mem_bytes: u64,
) -> super::Result<()> {
const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
const KERNEL_LOADER_OTHER: u8 = 0xff;
const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero.
let mmio_start = GuestAddress(layout::MMIO_LOW_START);
let mmio_end = GuestAddress(layout::MMIO_LOW_END);
let himem_start = GuestAddress(layout::HIMEM_START);
// Note that this puts the mptable at the last 1k of Linux's 640k base RAM
mptable::setup_mptable(guest_mem, boot_cpus, max_cpus).map_err(Error::MpTableSetup)?;
let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default());
params.0.hdr.type_of_loader = KERNEL_LOADER_OTHER;
params.0.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
params.0.hdr.header = KERNEL_HDR_MAGIC;
params.0.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32;
params.0.hdr.cmdline_size = cmdline_size as u32;
params.0.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
if let Some(initrd_config) = initrd {
params.0.hdr.ramdisk_image = initrd_config.address.raw_value() as u32;
params.0.hdr.ramdisk_size = initrd_config.size as u32;
}
add_e820_entry(&mut params.0, 0, layout::EBDA_START, bootparam::E820_RAM)
.map_err(Error::BootSystem)?;
let mem_end = address_space.ok_or(Error::AddressSpace)?.last_addr();
if mem_end < mmio_start {
add_e820_entry(
&mut params.0,
himem_start.raw_value() as u64,
// it's safe to use unchecked_offset_from because
// mem_end > himem_start
mem_end.unchecked_offset_from(himem_start) as u64 + 1,
bootparam::E820_RAM,
)
.map_err(Error::BootSystem)?;
} else {
add_e820_entry(
&mut params.0,
himem_start.raw_value(),
// it's safe to use unchecked_offset_from because
// end_32bit_gap_start > himem_start
mmio_start.unchecked_offset_from(himem_start),
bootparam::E820_RAM,
)
.map_err(Error::BootSystem)?;
if mem_end > mmio_end {
add_e820_entry(
&mut params.0,
mmio_end.raw_value() + 1,
// it's safe to use unchecked_offset_from because mem_end > mmio_end
mem_end.unchecked_offset_from(mmio_end) as u64,
bootparam::E820_RAM,
)
.map_err(Error::BootSystem)?;
}
}
// reserve memory from microVM.
if rsv_mem_bytes > 0 {
add_e820_entry(
&mut params.0,
mem_end.raw_value().max(mmio_end.raw_value()) + 1,
rsv_mem_bytes,
bootparam::E820_RESERVED,
)
.map_err(Error::BootSystem)?;
}
let zero_page_addr = GuestAddress(layout::ZERO_PAGE_START);
guest_mem
.checked_offset(zero_page_addr, mem::size_of::<bootparam::boot_params>())
.ok_or(Error::ZeroPagePastRamEnd)?;
guest_mem
.write_obj(params, zero_page_addr)
.map_err(|_| Error::ZeroPageSetup)?;
Ok(())
}
impl Vm {
/// Get the status of in-kernel PIT.
pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
self.fd.get_pit2().map_err(|e| Error::Vm(VmError::Irq(e)))
}
/// Set the status of in-kernel PIT.
pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
self.fd
.set_pit2(pit_state)
.map_err(|e| Error::Vm(VmError::Irq(e)))
}
/// Get the status of in-kernel ioapic.
pub fn get_irqchip_state(&self, chip_id: u32) -> Result<kvm_irqchip> {
let mut irqchip: kvm_irqchip = kvm_irqchip {
chip_id,
..kvm_irqchip::default()
};
self.fd
.get_irqchip(&mut irqchip)
.map(|_| irqchip)
.map_err(|e| Error::Vm(VmError::Irq(e)))
}
/// Set the status of in-kernel ioapic.
pub fn set_irqchip_state(&self, irqchip: &kvm_irqchip) -> Result<()> {
self.fd
.set_irqchip(irqchip)
.map_err(|e| Error::Vm(VmError::Irq(e)))
}
}
impl Vm {
/// Initialize the virtual machine instance.
///
/// It initialize the virtual machine instance by:
/// 1) initialize virtual machine global state and configuration.
/// 2) create system devices, such as interrupt controller, PIT etc.
/// 3) create and start IO devices, such as serial, console, block, net, vsock etc.
/// 4) create and initialize vCPUs.
/// 5) configure CPU power management features.
/// 6) load guest kernel image.
pub fn init_microvm(
&mut self,
epoll_mgr: EpollManager,
vm_as: GuestAddressSpaceImpl,
request_ts: TimestampUs,
) -> std::result::Result<(), StartMicrovmError> {
info!(self.logger, "VM: start initializing microvm ...");
self.init_tss()?;
// For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS`
// while on aarch64 we need to do it the other way around.
self.setup_interrupt_controller()?;
self.create_pit()?;
self.init_devices(epoll_mgr)?;
let reset_event_fd = self.device_manager.get_reset_eventfd().unwrap();
self.vcpu_manager()
.map_err(StartMicrovmError::Vcpu)?
.set_reset_event_fd(reset_event_fd)
.map_err(StartMicrovmError::Vcpu)?;
if self.vm_config.cpu_pm == "on" {
// TODO: add cpu_pm support. issue #4590.
info!(self.logger, "VM: enable CPU disable_idle_exits capability");
}
let vm_memory = vm_as.memory();
let kernel_loader_result = self.load_kernel(vm_memory.deref())?;
self.vcpu_manager()
.map_err(StartMicrovmError::Vcpu)?
.create_boot_vcpus(request_ts, kernel_loader_result.kernel_load)
.map_err(StartMicrovmError::Vcpu)?;
info!(self.logger, "VM: initializing microvm done");
Ok(())
}
/// Execute system architecture specific configurations.
///
/// 1) set guest kernel boot parameters
/// 2) setup BIOS configuration data structs, mainly implement the MPSpec.
pub fn configure_system_arch(
&self,
vm_memory: &GuestMemoryImpl,
cmdline: &Cmdline,
initrd: Option<InitrdConfig>,
) -> std::result::Result<(), StartMicrovmError> {
let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START);
linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline)
.map_err(StartMicrovmError::LoadCommandline)?;
configure_system(
vm_memory,
self.address_space.address_space(),
cmdline_addr,
cmdline.as_str().len() + 1,
&initrd,
self.vm_config.vcpu_count,
self.vm_config.max_vcpu_count,
self.vm_config.reserve_memory_bytes,
)
.map_err(StartMicrovmError::ConfigureSystem)
}
/// Initializes the guest memory.
pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicrovmError> {
self.fd
.set_tss_address(dbs_boot::layout::KVM_TSS_ADDRESS.try_into().unwrap())
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e)))
}
/// Creates the irq chip and an in-kernel device model for the PIT.
pub(crate) fn setup_interrupt_controller(
&mut self,
) -> std::result::Result<(), StartMicrovmError> {
self.fd
.create_irq_chip()
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e)))
}
/// Creates an in-kernel device model for the PIT.
pub(crate) fn create_pit(&self) -> std::result::Result<(), StartMicrovmError> {
info!(self.logger, "VM: create pit");
// We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61
// (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space.
let pit_config = kvm_pit_config {
flags: KVM_PIT_SPEAKER_DUMMY,
..kvm_pit_config::default()
};
// Safe because we know that our file is a VM fd, we know the kernel will only read the
// correct amount of memory from our pointer, and we verify the return result.
self.fd
.create_pit2(pit_config)
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e)))
}
}