mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-31 23:36:12 +00:00
dragonball: add vm struct
The vm struct to manage resources and control states of an virtual machine instance. Signed-off-by: wllenyj <wllenyj@linux.alibaba.com> Signed-off-by: jingshan <jingshan@linux.alibaba.com> Signed-off-by: Liu Jiang <gerry@linux.alibaba.com> Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
This commit is contained in:
parent
527b73a8e5
commit
cfd5dae47c
@ -351,6 +351,16 @@ pub struct DmesgWriter {
|
||||
logger: slog::Logger,
|
||||
}
|
||||
|
||||
impl DmesgWriter {
|
||||
/// Creates a new instance.
|
||||
pub fn new(logger: slog::Logger) -> Self {
|
||||
Self {
|
||||
buf: BytesMut::with_capacity(1024),
|
||||
logger: logger.new(slog::o!("subsystem" => "dmesg")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for DmesgWriter {
|
||||
/// 0000000 [ 0 . 0 3 4 9 1 6 ] R
|
||||
/// 5b 20 20 20 20 30 2e 30 33 34 39 31 36 5d 20 52
|
||||
|
@ -8,6 +8,8 @@ use std::sync::{Arc, Mutex, MutexGuard};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use dbs_address_space::AddressSpace;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use dbs_arch::{DeviceType, MMIODeviceInfo};
|
||||
use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext};
|
||||
use dbs_device::resources::Resource;
|
||||
use dbs_device::DeviceIo;
|
||||
@ -20,6 +22,8 @@ use kvm_ioctls::VmFd;
|
||||
use dbs_device::resources::ResourceConstraint;
|
||||
#[cfg(feature = "dbs-virtio-devices")]
|
||||
use dbs_virtio_devices as virtio;
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
use dbs_virtio_devices::vsock::backend::VsockInnerConnector;
|
||||
#[cfg(feature = "dbs-virtio-devices")]
|
||||
use dbs_virtio_devices::{
|
||||
mmio::{
|
||||
@ -38,7 +42,8 @@ use dbs_upcall::{
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::error::StartMicrovmError;
|
||||
use crate::resource_manager::ResourceManager;
|
||||
use crate::vm::KernelConfigInfo;
|
||||
use crate::vm::{KernelConfigInfo, Vm};
|
||||
use crate::IoManagerCached;
|
||||
|
||||
/// Virtual machine console device manager.
|
||||
pub mod console_manager;
|
||||
@ -240,6 +245,10 @@ impl DeviceOpContext {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_boot_ctx(vm: &Vm, epoll_mgr: Option<EpollManager>) -> Self {
|
||||
Self::new(epoll_mgr, vm.device_manager(), None, None, false)
|
||||
}
|
||||
|
||||
pub(crate) fn get_vm_as(&self) -> Result<GuestAddressSpaceImpl> {
|
||||
match self.vm_as.as_ref() {
|
||||
Some(v) => Ok(v.clone()),
|
||||
@ -303,6 +312,23 @@ impl DeviceOpContext {
|
||||
|
||||
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
|
||||
impl DeviceOpContext {
|
||||
pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option<EpollManager>) -> Self {
|
||||
let vm_as = vm.vm_as().expect("VM should have memory ready").clone();
|
||||
|
||||
let vm_config = vm.vm_config().clone();
|
||||
|
||||
let mut ctx = Self::new(
|
||||
epoll_mgr,
|
||||
vm.device_manager(),
|
||||
Some(vm_as),
|
||||
vm.vm_address_space().cloned(),
|
||||
true,
|
||||
);
|
||||
ctx.upcall_client = vm.upcall_client().clone();
|
||||
|
||||
ctx
|
||||
}
|
||||
|
||||
fn call_hotplug_device(
|
||||
&self,
|
||||
req: DevMgrRequest,
|
||||
@ -380,6 +406,8 @@ pub struct DeviceManager {
|
||||
pub(crate) legacy_manager: Option<LegacyDeviceManager>,
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
pub(crate) vsock_manager: VsockDeviceMgr,
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>,
|
||||
}
|
||||
|
||||
impl DeviceManager {
|
||||
@ -401,9 +429,16 @@ impl DeviceManager {
|
||||
legacy_manager: None,
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
vsock_manager: VsockDeviceMgr::default(),
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mmio_device_info: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the underlying IoManager to dispatch IO read/write requests.
|
||||
pub fn io_manager(&self) -> IoManagerCached {
|
||||
IoManagerCached::new(self.io_manager.clone())
|
||||
}
|
||||
|
||||
/// Create the underline interrupt manager for the device manager.
|
||||
pub fn create_interrupt_manager(&mut self) -> Result<()> {
|
||||
self.irq_manager
|
||||
@ -494,6 +529,12 @@ impl DeviceManager {
|
||||
self.con_manager.reset_console()
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
/// Return mmio device info for FDT build.
|
||||
pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), MMIODeviceInfo>> {
|
||||
Some(&self.mmio_device_info)
|
||||
}
|
||||
|
||||
/// Create all registered devices when booting the associated virtual machine.
|
||||
pub fn create_devices(
|
||||
&mut self,
|
||||
@ -524,6 +565,21 @@ impl DeviceManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start all registered devices when booting the associated virtual machine.
|
||||
pub fn start_devices(&mut self) -> std::result::Result<(), StartMicrovmError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove all devices when shutdown the associated virtual machine
|
||||
pub fn remove_devices(
|
||||
&mut self,
|
||||
_vm_as: GuestAddressSpaceImpl,
|
||||
_epoll_mgr: EpollManager,
|
||||
_address_space: Option<&AddressSpace>,
|
||||
) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
/// Get the underlying eventfd for vm exit notification.
|
||||
pub fn get_reset_eventfd(&self) -> Result<vmm_sys_util::eventfd::EventFd> {
|
||||
@ -689,3 +745,21 @@ impl DeviceManager {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
impl DeviceManager {
|
||||
/// Get Unix Domain Socket path for the vsock device.
|
||||
pub fn get_vsock_inner_connector(&mut self) -> Option<VsockInnerConnector> {
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
{
|
||||
self.vsock_manager
|
||||
.get_default_connector()
|
||||
.map(|d| Some(d))
|
||||
.unwrap_or(None)
|
||||
}
|
||||
#[cfg(not(feature = "virtio-vsock"))]
|
||||
{
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,10 @@
|
||||
#[cfg(feature = "dbs-virtio-devices")]
|
||||
use dbs_virtio_devices::Error as VirtIoError;
|
||||
|
||||
use crate::address_space_manager;
|
||||
use crate::device_manager;
|
||||
use crate::vcpu;
|
||||
use crate::vm;
|
||||
|
||||
/// Shorthand result type for internal VMM commands.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@ -23,8 +26,20 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
/// of the host (for example if Dragonball doesn't have permissions to open the KVM fd).
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
/// Empty AddressSpace from parameters.
|
||||
#[error("Empty AddressSpace from parameters")]
|
||||
AddressSpace,
|
||||
|
||||
/// The zero page extends past the end of guest_mem.
|
||||
#[error("the guest zero page extends past the end of guest memory")]
|
||||
ZeroPagePastRamEnd,
|
||||
|
||||
/// Error writing the zero page of guest memory.
|
||||
#[error("failed to write to guest zero page")]
|
||||
ZeroPageSetup,
|
||||
|
||||
/// Failure occurs in issuing KVM ioctls and errors will be returned from kvm_ioctls lib.
|
||||
#[error("failure in issuing KVM ioctl command")]
|
||||
#[error("failure in issuing KVM ioctl command: {0}")]
|
||||
Kvm(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// The host kernel reports an unsupported KVM API version.
|
||||
@ -32,17 +47,30 @@ pub enum Error {
|
||||
KvmApiVersion(i32),
|
||||
|
||||
/// Cannot initialize the KVM context due to missing capabilities.
|
||||
#[error("missing KVM capability")]
|
||||
#[error("missing KVM capability: {0:?}")]
|
||||
KvmCap(kvm_ioctls::Cap),
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[error("failed to configure MSRs")]
|
||||
#[error("failed to configure MSRs: {0:?}")]
|
||||
/// Cannot configure MSRs
|
||||
GuestMSRs(dbs_arch::msr::Error),
|
||||
|
||||
/// MSR inner error
|
||||
#[error("MSR inner error")]
|
||||
Msr(vmm_sys_util::fam::Error),
|
||||
|
||||
/// Error writing MP table to memory.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[error("failed to write MP table to guest memory: {0}")]
|
||||
MpTableSetup(#[source] dbs_boot::mptable::Error),
|
||||
|
||||
/// Fail to boot system
|
||||
#[error("failed to boot system: {0}")]
|
||||
BootSystem(#[source] dbs_boot::Error),
|
||||
|
||||
/// Cannot open the VM file descriptor.
|
||||
#[error(transparent)]
|
||||
Vm(vm::VmError),
|
||||
}
|
||||
|
||||
/// Errors associated with starting the instance.
|
||||
@ -52,6 +80,48 @@ pub enum StartMicrovmError {
|
||||
#[error("failure while reading from EventFd file descriptor")]
|
||||
EventFd,
|
||||
|
||||
/// The start command was issued more than once.
|
||||
#[error("the virtual machine is already running")]
|
||||
MicroVMAlreadyRunning,
|
||||
|
||||
/// Cannot start the VM because the kernel was not configured.
|
||||
#[error("cannot start the virtual machine without kernel configuration")]
|
||||
MissingKernelConfig,
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
/// Upcall initialize miss vsock device.
|
||||
#[error("the upcall client needs a virtio-vsock device for communication")]
|
||||
UpcallMissVsock,
|
||||
|
||||
/// Upcall is not ready
|
||||
#[error("the upcall client is not ready")]
|
||||
UpcallNotReady,
|
||||
|
||||
/// Configuration passed in is invalidate.
|
||||
#[error("invalid virtual machine configuration: {0} ")]
|
||||
ConfigureInvalid(String),
|
||||
|
||||
/// This error is thrown by the minimal boot loader implementation.
|
||||
/// It is related to a faulty memory configuration.
|
||||
#[error("failure while configuring boot information for the virtual machine: {0}")]
|
||||
ConfigureSystem(#[source] Error),
|
||||
|
||||
/// Cannot configure the VM.
|
||||
#[error("failure while configuring the virtual machine: {0}")]
|
||||
ConfigureVm(#[source] vm::VmError),
|
||||
|
||||
/// Cannot load initrd.
|
||||
#[error("cannot load Initrd into guest memory: {0}")]
|
||||
InitrdLoader(#[from] LoadInitrdError),
|
||||
|
||||
/// Cannot load kernel due to invalid memory configuration or invalid kernel image.
|
||||
#[error("cannot load guest kernel into guest memory: {0}")]
|
||||
KernelLoader(#[source] linux_loader::loader::Error),
|
||||
|
||||
/// Cannot load command line string.
|
||||
#[error("failure while configuring guest kernel commandline: {0}")]
|
||||
LoadCommandline(#[source] linux_loader::loader::Error),
|
||||
|
||||
/// The device manager was not configured.
|
||||
#[error("the device manager failed to manage devices: {0}")]
|
||||
DeviceManager(#[source] device_manager::DeviceMgrError),
|
||||
@ -69,4 +139,45 @@ pub enum StartMicrovmError {
|
||||
/// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus.
|
||||
#[error("failure while registering virtio-vsock device: {0}")]
|
||||
RegisterVsockDevice(#[source] device_manager::DeviceMgrError),
|
||||
|
||||
/// Address space manager related error, e.g.cannot access guest address space manager.
|
||||
#[error("address space manager related error: {0}")]
|
||||
AddressManagerError(#[source] address_space_manager::AddressManagerError),
|
||||
|
||||
/// Cannot create a new vCPU file descriptor.
|
||||
#[error("vCPU related error: {0}")]
|
||||
Vcpu(#[source] vcpu::VcpuManagerError),
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
/// Upcall initialize Error.
|
||||
#[error("failure while initializing the upcall client: {0}")]
|
||||
UpcallInitError(#[source] dbs_upcall::UpcallClientError),
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
/// Upcall connect Error.
|
||||
#[error("failure while connecting the upcall client: {0}")]
|
||||
UpcallConnectError(#[source] dbs_upcall::UpcallClientError),
|
||||
}
|
||||
|
||||
/// Errors associated with starting the instance.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum StopMicrovmError {
|
||||
/// Guest memory has not been initialized.
|
||||
#[error("Guest memory has not been initialized")]
|
||||
GuestMemoryNotInitialized,
|
||||
|
||||
/// Cannnot remove devices
|
||||
#[error("Failed to remove devices in device_manager {0}")]
|
||||
DeviceManager(#[source] device_manager::DeviceMgrError),
|
||||
}
|
||||
|
||||
/// Errors associated with loading initrd
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum LoadInitrdError {
|
||||
/// Cannot load initrd due to an invalid memory configuration.
|
||||
#[error("failed to load the initrd image to guest memory")]
|
||||
LoadInitrd,
|
||||
/// Cannot load initrd due to an invalid image.
|
||||
#[error("failed to read the initrd image: {0}")]
|
||||
ReadInitrd(#[source] std::io::Error),
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
mod sm;
|
||||
pub mod vcpu_impl;
|
||||
pub mod vcpu_manager;
|
||||
pub use vcpu_manager::{VcpuManager, VcpuManagerError};
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use dbs_arch::cpuid::VpmuFeatureLevel;
|
||||
|
@ -964,7 +964,7 @@ pub mod tests {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[test]
|
||||
fn test_vcpu_check_io_port_info() {
|
||||
let (vcpu, receiver) = create_vcpu();
|
||||
let (vcpu, _receiver) = create_vcpu();
|
||||
|
||||
// boot complete signal
|
||||
let res = vcpu
|
||||
|
148
src/dragonball/src/vm/aarch64.rs
Normal file
148
src/dragonball/src/vm/aarch64.rs
Normal file
@ -0,0 +1,148 @@
|
||||
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
|
||||
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ops::Deref;
|
||||
|
||||
use dbs_arch::gic::GICDevice;
|
||||
use dbs_arch::{DeviceInfoForFDT, DeviceType};
|
||||
use dbs_boot::InitrdConfig;
|
||||
use dbs_utils::epoll_manager::EpollManager;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use std::fmt::Debug;
|
||||
use vm_memory::{GuestAddressSpace, GuestMemory};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use super::{Vm, VmError};
|
||||
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
|
||||
use crate::error::Error;
|
||||
use crate::StartMicrovmError;
|
||||
use linux_loader::loader::Cmdline;
|
||||
|
||||
/// Configures the system and should be called once per vm before starting vcpu threads.
|
||||
/// For aarch64, we only setup the FDT.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `guest_mem` - The memory to be used by the guest.
|
||||
/// * `cmdline` - The kernel commandline.
|
||||
/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu.
|
||||
/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes.
|
||||
/// * `gic_device` - The GIC device.
|
||||
/// * `initrd` - Information about an optional initrd.
|
||||
pub fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
|
||||
guest_mem: &M,
|
||||
cmdline: &str,
|
||||
vcpu_mpidr: Vec<u64>,
|
||||
device_info: Option<&HashMap<(DeviceType, String), T>>,
|
||||
gic_device: &Box<dyn GICDevice>,
|
||||
initrd: &Option<super::InitrdConfig>,
|
||||
) -> super::Result<()> {
|
||||
dbs_boot::fdt::create_fdt(
|
||||
guest_mem,
|
||||
vcpu_mpidr,
|
||||
cmdline,
|
||||
device_info,
|
||||
gic_device,
|
||||
initrd,
|
||||
)
|
||||
.map_err(Error::BootSystem)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
impl Vm {
|
||||
/// Gets a reference to the irqchip of the VM
|
||||
pub fn get_irqchip(&self) -> &Box<dyn GICDevice> {
|
||||
&self.irqchip_handle.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Initialize the virtual machine instance.
|
||||
///
|
||||
/// It initialize the virtual machine instance by:
|
||||
/// 1) initialize virtual machine global state and configuration.
|
||||
/// 2) create system devices, such as interrupt controller.
|
||||
/// 3) create and start IO devices, such as serial, console, block, net, vsock etc.
|
||||
/// 4) create and initialize vCPUs.
|
||||
/// 5) configure CPU power management features.
|
||||
/// 6) load guest kernel image.
|
||||
pub fn init_microvm(
|
||||
&mut self,
|
||||
epoll_mgr: EpollManager,
|
||||
vm_as: GuestAddressSpaceImpl,
|
||||
request_ts: TimestampUs,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
let kernel_loader_result = self.load_kernel(vm_as.memory().deref())?;
|
||||
// On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before
|
||||
// setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP
|
||||
// was already initialized.
|
||||
// Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c.
|
||||
|
||||
let reset_eventfd =
|
||||
EventFd::new(libc::EFD_NONBLOCK).map_err(|_| StartMicrovmError::EventFd)?;
|
||||
self.reset_eventfd = Some(
|
||||
reset_eventfd
|
||||
.try_clone()
|
||||
.map_err(|_| StartMicrovmError::EventFd)?,
|
||||
);
|
||||
|
||||
self.vcpu_manager()
|
||||
.map_err(StartMicrovmError::Vcpu)?
|
||||
.set_reset_event_fd(reset_eventfd);
|
||||
self.vcpu_manager()
|
||||
.map_err(StartMicrovmError::Vcpu)?
|
||||
.create_boot_vcpus(request_ts, kernel_loader_result.kernel_load)
|
||||
.map_err(StartMicrovmError::Vcpu)?;
|
||||
|
||||
self.setup_interrupt_controller()?;
|
||||
self.init_devices(epoll_mgr)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Creates the irq chip in-kernel device model.
|
||||
pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicrovmError> {
|
||||
let vcpu_count = self.vm_config.vcpu_count;
|
||||
|
||||
self.irqchip_handle = Some(
|
||||
dbs_arch::gic::create_gic(&self.fd, vcpu_count.into())
|
||||
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::SetupGIC(e)))?,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Execute system architecture specific configurations.
|
||||
///
|
||||
/// 1) set guest kernel boot parameters
|
||||
/// 2) setup FDT data structs.
|
||||
pub fn configure_system_arch(
|
||||
&self,
|
||||
vm_memory: &GuestMemoryImpl,
|
||||
cmdline: &Cmdline,
|
||||
initrd: Option<InitrdConfig>,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
let vcpu_manager = self.vcpu_manager().map_err(StartMicrovmError::Vcpu)?;
|
||||
let vcpu_mpidr = vcpu_manager
|
||||
.vcpus()
|
||||
.into_iter()
|
||||
.map(|cpu| cpu.get_mpidr())
|
||||
.collect();
|
||||
|
||||
let guest_memory = vm_memory.memory();
|
||||
configure_system(
|
||||
guest_memory,
|
||||
cmdline.as_str(),
|
||||
vcpu_mpidr,
|
||||
self.device_manager.get_mmio_device_info(),
|
||||
self.get_irqchip(),
|
||||
&initrd,
|
||||
)
|
||||
.map_err(StartMicrovmError::ConfigureSystem)
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@ pub struct KernelConfigInfo {
|
||||
/// The descriptor to the kernel file.
|
||||
kernel_file: File,
|
||||
/// The descriptor to the initrd file, if there is one
|
||||
initrd_file: Option<File>,
|
||||
pub initrd_file: Option<File>,
|
||||
/// The commandline for guest kernel.
|
||||
cmdline: linux_loader::cmdline::Cmdline,
|
||||
}
|
||||
|
@ -1,11 +1,69 @@
|
||||
// Copyright (C) 2021 Alibaba Cloud. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use std::io::{self, Read, Seek, SeekFrom};
|
||||
use std::ops::Deref;
|
||||
use std::os::unix::io::RawFd;
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
|
||||
use dbs_address_space::AddressSpace;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use dbs_arch::gic::GICDevice;
|
||||
use dbs_boot::InitrdConfig;
|
||||
#[cfg(feature = "hotplug")]
|
||||
use dbs_upcall::{DevMgrService, UpcallClient};
|
||||
use dbs_utils::epoll_manager::EpollManager;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_ioctls::VmFd;
|
||||
use linux_loader::loader::{KernelLoader, KernelLoaderResult};
|
||||
use seccompiler::BpfProgram;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use slog::{error, info};
|
||||
use vm_memory::{Bytes, GuestAddress, GuestAddressSpace};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use crate::address_space_manager::{
|
||||
AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl,
|
||||
GuestMemoryImpl,
|
||||
};
|
||||
use crate::api::v1::{InstanceInfo, InstanceState};
|
||||
use crate::device_manager::console_manager::DmesgWriter;
|
||||
use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext};
|
||||
use crate::error::{LoadInitrdError, Result, StartMicrovmError, StopMicrovmError};
|
||||
use crate::kvm_context::KvmContext;
|
||||
use crate::resource_manager::ResourceManager;
|
||||
use crate::vcpu::{VcpuManager, VcpuManagerError};
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use dbs_arch::gic::Error as GICError;
|
||||
|
||||
mod kernel_config;
|
||||
pub use self::kernel_config::KernelConfigInfo;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[path = "aarch64.rs"]
|
||||
mod aarch64;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[path = "x86_64.rs"]
|
||||
mod x86_64;
|
||||
|
||||
/// Errors associated with virtual machine instance related operations.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum VmError {
|
||||
/// Cannot configure the IRQ.
|
||||
#[error("failed to configure IRQ fot the virtual machine: {0}")]
|
||||
Irq(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// Cannot configure the microvm.
|
||||
#[error("failed to initialize the virtual machine: {0}")]
|
||||
VmSetup(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// Cannot setup GIC
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[error("failed to configure GIC")]
|
||||
SetupGIC(GICError),
|
||||
}
|
||||
|
||||
/// Configuration information for user defined NUMA nodes.
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct NumaRegionInfo {
|
||||
@ -94,3 +152,555 @@ impl Default for VmConfigInfo {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct to manage resources and control states of an virtual machine instance.
|
||||
///
|
||||
/// An `Vm` instance holds a resources assigned to a virtual machine instance, such as CPU, memory,
|
||||
/// devices etc. When an `Vm` instance gets deconstructed, all resources assigned should be
|
||||
/// released.
|
||||
///
|
||||
/// We have explicit build the object model as:
|
||||
/// |---Vmm API Server--<-1:1-> HTTP API Server
|
||||
/// | |----------<-1:1-> Shimv2/CRI API Server
|
||||
/// |
|
||||
/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory
|
||||
/// ^ ^---1:1-> Device Manager <-1:N-> Device
|
||||
/// | ^---1:1-> Resource Manager
|
||||
/// | ^---1:N-> Vcpu
|
||||
/// |---<-1:N-> Event Manager
|
||||
pub struct Vm {
|
||||
fd: Arc<VmFd>,
|
||||
kvm: KvmContext,
|
||||
|
||||
address_space: AddressSpaceMgr,
|
||||
device_manager: DeviceManager,
|
||||
epoll_manager: EpollManager,
|
||||
resource_manager: Arc<ResourceManager>,
|
||||
vcpu_manager: Option<Arc<Mutex<VcpuManager>>>,
|
||||
logger: slog::Logger,
|
||||
/// Config of virtual machine
|
||||
vm_config: VmConfigInfo,
|
||||
kernel_config: Option<KernelConfigInfo>,
|
||||
shared_info: Arc<RwLock<InstanceInfo>>,
|
||||
reset_eventfd: Option<EventFd>,
|
||||
dmesg_fifo: Option<Box<dyn io::Write + Send>>,
|
||||
start_instance_request_ts: u64,
|
||||
start_instance_request_cpu_ts: u64,
|
||||
start_instance_downtime: u64,
|
||||
|
||||
// Arm specific fields.
|
||||
// On aarch64 we need to keep around the fd obtained by creating the VGIC device.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
irqchip_handle: Option<Box<dyn GICDevice>>,
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
|
||||
}
|
||||
|
||||
impl Vm {
|
||||
/// Constructs a new `Vm` instance using the given `Kvm` instance.
|
||||
pub fn new(
|
||||
kvm_fd: Option<RawFd>,
|
||||
api_shared_info: Arc<RwLock<InstanceInfo>>,
|
||||
epoll_manager: EpollManager,
|
||||
) -> Result<Self> {
|
||||
let id = api_shared_info.read().unwrap().id.clone();
|
||||
let logger = slog_scope::logger().new(slog::o!("id" => id));
|
||||
|
||||
let kvm = KvmContext::new(kvm_fd)?;
|
||||
let fd = Arc::new(kvm.create_vm()?);
|
||||
|
||||
let resource_manager = Arc::new(ResourceManager::new(Some(kvm.max_memslots())));
|
||||
|
||||
let device_manager = DeviceManager::new(
|
||||
fd.clone(),
|
||||
resource_manager.clone(),
|
||||
epoll_manager.clone(),
|
||||
&logger,
|
||||
);
|
||||
|
||||
Ok(Vm {
|
||||
fd,
|
||||
kvm,
|
||||
address_space: AddressSpaceMgr::default(),
|
||||
device_manager,
|
||||
epoll_manager,
|
||||
resource_manager,
|
||||
vcpu_manager: None,
|
||||
logger,
|
||||
vm_config: Default::default(),
|
||||
kernel_config: None,
|
||||
shared_info: api_shared_info,
|
||||
reset_eventfd: None,
|
||||
dmesg_fifo: None,
|
||||
start_instance_request_ts: 0,
|
||||
start_instance_request_cpu_ts: 0,
|
||||
start_instance_downtime: 0,
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
irqchip_handle: None,
|
||||
#[cfg(feature = "hotplug")]
|
||||
upcall_client: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets a reference to the kvm file descriptor owned by this VM.
|
||||
pub fn vm_fd(&self) -> &VmFd {
|
||||
&self.fd
|
||||
}
|
||||
|
||||
/// Gets a reference to the address_space.address_space for guest memory owned by this VM.
|
||||
pub fn vm_address_space(&self) -> Option<&AddressSpace> {
|
||||
self.address_space.get_address_space()
|
||||
}
|
||||
|
||||
/// Gets a reference to the device manager by this VM.
|
||||
pub fn device_manager(&self) -> &DeviceManager {
|
||||
&self.device_manager
|
||||
}
|
||||
|
||||
/// Gets a reference to the address space for guest memory owned by this VM.
|
||||
///
|
||||
/// Note that `GuestMemory` does not include any device memory that may have been added after
|
||||
/// this VM was constructed.
|
||||
pub fn vm_as(&self) -> Option<&GuestAddressSpaceImpl> {
|
||||
self.address_space.get_vm_as()
|
||||
}
|
||||
|
||||
/// Get a immutable reference to the virtual machine configuration information.
|
||||
pub fn vm_config(&self) -> &VmConfigInfo {
|
||||
&self.vm_config
|
||||
}
|
||||
|
||||
/// Set the virtual machine configuration information.
|
||||
pub fn set_vm_config(&mut self, config: VmConfigInfo) {
|
||||
self.vm_config = config;
|
||||
}
|
||||
|
||||
/// Set guest kernel boot configurations.
|
||||
pub fn set_kernel_config(&mut self, kernel_config: KernelConfigInfo) {
|
||||
self.kernel_config = Some(kernel_config);
|
||||
}
|
||||
|
||||
/// Get virtual machine shared instance information.
|
||||
pub fn shared_info(&self) -> &Arc<RwLock<InstanceInfo>> {
|
||||
&self.shared_info
|
||||
}
|
||||
|
||||
/// Get a reference to EpollManager.
|
||||
pub fn epoll_manager(&self) -> &EpollManager {
|
||||
&self.epoll_manager
|
||||
}
|
||||
|
||||
/// Get eventfd for exit notification.
|
||||
pub fn get_reset_eventfd(&self) -> Option<&EventFd> {
|
||||
self.reset_eventfd.as_ref()
|
||||
}
|
||||
|
||||
/// Check whether the VM has been initialized.
|
||||
pub fn is_vm_initialized(&self) -> bool {
|
||||
let instance_state = {
|
||||
// Use expect() to crash if the other thread poisoned this lock.
|
||||
let shared_info = self.shared_info.read()
|
||||
.expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
|
||||
shared_info.state
|
||||
};
|
||||
instance_state != InstanceState::Uninitialized
|
||||
}
|
||||
|
||||
/// Check whether the VM instance is running.
|
||||
pub fn is_vm_running(&self) -> bool {
|
||||
let instance_state = {
|
||||
// Use expect() to crash if the other thread poisoned this lock.
|
||||
let shared_info = self.shared_info.read()
|
||||
.expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
|
||||
shared_info.state
|
||||
};
|
||||
instance_state == InstanceState::Running
|
||||
}
|
||||
|
||||
/// returns true if system upcall service is ready
|
||||
pub fn is_upcall_client_ready(&self) -> bool {
|
||||
#[cfg(feature = "hotplug")]
|
||||
{
|
||||
if let Some(upcall_client) = self.upcall_client() {
|
||||
return upcall_client.is_ready();
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Create device operation context.
|
||||
/// vm is not running, return false
|
||||
/// vm is running, but hotplug feature is not enable, return error
|
||||
/// vm is running, but upcall initialize failed, return error
|
||||
/// vm is running, upcall initialize OK, return true
|
||||
pub fn create_device_op_context(
|
||||
&mut self,
|
||||
epoll_mgr: Option<EpollManager>,
|
||||
) -> std::result::Result<DeviceOpContext, StartMicrovmError> {
|
||||
if !self.is_vm_initialized() {
|
||||
Ok(DeviceOpContext::create_boot_ctx(self, epoll_mgr))
|
||||
} else {
|
||||
#[cfg(feature = "hotplug")]
|
||||
{
|
||||
if self.upcall_client().is_none() {
|
||||
Err(StartMicrovmError::UpcallMissVsock)
|
||||
} else if self.is_upcall_client_ready() {
|
||||
Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr))
|
||||
} else {
|
||||
Err(StartMicrovmError::UpcallNotReady)
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "hotplug"))]
|
||||
{
|
||||
Err(StartMicrovmError::MicroVMAlreadyRunning)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save VM instance exit state
|
||||
pub fn vm_exit(&self, exit_code: i32) {
|
||||
if let Ok(mut info) = self.shared_info.write() {
|
||||
info.state = InstanceState::Exited(exit_code);
|
||||
} else {
|
||||
error!(
|
||||
self.logger,
|
||||
"Failed to save exit state, couldn't be written due to poisoned lock"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the console into canonical mode.
|
||||
pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> {
|
||||
self.device_manager.reset_console()
|
||||
}
|
||||
|
||||
fn get_dragonball_info(&self) -> (String, String) {
|
||||
let guard = self.shared_info.read().unwrap();
|
||||
let instance_id = guard.id.clone();
|
||||
let dragonball_version = guard.vmm_version.clone();
|
||||
|
||||
(dragonball_version, instance_id)
|
||||
}
|
||||
|
||||
fn init_dmesg_logger(&mut self) {
|
||||
let writer = self.dmesg_logger();
|
||||
self.dmesg_fifo = Some(writer);
|
||||
}
|
||||
|
||||
/// dmesg write to logger
|
||||
pub fn dmesg_logger(&self) -> Box<dyn io::Write + Send> {
|
||||
Box::new(DmesgWriter::new(self.logger.clone()))
|
||||
}
|
||||
|
||||
pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicrovmError> {
|
||||
if self.kernel_config.is_none() {
|
||||
return Err(StartMicrovmError::MissingKernelConfig);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn init_vcpu_manager(
|
||||
&mut self,
|
||||
vm_as: GuestAddressSpaceImpl,
|
||||
vcpu_seccomp_filter: BpfProgram,
|
||||
) -> std::result::Result<(), VcpuManagerError> {
|
||||
let vcpu_manager = VcpuManager::new(
|
||||
self.fd.clone(),
|
||||
&self.kvm,
|
||||
&self.vm_config,
|
||||
vm_as,
|
||||
vcpu_seccomp_filter,
|
||||
self.shared_info.clone(),
|
||||
self.device_manager.io_manager(),
|
||||
self.epoll_manager.clone(),
|
||||
)?;
|
||||
self.vcpu_manager = Some(vcpu_manager);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// get the cpu manager's reference
|
||||
pub fn vcpu_manager(
|
||||
&self,
|
||||
) -> std::result::Result<std::sync::MutexGuard<'_, VcpuManager>, VcpuManagerError> {
|
||||
self.vcpu_manager
|
||||
.as_ref()
|
||||
.ok_or(VcpuManagerError::VcpuManagerNotInitialized)
|
||||
.map(|mgr| mgr.lock().unwrap())
|
||||
}
|
||||
|
||||
/// Pause all vcpus and record the instance downtime
|
||||
pub fn pause_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> {
|
||||
let ts = TimestampUs::default();
|
||||
self.start_instance_downtime = ts.time_us;
|
||||
|
||||
self.vcpu_manager()?.pause_all_vcpus()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resume all vcpus and calc the intance downtime
|
||||
pub fn resume_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> {
|
||||
self.vcpu_manager()?.resume_all_vcpus()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> {
|
||||
info!(self.logger, "VM: initializing guest memory...");
|
||||
|
||||
// We are not allowing reinitialization of vm guest memory.
|
||||
if self.address_space.is_initialized() {
|
||||
return Ok(());
|
||||
}
|
||||
// vcpu boot up require local memory. reserve 100 MiB memory
|
||||
let mem_size = (self.vm_config.mem_size_mib as u64) << 20;
|
||||
let reserve_memory_bytes = self.vm_config.reserve_memory_bytes;
|
||||
if reserve_memory_bytes > (mem_size >> 1) as u64 {
|
||||
return Err(StartMicrovmError::ConfigureInvalid(String::from(
|
||||
"invalid reserve_memory_bytes",
|
||||
)));
|
||||
}
|
||||
|
||||
let mem_type = self.vm_config.mem_type.clone();
|
||||
let mut mem_file_path = String::from("");
|
||||
if mem_type == "hugetlbfs" {
|
||||
let shared_info = self.shared_info.read()
|
||||
.expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
|
||||
mem_file_path.push_str("/dragonball/");
|
||||
mem_file_path.push_str(shared_info.id.as_str());
|
||||
}
|
||||
|
||||
// init default regions.
|
||||
let mut numa_regions = Vec::with_capacity(1);
|
||||
let mut vcpu_ids: Vec<u32> = Vec::new();
|
||||
|
||||
for i in 0..self.vm_config().max_vcpu_count {
|
||||
vcpu_ids.push(i as u32);
|
||||
}
|
||||
let numa_node = NumaRegionInfo {
|
||||
size: self.vm_config.mem_size_mib as u64,
|
||||
host_numa_node_id: None,
|
||||
guest_numa_node_id: Some(0),
|
||||
vcpu_ids,
|
||||
};
|
||||
numa_regions.push(numa_node);
|
||||
|
||||
info!(
|
||||
self.logger,
|
||||
"VM: mem_type:{} mem_file_path:{}, mem_size:{}, reserve_memory_bytes:{}, \
|
||||
numa_regions:{:?}",
|
||||
mem_type,
|
||||
mem_file_path,
|
||||
mem_size,
|
||||
reserve_memory_bytes,
|
||||
numa_regions,
|
||||
);
|
||||
|
||||
let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path)
|
||||
.map_err(StartMicrovmError::AddressManagerError)?;
|
||||
address_space_param.set_kvm_vm_fd(self.fd.clone());
|
||||
self.address_space
|
||||
.create_address_space(&self.resource_manager, &numa_regions, address_space_param)
|
||||
.map_err(StartMicrovmError::AddressManagerError)?;
|
||||
|
||||
info!(self.logger, "VM: initializing guest memory done");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn init_devices(
|
||||
&mut self,
|
||||
epoll_manager: EpollManager,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
info!(self.logger, "VM: initializing devices ...");
|
||||
|
||||
let com1_sock_path = self.vm_config.serial_path.clone();
|
||||
let kernel_config = self
|
||||
.kernel_config
|
||||
.as_mut()
|
||||
.ok_or(StartMicrovmError::MissingKernelConfig)?;
|
||||
|
||||
info!(self.logger, "VM: create interrupt manager");
|
||||
self.device_manager
|
||||
.create_interrupt_manager()
|
||||
.map_err(StartMicrovmError::DeviceManager)?;
|
||||
|
||||
info!(self.logger, "VM: create devices");
|
||||
let vm_as =
|
||||
self.address_space
|
||||
.get_vm_as()
|
||||
.ok_or(StartMicrovmError::AddressManagerError(
|
||||
AddressManagerError::GuestMemoryNotInitialized,
|
||||
))?;
|
||||
self.device_manager.create_devices(
|
||||
vm_as.clone(),
|
||||
epoll_manager,
|
||||
kernel_config,
|
||||
com1_sock_path,
|
||||
self.dmesg_fifo.take(),
|
||||
self.address_space.address_space(),
|
||||
)?;
|
||||
|
||||
info!(self.logger, "VM: start devices");
|
||||
self.device_manager.start_devices()?;
|
||||
|
||||
info!(self.logger, "VM: initializing devices done");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove devices when shutdown vm
|
||||
pub fn remove_devices(&mut self) -> std::result::Result<(), StopMicrovmError> {
|
||||
info!(self.logger, "VM: remove devices");
|
||||
let vm_as = self
|
||||
.address_space
|
||||
.get_vm_as()
|
||||
.ok_or(StopMicrovmError::GuestMemoryNotInitialized)?;
|
||||
|
||||
self.device_manager
|
||||
.remove_devices(
|
||||
vm_as.clone(),
|
||||
self.epoll_manager.clone(),
|
||||
self.address_space.address_space(),
|
||||
)
|
||||
.map_err(StopMicrovmError::DeviceManager)
|
||||
}
|
||||
|
||||
fn load_kernel(
|
||||
&mut self,
|
||||
vm_memory: &GuestMemoryImpl,
|
||||
) -> std::result::Result<KernelLoaderResult, StartMicrovmError> {
|
||||
// This is the easy way out of consuming the value of the kernel_cmdline.
|
||||
|
||||
let kernel_config = self
|
||||
.kernel_config
|
||||
.as_mut()
|
||||
.ok_or(StartMicrovmError::MissingKernelConfig)?;
|
||||
|
||||
let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start());
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
return linux_loader::loader::elf::Elf::load(
|
||||
vm_memory,
|
||||
None,
|
||||
kernel_config.kernel_file_mut(),
|
||||
Some(high_mem_addr),
|
||||
)
|
||||
.map_err(StartMicrovmError::KernelLoader);
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
return linux_loader::loader::pe::PE::load(
|
||||
vm_memory,
|
||||
Some(GuestAddress(dbs_boot::get_kernel_start())),
|
||||
kernel_config.kernel_file_mut(),
|
||||
Some(high_mem_addr),
|
||||
)
|
||||
.map_err(StartMicrovmError::KernelLoader);
|
||||
}
|
||||
|
||||
/// Loads the initrd from a file into the given memory slice.
|
||||
///
|
||||
/// * `vm_memory` - The guest memory the initrd is written to.
|
||||
/// * `image` - The initrd image.
|
||||
///
|
||||
/// Returns the result of initrd loading
|
||||
fn load_initrd<F>(
|
||||
&self,
|
||||
vm_memory: &GuestMemoryImpl,
|
||||
image: &mut F,
|
||||
) -> std::result::Result<InitrdConfig, LoadInitrdError>
|
||||
where
|
||||
F: Read + Seek,
|
||||
{
|
||||
use crate::error::LoadInitrdError::*;
|
||||
|
||||
let size: usize;
|
||||
// Get the image size
|
||||
match image.seek(SeekFrom::End(0)) {
|
||||
Err(e) => return Err(ReadInitrd(e)),
|
||||
Ok(0) => {
|
||||
return Err(ReadInitrd(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"Initrd image seek returned a size of zero",
|
||||
)))
|
||||
}
|
||||
Ok(s) => size = s as usize,
|
||||
};
|
||||
// Go back to the image start
|
||||
image.seek(SeekFrom::Start(0)).map_err(ReadInitrd)?;
|
||||
|
||||
// Get the target address
|
||||
let address = dbs_boot::initrd_load_addr(vm_memory, size as u64).map_err(|_| LoadInitrd)?;
|
||||
|
||||
// Load the image into memory
|
||||
vm_memory
|
||||
.read_from(GuestAddress(address), image, size)
|
||||
.map_err(|_| LoadInitrd)?;
|
||||
|
||||
Ok(InitrdConfig {
|
||||
address: GuestAddress(address),
|
||||
size,
|
||||
})
|
||||
}
|
||||
|
||||
fn init_configure_system(
|
||||
&mut self,
|
||||
vm_as: &GuestAddressSpaceImpl,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
let vm_memory = vm_as.memory();
|
||||
let kernel_config = self
|
||||
.kernel_config
|
||||
.as_ref()
|
||||
.ok_or(StartMicrovmError::MissingKernelConfig)?;
|
||||
//let cmdline = kernel_config.cmdline.clone();
|
||||
let initrd: Option<InitrdConfig> = match &kernel_config.initrd_file {
|
||||
Some(f) => {
|
||||
let initrd_file = f.try_clone();
|
||||
if initrd_file.is_err() {
|
||||
return Err(StartMicrovmError::InitrdLoader(
|
||||
LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)),
|
||||
));
|
||||
}
|
||||
let res = self.load_initrd(vm_memory.deref(), &mut initrd_file.unwrap())?;
|
||||
Some(res)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
self.configure_system_arch(vm_memory.deref(), kernel_config.kernel_cmdline(), initrd)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
impl Vm {
|
||||
/// Get upcall client.
|
||||
pub fn upcall_client(&self) -> &Option<Arc<UpcallClient<DevMgrService>>> {
|
||||
&self.upcall_client
|
||||
}
|
||||
|
||||
/// initialize upcall client for guest os
|
||||
fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> {
|
||||
// get vsock inner connector for upcall
|
||||
let inner_connector = self
|
||||
.device_manager
|
||||
.get_vsock_inner_connector()
|
||||
.ok_or(StartMicrovmError::UpcallMissVsock)?;
|
||||
|
||||
let mut upcall_client = UpcallClient::new(
|
||||
inner_connector,
|
||||
self.epoll_manager.clone(),
|
||||
DevMgrService::default(),
|
||||
)
|
||||
.map_err(StartMicrovmError::UpcallInitError)?;
|
||||
|
||||
upcall_client
|
||||
.connect()
|
||||
.map_err(StartMicrovmError::UpcallConnectError)?;
|
||||
|
||||
self.upcall_client = Some(Arc::new(upcall_client));
|
||||
|
||||
info!(self.logger, "upcall client init success");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
276
src/dragonball/src/vm/x86_64.rs
Normal file
276
src/dragonball/src/vm/x86_64.rs
Normal file
@ -0,0 +1,276 @@
|
||||
// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved.
|
||||
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::mem;
|
||||
use std::ops::Deref;
|
||||
|
||||
use dbs_address_space::AddressSpace;
|
||||
use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, InitrdConfig};
|
||||
use dbs_utils::epoll_manager::EpollManager;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_bindings::{kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_PIT_SPEAKER_DUMMY};
|
||||
use slog::info;
|
||||
use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory};
|
||||
|
||||
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
|
||||
use crate::error::{Error, Result, StartMicrovmError};
|
||||
use crate::vm::{Vm, VmError};
|
||||
|
||||
use linux_loader::cmdline::Cmdline;
|
||||
|
||||
/// Configures the system and should be called once per vm before starting vcpu
|
||||
/// threads.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `guest_mem` - The memory to be used by the guest.
|
||||
/// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was
|
||||
/// loaded.
|
||||
/// * `cmdline_size` - Size of the kernel command line in bytes including the
|
||||
/// null terminator.
|
||||
/// * `initrd` - Information about where the ramdisk image was loaded in the
|
||||
/// `guest_mem`.
|
||||
/// * `boot_cpus` - Number of virtual CPUs the guest will have at boot time.
|
||||
/// * `max_cpus` - Max number of virtual CPUs the guest will have.
|
||||
/// * `rsv_mem_bytes` - Reserve memory from microVM..
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn configure_system<M: GuestMemory>(
|
||||
guest_mem: &M,
|
||||
address_space: Option<&AddressSpace>,
|
||||
cmdline_addr: GuestAddress,
|
||||
cmdline_size: usize,
|
||||
initrd: &Option<InitrdConfig>,
|
||||
boot_cpus: u8,
|
||||
max_cpus: u8,
|
||||
rsv_mem_bytes: u64,
|
||||
) -> super::Result<()> {
|
||||
const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
|
||||
const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
|
||||
const KERNEL_LOADER_OTHER: u8 = 0xff;
|
||||
const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero.
|
||||
|
||||
let mmio_start = GuestAddress(layout::MMIO_LOW_START);
|
||||
let mmio_end = GuestAddress(layout::MMIO_LOW_END);
|
||||
let himem_start = GuestAddress(layout::HIMEM_START);
|
||||
|
||||
// Note that this puts the mptable at the last 1k of Linux's 640k base RAM
|
||||
mptable::setup_mptable(guest_mem, boot_cpus, max_cpus).map_err(Error::MpTableSetup)?;
|
||||
|
||||
let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default());
|
||||
|
||||
params.0.hdr.type_of_loader = KERNEL_LOADER_OTHER;
|
||||
params.0.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
|
||||
params.0.hdr.header = KERNEL_HDR_MAGIC;
|
||||
params.0.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32;
|
||||
params.0.hdr.cmdline_size = cmdline_size as u32;
|
||||
params.0.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
|
||||
if let Some(initrd_config) = initrd {
|
||||
params.0.hdr.ramdisk_image = initrd_config.address.raw_value() as u32;
|
||||
params.0.hdr.ramdisk_size = initrd_config.size as u32;
|
||||
}
|
||||
|
||||
add_e820_entry(&mut params.0, 0, layout::EBDA_START, bootparam::E820_RAM)
|
||||
.map_err(Error::BootSystem)?;
|
||||
|
||||
let mem_end = address_space.ok_or(Error::AddressSpace)?.last_addr();
|
||||
if mem_end < mmio_start {
|
||||
add_e820_entry(
|
||||
&mut params.0,
|
||||
himem_start.raw_value() as u64,
|
||||
// it's safe to use unchecked_offset_from because
|
||||
// mem_end > himem_start
|
||||
mem_end.unchecked_offset_from(himem_start) as u64 + 1,
|
||||
bootparam::E820_RAM,
|
||||
)
|
||||
.map_err(Error::BootSystem)?;
|
||||
} else {
|
||||
add_e820_entry(
|
||||
&mut params.0,
|
||||
himem_start.raw_value(),
|
||||
// it's safe to use unchecked_offset_from because
|
||||
// end_32bit_gap_start > himem_start
|
||||
mmio_start.unchecked_offset_from(himem_start),
|
||||
bootparam::E820_RAM,
|
||||
)
|
||||
.map_err(Error::BootSystem)?;
|
||||
if mem_end > mmio_end {
|
||||
add_e820_entry(
|
||||
&mut params.0,
|
||||
mmio_end.raw_value() + 1,
|
||||
// it's safe to use unchecked_offset_from because mem_end > mmio_end
|
||||
mem_end.unchecked_offset_from(mmio_end) as u64,
|
||||
bootparam::E820_RAM,
|
||||
)
|
||||
.map_err(Error::BootSystem)?;
|
||||
}
|
||||
}
|
||||
|
||||
// reserve memory from microVM.
|
||||
if rsv_mem_bytes > 0 {
|
||||
add_e820_entry(
|
||||
&mut params.0,
|
||||
mem_end.raw_value().max(mmio_end.raw_value()) + 1,
|
||||
rsv_mem_bytes,
|
||||
bootparam::E820_RESERVED,
|
||||
)
|
||||
.map_err(Error::BootSystem)?;
|
||||
}
|
||||
|
||||
let zero_page_addr = GuestAddress(layout::ZERO_PAGE_START);
|
||||
guest_mem
|
||||
.checked_offset(zero_page_addr, mem::size_of::<bootparam::boot_params>())
|
||||
.ok_or(Error::ZeroPagePastRamEnd)?;
|
||||
guest_mem
|
||||
.write_obj(params, zero_page_addr)
|
||||
.map_err(|_| Error::ZeroPageSetup)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl Vm {
|
||||
/// Get the status of in-kernel PIT.
|
||||
pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
|
||||
self.fd.get_pit2().map_err(|e| Error::Vm(VmError::Irq(e)))
|
||||
}
|
||||
|
||||
/// Set the status of in-kernel PIT.
|
||||
pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
|
||||
self.fd
|
||||
.set_pit2(pit_state)
|
||||
.map_err(|e| Error::Vm(VmError::Irq(e)))
|
||||
}
|
||||
|
||||
/// Get the status of in-kernel ioapic.
|
||||
pub fn get_irqchip_state(&self, chip_id: u32) -> Result<kvm_irqchip> {
|
||||
let mut irqchip: kvm_irqchip = kvm_irqchip {
|
||||
chip_id,
|
||||
..kvm_irqchip::default()
|
||||
};
|
||||
self.fd
|
||||
.get_irqchip(&mut irqchip)
|
||||
.map(|_| irqchip)
|
||||
.map_err(|e| Error::Vm(VmError::Irq(e)))
|
||||
}
|
||||
|
||||
/// Set the status of in-kernel ioapic.
|
||||
pub fn set_irqchip_state(&self, irqchip: &kvm_irqchip) -> Result<()> {
|
||||
self.fd
|
||||
.set_irqchip(irqchip)
|
||||
.map_err(|e| Error::Vm(VmError::Irq(e)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Vm {
|
||||
/// Initialize the virtual machine instance.
|
||||
///
|
||||
/// It initialize the virtual machine instance by:
|
||||
/// 1) initialize virtual machine global state and configuration.
|
||||
/// 2) create system devices, such as interrupt controller, PIT etc.
|
||||
/// 3) create and start IO devices, such as serial, console, block, net, vsock etc.
|
||||
/// 4) create and initialize vCPUs.
|
||||
/// 5) configure CPU power management features.
|
||||
/// 6) load guest kernel image.
|
||||
pub fn init_microvm(
|
||||
&mut self,
|
||||
epoll_mgr: EpollManager,
|
||||
vm_as: GuestAddressSpaceImpl,
|
||||
request_ts: TimestampUs,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
info!(self.logger, "VM: start initializing microvm ...");
|
||||
|
||||
self.init_tss()?;
|
||||
// For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS`
|
||||
// while on aarch64 we need to do it the other way around.
|
||||
self.setup_interrupt_controller()?;
|
||||
self.create_pit()?;
|
||||
self.init_devices(epoll_mgr)?;
|
||||
|
||||
let reset_event_fd = self.device_manager.get_reset_eventfd().unwrap();
|
||||
self.vcpu_manager()
|
||||
.map_err(StartMicrovmError::Vcpu)?
|
||||
.set_reset_event_fd(reset_event_fd)
|
||||
.map_err(StartMicrovmError::Vcpu)?;
|
||||
|
||||
if self.vm_config.cpu_pm == "on" {
|
||||
// TODO: add cpu_pm support. issue #4590.
|
||||
info!(self.logger, "VM: enable CPU disable_idle_exits capability");
|
||||
}
|
||||
|
||||
let vm_memory = vm_as.memory();
|
||||
let kernel_loader_result = self.load_kernel(vm_memory.deref())?;
|
||||
|
||||
self.vcpu_manager()
|
||||
.map_err(StartMicrovmError::Vcpu)?
|
||||
.create_boot_vcpus(request_ts, kernel_loader_result.kernel_load)
|
||||
.map_err(StartMicrovmError::Vcpu)?;
|
||||
|
||||
info!(self.logger, "VM: initializing microvm done");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Execute system architecture specific configurations.
|
||||
///
|
||||
/// 1) set guest kernel boot parameters
|
||||
/// 2) setup BIOS configuration data structs, mainly implement the MPSpec.
|
||||
pub fn configure_system_arch(
|
||||
&self,
|
||||
vm_memory: &GuestMemoryImpl,
|
||||
cmdline: &Cmdline,
|
||||
initrd: Option<InitrdConfig>,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START);
|
||||
linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline)
|
||||
.map_err(StartMicrovmError::LoadCommandline)?;
|
||||
|
||||
configure_system(
|
||||
vm_memory,
|
||||
self.address_space.address_space(),
|
||||
cmdline_addr,
|
||||
cmdline.as_str().len() + 1,
|
||||
&initrd,
|
||||
self.vm_config.vcpu_count,
|
||||
self.vm_config.max_vcpu_count,
|
||||
self.vm_config.reserve_memory_bytes,
|
||||
)
|
||||
.map_err(StartMicrovmError::ConfigureSystem)
|
||||
}
|
||||
|
||||
/// Initializes the guest memory.
|
||||
pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicrovmError> {
|
||||
self.fd
|
||||
.set_tss_address(dbs_boot::layout::KVM_TSS_ADDRESS.try_into().unwrap())
|
||||
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e)))
|
||||
}
|
||||
|
||||
/// Creates the irq chip and an in-kernel device model for the PIT.
|
||||
pub(crate) fn setup_interrupt_controller(
|
||||
&mut self,
|
||||
) -> std::result::Result<(), StartMicrovmError> {
|
||||
self.fd
|
||||
.create_irq_chip()
|
||||
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e)))
|
||||
}
|
||||
|
||||
/// Creates an in-kernel device model for the PIT.
|
||||
pub(crate) fn create_pit(&self) -> std::result::Result<(), StartMicrovmError> {
|
||||
info!(self.logger, "VM: create pit");
|
||||
// We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61
|
||||
// (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space.
|
||||
let pit_config = kvm_pit_config {
|
||||
flags: KVM_PIT_SPEAKER_DUMMY,
|
||||
..kvm_pit_config::default()
|
||||
};
|
||||
|
||||
// Safe because we know that our file is a VM fd, we know the kernel will only read the
|
||||
// correct amount of memory from our pointer, and we verify the return result.
|
||||
self.fd
|
||||
.create_pit2(pit_config)
|
||||
.map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e)))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user