mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-28 19:54:35 +00:00
dragonball: add vcpu
Virtual CPU manager for virtual machines. Signed-off-by: Liu Jiang <gerry@linux.alibaba.com> Signed-off-by: jingshan <jingshan@linux.alibaba.com> Signed-off-by: Chao Wu <chaowu@linux.alibaba.com> Signed-off-by: wllenyj <wllenyj@linux.alibaba.com>
This commit is contained in:
parent
468c73b3cb
commit
7d1953b52e
@ -14,6 +14,7 @@ arc-swap = "1.5.0"
|
||||
bytes = "1.1.0"
|
||||
dbs-address-space = "0.1.0"
|
||||
dbs-allocator = "0.1.0"
|
||||
dbs-arch = "0.1.0"
|
||||
dbs-boot = "0.2.0"
|
||||
dbs-device = "0.1.0"
|
||||
dbs-interrupt = { version = "0.1.0", features = ["kvm-irq"] }
|
||||
|
@ -25,6 +25,8 @@ pub mod metric;
|
||||
pub mod resource_manager;
|
||||
/// Signal handler for virtual machines.
|
||||
pub mod signal_handler;
|
||||
/// Virtual CPU manager for virtual machines.
|
||||
pub mod vcpu;
|
||||
/// Virtual machine manager for virtual machines.
|
||||
pub mod vm;
|
||||
|
||||
|
@ -13,6 +13,23 @@ lazy_static! {
|
||||
pub static ref METRICS: DragonballMetrics = DragonballMetrics::default();
|
||||
}
|
||||
|
||||
/// Metrics specific to VCPUs' mode of functioning.
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct VcpuMetrics {
|
||||
/// Number of KVM exits for handling input IO.
|
||||
pub exit_io_in: SharedIncMetric,
|
||||
/// Number of KVM exits for handling output IO.
|
||||
pub exit_io_out: SharedIncMetric,
|
||||
/// Number of KVM exits for handling MMIO reads.
|
||||
pub exit_mmio_read: SharedIncMetric,
|
||||
/// Number of KVM exits for handling MMIO writes.
|
||||
pub exit_mmio_write: SharedIncMetric,
|
||||
/// Number of errors during this VCPU's run.
|
||||
pub failures: SharedIncMetric,
|
||||
/// Failures in configuring the CPUID.
|
||||
pub filter_cpuid: SharedIncMetric,
|
||||
}
|
||||
|
||||
/// Metrics for the seccomp filtering.
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct SeccompMetrics {
|
||||
@ -32,6 +49,8 @@ pub struct SignalMetrics {
|
||||
/// Structure storing all metrics while enforcing serialization support on them.
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct DragonballMetrics {
|
||||
/// Metrics related to a vcpu's functioning.
|
||||
pub vcpu: VcpuMetrics,
|
||||
/// Metrics related to seccomp filtering.
|
||||
pub seccomp: SeccompMetrics,
|
||||
/// Metrics related to signals.
|
||||
|
94
src/dragonball/src/vcpu/aarch64.rs
Normal file
94
src/dragonball/src/vcpu/aarch64.rs
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
|
||||
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
use std::sync::mpsc::{channel, Sender};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::IoManagerCached;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_ioctls::{VcpuFd, VmFd};
|
||||
use vm_memory::GuestAddress;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuStateEvent};
|
||||
use crate::vcpu::VcpuConfig;
|
||||
|
||||
#[allow(unused)]
|
||||
impl Vcpu {
|
||||
/// Constructs a new VCPU for `vm`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `id` - Represents the CPU number between [0, max vcpus).
|
||||
/// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu.
|
||||
/// * `io_mgr` - The io-manager used to access port-io and mmio devices.
|
||||
/// * `exit_evt` - An `EventFd` that will be written into when this vcpu
|
||||
/// exits.
|
||||
/// * `vcpu_state_event` - The eventfd which can notify vmm state of some
|
||||
/// vcpu should change.
|
||||
/// * `vcpu_state_sender` - The channel to send state change message from
|
||||
/// vcpu thread to vmm thread.
|
||||
/// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime.
|
||||
/// * `support_immediate_exit` - whether kvm uses supports immediate_exit flag.
|
||||
pub fn new_aarch64(
|
||||
id: u8,
|
||||
vcpu_fd: Arc<VcpuFd>,
|
||||
io_mgr: IoManagerCached,
|
||||
exit_evt: EventFd,
|
||||
vcpu_state_event: EventFd,
|
||||
vcpu_state_sender: Sender<VcpuStateEvent>,
|
||||
create_ts: TimestampUs,
|
||||
support_immediate_exit: bool,
|
||||
) -> Result<Self> {
|
||||
let (event_sender, event_receiver) = channel();
|
||||
let (response_sender, response_receiver) = channel();
|
||||
|
||||
Ok(Vcpu {
|
||||
fd: vcpu_fd,
|
||||
id,
|
||||
io_mgr,
|
||||
create_ts,
|
||||
event_receiver,
|
||||
event_sender: Some(event_sender),
|
||||
response_receiver: Some(response_receiver),
|
||||
response_sender,
|
||||
vcpu_state_event,
|
||||
vcpu_state_sender,
|
||||
support_immediate_exit,
|
||||
mpidr: 0,
|
||||
exit_evt,
|
||||
})
|
||||
}
|
||||
|
||||
/// Configures an aarch64 specific vcpu.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `vcpu_config` - vCPU config for this vCPU status
|
||||
/// * `vm_fd` - The kvm `VmFd` for this microvm.
|
||||
/// * `vm_as` - The guest memory address space used by this microvm.
|
||||
/// * `kernel_load_addr` - Offset from `guest_mem` at which the kernel is loaded.
|
||||
/// * `_pgtable_addr` - pgtable address for ap vcpu (not used in aarch64)
|
||||
pub fn configure(
|
||||
&mut self,
|
||||
_vcpu_config: &VcpuConfig,
|
||||
vm_fd: &VmFd,
|
||||
vm_as: &GuestAddressSpaceImpl,
|
||||
kernel_load_addr: Option<GuestAddress>,
|
||||
_pgtable_addr: Option<GuestAddress>,
|
||||
) -> Result<()> {
|
||||
// TODO: add arm vcpu configure() function. issue: #4445
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the MPIDR register value.
|
||||
pub fn get_mpidr(&self) -> u64 {
|
||||
self.mpidr
|
||||
}
|
||||
}
|
31
src/dragonball/src/vcpu/mod.rs
Normal file
31
src/dragonball/src/vcpu/mod.rs
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2022 Alibaba Cloud Computing. All rights reserved.
|
||||
// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
mod sm;
|
||||
pub mod vcpu_impl;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use dbs_arch::cpuid::VpmuFeatureLevel;
|
||||
|
||||
/// vcpu config collection
|
||||
pub struct VcpuConfig {
|
||||
/// initial vcpu count
|
||||
pub boot_vcpu_count: u8,
|
||||
/// max vcpu count for hotplug
|
||||
pub max_vcpu_count: u8,
|
||||
/// threads per core for cpu topology information
|
||||
pub threads_per_core: u8,
|
||||
/// cores per die for cpu topology information
|
||||
pub cores_per_die: u8,
|
||||
/// dies per socket for cpu topology information
|
||||
pub dies_per_socket: u8,
|
||||
/// socket number for cpu topology information
|
||||
pub sockets: u8,
|
||||
/// if vpmu feature is Disabled, it means vpmu feature is off (by default)
|
||||
/// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions)
|
||||
/// if vpmu feature is FullyEnabled, it means all vpmu counters are supported
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub vpmu_feature: VpmuFeatureLevel,
|
||||
}
|
149
src/dragonball/src/vcpu/sm.rs
Normal file
149
src/dragonball/src/vcpu/sm.rs
Normal file
@ -0,0 +1,149 @@
|
||||
// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use std::ops::Deref;
|
||||
|
||||
/// Simple abstraction of a state machine.
|
||||
///
|
||||
/// `StateMachine<T>` is a wrapper over `T` that also encodes state information for `T`.
|
||||
///
|
||||
/// Each state for `T` is represented by a `StateFn<T>` which is a function that acts as
|
||||
/// the state handler for that particular state of `T`.
|
||||
///
|
||||
/// `StateFn<T>` returns exactly one other `StateMachine<T>` thus each state gets clearly
|
||||
/// defined transitions to other states.
|
||||
pub struct StateMachine<T> {
|
||||
function: StateFn<T>,
|
||||
end_state: bool,
|
||||
}
|
||||
|
||||
/// Type representing a state handler of a `StateMachine<T>` machine. Each state handler
|
||||
/// is a function from `T` that handles a specific state of `T`.
|
||||
type StateFn<T> = fn(&mut T) -> StateMachine<T>;
|
||||
|
||||
impl<T> StateMachine<T> {
|
||||
/// Creates a new state wrapper.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// `function` - the state handler for this state.
|
||||
/// `end_state` - whether this state is final.
|
||||
pub fn new(function: StateFn<T>, end_state: bool) -> StateMachine<T> {
|
||||
StateMachine {
|
||||
function,
|
||||
end_state,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new state wrapper that has further possible transitions.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// `function` - the state handler for this state.
|
||||
pub fn next(function: StateFn<T>) -> StateMachine<T> {
|
||||
StateMachine::new(function, false)
|
||||
}
|
||||
|
||||
/// Creates a new state wrapper that has no further transitions. The state machine
|
||||
/// will finish after running this handler.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// `function` - the state handler for this last state.
|
||||
pub fn finish(function: StateFn<T>) -> StateMachine<T> {
|
||||
StateMachine::new(function, true)
|
||||
}
|
||||
|
||||
/// Runs a state machine for `T` starting from the provided state.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// `machine` - a mutable reference to the object running through the various states.
|
||||
/// `starting_state_fn` - a `fn(&mut T) -> StateMachine<T>` that should be the handler for
|
||||
/// the initial state.
|
||||
pub fn run(machine: &mut T, starting_state_fn: StateFn<T>) {
|
||||
// Start off in the `starting_state` state.
|
||||
let mut sf = StateMachine::new(starting_state_fn, false);
|
||||
// While current state is not a final/end state, keep churning.
|
||||
while !sf.end_state {
|
||||
// Run the current state handler, and get the next one.
|
||||
sf = sf(machine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Implement Deref of `StateMachine<T>` so that we can directly call its underlying state handler.
|
||||
impl<T> Deref for StateMachine<T> {
|
||||
type Target = StateFn<T>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.function
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// DummyMachine with states `s1`, `s2` and `s3`.
|
||||
struct DummyMachine {
|
||||
private_data_s1: bool,
|
||||
private_data_s2: bool,
|
||||
private_data_s3: bool,
|
||||
}
|
||||
|
||||
impl DummyMachine {
|
||||
fn new() -> Self {
|
||||
DummyMachine {
|
||||
private_data_s1: false,
|
||||
private_data_s2: false,
|
||||
private_data_s3: false,
|
||||
}
|
||||
}
|
||||
|
||||
// DummyMachine functions here.
|
||||
|
||||
// Simple state-machine: start->s1->s2->s3->done.
|
||||
fn run(&mut self) {
|
||||
// Verify the machine has not run yet.
|
||||
assert!(!self.private_data_s1);
|
||||
assert!(!self.private_data_s2);
|
||||
assert!(!self.private_data_s3);
|
||||
|
||||
// Run the state-machine.
|
||||
StateMachine::run(self, Self::s1);
|
||||
|
||||
// Verify the machine went through all states.
|
||||
assert!(self.private_data_s1);
|
||||
assert!(self.private_data_s2);
|
||||
assert!(self.private_data_s3);
|
||||
}
|
||||
|
||||
fn s1(&mut self) -> StateMachine<Self> {
|
||||
// Verify private data mutates along with the states.
|
||||
assert!(!self.private_data_s1);
|
||||
self.private_data_s1 = true;
|
||||
StateMachine::next(Self::s2)
|
||||
}
|
||||
|
||||
fn s2(&mut self) -> StateMachine<Self> {
|
||||
// Verify private data mutates along with the states.
|
||||
assert!(!self.private_data_s2);
|
||||
self.private_data_s2 = true;
|
||||
StateMachine::next(Self::s3)
|
||||
}
|
||||
|
||||
fn s3(&mut self) -> StateMachine<Self> {
|
||||
// Verify private data mutates along with the states.
|
||||
assert!(!self.private_data_s3);
|
||||
self.private_data_s3 = true;
|
||||
// The machine ends here, adding `s1` as next state to validate this.
|
||||
StateMachine::finish(Self::s1)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sm() {
|
||||
let mut machine = DummyMachine::new();
|
||||
machine.run();
|
||||
}
|
||||
}
|
955
src/dragonball/src/vcpu/vcpu_impl.rs
Normal file
955
src/dragonball/src/vcpu/vcpu_impl.rs
Normal file
@ -0,0 +1,955 @@
|
||||
// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved.
|
||||
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
//! The implementation for per vcpu
|
||||
|
||||
use std::cell::Cell;
|
||||
use std::result;
|
||||
use std::sync::atomic::{fence, Ordering};
|
||||
use std::sync::mpsc::{Receiver, Sender, TryRecvError};
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::thread;
|
||||
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
|
||||
use kvm_ioctls::{VcpuExit, VcpuFd};
|
||||
use libc::{c_int, c_void, siginfo_t};
|
||||
use log::{error, info, warn};
|
||||
use seccompiler::{apply_filter, BpfProgram, Error as SecError};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
use vmm_sys_util::signal::{register_signal_handler, Killable};
|
||||
|
||||
use super::sm::StateMachine;
|
||||
use crate::metric::{IncMetric, METRICS};
|
||||
use crate::signal_handler::sigrtmin;
|
||||
use crate::IoManagerCached;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[path = "x86_64.rs"]
|
||||
mod x86_64;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[path = "aarch64.rs"]
|
||||
mod aarch64;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
const MAGIC_IOPORT_BASE: u16 = 0xdbdb;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
const MAGIC_IOPORT_DEBUG_INFO: u16 = MAGIC_IOPORT_BASE;
|
||||
|
||||
/// Signal number (SIGRTMIN) used to kick Vcpus.
|
||||
pub const VCPU_RTSIG_OFFSET: i32 = 0;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
/// Errors associated with the wrappers over KVM ioctls.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum VcpuError {
|
||||
/// Failed to signal Vcpu.
|
||||
#[error("cannot signal the vCPU thread")]
|
||||
SignalVcpu(#[source] vmm_sys_util::errno::Error),
|
||||
|
||||
/// Cannot open the vCPU file descriptor.
|
||||
#[error("cannot open the vCPU file descriptor")]
|
||||
VcpuFd(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// Cannot spawn a new vCPU thread.
|
||||
#[error("cannot spawn vCPU thread")]
|
||||
VcpuSpawn(#[source] std::io::Error),
|
||||
|
||||
/// Cannot cleanly initialize vCPU TLS.
|
||||
#[error("cannot cleanly initialize TLS fro vCPU")]
|
||||
VcpuTlsInit,
|
||||
|
||||
/// Vcpu not present in TLS.
|
||||
#[error("vCPU not present in the TLS")]
|
||||
VcpuTlsNotPresent,
|
||||
|
||||
/// Unexpected KVM_RUN exit reason
|
||||
#[error("Unexpected KVM_RUN exit reason")]
|
||||
VcpuUnhandledKvmExit,
|
||||
|
||||
/// Pause vcpu failed
|
||||
#[error("failed to pause vcpus")]
|
||||
PauseFailed,
|
||||
|
||||
/// Kvm Ioctl Error
|
||||
#[error("failure in issuing KVM ioctl command")]
|
||||
Kvm(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// Msr error
|
||||
#[error("failure to deal with MSRs")]
|
||||
Msr(vmm_sys_util::fam::Error),
|
||||
|
||||
/// A call to cpuid instruction failed on x86_64.
|
||||
#[error("failure while configuring CPUID for virtual CPU on x86_64")]
|
||||
CpuId(dbs_arch::cpuid::Error),
|
||||
|
||||
/// Error configuring the floating point related registers on x86_64.
|
||||
#[error("failure while configuring the floating point related registers on x86_64")]
|
||||
FPUConfiguration(dbs_arch::regs::Error),
|
||||
|
||||
/// Cannot set the local interruption due to bad configuration on x86_64.
|
||||
#[error("cannot set the local interruption due to bad configuration on x86_64")]
|
||||
LocalIntConfiguration(dbs_arch::interrupts::Error),
|
||||
|
||||
/// Error configuring the MSR registers on x86_64.
|
||||
#[error("failure while configuring the MSR registers on x86_64")]
|
||||
MSRSConfiguration(dbs_arch::regs::Error),
|
||||
|
||||
/// Error configuring the general purpose registers on x86_64.
|
||||
#[error("failure while configuring the general purpose registers on x86_64")]
|
||||
REGSConfiguration(dbs_arch::regs::Error),
|
||||
|
||||
/// Error configuring the special registers on x86_64.
|
||||
#[error("failure while configuring the special registers on x86_64")]
|
||||
SREGSConfiguration(dbs_arch::regs::Error),
|
||||
|
||||
/// Error configuring the page table on x86_64.
|
||||
#[error("failure while configuring the page table on x86_64")]
|
||||
PageTable(dbs_boot::Error),
|
||||
|
||||
/// The call to KVM_SET_CPUID2 failed on x86_64.
|
||||
#[error("failure while calling KVM_SET_CPUID2 on x86_64")]
|
||||
SetSupportedCpusFailed(#[source] kvm_ioctls::Error),
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
/// Errors associated with the wrappers over KVM ioctls.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum VcpuError {
|
||||
/// Failed to signal Vcpu.
|
||||
#[error("cannot signal the vCPU thread")]
|
||||
SignalVcpu(#[source] vmm_sys_util::errno::Error),
|
||||
|
||||
/// Cannot open the vCPU file descriptor.
|
||||
#[error("cannot open the vCPU file descriptor")]
|
||||
VcpuFd(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// Cannot spawn a new vCPU thread.
|
||||
#[error("cannot spawn vCPU thread")]
|
||||
VcpuSpawn(#[source] std::io::Error),
|
||||
|
||||
/// Cannot cleanly initialize vCPU TLS.
|
||||
#[error("cannot cleanly initialize TLS fro vCPU")]
|
||||
VcpuTlsInit,
|
||||
|
||||
/// Vcpu not present in TLS.
|
||||
#[error("vCPU not present in the TLS")]
|
||||
VcpuTlsNotPresent,
|
||||
|
||||
/// Unexpected KVM_RUN exit reason
|
||||
#[error("Unexpected KVM_RUN exit reason")]
|
||||
VcpuUnhandledKvmExit,
|
||||
|
||||
/// Pause vcpu failed
|
||||
#[error("failed to pause vcpus")]
|
||||
PauseFailed,
|
||||
|
||||
/// Kvm Ioctl Error
|
||||
#[error("failure in issuing KVM ioctl command")]
|
||||
Kvm(#[source] kvm_ioctls::Error),
|
||||
|
||||
/// Msr error
|
||||
#[error("failure to deal with MSRs")]
|
||||
Msr(vmm_sys_util::fam::Error),
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
/// Error configuring the general purpose aarch64 registers on aarch64.
|
||||
#[error("failure while configuring the general purpose registers on aarch64")]
|
||||
REGSConfiguration(dbs_arch::regs::Error),
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
/// Error setting up the global interrupt controller on aarch64.
|
||||
#[error("failure while setting up the global interrupt controller on aarch64")]
|
||||
SetupGIC(dbs_arch::gic::Error),
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
/// Error getting the Vcpu preferred target on aarch64.
|
||||
#[error("failure while getting the vCPU preferred target on aarch64")]
|
||||
VcpuArmPreferredTarget(kvm_ioctls::Error),
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
/// Error doing vCPU Init on aarch64.
|
||||
#[error("failure while doing vCPU init on aarch64")]
|
||||
VcpuArmInit(kvm_ioctls::Error),
|
||||
}
|
||||
|
||||
/// Result for Vcpu related operations.
|
||||
pub type Result<T> = result::Result<T, VcpuError>;
|
||||
|
||||
/// List of events that the Vcpu can receive.
|
||||
#[derive(Debug)]
|
||||
pub enum VcpuEvent {
|
||||
/// Kill the Vcpu.
|
||||
Exit,
|
||||
/// Pause the Vcpu.
|
||||
Pause,
|
||||
/// Event that should resume the Vcpu.
|
||||
Resume,
|
||||
/// Get vcpu thread tid
|
||||
Gettid,
|
||||
|
||||
/// Event to revalidate vcpu IoManager cache
|
||||
RevalidateCache,
|
||||
}
|
||||
|
||||
/// List of responses that the Vcpu reports.
|
||||
pub enum VcpuResponse {
|
||||
/// Vcpu is paused.
|
||||
Paused,
|
||||
/// Vcpu is resumed.
|
||||
Resumed,
|
||||
/// Vcpu index and thread tid.
|
||||
Tid(u8, u32),
|
||||
/// Requested Vcpu operation is not allowed.
|
||||
NotAllowed,
|
||||
/// Requestion action encountered an error
|
||||
Error(VcpuError),
|
||||
/// Vcpu IoManager cache is revalidated
|
||||
CacheRevalidated,
|
||||
}
|
||||
|
||||
/// List of events that the vcpu_state_sender can send.
|
||||
pub enum VcpuStateEvent {
|
||||
/// For Hotplug
|
||||
Hotplug((bool, u32)),
|
||||
}
|
||||
|
||||
/// Wrapper over vCPU that hides the underlying interactions with the vCPU thread.
|
||||
pub struct VcpuHandle {
|
||||
event_sender: Sender<VcpuEvent>,
|
||||
response_receiver: Receiver<VcpuResponse>,
|
||||
vcpu_thread: thread::JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl VcpuHandle {
|
||||
/// Send event to vCPU thread
|
||||
pub fn send_event(&self, event: VcpuEvent) -> Result<()> {
|
||||
// Use expect() to crash if the other thread closed this channel.
|
||||
self.event_sender
|
||||
.send(event)
|
||||
.expect("event sender channel closed on vcpu end.");
|
||||
// Kick the vCPU so it picks up the message.
|
||||
self.vcpu_thread
|
||||
.kill(sigrtmin() + VCPU_RTSIG_OFFSET)
|
||||
.map_err(VcpuError::SignalVcpu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Receive response from vcpu thread
|
||||
pub fn response_receiver(&self) -> &Receiver<VcpuResponse> {
|
||||
&self.response_receiver
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
/// Join the vcpu thread
|
||||
pub fn join_vcpu_thread(self) -> thread::Result<()> {
|
||||
self.vcpu_thread.join()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
enum VcpuEmulation {
|
||||
Handled,
|
||||
Interrupted,
|
||||
Stopped,
|
||||
}
|
||||
|
||||
/// A wrapper around creating and using a kvm-based VCPU.
|
||||
pub struct Vcpu {
|
||||
// vCPU fd used by the vCPU
|
||||
fd: Arc<VcpuFd>,
|
||||
// vCPU id info
|
||||
id: u8,
|
||||
// Io manager Cached for facilitating IO operations
|
||||
io_mgr: IoManagerCached,
|
||||
// Records vCPU create time stamp
|
||||
create_ts: TimestampUs,
|
||||
|
||||
// The receiving end of events channel owned by the vcpu side.
|
||||
event_receiver: Receiver<VcpuEvent>,
|
||||
// The transmitting end of the events channel which will be given to the handler.
|
||||
event_sender: Option<Sender<VcpuEvent>>,
|
||||
// The receiving end of the responses channel which will be given to the handler.
|
||||
response_receiver: Option<Receiver<VcpuResponse>>,
|
||||
// The transmitting end of the responses channel owned by the vcpu side.
|
||||
response_sender: Sender<VcpuResponse>,
|
||||
// Event notifier for CPU hotplug.
|
||||
// After arm adapts to hotplug vcpu, the dead code macro needs to be removed
|
||||
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
|
||||
vcpu_state_event: EventFd,
|
||||
// CPU hotplug events.
|
||||
// After arm adapts to hotplug vcpu, the dead code macro needs to be removed
|
||||
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
|
||||
vcpu_state_sender: Sender<VcpuStateEvent>,
|
||||
|
||||
// An `EventFd` that will be written into when this vcpu exits.
|
||||
exit_evt: EventFd,
|
||||
// Whether kvm used supports immediate_exit flag.
|
||||
support_immediate_exit: bool,
|
||||
|
||||
// CPUID information for the x86_64 CPU
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
cpuid: kvm_bindings::CpuId,
|
||||
|
||||
/// Multiprocessor affinity register recorded for aarch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub(crate) mpidr: u64,
|
||||
}
|
||||
|
||||
// Using this for easier explicit type-casting to help IDEs interpret the code.
|
||||
type VcpuCell = Cell<Option<*const Vcpu>>;
|
||||
|
||||
impl Vcpu {
|
||||
thread_local!(static TLS_VCPU_PTR: VcpuCell = Cell::new(None));
|
||||
|
||||
/// Associates `self` with the current thread.
|
||||
///
|
||||
/// It is a prerequisite to successfully run `init_thread_local_data()` before using
|
||||
/// `run_on_thread_local()` on the current thread.
|
||||
/// This function will return an error if there already is a `Vcpu` present in the TLS.
|
||||
fn init_thread_local_data(&mut self) -> Result<()> {
|
||||
Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
|
||||
if cell.get().is_some() {
|
||||
return Err(VcpuError::VcpuTlsInit);
|
||||
}
|
||||
cell.set(Some(self as *const Vcpu));
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
/// Deassociates `self` from the current thread.
|
||||
///
|
||||
/// Should be called if the current `self` had called `init_thread_local_data()` and
|
||||
/// now needs to move to a different thread.
|
||||
///
|
||||
/// Fails if `self` was not previously associated with the current thread.
|
||||
fn reset_thread_local_data(&mut self) -> Result<()> {
|
||||
// Best-effort to clean up TLS. If the `Vcpu` was moved to another thread
|
||||
// _before_ running this, then there is nothing we can do.
|
||||
Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
|
||||
if let Some(vcpu_ptr) = cell.get() {
|
||||
if vcpu_ptr == self as *const Vcpu {
|
||||
Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| cell.take());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(VcpuError::VcpuTlsNotPresent)
|
||||
})
|
||||
}
|
||||
|
||||
/// Runs `func` for the `Vcpu` associated with the current thread.
|
||||
///
|
||||
/// It requires that `init_thread_local_data()` was run on this thread.
|
||||
///
|
||||
/// Fails if there is no `Vcpu` associated with the current thread.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This is marked unsafe as it allows temporary aliasing through
|
||||
/// dereferencing from pointer an already borrowed `Vcpu`.
|
||||
unsafe fn run_on_thread_local<F>(func: F) -> Result<()>
|
||||
where
|
||||
F: FnOnce(&Vcpu),
|
||||
{
|
||||
Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
|
||||
if let Some(vcpu_ptr) = cell.get() {
|
||||
// Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty,
|
||||
// and it is being cleared on `Vcpu::drop` so there is no dangling pointer.
|
||||
let vcpu_ref: &Vcpu = &*vcpu_ptr;
|
||||
func(vcpu_ref);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(VcpuError::VcpuTlsNotPresent)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Registers a signal handler which makes use of TLS and kvm immediate exit to
|
||||
/// kick the vcpu running on the current thread, if there is one.
|
||||
pub fn register_kick_signal_handler() {
|
||||
extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) {
|
||||
// This is safe because it's temporarily aliasing the `Vcpu` object, but we are
|
||||
// only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`.
|
||||
unsafe {
|
||||
let _ = Vcpu::run_on_thread_local(|vcpu| {
|
||||
vcpu.fd.set_kvm_immediate_exit(1);
|
||||
fence(Ordering::Release);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal)
|
||||
.expect("Failed to register vcpu signal handler");
|
||||
}
|
||||
|
||||
/// Returns the cpu index as seen by the guest OS.
|
||||
pub fn cpu_index(&self) -> u8 {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// Moves the vcpu to its own thread and constructs a VcpuHandle.
|
||||
/// The handle can be used to control the remote vcpu.
|
||||
pub fn start_threaded(
|
||||
mut self,
|
||||
seccomp_filter: BpfProgram,
|
||||
barrier: Arc<Barrier>,
|
||||
) -> Result<VcpuHandle> {
|
||||
let event_sender = self.event_sender.take().unwrap();
|
||||
let response_receiver = self.response_receiver.take().unwrap();
|
||||
|
||||
let vcpu_thread = thread::Builder::new()
|
||||
.name(format!("db_vcpu{}", self.cpu_index()))
|
||||
.spawn(move || {
|
||||
self.init_thread_local_data()
|
||||
.expect("Cannot cleanly initialize vcpu TLS.");
|
||||
barrier.wait();
|
||||
self.run(seccomp_filter);
|
||||
})
|
||||
.map_err(VcpuError::VcpuSpawn)?;
|
||||
|
||||
Ok(VcpuHandle {
|
||||
event_sender,
|
||||
response_receiver,
|
||||
vcpu_thread,
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract the vcpu running logic for test mocking.
|
||||
#[cfg(not(test))]
|
||||
pub fn emulate(fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
|
||||
fd.run()
|
||||
}
|
||||
|
||||
/// Runs the vCPU in KVM context and handles the kvm exit reason.
|
||||
///
|
||||
/// Returns error or enum specifying whether emulation was handled or interrupted.
|
||||
fn run_emulation(&mut self) -> Result<VcpuEmulation> {
|
||||
match Vcpu::emulate(&self.fd) {
|
||||
Ok(run) => match run {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
VcpuExit::IoIn(addr, data) => {
|
||||
let _ = self.io_mgr.pio_read(addr, data);
|
||||
METRICS.vcpu.exit_io_in.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
VcpuExit::IoOut(addr, data) => {
|
||||
if !self.check_io_port_info(addr, data)? {
|
||||
let _ = self.io_mgr.pio_write(addr, data);
|
||||
}
|
||||
METRICS.vcpu.exit_io_out.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
VcpuExit::MmioRead(addr, data) => {
|
||||
let _ = self.io_mgr.mmio_read(addr, data);
|
||||
METRICS.vcpu.exit_mmio_read.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
VcpuExit::MmioWrite(addr, data) => {
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
self.check_boot_complete_signal(addr, data);
|
||||
|
||||
let _ = self.io_mgr.mmio_write(addr, data);
|
||||
METRICS.vcpu.exit_mmio_write.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
VcpuExit::Hlt => {
|
||||
info!("Received KVM_EXIT_HLT signal");
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
VcpuExit::Shutdown => {
|
||||
info!("Received KVM_EXIT_SHUTDOWN signal");
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
// Documentation specifies that below kvm exits are considered errors.
|
||||
VcpuExit::FailEntry => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
error!("Received KVM_EXIT_FAIL_ENTRY signal");
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
VcpuExit::InternalError => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
error!("Received KVM_EXIT_INTERNAL_ERROR signal");
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
|
||||
KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
|
||||
info!(
|
||||
"Received KVM_SYSTEM_EVENT: type: {}, event: {}",
|
||||
event_type, event_flags
|
||||
);
|
||||
Ok(VcpuEmulation::Stopped)
|
||||
}
|
||||
_ => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
error!(
|
||||
"Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
|
||||
event_type, event_flags
|
||||
);
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
},
|
||||
r => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
// TODO: Are we sure we want to finish running a vcpu upon
|
||||
// receiving a vm exit that is not necessarily an error?
|
||||
error!("Unexpected exit reason on vcpu run: {:?}", r);
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
},
|
||||
// The unwrap on raw_os_error can only fail if we have a logic
|
||||
// error in our code in which case it is better to panic.
|
||||
Err(ref e) => {
|
||||
match e.errno() {
|
||||
libc::EAGAIN => Ok(VcpuEmulation::Handled),
|
||||
libc::EINTR => {
|
||||
self.fd.set_kvm_immediate_exit(0);
|
||||
// Notify that this KVM_RUN was interrupted.
|
||||
Ok(VcpuEmulation::Interrupted)
|
||||
}
|
||||
_ => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
error!("Failure during vcpu run: {}", e);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
error!(
|
||||
"dump regs: {:?}, dump sregs: {:?}",
|
||||
self.fd.get_regs(),
|
||||
self.fd.get_sregs()
|
||||
);
|
||||
}
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
// checkout the io port that dragonball used only
|
||||
fn check_io_port_info(&self, addr: u16, data: &[u8]) -> Result<bool> {
|
||||
let mut checked = false;
|
||||
|
||||
match addr {
|
||||
// debug info signal
|
||||
MAGIC_IOPORT_DEBUG_INFO => {
|
||||
if data.len() == 4 {
|
||||
let data = unsafe { std::ptr::read(data.as_ptr() as *const u32) };
|
||||
warn!("KDBG: guest kernel debug info: 0x{:x}", data);
|
||||
checked = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
Ok(checked)
|
||||
}
|
||||
|
||||
fn gettid() -> u32 {
|
||||
nix::unistd::gettid().as_raw() as u32
|
||||
}
|
||||
|
||||
fn revalidate_cache(&mut self) -> Result<()> {
|
||||
self.io_mgr.revalidate_cache();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Main loop of the vCPU thread.
|
||||
///
|
||||
/// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in.
|
||||
/// Note that the state of the VCPU and associated VM must be setup first for this to do
|
||||
/// anything useful.
|
||||
pub fn run(&mut self, seccomp_filter: BpfProgram) {
|
||||
// Load seccomp filters for this vCPU thread.
|
||||
// Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters
|
||||
// altogether is the desired behaviour.
|
||||
if let Err(e) = apply_filter(&seccomp_filter) {
|
||||
if matches!(e, SecError::EmptyFilter) {
|
||||
info!("vCPU thread {} use empty seccomp filters.", self.id);
|
||||
} else {
|
||||
panic!(
|
||||
"Failed to set the requested seccomp filters on vCPU {}: Error: {}",
|
||||
self.id, e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
info!("vcpu {} is running", self.cpu_index());
|
||||
|
||||
// Start running the machine state in the `Paused` state.
|
||||
StateMachine::run(self, Self::paused);
|
||||
}
|
||||
|
||||
// This is the main loop of the `Running` state.
|
||||
fn running(&mut self) -> StateMachine<Self> {
|
||||
// This loop is here just for optimizing the emulation path.
|
||||
// No point in ticking the state machine if there are no external events.
|
||||
loop {
|
||||
match self.run_emulation() {
|
||||
// Emulation ran successfully, continue.
|
||||
Ok(VcpuEmulation::Handled) => {
|
||||
// We need to break here if kvm doesn't support
|
||||
// immediate_exit flag. Because the signal sent from vmm
|
||||
// thread may occurs when handling the vcpu exit events, and
|
||||
// in this case the external vcpu events may not be handled
|
||||
// correctly, so we need to check the event_receiver channel
|
||||
// after handle vcpu exit events to decrease the window that
|
||||
// doesn't handle the vcpu external events.
|
||||
if !self.support_immediate_exit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Emulation was interrupted, check external events.
|
||||
Ok(VcpuEmulation::Interrupted) => break,
|
||||
// Emulation was stopped due to reset or shutdown.
|
||||
Ok(VcpuEmulation::Stopped) => return StateMachine::next(Self::waiting_exit),
|
||||
// Emulation errors lead to vCPU exit.
|
||||
Err(e) => {
|
||||
error!("vcpu: {}, run_emulation failed: {:?}", self.id, e);
|
||||
return StateMachine::next(Self::waiting_exit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// By default don't change state.
|
||||
let mut state = StateMachine::next(Self::running);
|
||||
|
||||
// Break this emulation loop on any transition request/external event.
|
||||
match self.event_receiver.try_recv() {
|
||||
// Running ---- Exit ----> Exited
|
||||
Ok(VcpuEvent::Exit) => {
|
||||
// Move to 'exited' state.
|
||||
state = StateMachine::next(Self::exited);
|
||||
}
|
||||
// Running ---- Pause ----> Paused
|
||||
Ok(VcpuEvent::Pause) => {
|
||||
// Nothing special to do.
|
||||
self.response_sender
|
||||
.send(VcpuResponse::Paused)
|
||||
.expect("failed to send pause status");
|
||||
|
||||
// TODO: we should call `KVM_KVMCLOCK_CTRL` here to make sure
|
||||
// TODO continued: the guest soft lockup watchdog does not panic on Resume.
|
||||
//let _ = self.fd.kvmclock_ctrl();
|
||||
|
||||
// Move to 'paused' state.
|
||||
state = StateMachine::next(Self::paused);
|
||||
}
|
||||
Ok(VcpuEvent::Resume) => {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::Resumed)
|
||||
.expect("failed to send resume status");
|
||||
}
|
||||
Ok(VcpuEvent::Gettid) => {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid()))
|
||||
.expect("failed to send vcpu thread tid");
|
||||
}
|
||||
Ok(VcpuEvent::RevalidateCache) => {
|
||||
self.revalidate_cache()
|
||||
.map(|()| {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::CacheRevalidated)
|
||||
.expect("failed to revalidate vcpu IoManager cache");
|
||||
})
|
||||
.map_err(|e| self.response_sender.send(VcpuResponse::Error(e)))
|
||||
.expect("failed to revalidate vcpu IoManager cache");
|
||||
}
|
||||
// Unhandled exit of the other end.
|
||||
Err(TryRecvError::Disconnected) => {
|
||||
// Move to 'exited' state.
|
||||
state = StateMachine::next(Self::exited);
|
||||
}
|
||||
// All other events or lack thereof have no effect on current 'running' state.
|
||||
Err(TryRecvError::Empty) => (),
|
||||
}
|
||||
|
||||
state
|
||||
}
|
||||
|
||||
// This is the main loop of the `Paused` state.
|
||||
fn paused(&mut self) -> StateMachine<Self> {
|
||||
match self.event_receiver.recv() {
|
||||
// Paused ---- Exit ----> Exited
|
||||
Ok(VcpuEvent::Exit) => {
|
||||
// Move to 'exited' state.
|
||||
StateMachine::next(Self::exited)
|
||||
}
|
||||
// Paused ---- Resume ----> Running
|
||||
Ok(VcpuEvent::Resume) => {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::Resumed)
|
||||
.expect("failed to send resume status");
|
||||
// Move to 'running' state.
|
||||
StateMachine::next(Self::running)
|
||||
}
|
||||
Ok(VcpuEvent::Pause) => {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::Paused)
|
||||
.expect("failed to send pause status");
|
||||
// continue 'pause' state.
|
||||
StateMachine::next(Self::paused)
|
||||
}
|
||||
Ok(VcpuEvent::Gettid) => {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid()))
|
||||
.expect("failed to send vcpu thread tid");
|
||||
StateMachine::next(Self::paused)
|
||||
}
|
||||
Ok(VcpuEvent::RevalidateCache) => {
|
||||
self.revalidate_cache()
|
||||
.map(|()| {
|
||||
self.response_sender
|
||||
.send(VcpuResponse::CacheRevalidated)
|
||||
.expect("failed to revalidate vcpu IoManager cache");
|
||||
})
|
||||
.map_err(|e| self.response_sender.send(VcpuResponse::Error(e)))
|
||||
.expect("failed to revalidate vcpu IoManager cache");
|
||||
|
||||
StateMachine::next(Self::paused)
|
||||
}
|
||||
// Unhandled exit of the other end.
|
||||
Err(_) => {
|
||||
// Move to 'exited' state.
|
||||
StateMachine::next(Self::exited)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is the main loop of the `WaitingExit` state.
|
||||
fn waiting_exit(&mut self) -> StateMachine<Self> {
|
||||
// trigger vmm to stop machine
|
||||
if let Err(e) = self.exit_evt.write(1) {
|
||||
METRICS.vcpu.failures.inc();
|
||||
error!("Failed signaling vcpu exit event: {}", e);
|
||||
}
|
||||
|
||||
let mut state = StateMachine::next(Self::waiting_exit);
|
||||
|
||||
match self.event_receiver.recv() {
|
||||
Ok(VcpuEvent::Exit) => state = StateMachine::next(Self::exited),
|
||||
Ok(_) => error!(
|
||||
"wrong state received in waiting exit state on vcpu {}",
|
||||
self.id
|
||||
),
|
||||
Err(_) => {
|
||||
error!(
|
||||
"vcpu channel closed in waiting exit state on vcpu {}",
|
||||
self.id
|
||||
);
|
||||
state = StateMachine::next(Self::exited);
|
||||
}
|
||||
}
|
||||
|
||||
state
|
||||
}
|
||||
|
||||
// This is the main loop of the `Exited` state.
|
||||
fn exited(&mut self) -> StateMachine<Self> {
|
||||
// State machine reached its end.
|
||||
StateMachine::finish(Self::exited)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Vcpu {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.reset_thread_local_data();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::sync::mpsc::{channel, Receiver};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use dbs_device::device_manager::IoManager;
|
||||
use kvm_ioctls::Kvm;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use super::*;
|
||||
use crate::kvm_context::KvmContext;
|
||||
|
||||
pub enum EmulationCase {
|
||||
IoIn,
|
||||
IoOut,
|
||||
MmioRead,
|
||||
MmioWrite,
|
||||
Hlt,
|
||||
Shutdown,
|
||||
FailEntry,
|
||||
InternalError,
|
||||
Unknown,
|
||||
SystemEvent(u32, u64),
|
||||
Error(i32),
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref EMULATE_RES: Mutex<EmulationCase> = Mutex::new(EmulationCase::Unknown);
|
||||
}
|
||||
|
||||
impl Vcpu {
|
||||
pub fn emulate(_fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
|
||||
let res = &*EMULATE_RES.lock().unwrap();
|
||||
match res {
|
||||
EmulationCase::IoIn => Ok(VcpuExit::IoIn(0, &mut [])),
|
||||
EmulationCase::IoOut => Ok(VcpuExit::IoOut(0, &[])),
|
||||
EmulationCase::MmioRead => Ok(VcpuExit::MmioRead(0, &mut [])),
|
||||
EmulationCase::MmioWrite => Ok(VcpuExit::MmioWrite(0, &[])),
|
||||
EmulationCase::Hlt => Ok(VcpuExit::Hlt),
|
||||
EmulationCase::Shutdown => Ok(VcpuExit::Shutdown),
|
||||
EmulationCase::FailEntry => Ok(VcpuExit::FailEntry),
|
||||
EmulationCase::InternalError => Ok(VcpuExit::InternalError),
|
||||
EmulationCase::Unknown => Ok(VcpuExit::Unknown),
|
||||
EmulationCase::SystemEvent(event_type, event_flags) => {
|
||||
Ok(VcpuExit::SystemEvent(*event_type, *event_flags))
|
||||
}
|
||||
EmulationCase::Error(e) => Err(kvm_ioctls::Error::new(*e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) {
|
||||
// Call for kvm too frequently would cause error in some host kernel.
|
||||
std::thread::sleep(std::time::Duration::from_millis(5));
|
||||
|
||||
let kvm = Kvm::new().unwrap();
|
||||
let vm = Arc::new(kvm.create_vm().unwrap());
|
||||
let kvm_context = KvmContext::new(Some(kvm.as_raw_fd())).unwrap();
|
||||
let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap());
|
||||
let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new()))));
|
||||
let supported_cpuid = kvm_context
|
||||
.supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
|
||||
.unwrap();
|
||||
let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
|
||||
let vcpu_state_event = EventFd::new(libc::EFD_NONBLOCK).unwrap();
|
||||
let (tx, rx) = channel();
|
||||
let time_stamp = TimestampUs::default();
|
||||
|
||||
let vcpu = Vcpu::new_x86_64(
|
||||
0,
|
||||
vcpu_fd,
|
||||
io_manager,
|
||||
supported_cpuid,
|
||||
reset_event_fd,
|
||||
vcpu_state_event,
|
||||
tx,
|
||||
time_stamp,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
(vcpu, rx)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[test]
|
||||
fn test_vcpu_run_emulation() {
|
||||
let (mut vcpu, _) = create_vcpu();
|
||||
|
||||
// Io in
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::IoIn;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Handled)));
|
||||
|
||||
// Io out
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::IoOut;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Handled)));
|
||||
|
||||
// Mmio read
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioRead;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Handled)));
|
||||
|
||||
// Mmio write
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioWrite;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Handled)));
|
||||
|
||||
// KVM_EXIT_HLT signal
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Hlt;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
|
||||
// KVM_EXIT_SHUTDOWN signal
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Shutdown;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
|
||||
// KVM_EXIT_FAIL_ENTRY signal
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
|
||||
// KVM_EXIT_INTERNAL_ERROR signal
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::InternalError;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
|
||||
// KVM_SYSTEM_EVENT_RESET
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_RESET, 0);
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Stopped)));
|
||||
|
||||
// KVM_SYSTEM_EVENT_SHUTDOWN
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, 0);
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Stopped)));
|
||||
|
||||
// Other system event
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(0, 0);
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
|
||||
// Unknown exit reason
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Unknown;
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
|
||||
// Error: EAGAIN
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EAGAIN);
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Handled)));
|
||||
|
||||
// Error: EINTR
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Ok(VcpuEmulation::Interrupted)));
|
||||
|
||||
// other error
|
||||
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINVAL);
|
||||
let res = vcpu.run_emulation();
|
||||
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[test]
|
||||
fn test_vcpu_check_io_port_info() {
|
||||
let (vcpu, receiver) = create_vcpu();
|
||||
|
||||
// boot complete signal
|
||||
let res = vcpu
|
||||
.check_io_port_info(
|
||||
MAGIC_IOPORT_SIGNAL_GUEST_BOOT_COMPLETE,
|
||||
&[MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE],
|
||||
)
|
||||
.unwrap();
|
||||
assert!(res);
|
||||
|
||||
// debug info signal
|
||||
let res = vcpu
|
||||
.check_io_port_info(MAGIC_IOPORT_DEBUG_INFO, &[0, 0, 0, 0])
|
||||
.unwrap();
|
||||
assert!(res);
|
||||
}
|
||||
}
|
5
src/dragonball/src/vcpu/vcpu_manager.rs
Normal file
5
src/dragonball/src/vcpu/vcpu_manager.rs
Normal file
@ -0,0 +1,5 @@
|
||||
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
//! The implementation of vcpu manager
|
149
src/dragonball/src/vcpu/x86_64.rs
Normal file
149
src/dragonball/src/vcpu/x86_64.rs
Normal file
@ -0,0 +1,149 @@
|
||||
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
|
||||
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
use std::sync::mpsc::{channel, Sender};
|
||||
use std::sync::Arc;
|
||||
|
||||
use dbs_arch::cpuid::{process_cpuid, VmSpec};
|
||||
use dbs_arch::gdt::gdt_entry;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_bindings::CpuId;
|
||||
use kvm_ioctls::{VcpuFd, VmFd};
|
||||
use log::error;
|
||||
use vm_memory::{Address, GuestAddress, GuestAddressSpace};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::metric::{IncMetric, METRICS};
|
||||
use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent};
|
||||
use crate::vcpu::VcpuConfig;
|
||||
use crate::IoManagerCached;
|
||||
|
||||
impl Vcpu {
|
||||
/// Constructs a new VCPU for `vm`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `id` - Represents the CPU number between [0, max vcpus).
|
||||
/// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu.
|
||||
/// * `io_mgr` - The io-manager used to access port-io and mmio devices.
|
||||
/// * `cpuid` - The `CpuId` listing the supported capabilities of this vcpu.
|
||||
/// * `exit_evt` - An `EventFd` that will be written into when this vcpu
|
||||
/// exits.
|
||||
/// * `vcpu_state_event` - The eventfd which can notify vmm state of some
|
||||
/// vcpu should change.
|
||||
/// * `vcpu_state_sender` - The channel to send state change message from
|
||||
/// vcpu thread to vmm thread.
|
||||
/// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime.
|
||||
/// * `support_immediate_exit` - whether kvm used supports immediate_exit flag.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new_x86_64(
|
||||
id: u8,
|
||||
vcpu_fd: Arc<VcpuFd>,
|
||||
io_mgr: IoManagerCached,
|
||||
cpuid: CpuId,
|
||||
exit_evt: EventFd,
|
||||
vcpu_state_event: EventFd,
|
||||
vcpu_state_sender: Sender<VcpuStateEvent>,
|
||||
create_ts: TimestampUs,
|
||||
support_immediate_exit: bool,
|
||||
) -> Result<Self> {
|
||||
let (event_sender, event_receiver) = channel();
|
||||
let (response_sender, response_receiver) = channel();
|
||||
// Initially the cpuid per vCPU is the one supported by this VM.
|
||||
Ok(Vcpu {
|
||||
fd: vcpu_fd,
|
||||
id,
|
||||
io_mgr,
|
||||
create_ts,
|
||||
event_receiver,
|
||||
event_sender: Some(event_sender),
|
||||
response_receiver: Some(response_receiver),
|
||||
response_sender,
|
||||
vcpu_state_event,
|
||||
vcpu_state_sender,
|
||||
exit_evt,
|
||||
support_immediate_exit,
|
||||
cpuid,
|
||||
})
|
||||
}
|
||||
|
||||
/// Configures a x86_64 specific vcpu and should be called once per vcpu.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `vm_config` - The machine configuration of this microvm needed for the CPUID configuration.
|
||||
/// * `vm_fd` - The kvm `VmFd` for the virtual machine this vcpu will get attached to.
|
||||
/// * `vm_memory` - The guest memory used by this microvm.
|
||||
/// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts.
|
||||
/// * `pgtable_addr` - pgtable address for ap vcpu
|
||||
pub fn configure(
|
||||
&mut self,
|
||||
vcpu_config: &VcpuConfig,
|
||||
_vm_fd: &VmFd,
|
||||
vm_as: &GuestAddressSpaceImpl,
|
||||
kernel_start_addr: Option<GuestAddress>,
|
||||
_pgtable_addr: Option<GuestAddress>,
|
||||
) -> Result<()> {
|
||||
self.set_cpuid(vcpu_config)?;
|
||||
|
||||
dbs_arch::regs::setup_msrs(&self.fd).map_err(VcpuError::MSRSConfiguration)?;
|
||||
if let Some(start_addr) = kernel_start_addr {
|
||||
dbs_arch::regs::setup_regs(
|
||||
&self.fd,
|
||||
start_addr.raw_value() as u64,
|
||||
dbs_boot::layout::BOOT_STACK_POINTER,
|
||||
dbs_boot::layout::BOOT_STACK_POINTER,
|
||||
dbs_boot::layout::ZERO_PAGE_START,
|
||||
)
|
||||
.map_err(VcpuError::REGSConfiguration)?;
|
||||
dbs_arch::regs::setup_fpu(&self.fd).map_err(VcpuError::FPUConfiguration)?;
|
||||
let gdt_table: [u64; dbs_boot::layout::BOOT_GDT_MAX as usize] = [
|
||||
gdt_entry(0, 0, 0), // NULL
|
||||
gdt_entry(0xa09b, 0, 0xfffff), // CODE
|
||||
gdt_entry(0xc093, 0, 0xfffff), // DATA
|
||||
gdt_entry(0x808b, 0, 0xfffff), // TSS
|
||||
];
|
||||
let pgtable_addr =
|
||||
dbs_boot::setup_identity_mapping(&*vm_as.memory()).map_err(VcpuError::PageTable)?;
|
||||
dbs_arch::regs::setup_sregs(
|
||||
&*vm_as.memory(),
|
||||
&self.fd,
|
||||
pgtable_addr,
|
||||
&gdt_table,
|
||||
dbs_boot::layout::BOOT_GDT_OFFSET,
|
||||
dbs_boot::layout::BOOT_IDT_OFFSET,
|
||||
)
|
||||
.map_err(VcpuError::SREGSConfiguration)?;
|
||||
}
|
||||
dbs_arch::interrupts::set_lint(&self.fd).map_err(VcpuError::LocalIntConfiguration)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_cpuid(&mut self, vcpu_config: &VcpuConfig) -> Result<()> {
|
||||
let cpuid_vm_spec = VmSpec::new(
|
||||
self.id,
|
||||
vcpu_config.max_vcpu_count as u8,
|
||||
vcpu_config.threads_per_core,
|
||||
vcpu_config.cores_per_die,
|
||||
vcpu_config.dies_per_socket,
|
||||
vcpu_config.vpmu_feature,
|
||||
)
|
||||
.map_err(VcpuError::CpuId)?;
|
||||
process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| {
|
||||
METRICS.vcpu.filter_cpuid.inc();
|
||||
error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e);
|
||||
VcpuError::CpuId(e)
|
||||
})?;
|
||||
|
||||
self.fd
|
||||
.set_cpuid2(&self.cpuid)
|
||||
.map_err(VcpuError::SetSupportedCpusFailed)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user