mirror of
				https://github.com/kata-containers/kata-containers.git
				synced 2025-10-31 09:26:52 +00:00 
			
		
		
		
	dragonball: add vcpu
Virtual CPU manager for virtual machines. Signed-off-by: Liu Jiang <gerry@linux.alibaba.com> Signed-off-by: jingshan <jingshan@linux.alibaba.com> Signed-off-by: Chao Wu <chaowu@linux.alibaba.com> Signed-off-by: wllenyj <wllenyj@linux.alibaba.com>
This commit is contained in:
		| @@ -14,6 +14,7 @@ arc-swap = "1.5.0" | |||||||
| bytes = "1.1.0" | bytes = "1.1.0" | ||||||
| dbs-address-space = "0.1.0" | dbs-address-space = "0.1.0" | ||||||
| dbs-allocator = "0.1.0" | dbs-allocator = "0.1.0" | ||||||
|  | dbs-arch = "0.1.0" | ||||||
| dbs-boot = "0.2.0" | dbs-boot = "0.2.0" | ||||||
| dbs-device = "0.1.0" | dbs-device = "0.1.0" | ||||||
| dbs-interrupt = { version = "0.1.0", features = ["kvm-irq"] } | dbs-interrupt = { version = "0.1.0", features = ["kvm-irq"] } | ||||||
|   | |||||||
| @@ -25,6 +25,8 @@ pub mod metric; | |||||||
| pub mod resource_manager; | pub mod resource_manager; | ||||||
| /// Signal handler for virtual machines. | /// Signal handler for virtual machines. | ||||||
| pub mod signal_handler; | pub mod signal_handler; | ||||||
|  | /// Virtual CPU manager for virtual machines. | ||||||
|  | pub mod vcpu; | ||||||
| /// Virtual machine manager for virtual machines. | /// Virtual machine manager for virtual machines. | ||||||
| pub mod vm; | pub mod vm; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -13,6 +13,23 @@ lazy_static! { | |||||||
|     pub static ref METRICS: DragonballMetrics = DragonballMetrics::default(); |     pub static ref METRICS: DragonballMetrics = DragonballMetrics::default(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Metrics specific to VCPUs' mode of functioning. | ||||||
|  | #[derive(Default, Serialize)] | ||||||
|  | pub struct VcpuMetrics { | ||||||
|  |     /// Number of KVM exits for handling input IO. | ||||||
|  |     pub exit_io_in: SharedIncMetric, | ||||||
|  |     /// Number of KVM exits for handling output IO. | ||||||
|  |     pub exit_io_out: SharedIncMetric, | ||||||
|  |     /// Number of KVM exits for handling MMIO reads. | ||||||
|  |     pub exit_mmio_read: SharedIncMetric, | ||||||
|  |     /// Number of KVM exits for handling MMIO writes. | ||||||
|  |     pub exit_mmio_write: SharedIncMetric, | ||||||
|  |     /// Number of errors during this VCPU's run. | ||||||
|  |     pub failures: SharedIncMetric, | ||||||
|  |     /// Failures in configuring the CPUID. | ||||||
|  |     pub filter_cpuid: SharedIncMetric, | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Metrics for the seccomp filtering. | /// Metrics for the seccomp filtering. | ||||||
| #[derive(Default, Serialize)] | #[derive(Default, Serialize)] | ||||||
| pub struct SeccompMetrics { | pub struct SeccompMetrics { | ||||||
| @@ -32,6 +49,8 @@ pub struct SignalMetrics { | |||||||
| /// Structure storing all metrics while enforcing serialization support on them. | /// Structure storing all metrics while enforcing serialization support on them. | ||||||
| #[derive(Default, Serialize)] | #[derive(Default, Serialize)] | ||||||
| pub struct DragonballMetrics { | pub struct DragonballMetrics { | ||||||
|  |     /// Metrics related to a vcpu's functioning. | ||||||
|  |     pub vcpu: VcpuMetrics, | ||||||
|     /// Metrics related to seccomp filtering. |     /// Metrics related to seccomp filtering. | ||||||
|     pub seccomp: SeccompMetrics, |     pub seccomp: SeccompMetrics, | ||||||
|     /// Metrics related to signals. |     /// Metrics related to signals. | ||||||
|   | |||||||
							
								
								
									
										94
									
								
								src/dragonball/src/vcpu/aarch64.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								src/dragonball/src/vcpu/aarch64.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | |||||||
|  | // Copyright (C) 2022 Alibaba Cloud. All rights reserved. | ||||||
|  | // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||||||
|  | // SPDX-License-Identifier: Apache-2.0 | ||||||
|  | // | ||||||
|  | // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style license that can be | ||||||
|  | // found in the THIRD-PARTY file. | ||||||
|  |  | ||||||
|  | use std::sync::mpsc::{channel, Sender}; | ||||||
|  | use std::sync::Arc; | ||||||
|  |  | ||||||
|  | use crate::IoManagerCached; | ||||||
|  | use dbs_utils::time::TimestampUs; | ||||||
|  | use kvm_ioctls::{VcpuFd, VmFd}; | ||||||
|  | use vm_memory::GuestAddress; | ||||||
|  | use vmm_sys_util::eventfd::EventFd; | ||||||
|  |  | ||||||
|  | use crate::address_space_manager::GuestAddressSpaceImpl; | ||||||
|  | use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuStateEvent}; | ||||||
|  | use crate::vcpu::VcpuConfig; | ||||||
|  |  | ||||||
|  | #[allow(unused)] | ||||||
|  | impl Vcpu { | ||||||
|  |     /// Constructs a new VCPU for `vm`. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// * `id` - Represents the CPU number between [0, max vcpus). | ||||||
|  |     /// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu. | ||||||
|  |     /// * `io_mgr` - The io-manager used to access port-io and mmio devices. | ||||||
|  |     /// * `exit_evt` - An `EventFd` that will be written into when this vcpu | ||||||
|  |     ///   exits. | ||||||
|  |     /// * `vcpu_state_event` - The eventfd which can notify vmm state of some | ||||||
|  |     ///   vcpu should change. | ||||||
|  |     /// * `vcpu_state_sender` - The channel to send state change message from | ||||||
|  |     ///   vcpu thread to vmm thread. | ||||||
|  |     /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. | ||||||
|  |     /// * `support_immediate_exit` -  whether kvm uses supports immediate_exit flag. | ||||||
|  |     pub fn new_aarch64( | ||||||
|  |         id: u8, | ||||||
|  |         vcpu_fd: Arc<VcpuFd>, | ||||||
|  |         io_mgr: IoManagerCached, | ||||||
|  |         exit_evt: EventFd, | ||||||
|  |         vcpu_state_event: EventFd, | ||||||
|  |         vcpu_state_sender: Sender<VcpuStateEvent>, | ||||||
|  |         create_ts: TimestampUs, | ||||||
|  |         support_immediate_exit: bool, | ||||||
|  |     ) -> Result<Self> { | ||||||
|  |         let (event_sender, event_receiver) = channel(); | ||||||
|  |         let (response_sender, response_receiver) = channel(); | ||||||
|  |  | ||||||
|  |         Ok(Vcpu { | ||||||
|  |             fd: vcpu_fd, | ||||||
|  |             id, | ||||||
|  |             io_mgr, | ||||||
|  |             create_ts, | ||||||
|  |             event_receiver, | ||||||
|  |             event_sender: Some(event_sender), | ||||||
|  |             response_receiver: Some(response_receiver), | ||||||
|  |             response_sender, | ||||||
|  |             vcpu_state_event, | ||||||
|  |             vcpu_state_sender, | ||||||
|  |             support_immediate_exit, | ||||||
|  |             mpidr: 0, | ||||||
|  |             exit_evt, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Configures an aarch64 specific vcpu. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// * `vcpu_config` - vCPU config for this vCPU status | ||||||
|  |     /// * `vm_fd` - The kvm `VmFd` for this microvm. | ||||||
|  |     /// * `vm_as` - The guest memory address space used by this microvm. | ||||||
|  |     /// * `kernel_load_addr` - Offset from `guest_mem` at which the kernel is loaded. | ||||||
|  |     /// * `_pgtable_addr` - pgtable address for ap vcpu (not used in aarch64) | ||||||
|  |     pub fn configure( | ||||||
|  |         &mut self, | ||||||
|  |         _vcpu_config: &VcpuConfig, | ||||||
|  |         vm_fd: &VmFd, | ||||||
|  |         vm_as: &GuestAddressSpaceImpl, | ||||||
|  |         kernel_load_addr: Option<GuestAddress>, | ||||||
|  |         _pgtable_addr: Option<GuestAddress>, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         // TODO: add arm vcpu configure() function. issue: #4445 | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Gets the MPIDR register value. | ||||||
|  |     pub fn get_mpidr(&self) -> u64 { | ||||||
|  |         self.mpidr | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										31
									
								
								src/dragonball/src/vcpu/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								src/dragonball/src/vcpu/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | |||||||
|  | // Copyright (C) 2022 Alibaba Cloud Computing. All rights reserved. | ||||||
|  | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||||||
|  | // | ||||||
|  | // SPDX-License-Identifier: Apache-2.0 | ||||||
|  |  | ||||||
|  | mod sm; | ||||||
|  | pub mod vcpu_impl; | ||||||
|  |  | ||||||
|  | #[cfg(target_arch = "x86_64")] | ||||||
|  | use dbs_arch::cpuid::VpmuFeatureLevel; | ||||||
|  |  | ||||||
|  | /// vcpu config collection | ||||||
|  | pub struct VcpuConfig { | ||||||
|  |     /// initial vcpu count | ||||||
|  |     pub boot_vcpu_count: u8, | ||||||
|  |     /// max vcpu count for hotplug | ||||||
|  |     pub max_vcpu_count: u8, | ||||||
|  |     /// threads per core for cpu topology information | ||||||
|  |     pub threads_per_core: u8, | ||||||
|  |     /// cores per die for cpu topology information | ||||||
|  |     pub cores_per_die: u8, | ||||||
|  |     /// dies per socket for cpu topology information | ||||||
|  |     pub dies_per_socket: u8, | ||||||
|  |     /// socket number for cpu topology information | ||||||
|  |     pub sockets: u8, | ||||||
|  |     /// if vpmu feature is Disabled, it means vpmu feature is off (by default) | ||||||
|  |     /// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions) | ||||||
|  |     /// if vpmu feature is FullyEnabled, it means all vpmu counters are supported | ||||||
|  |     #[cfg(target_arch = "x86_64")] | ||||||
|  |     pub vpmu_feature: VpmuFeatureLevel, | ||||||
|  | } | ||||||
							
								
								
									
										149
									
								
								src/dragonball/src/vcpu/sm.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								src/dragonball/src/vcpu/sm.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,149 @@ | |||||||
|  | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||||||
|  | // SPDX-License-Identifier: Apache-2.0 | ||||||
|  |  | ||||||
|  | use std::ops::Deref; | ||||||
|  |  | ||||||
|  | /// Simple abstraction of a state machine. | ||||||
|  | /// | ||||||
|  | /// `StateMachine<T>` is a wrapper over `T` that also encodes state information for `T`. | ||||||
|  | /// | ||||||
|  | /// Each state for `T` is represented by a `StateFn<T>` which is a function that acts as | ||||||
|  | /// the state handler for that particular state of `T`. | ||||||
|  | /// | ||||||
|  | /// `StateFn<T>` returns exactly one other `StateMachine<T>` thus each state gets clearly | ||||||
|  | /// defined transitions to other states. | ||||||
|  | pub struct StateMachine<T> { | ||||||
|  |     function: StateFn<T>, | ||||||
|  |     end_state: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Type representing a state handler of a `StateMachine<T>` machine. Each state handler | ||||||
|  | /// is a function from `T` that handles a specific state of `T`. | ||||||
|  | type StateFn<T> = fn(&mut T) -> StateMachine<T>; | ||||||
|  |  | ||||||
|  | impl<T> StateMachine<T> { | ||||||
|  |     /// Creates a new state wrapper. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// `function` - the state handler for this state. | ||||||
|  |     /// `end_state` - whether this state is final. | ||||||
|  |     pub fn new(function: StateFn<T>, end_state: bool) -> StateMachine<T> { | ||||||
|  |         StateMachine { | ||||||
|  |             function, | ||||||
|  |             end_state, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Creates a new state wrapper that has further possible transitions. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// `function` - the state handler for this state. | ||||||
|  |     pub fn next(function: StateFn<T>) -> StateMachine<T> { | ||||||
|  |         StateMachine::new(function, false) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Creates a new state wrapper that has no further transitions. The state machine | ||||||
|  |     /// will finish after running this handler. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// `function` - the state handler for this last state. | ||||||
|  |     pub fn finish(function: StateFn<T>) -> StateMachine<T> { | ||||||
|  |         StateMachine::new(function, true) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Runs a state machine for `T` starting from the provided state. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// `machine` - a mutable reference to the object running through the various states. | ||||||
|  |     /// `starting_state_fn` - a `fn(&mut T) -> StateMachine<T>` that should be the handler for | ||||||
|  |     ///                       the initial state. | ||||||
|  |     pub fn run(machine: &mut T, starting_state_fn: StateFn<T>) { | ||||||
|  |         // Start off in the `starting_state` state. | ||||||
|  |         let mut sf = StateMachine::new(starting_state_fn, false); | ||||||
|  |         // While current state is not a final/end state, keep churning. | ||||||
|  |         while !sf.end_state { | ||||||
|  |             // Run the current state handler, and get the next one. | ||||||
|  |             sf = sf(machine); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Implement Deref of `StateMachine<T>` so that we can directly call its underlying state handler. | ||||||
|  | impl<T> Deref for StateMachine<T> { | ||||||
|  |     type Target = StateFn<T>; | ||||||
|  |     fn deref(&self) -> &Self::Target { | ||||||
|  |         &self.function | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use super::*; | ||||||
|  |  | ||||||
|  |     // DummyMachine with states `s1`, `s2` and `s3`. | ||||||
|  |     struct DummyMachine { | ||||||
|  |         private_data_s1: bool, | ||||||
|  |         private_data_s2: bool, | ||||||
|  |         private_data_s3: bool, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl DummyMachine { | ||||||
|  |         fn new() -> Self { | ||||||
|  |             DummyMachine { | ||||||
|  |                 private_data_s1: false, | ||||||
|  |                 private_data_s2: false, | ||||||
|  |                 private_data_s3: false, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // DummyMachine functions here. | ||||||
|  |  | ||||||
|  |         // Simple state-machine: start->s1->s2->s3->done. | ||||||
|  |         fn run(&mut self) { | ||||||
|  |             // Verify the machine has not run yet. | ||||||
|  |             assert!(!self.private_data_s1); | ||||||
|  |             assert!(!self.private_data_s2); | ||||||
|  |             assert!(!self.private_data_s3); | ||||||
|  |  | ||||||
|  |             // Run the state-machine. | ||||||
|  |             StateMachine::run(self, Self::s1); | ||||||
|  |  | ||||||
|  |             // Verify the machine went through all states. | ||||||
|  |             assert!(self.private_data_s1); | ||||||
|  |             assert!(self.private_data_s2); | ||||||
|  |             assert!(self.private_data_s3); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn s1(&mut self) -> StateMachine<Self> { | ||||||
|  |             // Verify private data mutates along with the states. | ||||||
|  |             assert!(!self.private_data_s1); | ||||||
|  |             self.private_data_s1 = true; | ||||||
|  |             StateMachine::next(Self::s2) | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn s2(&mut self) -> StateMachine<Self> { | ||||||
|  |             // Verify private data mutates along with the states. | ||||||
|  |             assert!(!self.private_data_s2); | ||||||
|  |             self.private_data_s2 = true; | ||||||
|  |             StateMachine::next(Self::s3) | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn s3(&mut self) -> StateMachine<Self> { | ||||||
|  |             // Verify private data mutates along with the states. | ||||||
|  |             assert!(!self.private_data_s3); | ||||||
|  |             self.private_data_s3 = true; | ||||||
|  |             // The machine ends here, adding `s1` as next state to validate this. | ||||||
|  |             StateMachine::finish(Self::s1) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_sm() { | ||||||
|  |         let mut machine = DummyMachine::new(); | ||||||
|  |         machine.run(); | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										955
									
								
								src/dragonball/src/vcpu/vcpu_impl.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										955
									
								
								src/dragonball/src/vcpu/vcpu_impl.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,955 @@ | |||||||
|  | // Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved. | ||||||
|  | // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||||||
|  | // SPDX-License-Identifier: Apache-2.0 | ||||||
|  | // | ||||||
|  | // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style license that can be | ||||||
|  | // found in the THIRD-PARTY file. | ||||||
|  |  | ||||||
|  | //! The implementation for per vcpu | ||||||
|  |  | ||||||
|  | use std::cell::Cell; | ||||||
|  | use std::result; | ||||||
|  | use std::sync::atomic::{fence, Ordering}; | ||||||
|  | use std::sync::mpsc::{Receiver, Sender, TryRecvError}; | ||||||
|  | use std::sync::{Arc, Barrier}; | ||||||
|  | use std::thread; | ||||||
|  |  | ||||||
|  | use dbs_utils::time::TimestampUs; | ||||||
|  | use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; | ||||||
|  | use kvm_ioctls::{VcpuExit, VcpuFd}; | ||||||
|  | use libc::{c_int, c_void, siginfo_t}; | ||||||
|  | use log::{error, info, warn}; | ||||||
|  | use seccompiler::{apply_filter, BpfProgram, Error as SecError}; | ||||||
|  | use vmm_sys_util::eventfd::EventFd; | ||||||
|  | use vmm_sys_util::signal::{register_signal_handler, Killable}; | ||||||
|  |  | ||||||
|  | use super::sm::StateMachine; | ||||||
|  | use crate::metric::{IncMetric, METRICS}; | ||||||
|  | use crate::signal_handler::sigrtmin; | ||||||
|  | use crate::IoManagerCached; | ||||||
|  |  | ||||||
|  | #[cfg(target_arch = "x86_64")] | ||||||
|  | #[path = "x86_64.rs"] | ||||||
|  | mod x86_64; | ||||||
|  |  | ||||||
|  | #[cfg(target_arch = "aarch64")] | ||||||
|  | #[path = "aarch64.rs"] | ||||||
|  | mod aarch64; | ||||||
|  |  | ||||||
|  | #[cfg(target_arch = "x86_64")] | ||||||
|  | const MAGIC_IOPORT_BASE: u16 = 0xdbdb; | ||||||
|  | #[cfg(target_arch = "x86_64")] | ||||||
|  | const MAGIC_IOPORT_DEBUG_INFO: u16 = MAGIC_IOPORT_BASE; | ||||||
|  |  | ||||||
|  | /// Signal number (SIGRTMIN) used to kick Vcpus. | ||||||
|  | pub const VCPU_RTSIG_OFFSET: i32 = 0; | ||||||
|  |  | ||||||
|  | #[cfg(target_arch = "x86_64")] | ||||||
|  | /// Errors associated with the wrappers over KVM ioctls. | ||||||
|  | #[derive(Debug, thiserror::Error)] | ||||||
|  | pub enum VcpuError { | ||||||
|  |     /// Failed to signal Vcpu. | ||||||
|  |     #[error("cannot signal the vCPU thread")] | ||||||
|  |     SignalVcpu(#[source] vmm_sys_util::errno::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot open the vCPU file descriptor. | ||||||
|  |     #[error("cannot open the vCPU file descriptor")] | ||||||
|  |     VcpuFd(#[source] kvm_ioctls::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot spawn a new vCPU thread. | ||||||
|  |     #[error("cannot spawn vCPU thread")] | ||||||
|  |     VcpuSpawn(#[source] std::io::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot cleanly initialize vCPU TLS. | ||||||
|  |     #[error("cannot cleanly initialize TLS fro vCPU")] | ||||||
|  |     VcpuTlsInit, | ||||||
|  |  | ||||||
|  |     /// Vcpu not present in TLS. | ||||||
|  |     #[error("vCPU not present in the TLS")] | ||||||
|  |     VcpuTlsNotPresent, | ||||||
|  |  | ||||||
|  |     /// Unexpected KVM_RUN exit reason | ||||||
|  |     #[error("Unexpected KVM_RUN exit reason")] | ||||||
|  |     VcpuUnhandledKvmExit, | ||||||
|  |  | ||||||
|  |     /// Pause vcpu failed | ||||||
|  |     #[error("failed to pause vcpus")] | ||||||
|  |     PauseFailed, | ||||||
|  |  | ||||||
|  |     /// Kvm Ioctl Error | ||||||
|  |     #[error("failure in issuing KVM ioctl command")] | ||||||
|  |     Kvm(#[source] kvm_ioctls::Error), | ||||||
|  |  | ||||||
|  |     /// Msr error | ||||||
|  |     #[error("failure to deal with MSRs")] | ||||||
|  |     Msr(vmm_sys_util::fam::Error), | ||||||
|  |  | ||||||
|  |     /// A call to cpuid instruction failed on x86_64. | ||||||
|  |     #[error("failure while configuring CPUID for virtual CPU on x86_64")] | ||||||
|  |     CpuId(dbs_arch::cpuid::Error), | ||||||
|  |  | ||||||
|  |     /// Error configuring the floating point related registers on x86_64. | ||||||
|  |     #[error("failure while configuring the floating point related registers on x86_64")] | ||||||
|  |     FPUConfiguration(dbs_arch::regs::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot set the local interruption due to bad configuration on x86_64. | ||||||
|  |     #[error("cannot set the local interruption due to bad configuration on x86_64")] | ||||||
|  |     LocalIntConfiguration(dbs_arch::interrupts::Error), | ||||||
|  |  | ||||||
|  |     /// Error configuring the MSR registers on x86_64. | ||||||
|  |     #[error("failure while configuring the MSR registers on x86_64")] | ||||||
|  |     MSRSConfiguration(dbs_arch::regs::Error), | ||||||
|  |  | ||||||
|  |     /// Error configuring the general purpose registers on x86_64. | ||||||
|  |     #[error("failure while configuring the general purpose registers on x86_64")] | ||||||
|  |     REGSConfiguration(dbs_arch::regs::Error), | ||||||
|  |  | ||||||
|  |     /// Error configuring the special registers on x86_64. | ||||||
|  |     #[error("failure while configuring the special registers on x86_64")] | ||||||
|  |     SREGSConfiguration(dbs_arch::regs::Error), | ||||||
|  |  | ||||||
|  |     /// Error configuring the page table on x86_64. | ||||||
|  |     #[error("failure while configuring the page table on x86_64")] | ||||||
|  |     PageTable(dbs_boot::Error), | ||||||
|  |  | ||||||
|  |     /// The call to KVM_SET_CPUID2 failed on x86_64. | ||||||
|  |     #[error("failure while calling KVM_SET_CPUID2 on x86_64")] | ||||||
|  |     SetSupportedCpusFailed(#[source] kvm_ioctls::Error), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(target_arch = "aarch64")] | ||||||
|  | /// Errors associated with the wrappers over KVM ioctls. | ||||||
|  | #[derive(Debug, thiserror::Error)] | ||||||
|  | pub enum VcpuError { | ||||||
|  |     /// Failed to signal Vcpu. | ||||||
|  |     #[error("cannot signal the vCPU thread")] | ||||||
|  |     SignalVcpu(#[source] vmm_sys_util::errno::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot open the vCPU file descriptor. | ||||||
|  |     #[error("cannot open the vCPU file descriptor")] | ||||||
|  |     VcpuFd(#[source] kvm_ioctls::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot spawn a new vCPU thread. | ||||||
|  |     #[error("cannot spawn vCPU thread")] | ||||||
|  |     VcpuSpawn(#[source] std::io::Error), | ||||||
|  |  | ||||||
|  |     /// Cannot cleanly initialize vCPU TLS. | ||||||
|  |     #[error("cannot cleanly initialize TLS fro vCPU")] | ||||||
|  |     VcpuTlsInit, | ||||||
|  |  | ||||||
|  |     /// Vcpu not present in TLS. | ||||||
|  |     #[error("vCPU not present in the TLS")] | ||||||
|  |     VcpuTlsNotPresent, | ||||||
|  |  | ||||||
|  |     /// Unexpected KVM_RUN exit reason | ||||||
|  |     #[error("Unexpected KVM_RUN exit reason")] | ||||||
|  |     VcpuUnhandledKvmExit, | ||||||
|  |  | ||||||
|  |     /// Pause vcpu failed | ||||||
|  |     #[error("failed to pause vcpus")] | ||||||
|  |     PauseFailed, | ||||||
|  |  | ||||||
|  |     /// Kvm Ioctl Error | ||||||
|  |     #[error("failure in issuing KVM ioctl command")] | ||||||
|  |     Kvm(#[source] kvm_ioctls::Error), | ||||||
|  |  | ||||||
|  |     /// Msr error | ||||||
|  |     #[error("failure to deal with MSRs")] | ||||||
|  |     Msr(vmm_sys_util::fam::Error), | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "aarch64")] | ||||||
|  |     /// Error configuring the general purpose aarch64 registers on aarch64. | ||||||
|  |     #[error("failure while configuring the general purpose registers on aarch64")] | ||||||
|  |     REGSConfiguration(dbs_arch::regs::Error), | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "aarch64")] | ||||||
|  |     /// Error setting up the global interrupt controller on aarch64. | ||||||
|  |     #[error("failure while setting up the global interrupt controller on aarch64")] | ||||||
|  |     SetupGIC(dbs_arch::gic::Error), | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "aarch64")] | ||||||
|  |     /// Error getting the Vcpu preferred target on aarch64. | ||||||
|  |     #[error("failure while getting the vCPU preferred target on aarch64")] | ||||||
|  |     VcpuArmPreferredTarget(kvm_ioctls::Error), | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "aarch64")] | ||||||
|  |     /// Error doing vCPU Init on aarch64. | ||||||
|  |     #[error("failure while doing vCPU init on aarch64")] | ||||||
|  |     VcpuArmInit(kvm_ioctls::Error), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Result for Vcpu related operations. | ||||||
|  | pub type Result<T> = result::Result<T, VcpuError>; | ||||||
|  |  | ||||||
|  | /// List of events that the Vcpu can receive. | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub enum VcpuEvent { | ||||||
|  |     /// Kill the Vcpu. | ||||||
|  |     Exit, | ||||||
|  |     /// Pause the Vcpu. | ||||||
|  |     Pause, | ||||||
|  |     /// Event that should resume the Vcpu. | ||||||
|  |     Resume, | ||||||
|  |     /// Get vcpu thread tid | ||||||
|  |     Gettid, | ||||||
|  |  | ||||||
|  |     /// Event to revalidate vcpu IoManager cache | ||||||
|  |     RevalidateCache, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// List of responses that the Vcpu reports. | ||||||
|  | pub enum VcpuResponse { | ||||||
|  |     /// Vcpu is paused. | ||||||
|  |     Paused, | ||||||
|  |     /// Vcpu is resumed. | ||||||
|  |     Resumed, | ||||||
|  |     /// Vcpu index and thread tid. | ||||||
|  |     Tid(u8, u32), | ||||||
|  |     /// Requested Vcpu operation is not allowed. | ||||||
|  |     NotAllowed, | ||||||
|  |     /// Requestion action encountered an error | ||||||
|  |     Error(VcpuError), | ||||||
|  |     /// Vcpu IoManager cache is revalidated | ||||||
|  |     CacheRevalidated, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// List of events that the vcpu_state_sender can send. | ||||||
|  | pub enum VcpuStateEvent { | ||||||
|  |     /// For Hotplug | ||||||
|  |     Hotplug((bool, u32)), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Wrapper over vCPU that hides the underlying interactions with the vCPU thread. | ||||||
|  | pub struct VcpuHandle { | ||||||
|  |     event_sender: Sender<VcpuEvent>, | ||||||
|  |     response_receiver: Receiver<VcpuResponse>, | ||||||
|  |     vcpu_thread: thread::JoinHandle<()>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl VcpuHandle { | ||||||
|  |     /// Send event to vCPU thread | ||||||
|  |     pub fn send_event(&self, event: VcpuEvent) -> Result<()> { | ||||||
|  |         // Use expect() to crash if the other thread closed this channel. | ||||||
|  |         self.event_sender | ||||||
|  |             .send(event) | ||||||
|  |             .expect("event sender channel closed on vcpu end."); | ||||||
|  |         // Kick the vCPU so it picks up the message. | ||||||
|  |         self.vcpu_thread | ||||||
|  |             .kill(sigrtmin() + VCPU_RTSIG_OFFSET) | ||||||
|  |             .map_err(VcpuError::SignalVcpu)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Receive response from vcpu thread | ||||||
|  |     pub fn response_receiver(&self) -> &Receiver<VcpuResponse> { | ||||||
|  |         &self.response_receiver | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[allow(dead_code)] | ||||||
|  |     /// Join the vcpu thread | ||||||
|  |     pub fn join_vcpu_thread(self) -> thread::Result<()> { | ||||||
|  |         self.vcpu_thread.join() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(PartialEq)] | ||||||
|  | enum VcpuEmulation { | ||||||
|  |     Handled, | ||||||
|  |     Interrupted, | ||||||
|  |     Stopped, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// A wrapper around creating and using a kvm-based VCPU. | ||||||
|  | pub struct Vcpu { | ||||||
|  |     // vCPU fd used by the vCPU | ||||||
|  |     fd: Arc<VcpuFd>, | ||||||
|  |     // vCPU id info | ||||||
|  |     id: u8, | ||||||
|  |     // Io manager Cached for facilitating IO operations | ||||||
|  |     io_mgr: IoManagerCached, | ||||||
|  |     // Records vCPU create time stamp | ||||||
|  |     create_ts: TimestampUs, | ||||||
|  |  | ||||||
|  |     // The receiving end of events channel owned by the vcpu side. | ||||||
|  |     event_receiver: Receiver<VcpuEvent>, | ||||||
|  |     // The transmitting end of the events channel which will be given to the handler. | ||||||
|  |     event_sender: Option<Sender<VcpuEvent>>, | ||||||
|  |     // The receiving end of the responses channel which will be given to the handler. | ||||||
|  |     response_receiver: Option<Receiver<VcpuResponse>>, | ||||||
|  |     // The transmitting end of the responses channel owned by the vcpu side. | ||||||
|  |     response_sender: Sender<VcpuResponse>, | ||||||
|  |     // Event notifier for CPU hotplug. | ||||||
|  |     // After arm adapts to hotplug vcpu, the dead code macro needs to be removed | ||||||
|  |     #[cfg_attr(target_arch = "aarch64", allow(dead_code))] | ||||||
|  |     vcpu_state_event: EventFd, | ||||||
|  |     // CPU hotplug events. | ||||||
|  |     // After arm adapts to hotplug vcpu, the dead code macro needs to be removed | ||||||
|  |     #[cfg_attr(target_arch = "aarch64", allow(dead_code))] | ||||||
|  |     vcpu_state_sender: Sender<VcpuStateEvent>, | ||||||
|  |  | ||||||
|  |     // An `EventFd` that will be written into when this vcpu exits. | ||||||
|  |     exit_evt: EventFd, | ||||||
|  |     // Whether kvm used supports immediate_exit flag. | ||||||
|  |     support_immediate_exit: bool, | ||||||
|  |  | ||||||
|  |     // CPUID information for the x86_64 CPU | ||||||
|  |     #[cfg(target_arch = "x86_64")] | ||||||
|  |     cpuid: kvm_bindings::CpuId, | ||||||
|  |  | ||||||
|  |     /// Multiprocessor affinity register recorded for aarch64 | ||||||
|  |     #[cfg(target_arch = "aarch64")] | ||||||
|  |     pub(crate) mpidr: u64, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Using this for easier explicit type-casting to help IDEs interpret the code. | ||||||
|  | type VcpuCell = Cell<Option<*const Vcpu>>; | ||||||
|  |  | ||||||
|  | impl Vcpu { | ||||||
|  |     thread_local!(static TLS_VCPU_PTR: VcpuCell = Cell::new(None)); | ||||||
|  |  | ||||||
|  |     /// Associates `self` with the current thread. | ||||||
|  |     /// | ||||||
|  |     /// It is a prerequisite to successfully run `init_thread_local_data()` before using | ||||||
|  |     /// `run_on_thread_local()` on the current thread. | ||||||
|  |     /// This function will return an error if there already is a `Vcpu` present in the TLS. | ||||||
|  |     fn init_thread_local_data(&mut self) -> Result<()> { | ||||||
|  |         Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { | ||||||
|  |             if cell.get().is_some() { | ||||||
|  |                 return Err(VcpuError::VcpuTlsInit); | ||||||
|  |             } | ||||||
|  |             cell.set(Some(self as *const Vcpu)); | ||||||
|  |             Ok(()) | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Deassociates `self` from the current thread. | ||||||
|  |     /// | ||||||
|  |     /// Should be called if the current `self` had called `init_thread_local_data()` and | ||||||
|  |     /// now needs to move to a different thread. | ||||||
|  |     /// | ||||||
|  |     /// Fails if `self` was not previously associated with the current thread. | ||||||
|  |     fn reset_thread_local_data(&mut self) -> Result<()> { | ||||||
|  |         // Best-effort to clean up TLS. If the `Vcpu` was moved to another thread | ||||||
|  |         // _before_ running this, then there is nothing we can do. | ||||||
|  |         Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { | ||||||
|  |             if let Some(vcpu_ptr) = cell.get() { | ||||||
|  |                 if vcpu_ptr == self as *const Vcpu { | ||||||
|  |                     Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| cell.take()); | ||||||
|  |                     return Ok(()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Err(VcpuError::VcpuTlsNotPresent) | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Runs `func` for the `Vcpu` associated with the current thread. | ||||||
|  |     /// | ||||||
|  |     /// It requires that `init_thread_local_data()` was run on this thread. | ||||||
|  |     /// | ||||||
|  |     /// Fails if there is no `Vcpu` associated with the current thread. | ||||||
|  |     /// | ||||||
|  |     /// # Safety | ||||||
|  |     /// | ||||||
|  |     /// This is marked unsafe as it allows temporary aliasing through | ||||||
|  |     /// dereferencing from pointer an already borrowed `Vcpu`. | ||||||
|  |     unsafe fn run_on_thread_local<F>(func: F) -> Result<()> | ||||||
|  |     where | ||||||
|  |         F: FnOnce(&Vcpu), | ||||||
|  |     { | ||||||
|  |         Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { | ||||||
|  |             if let Some(vcpu_ptr) = cell.get() { | ||||||
|  |                 // Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty, | ||||||
|  |                 // and it is being cleared on `Vcpu::drop` so there is no dangling pointer. | ||||||
|  |                 let vcpu_ref: &Vcpu = &*vcpu_ptr; | ||||||
|  |                 func(vcpu_ref); | ||||||
|  |                 Ok(()) | ||||||
|  |             } else { | ||||||
|  |                 Err(VcpuError::VcpuTlsNotPresent) | ||||||
|  |             } | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Registers a signal handler which makes use of TLS and kvm immediate exit to | ||||||
|  |     /// kick the vcpu running on the current thread, if there is one. | ||||||
|  |     pub fn register_kick_signal_handler() { | ||||||
|  |         extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { | ||||||
|  |             // This is safe because it's temporarily aliasing the `Vcpu` object, but we are | ||||||
|  |             // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. | ||||||
|  |             unsafe { | ||||||
|  |                 let _ = Vcpu::run_on_thread_local(|vcpu| { | ||||||
|  |                     vcpu.fd.set_kvm_immediate_exit(1); | ||||||
|  |                     fence(Ordering::Release); | ||||||
|  |                 }); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal) | ||||||
|  |             .expect("Failed to register vcpu signal handler"); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the cpu index as seen by the guest OS. | ||||||
|  |     pub fn cpu_index(&self) -> u8 { | ||||||
|  |         self.id | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Moves the vcpu to its own thread and constructs a VcpuHandle. | ||||||
|  |     /// The handle can be used to control the remote vcpu. | ||||||
|  |     pub fn start_threaded( | ||||||
|  |         mut self, | ||||||
|  |         seccomp_filter: BpfProgram, | ||||||
|  |         barrier: Arc<Barrier>, | ||||||
|  |     ) -> Result<VcpuHandle> { | ||||||
|  |         let event_sender = self.event_sender.take().unwrap(); | ||||||
|  |         let response_receiver = self.response_receiver.take().unwrap(); | ||||||
|  |  | ||||||
|  |         let vcpu_thread = thread::Builder::new() | ||||||
|  |             .name(format!("db_vcpu{}", self.cpu_index())) | ||||||
|  |             .spawn(move || { | ||||||
|  |                 self.init_thread_local_data() | ||||||
|  |                     .expect("Cannot cleanly initialize vcpu TLS."); | ||||||
|  |                 barrier.wait(); | ||||||
|  |                 self.run(seccomp_filter); | ||||||
|  |             }) | ||||||
|  |             .map_err(VcpuError::VcpuSpawn)?; | ||||||
|  |  | ||||||
|  |         Ok(VcpuHandle { | ||||||
|  |             event_sender, | ||||||
|  |             response_receiver, | ||||||
|  |             vcpu_thread, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Extract the vcpu running logic for test mocking. | ||||||
|  |     #[cfg(not(test))] | ||||||
|  |     pub fn emulate(fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> { | ||||||
|  |         fd.run() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Runs the vCPU in KVM context and handles the kvm exit reason. | ||||||
|  |     /// | ||||||
|  |     /// Returns error or enum specifying whether emulation was handled or interrupted. | ||||||
|  |     fn run_emulation(&mut self) -> Result<VcpuEmulation> { | ||||||
|  |         match Vcpu::emulate(&self.fd) { | ||||||
|  |             Ok(run) => match run { | ||||||
|  |                 #[cfg(target_arch = "x86_64")] | ||||||
|  |                 VcpuExit::IoIn(addr, data) => { | ||||||
|  |                     let _ = self.io_mgr.pio_read(addr, data); | ||||||
|  |                     METRICS.vcpu.exit_io_in.inc(); | ||||||
|  |                     Ok(VcpuEmulation::Handled) | ||||||
|  |                 } | ||||||
|  |                 #[cfg(target_arch = "x86_64")] | ||||||
|  |                 VcpuExit::IoOut(addr, data) => { | ||||||
|  |                     if !self.check_io_port_info(addr, data)? { | ||||||
|  |                         let _ = self.io_mgr.pio_write(addr, data); | ||||||
|  |                     } | ||||||
|  |                     METRICS.vcpu.exit_io_out.inc(); | ||||||
|  |                     Ok(VcpuEmulation::Handled) | ||||||
|  |                 } | ||||||
|  |                 VcpuExit::MmioRead(addr, data) => { | ||||||
|  |                     let _ = self.io_mgr.mmio_read(addr, data); | ||||||
|  |                     METRICS.vcpu.exit_mmio_read.inc(); | ||||||
|  |                     Ok(VcpuEmulation::Handled) | ||||||
|  |                 } | ||||||
|  |                 VcpuExit::MmioWrite(addr, data) => { | ||||||
|  |                     #[cfg(target_arch = "aarch64")] | ||||||
|  |                     self.check_boot_complete_signal(addr, data); | ||||||
|  |  | ||||||
|  |                     let _ = self.io_mgr.mmio_write(addr, data); | ||||||
|  |                     METRICS.vcpu.exit_mmio_write.inc(); | ||||||
|  |                     Ok(VcpuEmulation::Handled) | ||||||
|  |                 } | ||||||
|  |                 VcpuExit::Hlt => { | ||||||
|  |                     info!("Received KVM_EXIT_HLT signal"); | ||||||
|  |                     Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                 } | ||||||
|  |                 VcpuExit::Shutdown => { | ||||||
|  |                     info!("Received KVM_EXIT_SHUTDOWN signal"); | ||||||
|  |                     Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                 } | ||||||
|  |                 // Documentation specifies that below kvm exits are considered errors. | ||||||
|  |                 VcpuExit::FailEntry => { | ||||||
|  |                     METRICS.vcpu.failures.inc(); | ||||||
|  |                     error!("Received KVM_EXIT_FAIL_ENTRY signal"); | ||||||
|  |                     Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                 } | ||||||
|  |                 VcpuExit::InternalError => { | ||||||
|  |                     METRICS.vcpu.failures.inc(); | ||||||
|  |                     error!("Received KVM_EXIT_INTERNAL_ERROR signal"); | ||||||
|  |                     Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                 } | ||||||
|  |                 VcpuExit::SystemEvent(event_type, event_flags) => match event_type { | ||||||
|  |                     KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => { | ||||||
|  |                         info!( | ||||||
|  |                             "Received KVM_SYSTEM_EVENT: type: {}, event: {}", | ||||||
|  |                             event_type, event_flags | ||||||
|  |                         ); | ||||||
|  |                         Ok(VcpuEmulation::Stopped) | ||||||
|  |                     } | ||||||
|  |                     _ => { | ||||||
|  |                         METRICS.vcpu.failures.inc(); | ||||||
|  |                         error!( | ||||||
|  |                             "Received KVM_SYSTEM_EVENT signal type: {}, flag: {}", | ||||||
|  |                             event_type, event_flags | ||||||
|  |                         ); | ||||||
|  |                         Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 r => { | ||||||
|  |                     METRICS.vcpu.failures.inc(); | ||||||
|  |                     // TODO: Are we sure we want to finish running a vcpu upon | ||||||
|  |                     // receiving a vm exit that is not necessarily an error? | ||||||
|  |                     error!("Unexpected exit reason on vcpu run: {:?}", r); | ||||||
|  |                     Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             // The unwrap on raw_os_error can only fail if we have a logic | ||||||
|  |             // error in our code in which case it is better to panic. | ||||||
|  |             Err(ref e) => { | ||||||
|  |                 match e.errno() { | ||||||
|  |                     libc::EAGAIN => Ok(VcpuEmulation::Handled), | ||||||
|  |                     libc::EINTR => { | ||||||
|  |                         self.fd.set_kvm_immediate_exit(0); | ||||||
|  |                         // Notify that this KVM_RUN was interrupted. | ||||||
|  |                         Ok(VcpuEmulation::Interrupted) | ||||||
|  |                     } | ||||||
|  |                     _ => { | ||||||
|  |                         METRICS.vcpu.failures.inc(); | ||||||
|  |                         error!("Failure during vcpu run: {}", e); | ||||||
|  |                         #[cfg(target_arch = "x86_64")] | ||||||
|  |                         { | ||||||
|  |                             error!( | ||||||
|  |                                 "dump regs: {:?}, dump sregs: {:?}", | ||||||
|  |                                 self.fd.get_regs(), | ||||||
|  |                                 self.fd.get_sregs() | ||||||
|  |                             ); | ||||||
|  |                         } | ||||||
|  |                         Err(VcpuError::VcpuUnhandledKvmExit) | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "x86_64")] | ||||||
|  |     // checkout the io port that dragonball used only | ||||||
|  |     fn check_io_port_info(&self, addr: u16, data: &[u8]) -> Result<bool> { | ||||||
|  |         let mut checked = false; | ||||||
|  |  | ||||||
|  |         match addr { | ||||||
|  |             // debug info signal | ||||||
|  |             MAGIC_IOPORT_DEBUG_INFO => { | ||||||
|  |                 if data.len() == 4 { | ||||||
|  |                     let data = unsafe { std::ptr::read(data.as_ptr() as *const u32) }; | ||||||
|  |                     warn!("KDBG: guest kernel debug info: 0x{:x}", data); | ||||||
|  |                     checked = true; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             _ => {} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         Ok(checked) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn gettid() -> u32 { | ||||||
|  |         nix::unistd::gettid().as_raw() as u32 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn revalidate_cache(&mut self) -> Result<()> { | ||||||
|  |         self.io_mgr.revalidate_cache(); | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Main loop of the vCPU thread. | ||||||
|  |     /// | ||||||
|  |     /// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in. | ||||||
|  |     /// Note that the state of the VCPU and associated VM must be setup first for this to do | ||||||
|  |     /// anything useful. | ||||||
|  |     pub fn run(&mut self, seccomp_filter: BpfProgram) { | ||||||
|  |         // Load seccomp filters for this vCPU thread. | ||||||
|  |         // Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters | ||||||
|  |         // altogether is the desired behaviour. | ||||||
|  |         if let Err(e) = apply_filter(&seccomp_filter) { | ||||||
|  |             if matches!(e, SecError::EmptyFilter) { | ||||||
|  |                 info!("vCPU thread {} use empty seccomp filters.", self.id); | ||||||
|  |             } else { | ||||||
|  |                 panic!( | ||||||
|  |                     "Failed to set the requested seccomp filters on vCPU {}: Error: {}", | ||||||
|  |                     self.id, e | ||||||
|  |                 ); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         info!("vcpu {} is running", self.cpu_index()); | ||||||
|  |  | ||||||
|  |         // Start running the machine state in the `Paused` state. | ||||||
|  |         StateMachine::run(self, Self::paused); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // This is the main loop of the `Running` state. | ||||||
|  |     fn running(&mut self) -> StateMachine<Self> { | ||||||
|  |         // This loop is here just for optimizing the emulation path. | ||||||
|  |         // No point in ticking the state machine if there are no external events. | ||||||
|  |         loop { | ||||||
|  |             match self.run_emulation() { | ||||||
|  |                 // Emulation ran successfully, continue. | ||||||
|  |                 Ok(VcpuEmulation::Handled) => { | ||||||
|  |                     // We need to break here if kvm doesn't support | ||||||
|  |                     // immediate_exit flag. Because the signal sent from vmm | ||||||
|  |                     // thread may occurs when handling the vcpu exit events, and | ||||||
|  |                     // in this case the external vcpu events may not be handled | ||||||
|  |                     // correctly, so we need to check the event_receiver channel | ||||||
|  |                     // after handle vcpu exit events to decrease the window that | ||||||
|  |                     // doesn't handle the vcpu external events. | ||||||
|  |                     if !self.support_immediate_exit { | ||||||
|  |                         break; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 // Emulation was interrupted, check external events. | ||||||
|  |                 Ok(VcpuEmulation::Interrupted) => break, | ||||||
|  |                 // Emulation was stopped due to reset or shutdown. | ||||||
|  |                 Ok(VcpuEmulation::Stopped) => return StateMachine::next(Self::waiting_exit), | ||||||
|  |                 // Emulation errors lead to vCPU exit. | ||||||
|  |                 Err(e) => { | ||||||
|  |                     error!("vcpu: {}, run_emulation failed: {:?}", self.id, e); | ||||||
|  |                     return StateMachine::next(Self::waiting_exit); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // By default don't change state. | ||||||
|  |         let mut state = StateMachine::next(Self::running); | ||||||
|  |  | ||||||
|  |         // Break this emulation loop on any transition request/external event. | ||||||
|  |         match self.event_receiver.try_recv() { | ||||||
|  |             // Running ---- Exit ----> Exited | ||||||
|  |             Ok(VcpuEvent::Exit) => { | ||||||
|  |                 // Move to 'exited' state. | ||||||
|  |                 state = StateMachine::next(Self::exited); | ||||||
|  |             } | ||||||
|  |             // Running ---- Pause ----> Paused | ||||||
|  |             Ok(VcpuEvent::Pause) => { | ||||||
|  |                 // Nothing special to do. | ||||||
|  |                 self.response_sender | ||||||
|  |                     .send(VcpuResponse::Paused) | ||||||
|  |                     .expect("failed to send pause status"); | ||||||
|  |  | ||||||
|  |                 // TODO: we should call `KVM_KVMCLOCK_CTRL` here to make sure | ||||||
|  |                 // TODO continued: the guest soft lockup watchdog does not panic on Resume. | ||||||
|  |                 //let _ = self.fd.kvmclock_ctrl(); | ||||||
|  |  | ||||||
|  |                 // Move to 'paused' state. | ||||||
|  |                 state = StateMachine::next(Self::paused); | ||||||
|  |             } | ||||||
|  |             Ok(VcpuEvent::Resume) => { | ||||||
|  |                 self.response_sender | ||||||
|  |                     .send(VcpuResponse::Resumed) | ||||||
|  |                     .expect("failed to send resume status"); | ||||||
|  |             } | ||||||
|  |             Ok(VcpuEvent::Gettid) => { | ||||||
|  |                 self.response_sender | ||||||
|  |                     .send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid())) | ||||||
|  |                     .expect("failed to send vcpu thread tid"); | ||||||
|  |             } | ||||||
|  |             Ok(VcpuEvent::RevalidateCache) => { | ||||||
|  |                 self.revalidate_cache() | ||||||
|  |                     .map(|()| { | ||||||
|  |                         self.response_sender | ||||||
|  |                             .send(VcpuResponse::CacheRevalidated) | ||||||
|  |                             .expect("failed to revalidate vcpu IoManager cache"); | ||||||
|  |                     }) | ||||||
|  |                     .map_err(|e| self.response_sender.send(VcpuResponse::Error(e))) | ||||||
|  |                     .expect("failed to revalidate vcpu IoManager cache"); | ||||||
|  |             } | ||||||
|  |             // Unhandled exit of the other end. | ||||||
|  |             Err(TryRecvError::Disconnected) => { | ||||||
|  |                 // Move to 'exited' state. | ||||||
|  |                 state = StateMachine::next(Self::exited); | ||||||
|  |             } | ||||||
|  |             // All other events or lack thereof have no effect on current 'running' state. | ||||||
|  |             Err(TryRecvError::Empty) => (), | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         state | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // This is the main loop of the `Paused` state. | ||||||
|  |     fn paused(&mut self) -> StateMachine<Self> { | ||||||
|  |         match self.event_receiver.recv() { | ||||||
|  |             // Paused ---- Exit ----> Exited | ||||||
|  |             Ok(VcpuEvent::Exit) => { | ||||||
|  |                 // Move to 'exited' state. | ||||||
|  |                 StateMachine::next(Self::exited) | ||||||
|  |             } | ||||||
|  |             // Paused ---- Resume ----> Running | ||||||
|  |             Ok(VcpuEvent::Resume) => { | ||||||
|  |                 self.response_sender | ||||||
|  |                     .send(VcpuResponse::Resumed) | ||||||
|  |                     .expect("failed to send resume status"); | ||||||
|  |                 // Move to 'running' state. | ||||||
|  |                 StateMachine::next(Self::running) | ||||||
|  |             } | ||||||
|  |             Ok(VcpuEvent::Pause) => { | ||||||
|  |                 self.response_sender | ||||||
|  |                     .send(VcpuResponse::Paused) | ||||||
|  |                     .expect("failed to send pause status"); | ||||||
|  |                 // continue 'pause' state. | ||||||
|  |                 StateMachine::next(Self::paused) | ||||||
|  |             } | ||||||
|  |             Ok(VcpuEvent::Gettid) => { | ||||||
|  |                 self.response_sender | ||||||
|  |                     .send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid())) | ||||||
|  |                     .expect("failed to send vcpu thread tid"); | ||||||
|  |                 StateMachine::next(Self::paused) | ||||||
|  |             } | ||||||
|  |             Ok(VcpuEvent::RevalidateCache) => { | ||||||
|  |                 self.revalidate_cache() | ||||||
|  |                     .map(|()| { | ||||||
|  |                         self.response_sender | ||||||
|  |                             .send(VcpuResponse::CacheRevalidated) | ||||||
|  |                             .expect("failed to revalidate vcpu IoManager cache"); | ||||||
|  |                     }) | ||||||
|  |                     .map_err(|e| self.response_sender.send(VcpuResponse::Error(e))) | ||||||
|  |                     .expect("failed to revalidate vcpu IoManager cache"); | ||||||
|  |  | ||||||
|  |                 StateMachine::next(Self::paused) | ||||||
|  |             } | ||||||
|  |             // Unhandled exit of the other end. | ||||||
|  |             Err(_) => { | ||||||
|  |                 // Move to 'exited' state. | ||||||
|  |                 StateMachine::next(Self::exited) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // This is the main loop of the `WaitingExit` state. | ||||||
|  |     fn waiting_exit(&mut self) -> StateMachine<Self> { | ||||||
|  |         // trigger vmm to stop machine | ||||||
|  |         if let Err(e) = self.exit_evt.write(1) { | ||||||
|  |             METRICS.vcpu.failures.inc(); | ||||||
|  |             error!("Failed signaling vcpu exit event: {}", e); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let mut state = StateMachine::next(Self::waiting_exit); | ||||||
|  |  | ||||||
|  |         match self.event_receiver.recv() { | ||||||
|  |             Ok(VcpuEvent::Exit) => state = StateMachine::next(Self::exited), | ||||||
|  |             Ok(_) => error!( | ||||||
|  |                 "wrong state received in waiting exit state on vcpu {}", | ||||||
|  |                 self.id | ||||||
|  |             ), | ||||||
|  |             Err(_) => { | ||||||
|  |                 error!( | ||||||
|  |                     "vcpu channel closed in waiting exit state on vcpu {}", | ||||||
|  |                     self.id | ||||||
|  |                 ); | ||||||
|  |                 state = StateMachine::next(Self::exited); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         state | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // This is the main loop of the `Exited` state. | ||||||
|  |     fn exited(&mut self) -> StateMachine<Self> { | ||||||
|  |         // State machine reached its end. | ||||||
|  |         StateMachine::finish(Self::exited) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Drop for Vcpu { | ||||||
|  |     fn drop(&mut self) { | ||||||
|  |         let _ = self.reset_thread_local_data(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | pub mod tests { | ||||||
|  |     use std::os::unix::io::AsRawFd; | ||||||
|  |     use std::sync::mpsc::{channel, Receiver}; | ||||||
|  |     use std::sync::Mutex; | ||||||
|  |  | ||||||
|  |     use arc_swap::ArcSwap; | ||||||
|  |     use dbs_device::device_manager::IoManager; | ||||||
|  |     use kvm_ioctls::Kvm; | ||||||
|  |     use lazy_static::lazy_static; | ||||||
|  |  | ||||||
|  |     use super::*; | ||||||
|  |     use crate::kvm_context::KvmContext; | ||||||
|  |  | ||||||
|  |     pub enum EmulationCase { | ||||||
|  |         IoIn, | ||||||
|  |         IoOut, | ||||||
|  |         MmioRead, | ||||||
|  |         MmioWrite, | ||||||
|  |         Hlt, | ||||||
|  |         Shutdown, | ||||||
|  |         FailEntry, | ||||||
|  |         InternalError, | ||||||
|  |         Unknown, | ||||||
|  |         SystemEvent(u32, u64), | ||||||
|  |         Error(i32), | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     lazy_static! { | ||||||
|  |         pub static ref EMULATE_RES: Mutex<EmulationCase> = Mutex::new(EmulationCase::Unknown); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl Vcpu { | ||||||
|  |         pub fn emulate(_fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> { | ||||||
|  |             let res = &*EMULATE_RES.lock().unwrap(); | ||||||
|  |             match res { | ||||||
|  |                 EmulationCase::IoIn => Ok(VcpuExit::IoIn(0, &mut [])), | ||||||
|  |                 EmulationCase::IoOut => Ok(VcpuExit::IoOut(0, &[])), | ||||||
|  |                 EmulationCase::MmioRead => Ok(VcpuExit::MmioRead(0, &mut [])), | ||||||
|  |                 EmulationCase::MmioWrite => Ok(VcpuExit::MmioWrite(0, &[])), | ||||||
|  |                 EmulationCase::Hlt => Ok(VcpuExit::Hlt), | ||||||
|  |                 EmulationCase::Shutdown => Ok(VcpuExit::Shutdown), | ||||||
|  |                 EmulationCase::FailEntry => Ok(VcpuExit::FailEntry), | ||||||
|  |                 EmulationCase::InternalError => Ok(VcpuExit::InternalError), | ||||||
|  |                 EmulationCase::Unknown => Ok(VcpuExit::Unknown), | ||||||
|  |                 EmulationCase::SystemEvent(event_type, event_flags) => { | ||||||
|  |                     Ok(VcpuExit::SystemEvent(*event_type, *event_flags)) | ||||||
|  |                 } | ||||||
|  |                 EmulationCase::Error(e) => Err(kvm_ioctls::Error::new(*e)), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "x86_64")] | ||||||
|  |     fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) { | ||||||
|  |         // Call for kvm too frequently would cause error in some host kernel. | ||||||
|  |         std::thread::sleep(std::time::Duration::from_millis(5)); | ||||||
|  |  | ||||||
|  |         let kvm = Kvm::new().unwrap(); | ||||||
|  |         let vm = Arc::new(kvm.create_vm().unwrap()); | ||||||
|  |         let kvm_context = KvmContext::new(Some(kvm.as_raw_fd())).unwrap(); | ||||||
|  |         let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap()); | ||||||
|  |         let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new())))); | ||||||
|  |         let supported_cpuid = kvm_context | ||||||
|  |             .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) | ||||||
|  |             .unwrap(); | ||||||
|  |         let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); | ||||||
|  |         let vcpu_state_event = EventFd::new(libc::EFD_NONBLOCK).unwrap(); | ||||||
|  |         let (tx, rx) = channel(); | ||||||
|  |         let time_stamp = TimestampUs::default(); | ||||||
|  |  | ||||||
|  |         let vcpu = Vcpu::new_x86_64( | ||||||
|  |             0, | ||||||
|  |             vcpu_fd, | ||||||
|  |             io_manager, | ||||||
|  |             supported_cpuid, | ||||||
|  |             reset_event_fd, | ||||||
|  |             vcpu_state_event, | ||||||
|  |             tx, | ||||||
|  |             time_stamp, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |         (vcpu, rx) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "x86_64")] | ||||||
|  |     #[test] | ||||||
|  |     fn test_vcpu_run_emulation() { | ||||||
|  |         let (mut vcpu, _) = create_vcpu(); | ||||||
|  |  | ||||||
|  |         // Io in | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::IoIn; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Handled))); | ||||||
|  |  | ||||||
|  |         // Io out | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::IoOut; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Handled))); | ||||||
|  |  | ||||||
|  |         // Mmio read | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioRead; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Handled))); | ||||||
|  |  | ||||||
|  |         // Mmio write | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioWrite; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Handled))); | ||||||
|  |  | ||||||
|  |         // KVM_EXIT_HLT signal | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::Hlt; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |  | ||||||
|  |         // KVM_EXIT_SHUTDOWN signal | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::Shutdown; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |  | ||||||
|  |         // KVM_EXIT_FAIL_ENTRY signal | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |  | ||||||
|  |         // KVM_EXIT_INTERNAL_ERROR signal | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::InternalError; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |  | ||||||
|  |         // KVM_SYSTEM_EVENT_RESET | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_RESET, 0); | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Stopped))); | ||||||
|  |  | ||||||
|  |         // KVM_SYSTEM_EVENT_SHUTDOWN | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, 0); | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Stopped))); | ||||||
|  |  | ||||||
|  |         // Other system event | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(0, 0); | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |  | ||||||
|  |         // Unknown exit reason | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::Unknown; | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |  | ||||||
|  |         // Error: EAGAIN | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EAGAIN); | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Handled))); | ||||||
|  |  | ||||||
|  |         // Error: EINTR | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Ok(VcpuEmulation::Interrupted))); | ||||||
|  |  | ||||||
|  |         // other error | ||||||
|  |         *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINVAL); | ||||||
|  |         let res = vcpu.run_emulation(); | ||||||
|  |         assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[cfg(target_arch = "x86_64")] | ||||||
|  |     #[test] | ||||||
|  |     fn test_vcpu_check_io_port_info() { | ||||||
|  |         let (vcpu, receiver) = create_vcpu(); | ||||||
|  |  | ||||||
|  |         // boot complete signal | ||||||
|  |         let res = vcpu | ||||||
|  |             .check_io_port_info( | ||||||
|  |                 MAGIC_IOPORT_SIGNAL_GUEST_BOOT_COMPLETE, | ||||||
|  |                 &[MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE], | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         assert!(res); | ||||||
|  |  | ||||||
|  |         // debug info signal | ||||||
|  |         let res = vcpu | ||||||
|  |             .check_io_port_info(MAGIC_IOPORT_DEBUG_INFO, &[0, 0, 0, 0]) | ||||||
|  |             .unwrap(); | ||||||
|  |         assert!(res); | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										5
									
								
								src/dragonball/src/vcpu/vcpu_manager.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								src/dragonball/src/vcpu/vcpu_manager.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | // Copyright (C) 2022 Alibaba Cloud. All rights reserved. | ||||||
|  | // | ||||||
|  | // SPDX-License-Identifier: Apache-2.0 | ||||||
|  |  | ||||||
|  | //! The implementation of vcpu manager | ||||||
							
								
								
									
										149
									
								
								src/dragonball/src/vcpu/x86_64.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								src/dragonball/src/vcpu/x86_64.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,149 @@ | |||||||
|  | // Copyright (C) 2022 Alibaba Cloud. All rights reserved. | ||||||
|  | // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||||||
|  | // SPDX-License-Identifier: Apache-2.0 | ||||||
|  | // | ||||||
|  | // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style license that can be | ||||||
|  | // found in the THIRD-PARTY file. | ||||||
|  |  | ||||||
|  | use std::sync::mpsc::{channel, Sender}; | ||||||
|  | use std::sync::Arc; | ||||||
|  |  | ||||||
|  | use dbs_arch::cpuid::{process_cpuid, VmSpec}; | ||||||
|  | use dbs_arch::gdt::gdt_entry; | ||||||
|  | use dbs_utils::time::TimestampUs; | ||||||
|  | use kvm_bindings::CpuId; | ||||||
|  | use kvm_ioctls::{VcpuFd, VmFd}; | ||||||
|  | use log::error; | ||||||
|  | use vm_memory::{Address, GuestAddress, GuestAddressSpace}; | ||||||
|  | use vmm_sys_util::eventfd::EventFd; | ||||||
|  |  | ||||||
|  | use crate::address_space_manager::GuestAddressSpaceImpl; | ||||||
|  | use crate::metric::{IncMetric, METRICS}; | ||||||
|  | use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent}; | ||||||
|  | use crate::vcpu::VcpuConfig; | ||||||
|  | use crate::IoManagerCached; | ||||||
|  |  | ||||||
|  | impl Vcpu { | ||||||
|  |     /// Constructs a new VCPU for `vm`. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// * `id` - Represents the CPU number between [0, max vcpus). | ||||||
|  |     /// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu. | ||||||
|  |     /// * `io_mgr` - The io-manager used to access port-io and mmio devices. | ||||||
|  |     /// * `cpuid` - The `CpuId` listing the supported capabilities of this vcpu. | ||||||
|  |     /// * `exit_evt` - An `EventFd` that will be written into when this vcpu | ||||||
|  |     ///   exits. | ||||||
|  |     /// * `vcpu_state_event` - The eventfd which can notify vmm state of some | ||||||
|  |     ///   vcpu should change. | ||||||
|  |     /// * `vcpu_state_sender` - The channel to send state change message from | ||||||
|  |     ///   vcpu thread to vmm thread. | ||||||
|  |     /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. | ||||||
|  |     /// * `support_immediate_exit` -  whether kvm used supports immediate_exit flag. | ||||||
|  |     #[allow(clippy::too_many_arguments)] | ||||||
|  |     pub fn new_x86_64( | ||||||
|  |         id: u8, | ||||||
|  |         vcpu_fd: Arc<VcpuFd>, | ||||||
|  |         io_mgr: IoManagerCached, | ||||||
|  |         cpuid: CpuId, | ||||||
|  |         exit_evt: EventFd, | ||||||
|  |         vcpu_state_event: EventFd, | ||||||
|  |         vcpu_state_sender: Sender<VcpuStateEvent>, | ||||||
|  |         create_ts: TimestampUs, | ||||||
|  |         support_immediate_exit: bool, | ||||||
|  |     ) -> Result<Self> { | ||||||
|  |         let (event_sender, event_receiver) = channel(); | ||||||
|  |         let (response_sender, response_receiver) = channel(); | ||||||
|  |         // Initially the cpuid per vCPU is the one supported by this VM. | ||||||
|  |         Ok(Vcpu { | ||||||
|  |             fd: vcpu_fd, | ||||||
|  |             id, | ||||||
|  |             io_mgr, | ||||||
|  |             create_ts, | ||||||
|  |             event_receiver, | ||||||
|  |             event_sender: Some(event_sender), | ||||||
|  |             response_receiver: Some(response_receiver), | ||||||
|  |             response_sender, | ||||||
|  |             vcpu_state_event, | ||||||
|  |             vcpu_state_sender, | ||||||
|  |             exit_evt, | ||||||
|  |             support_immediate_exit, | ||||||
|  |             cpuid, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Configures a x86_64 specific vcpu and should be called once per vcpu. | ||||||
|  |     /// | ||||||
|  |     /// # Arguments | ||||||
|  |     /// | ||||||
|  |     /// * `vm_config` - The machine configuration of this microvm needed for the CPUID configuration. | ||||||
|  |     /// * `vm_fd` - The kvm `VmFd` for the virtual machine this vcpu will get attached to. | ||||||
|  |     /// * `vm_memory` - The guest memory used by this microvm. | ||||||
|  |     /// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts. | ||||||
|  |     /// * `pgtable_addr` - pgtable address for ap vcpu | ||||||
|  |     pub fn configure( | ||||||
|  |         &mut self, | ||||||
|  |         vcpu_config: &VcpuConfig, | ||||||
|  |         _vm_fd: &VmFd, | ||||||
|  |         vm_as: &GuestAddressSpaceImpl, | ||||||
|  |         kernel_start_addr: Option<GuestAddress>, | ||||||
|  |         _pgtable_addr: Option<GuestAddress>, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         self.set_cpuid(vcpu_config)?; | ||||||
|  |  | ||||||
|  |         dbs_arch::regs::setup_msrs(&self.fd).map_err(VcpuError::MSRSConfiguration)?; | ||||||
|  |         if let Some(start_addr) = kernel_start_addr { | ||||||
|  |             dbs_arch::regs::setup_regs( | ||||||
|  |                 &self.fd, | ||||||
|  |                 start_addr.raw_value() as u64, | ||||||
|  |                 dbs_boot::layout::BOOT_STACK_POINTER, | ||||||
|  |                 dbs_boot::layout::BOOT_STACK_POINTER, | ||||||
|  |                 dbs_boot::layout::ZERO_PAGE_START, | ||||||
|  |             ) | ||||||
|  |             .map_err(VcpuError::REGSConfiguration)?; | ||||||
|  |             dbs_arch::regs::setup_fpu(&self.fd).map_err(VcpuError::FPUConfiguration)?; | ||||||
|  |             let gdt_table: [u64; dbs_boot::layout::BOOT_GDT_MAX as usize] = [ | ||||||
|  |                 gdt_entry(0, 0, 0),            // NULL | ||||||
|  |                 gdt_entry(0xa09b, 0, 0xfffff), // CODE | ||||||
|  |                 gdt_entry(0xc093, 0, 0xfffff), // DATA | ||||||
|  |                 gdt_entry(0x808b, 0, 0xfffff), // TSS | ||||||
|  |             ]; | ||||||
|  |             let pgtable_addr = | ||||||
|  |                 dbs_boot::setup_identity_mapping(&*vm_as.memory()).map_err(VcpuError::PageTable)?; | ||||||
|  |             dbs_arch::regs::setup_sregs( | ||||||
|  |                 &*vm_as.memory(), | ||||||
|  |                 &self.fd, | ||||||
|  |                 pgtable_addr, | ||||||
|  |                 &gdt_table, | ||||||
|  |                 dbs_boot::layout::BOOT_GDT_OFFSET, | ||||||
|  |                 dbs_boot::layout::BOOT_IDT_OFFSET, | ||||||
|  |             ) | ||||||
|  |             .map_err(VcpuError::SREGSConfiguration)?; | ||||||
|  |         } | ||||||
|  |         dbs_arch::interrupts::set_lint(&self.fd).map_err(VcpuError::LocalIntConfiguration)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn set_cpuid(&mut self, vcpu_config: &VcpuConfig) -> Result<()> { | ||||||
|  |         let cpuid_vm_spec = VmSpec::new( | ||||||
|  |             self.id, | ||||||
|  |             vcpu_config.max_vcpu_count as u8, | ||||||
|  |             vcpu_config.threads_per_core, | ||||||
|  |             vcpu_config.cores_per_die, | ||||||
|  |             vcpu_config.dies_per_socket, | ||||||
|  |             vcpu_config.vpmu_feature, | ||||||
|  |         ) | ||||||
|  |         .map_err(VcpuError::CpuId)?; | ||||||
|  |         process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| { | ||||||
|  |             METRICS.vcpu.filter_cpuid.inc(); | ||||||
|  |             error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e); | ||||||
|  |             VcpuError::CpuId(e) | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         self.fd | ||||||
|  |             .set_cpuid2(&self.cpuid) | ||||||
|  |             .map_err(VcpuError::SetSupportedCpusFailed) | ||||||
|  |     } | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user