Merge pull request #4400 from openanolis/anolis/dragonball-2

runtime-rs: built-in Dragonball sandbox part II - vCPU manager
2025-08-02 08:17:01 +00:00 · 2022-06-28 20:41:36 +08:00 · 2022-06-28 20:41:36 +08:00 · badbbcd8be
commit badbbcd8be
parent 98f041ed8e 71db2dd5b8
22 changed files with 3429 additions and 36 deletions
--- a/src/dragonball/Cargo.toml
+++ b/src/dragonball/Cargo.toml
@ -14,18 +14,22 @@ arc-swap = "1.5.0"
 bytes = "1.1.0"
 dbs-address-space = "0.1.0"
 dbs-allocator = "0.1.0"
+dbs-arch = "0.1.0"
 dbs-boot = "0.2.0"
 dbs-device = "0.1.0"
 dbs-interrupt = { version = "0.1.0", features = ["kvm-irq"] }
 dbs-legacy-devices = "0.1.0"
+dbs-upcall = { version = "0.1.0", optional = true }
 dbs-utils = "0.1.0"
 dbs-virtio-devices = { version = "0.1.0", optional = true, features = ["virtio-mmio"] }
 kvm-bindings = "0.5.0"
 kvm-ioctls = "0.11.0"
+lazy_static = "1.2"
 libc = "0.2.39"
 linux-loader = "0.4.0"
 log = "0.4.14"
 nix = "0.23.1"
+seccompiler = "0.2.0"
 serde = "1.0.27"
 serde_derive = "1.0.27"
 serde_json = "1.0.9"
@ -41,13 +45,15 @@ slog-term = "2.9.0"
 slog-async = "2.7.0"

 [features]
+acpi = []
 atomic-guest-memory = []
+hotplug = ["virtio-vsock"]
 virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"]

 [patch.'crates-io']
-dbs-device = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" }
-dbs-interrupt = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" }
-dbs-legacy-devices = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" }
-dbs-utils = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" }
-dbs-virtio-devices = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" }
-dbs-upcall = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "84eee5737cc7d85f9921c94a93e6b9dc4ae24a39" }
+dbs-device = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "127621db934af5ffba558e44b77afa00cdf62af6" }
+dbs-interrupt = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "127621db934af5ffba558e44b77afa00cdf62af6" }
+dbs-legacy-devices = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "127621db934af5ffba558e44b77afa00cdf62af6" }
+dbs-upcall = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "127621db934af5ffba558e44b77afa00cdf62af6" }
+dbs-utils = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "127621db934af5ffba558e44b77afa00cdf62af6" }
+dbs-virtio-devices = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "127621db934af5ffba558e44b77afa00cdf62af6" }
--- a/src/dragonball/README.md
+++ b/src/dragonball/README.md
@ -17,7 +17,10 @@ and configuration process.
 # Documentation

 Device: [Device Document](docs/device.md)
+vCPU: [vCPU Document](docs/vcpu.md)
+API: [API Document](docs/api.md)

+Currently, the documents are still actively adding.
 You could see the [official documentation](docs/) page for more details.

 # Supported Architectures
--- a/src/dragonball/docs/api.md
+++ b/src/dragonball/docs/api.md
@ -0,0 +1,7 @@
+# API
+
+We provide plenty API for Kata runtime to interact with `Dragonball` virtual machine manager.
+This document provides the introduction for each of them.
+
+TODO: Details will be added in the Part III PR for `Dragonball`
+
--- a/src/dragonball/docs/device.md
+++ b/src/dragonball/docs/device.md
@ -14,4 +14,7 @@ Currently we have following device manager:

 ## Device supported
 `VIRTIO-VSOCK`
+`i8042`
+`COM1`
+`COM2`

--- a/src/dragonball/docs/vcpu.md
+++ b/src/dragonball/docs/vcpu.md
@ -0,0 +1,42 @@
+# vCPU
+
+## vCPU Manager
+The vCPU manager is to manage all vCPU related actions, we will dive into some of the important structure members in this doc.
+
+For now, aarch64 vCPU support is still under development, we'll introduce it when we merge `runtime-rs` to the master branch. (issue: #4445)
+
+### vCPU config
+`VcpuConfig` is used to configure guest overall CPU info.
+
+`boot_vcpu_count` is used to define the initial vCPU number.
+
+`max_vcpu_count` is used to define the maximum vCPU number and it's used for the upper boundary for CPU hotplug feature
+
+`thread_per_core`, `cores_per_die`, `dies_per_socket` and `socket` are used to define CPU topology.
+
+`vpmu_feature` is used to define `vPMU` feature level.
+If `vPMU` feature is `Disabled`, it means `vPMU` feature is off (by default).
+If `vPMU` feature is `LimitedlyEnabled`, it means minimal `vPMU` counters are supported (cycles and instructions).
+If `vPMU` feature is `FullyEnabled`, it means all `vPMU` counters are supported
+
+## vCPU State
+
+There are four states for vCPU state machine: `running`, `paused`, `waiting_exit`, `exited`. There is a state machine to maintain the task flow.
+
+When the vCPU is created, it'll turn to `paused` state. After vCPU resource is ready at VMM, it'll send a `Resume` event to the vCPU thread, and then vCPU state will change to `running`.
+
+During the `running` state, VMM will catch vCPU exit and execute different logic according to the exit reason.
+
+If the VMM catch some exit reasons that it cannot handle, the state will change to `waiting_exit` and VMM will stop the virtual machine. 
+When the state switches to `waiting_exit`, an exit event will be sent to vCPU `exit_evt`, event manager will detect the change in `exit_evt` and set VMM `exit_evt_flag` as 1. A thread serving for VMM event loop will check `exit_evt_flag` and if the flag is 1, it'll stop the VMM.
+
+When the VMM is stopped / destroyed, the state will change to `exited`.
+   
+## vCPU Hot plug
+Since `Dragonball Sandbox` doesn't support virtualization of ACPI system, we use [`upcall`](https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-upcall) to establish a direct communication channel between `Dragonball` and Guest in order to trigger vCPU hotplug.
+
+To use `upcall`, kernel patches are needed, you can get the patches from [`upcall`](https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-upcall) page, and we'll provide a ready-to-use guest kernel binary for you to try.
+
+vCPU hot plug / hot unplug range is [1, `max_vcpu_count`]. Operations not in this range will be invalid.
+
+
--- a/src/dragonball/src/api/mod.rs
+++ b/src/dragonball/src/api/mod.rs
@ -0,0 +1,6 @@
+// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! API related data structures to configure the vmm.
+
+pub mod v1;
--- a/src/dragonball/src/api/v1/instance_info.rs
+++ b/src/dragonball/src/api/v1/instance_info.rs
@ -0,0 +1,84 @@
+// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+use serde_derive::{Deserialize, Serialize};
+
+/// The microvm state.
+///
+/// When Dragonball starts, the instance state is Uninitialized. Once start_microvm method is
+/// called, the state goes from Uninitialized to Starting. The state is changed to Running until
+/// the start_microvm method ends. Halting and Halted are currently unsupported.
+#[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)]
+pub enum InstanceState {
+    /// Microvm is not initialized.
+    Uninitialized,
+    /// Microvm is starting.
+    Starting,
+    /// Microvm is running.
+    Running,
+    /// Microvm is Paused.
+    Paused,
+    /// Microvm received a halt instruction.
+    Halting,
+    /// Microvm is halted.
+    Halted,
+    /// Microvm exit instead of process exit.
+    Exited(i32),
+}
+
+/// The state of async actions
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+pub enum AsyncState {
+    /// Uninitialized
+    Uninitialized,
+    /// Success
+    Success,
+    /// Failure
+    Failure,
+}
+
+/// The strongly typed that contains general information about the microVM.
+#[derive(Debug, Deserialize, Serialize)]
+pub struct InstanceInfo {
+    /// The ID of the microVM.
+    pub id: String,
+    /// The state of the microVM.
+    pub state: InstanceState,
+    /// The version of the VMM that runs the microVM.
+    pub vmm_version: String,
+    /// The pid of the current VMM process.
+    pub pid: u32,
+    /// The state of async actions.
+    pub async_state: AsyncState,
+    /// List of tids of vcpu threads (vcpu index, tid)
+    pub tids: Vec<(u8, u32)>,
+}
+
+impl InstanceInfo {
+    /// create instance info object with given id, version, and platform type
+    pub fn new(id: String, vmm_version: String) -> Self {
+        InstanceInfo {
+            id,
+            state: InstanceState::Uninitialized,
+            vmm_version,
+            pid: std::process::id(),
+            async_state: AsyncState::Uninitialized,
+            tids: Vec::new(),
+        }
+    }
+}
+
+impl Default for InstanceInfo {
+    fn default() -> Self {
+        InstanceInfo {
+            id: String::from(""),
+            state: InstanceState::Uninitialized,
+            vmm_version: env!("CARGO_PKG_VERSION").to_string(),
+            pid: std::process::id(),
+            async_state: AsyncState::Uninitialized,
+            tids: Vec::new(),
+        }
+    }
+}
--- a/src/dragonball/src/api/v1/mod.rs
+++ b/src/dragonball/src/api/v1/mod.rs
@ -0,0 +1,7 @@
+// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! API Version 1 related data structures to configure the vmm.
+
+mod instance_info;
+pub use self::instance_info::{InstanceInfo, InstanceState};
--- a/src/dragonball/src/device_manager/mod.rs
+++ b/src/dragonball/src/device_manager/mod.rs
@ -29,6 +29,12 @@ use dbs_virtio_devices::{
    VirtioDevice,
 };

+#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
+use dbs_upcall::{
+    DevMgrRequest, DevMgrService, MmioDevRequest, UpcallClient, UpcallClientError,
+    UpcallClientRequest, UpcallClientResponse,
+};
+
 use crate::address_space_manager::GuestAddressSpaceImpl;
 use crate::error::StartMicrovmError;
 use crate::resource_manager::ResourceManager;
@ -83,6 +89,11 @@ pub enum DeviceMgrError {
    /// Error from Virtio subsystem.
    #[error(transparent)]
    Virtio(virtio::Error),
+
+    #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
+    /// Failed to hotplug the device.
+    #[error("failed to hotplug virtual device")]
+    HotplugDevice(#[source] UpcallClientError),
 }

 /// Specialized version of `std::result::Result` for device manager operations.
@ -188,6 +199,8 @@ pub struct DeviceOpContext {
    logger: slog::Logger,
    is_hotplug: bool,

+    #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
+    upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
    #[cfg(feature = "dbs-virtio-devices")]
    virtio_devices: Vec<Arc<DbsMmioV2Device>>,
 }
@ -220,6 +233,8 @@ impl DeviceOpContext {
            address_space,
            logger,
            is_hotplug,
+            #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
+            upcall_client: None,
            #[cfg(feature = "dbs-virtio-devices")]
            virtio_devices: Vec::new(),
        }
@ -236,35 +251,122 @@ impl DeviceOpContext {
        &self.logger
    }

+    #[allow(unused_variables)]
    fn generate_kernel_boot_args(&mut self, kernel_config: &mut KernelConfigInfo) -> Result<()> {
-        if !self.is_hotplug {
+        if self.is_hotplug {
            return Err(DeviceMgrError::InvalidOperation);
        }

        #[cfg(feature = "dbs-virtio-devices")]
-        let cmdline = kernel_config.kernel_cmdline_mut();
+        {
+            let cmdline = kernel_config.kernel_cmdline_mut();

-        #[cfg(feature = "dbs-virtio-devices")]
-        for device in self.virtio_devices.iter() {
-            let (mmio_base, mmio_size, irq) = DeviceManager::get_virtio_device_info(device)?;
+            for device in self.virtio_devices.iter() {
+                let (mmio_base, mmio_size, irq) = DeviceManager::get_virtio_device_info(device)?;

-            // as per doc, [virtio_mmio.]device=<size>@<baseaddr>:<irq> needs to be appended
-            // to kernel commandline for virtio mmio devices to get recognized
-            // the size parameter has to be transformed to KiB, so dividing hexadecimal value in
-            // bytes to 1024; further, the '{}' formatting rust construct will automatically
-            // transform it to decimal
-            cmdline
-                .insert(
-                    "virtio_mmio.device",
-                    &format!("{}K@0x{:08x}:{}", mmio_size / 1024, mmio_base, irq),
-                )
-                .map_err(DeviceMgrError::Cmdline)?;
+                // as per doc, [virtio_mmio.]device=<size>@<baseaddr>:<irq> needs to be appended
+                // to kernel commandline for virtio mmio devices to get recognized
+                // the size parameter has to be transformed to KiB, so dividing hexadecimal value in
+                // bytes to 1024; further, the '{}' formatting rust construct will automatically
+                // transform it to decimal
+                cmdline
+                    .insert(
+                        "virtio_mmio.device",
+                        &format!("{}K@0x{:08x}:{}", mmio_size / 1024, mmio_base, irq),
+                    )
+                    .map_err(DeviceMgrError::Cmdline)?;
+            }
        }

        Ok(())
    }
 }

+#[cfg(not(feature = "hotplug"))]
+impl DeviceOpContext {
+    pub(crate) fn insert_hotplug_mmio_device(
+        &self,
+        _dev: &Arc<dyn DeviceIo>,
+        _callback: Option<()>,
+    ) -> Result<()> {
+        Err(DeviceMgrError::InvalidOperation)
+    }
+
+    pub(crate) fn remove_hotplug_mmio_device(
+        &self,
+        _dev: &Arc<dyn DeviceIo>,
+        _callback: Option<()>,
+    ) -> Result<()> {
+        Err(DeviceMgrError::InvalidOperation)
+    }
+}
+
+#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
+impl DeviceOpContext {
+    fn call_hotplug_device(
+        &self,
+        req: DevMgrRequest,
+        callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
+    ) -> Result<()> {
+        if let Some(upcall_client) = self.upcall_client.as_ref() {
+            if let Some(cb) = callback {
+                upcall_client
+                    .send_request(UpcallClientRequest::DevMgr(req), cb)
+                    .map_err(DeviceMgrError::HotplugDevice)?;
+            } else {
+                upcall_client
+                    .send_request_without_result(UpcallClientRequest::DevMgr(req))
+                    .map_err(DeviceMgrError::HotplugDevice)?;
+            }
+            Ok(())
+        } else {
+            Err(DeviceMgrError::InvalidOperation)
+        }
+    }
+
+    pub(crate) fn insert_hotplug_mmio_device(
+        &self,
+        dev: &Arc<DbsMmioV2Device>,
+        callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
+    ) -> Result<()> {
+        if !self.is_hotplug {
+            return Err(DeviceMgrError::InvalidOperation);
+        }
+
+        let (mmio_base, mmio_size, mmio_irq) = DeviceManager::get_virtio_device_info(dev)?;
+        let req = DevMgrRequest::AddMmioDev(MmioDevRequest {
+            mmio_base,
+            mmio_size,
+            mmio_irq,
+        });
+
+        self.call_hotplug_device(req, callback)
+    }
+
+    pub(crate) fn remove_hotplug_mmio_device(
+        &self,
+        dev: &Arc<DbsMmioV2Device>,
+        callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
+    ) -> Result<()> {
+        if !self.is_hotplug {
+            return Err(DeviceMgrError::InvalidOperation);
+        }
+        let (mmio_base, mmio_size, mmio_irq) = DeviceManager::get_virtio_device_info(dev)?;
+        let req = DevMgrRequest::DelMmioDev(MmioDevRequest {
+            mmio_base,
+            mmio_size,
+            mmio_irq,
+        });
+
+        self.call_hotplug_device(req, callback)
+    }
+}
+
+#[cfg(all(feature = "hotplug", feature = "acpi"))]
+impl DeviceOpContext {
+    // TODO: We will implement this when we develop ACPI virtualization
+}
+
 /// Device manager for virtual machines, which manages all device for a virtual machine.
 pub struct DeviceManager {
    io_manager: Arc<ArcSwap<IoManager>>,
@ -351,7 +453,7 @@ impl DeviceManager {
        self.set_guest_kernel_log_stream(dmesg_fifo)
            .map_err(|_| StartMicrovmError::EventFd)?;

-        slog::info!(self.logger, "init console path: {:?}", com1_sock_path);
+        info!(self.logger, "init console path: {:?}", com1_sock_path);
        if let Some(path) = com1_sock_path {
            if let Some(legacy_manager) = self.legacy_manager.as_ref() {
                let com1 = legacy_manager.get_com1_serial();
@ -387,19 +489,6 @@ impl DeviceManager {
        Ok(())
    }

-    /// Restore legacy devices
-    pub fn restore_legacy_devices(
-        &mut self,
-        dmesg_fifo: Option<Box<dyn io::Write + Send>>,
-        com1_sock_path: Option<String>,
-    ) -> std::result::Result<(), StartMicrovmError> {
-        self.set_guest_kernel_log_stream(dmesg_fifo)
-            .map_err(|_| StartMicrovmError::EventFd)?;
-        slog::info!(self.logger, "restore console path: {:?}", com1_sock_path);
-        // TODO: restore console
-        Ok(())
-    }
-
    /// Reset the console into canonical mode.
    pub fn reset_console(&self) -> Result<()> {
        self.con_manager.reset_console()
--- a/src/dragonball/src/error.rs
+++ b/src/dragonball/src/error.rs
@ -14,6 +14,37 @@ use dbs_virtio_devices::Error as VirtIoError;

 use crate::device_manager;

+/// Shorthand result type for internal VMM commands.
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Errors associated with the VMM internal logic.
+///
+/// These errors cannot be generated by direct user input, but can result from bad configuration
+/// of the host (for example if Dragonball doesn't have permissions to open the KVM fd).
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    /// Failure occurs in issuing KVM ioctls and errors will be returned from kvm_ioctls lib.
+    #[error("failure in issuing KVM ioctl command")]
+    Kvm(#[source] kvm_ioctls::Error),
+
+    /// The host kernel reports an unsupported KVM API version.
+    #[error("unsupported KVM version {0}")]
+    KvmApiVersion(i32),
+
+    /// Cannot initialize the KVM context due to missing capabilities.
+    #[error("missing KVM capability")]
+    KvmCap(kvm_ioctls::Cap),
+
+    #[cfg(target_arch = "x86_64")]
+    #[error("failed to configure MSRs")]
+    /// Cannot configure MSRs
+    GuestMSRs(dbs_arch::msr::Error),
+
+    /// MSR inner error
+    #[error("MSR inner error")]
+    Msr(vmm_sys_util::fam::Error),
+}
+
 /// Errors associated with starting the instance.
 #[derive(Debug, thiserror::Error)]
 pub enum StartMicrovmError {
--- a/src/dragonball/src/io_manager.rs
+++ b/src/dragonball/src/io_manager.rs
@ -0,0 +1,60 @@
+// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::sync::Arc;
+
+use arc_swap::{ArcSwap, Cache};
+use dbs_device::device_manager::Error;
+use dbs_device::device_manager::IoManager;
+
+/// A specialized version of [`std::result::Result`] for IO manager related operations.
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Wrapper over IoManager to support device hotplug with [`ArcSwap`] and [`Cache`].
+#[derive(Clone)]
+pub struct IoManagerCached(pub(crate) Cache<Arc<ArcSwap<IoManager>>, Arc<IoManager>>);
+
+impl IoManagerCached {
+    /// Create a new instance of [`IoManagerCached`].
+    pub fn new(io_manager: Arc<ArcSwap<IoManager>>) -> Self {
+        IoManagerCached(Cache::new(io_manager))
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[inline]
+    /// Read data from IO ports.
+    pub fn pio_read(&mut self, addr: u16, data: &mut [u8]) -> Result<()> {
+        self.0.load().pio_read(addr, data)
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[inline]
+    /// Write data to IO ports.
+    pub fn pio_write(&mut self, addr: u16, data: &[u8]) -> Result<()> {
+        self.0.load().pio_write(addr, data)
+    }
+
+    #[inline]
+    /// Read data to MMIO address.
+    pub fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> Result<()> {
+        self.0.load().mmio_read(addr, data)
+    }
+
+    #[inline]
+    /// Write data to MMIO address.
+    pub fn mmio_write(&mut self, addr: u64, data: &[u8]) -> Result<()> {
+        self.0.load().mmio_write(addr, data)
+    }
+
+    #[inline]
+    /// Revalidate the inner cache
+    pub fn revalidate_cache(&mut self) {
+        let _ = self.0.load();
+    }
+
+    #[inline]
+    /// Get immutable reference to underlying [`IoManager`].
+    pub fn load(&mut self) -> &IoManager {
+        self.0.load()
+    }
+}
--- a/src/dragonball/src/kvm_context.rs
+++ b/src/dragonball/src/kvm_context.rs
@ -0,0 +1,251 @@
+// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the THIRD-PARTY file.
+#![allow(dead_code)]
+use kvm_bindings::KVM_API_VERSION;
+use kvm_ioctls::{Cap, Kvm, VmFd};
+use std::os::unix::io::{FromRawFd, RawFd};
+
+use crate::error::{Error, Result};
+
+/// Describes a KVM context that gets attached to the micro VM instance.
+/// It gives access to the functionality of the KVM wrapper as long as every required
+/// KVM capability is present on the host.
+pub struct KvmContext {
+    kvm: Kvm,
+    max_memslots: usize,
+    #[cfg(target_arch = "x86_64")]
+    supported_msrs: kvm_bindings::MsrList,
+}
+
+impl KvmContext {
+    /// Create a new KVM context object, using the provided `kvm_fd` if one is presented.
+    pub fn new(kvm_fd: Option<RawFd>) -> Result<Self> {
+        let kvm = if let Some(fd) = kvm_fd {
+            // Safe because we expect kvm_fd to contain a valid fd number when is_some() == true.
+            unsafe { Kvm::from_raw_fd(fd) }
+        } else {
+            Kvm::new().map_err(Error::Kvm)?
+        };
+
+        if kvm.get_api_version() != KVM_API_VERSION as i32 {
+            return Err(Error::KvmApiVersion(kvm.get_api_version()));
+        }
+
+        Self::check_cap(&kvm, Cap::Irqchip)?;
+        Self::check_cap(&kvm, Cap::Irqfd)?;
+        Self::check_cap(&kvm, Cap::Ioeventfd)?;
+        Self::check_cap(&kvm, Cap::UserMemory)?;
+        #[cfg(target_arch = "x86_64")]
+        Self::check_cap(&kvm, Cap::SetTssAddr)?;
+
+        #[cfg(target_arch = "x86_64")]
+        let supported_msrs = dbs_arch::msr::supported_guest_msrs(&kvm).map_err(Error::GuestMSRs)?;
+        let max_memslots = kvm.get_nr_memslots();
+
+        Ok(KvmContext {
+            kvm,
+            max_memslots,
+            #[cfg(target_arch = "x86_64")]
+            supported_msrs,
+        })
+    }
+
+    /// Get underlying KVM object to access kvm-ioctls interfaces.
+    pub fn kvm(&self) -> &Kvm {
+        &self.kvm
+    }
+
+    /// Get the maximum number of memory slots reported by this KVM context.
+    pub fn max_memslots(&self) -> usize {
+        self.max_memslots
+    }
+
+    /// Create a virtual machine object.
+    pub fn create_vm(&self) -> Result<VmFd> {
+        self.kvm.create_vm().map_err(Error::Kvm)
+    }
+
+    /// Get the max vcpu count supported by kvm
+    pub fn get_max_vcpus(&self) -> usize {
+        self.kvm.get_max_vcpus()
+    }
+
+    fn check_cap(kvm: &Kvm, cap: Cap) -> std::result::Result<(), Error> {
+        if !kvm.check_extension(cap) {
+            return Err(Error::KvmCap(cap));
+        }
+        Ok(())
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+mod x86_64 {
+    use super::*;
+    use dbs_arch::msr::*;
+    use kvm_bindings::{kvm_msr_entry, CpuId, MsrList, Msrs};
+    use std::collections::HashSet;
+
+    impl KvmContext {
+        /// Get information about supported CPUID of x86 processor.
+        pub fn supported_cpuid(
+            &self,
+            max_entries_count: usize,
+        ) -> std::result::Result<CpuId, kvm_ioctls::Error> {
+            self.kvm.get_supported_cpuid(max_entries_count)
+        }
+
+        /// Get information about supported MSRs of x86 processor.
+        pub fn supported_msrs(
+            &self,
+            _max_entries_count: usize,
+        ) -> std::result::Result<MsrList, kvm_ioctls::Error> {
+            Ok(self.supported_msrs.clone())
+        }
+
+        // It's very sensible to manipulate MSRs, so please be careful to change code below.
+        fn build_msrs_list(kvm: &Kvm) -> Result<Msrs> {
+            let mut mset: HashSet<u32> = HashSet::new();
+            let supported_msr_list = kvm.get_msr_index_list().map_err(super::Error::Kvm)?;
+            for msr in supported_msr_list.as_slice() {
+                mset.insert(*msr);
+            }
+
+            let mut msrs = vec![
+                MSR_IA32_APICBASE,
+                MSR_IA32_SYSENTER_CS,
+                MSR_IA32_SYSENTER_ESP,
+                MSR_IA32_SYSENTER_EIP,
+                MSR_IA32_CR_PAT,
+            ];
+
+            let filters_list = vec![
+                MSR_STAR,
+                MSR_VM_HSAVE_PA,
+                MSR_TSC_AUX,
+                MSR_IA32_TSC_ADJUST,
+                MSR_IA32_TSCDEADLINE,
+                MSR_IA32_MISC_ENABLE,
+                MSR_IA32_BNDCFGS,
+                MSR_IA32_SPEC_CTRL,
+            ];
+            for msr in filters_list {
+                if mset.contains(&msr) {
+                    msrs.push(msr);
+                }
+            }
+
+            // TODO: several msrs are optional.
+
+            // TODO: Since our guests don't support nested-vmx, LMCE nor SGX for now.
+            // msrs.push(MSR_IA32_FEATURE_CONTROL);
+
+            msrs.push(MSR_CSTAR);
+            msrs.push(MSR_KERNEL_GS_BASE);
+            msrs.push(MSR_SYSCALL_MASK);
+            msrs.push(MSR_LSTAR);
+            msrs.push(MSR_IA32_TSC);
+
+            msrs.push(MSR_KVM_SYSTEM_TIME_NEW);
+            msrs.push(MSR_KVM_WALL_CLOCK_NEW);
+
+            // FIXME: check if it's supported.
+            msrs.push(MSR_KVM_ASYNC_PF_EN);
+            msrs.push(MSR_KVM_PV_EOI_EN);
+            msrs.push(MSR_KVM_STEAL_TIME);
+
+            msrs.push(MSR_CORE_PERF_FIXED_CTR_CTRL);
+            msrs.push(MSR_CORE_PERF_GLOBAL_CTRL);
+            msrs.push(MSR_CORE_PERF_GLOBAL_STATUS);
+            msrs.push(MSR_CORE_PERF_GLOBAL_OVF_CTRL);
+
+            const MAX_FIXED_COUNTERS: u32 = 3;
+            for i in 0..MAX_FIXED_COUNTERS {
+                msrs.push(MSR_CORE_PERF_FIXED_CTR0 + i);
+            }
+
+            // FIXME: skip MCE for now.
+
+            let mtrr_msrs = vec![
+                MSR_MTRRdefType,
+                MSR_MTRRfix64K_00000,
+                MSR_MTRRfix16K_80000,
+                MSR_MTRRfix16K_A0000,
+                MSR_MTRRfix4K_C0000,
+                MSR_MTRRfix4K_C8000,
+                MSR_MTRRfix4K_D0000,
+                MSR_MTRRfix4K_D8000,
+                MSR_MTRRfix4K_E0000,
+                MSR_MTRRfix4K_E8000,
+                MSR_MTRRfix4K_F0000,
+                MSR_MTRRfix4K_F8000,
+            ];
+            for mtrr in mtrr_msrs {
+                msrs.push(mtrr);
+            }
+
+            const MSR_MTRRCAP_VCNT: u32 = 8;
+            for i in 0..MSR_MTRRCAP_VCNT {
+                msrs.push(0x200 + 2 * i);
+                msrs.push(0x200 + 2 * i + 1);
+            }
+
+            let msrs: Vec<kvm_msr_entry> = msrs
+                .iter()
+                .map(|reg| kvm_msr_entry {
+                    index: *reg,
+                    reserved: 0,
+                    data: 0,
+                })
+                .collect();
+
+            Msrs::from_entries(&msrs).map_err(super::Error::Msr)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use kvm_ioctls::Kvm;
+    use std::fs::File;
+    use std::os::unix::fs::MetadataExt;
+    use std::os::unix::io::{AsRawFd, FromRawFd};
+
+    #[test]
+    fn test_create_kvm_context() {
+        let c = KvmContext::new(None).unwrap();
+
+        assert!(c.max_memslots >= 32);
+
+        let kvm = Kvm::new().unwrap();
+        let f = unsafe { File::from_raw_fd(kvm.as_raw_fd()) };
+        let m1 = f.metadata().unwrap();
+        let m2 = File::open("/dev/kvm").unwrap().metadata().unwrap();
+
+        assert_eq!(m1.dev(), m2.dev());
+        assert_eq!(m1.ino(), m2.ino());
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_get_supported_cpu_id() {
+        let c = KvmContext::new(None).unwrap();
+
+        let _ = c
+            .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
+            .expect("failed to get supported CPUID");
+        assert!(c.supported_cpuid(0).is_err());
+    }
+
+    #[test]
+    fn test_create_vm() {
+        let c = KvmContext::new(None).unwrap();
+
+        let _ = c.create_vm().unwrap();
+    }
+}
--- a/src/dragonball/src/lib.rs
+++ b/src/dragonball/src/lib.rs
@ -1,4 +1,5 @@
 // Copyright (C) 2018-2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0

 //! Dragonball is a light-weight virtual machine manager(VMM) based on Linux Kernel-based Virtual
@ -10,13 +11,45 @@

 /// Address space manager for virtual machines.
 pub mod address_space_manager;
+/// API to handle vmm requests.
+pub mod api;
 /// Structs to maintain configuration information.
 pub mod config_manager;
 /// Device manager for virtual machines.
 pub mod device_manager;
 /// Errors related to Virtual machine manager.
 pub mod error;
+/// KVM operation context for virtual machines.
+pub mod kvm_context;
+/// Metrics system.
+pub mod metric;
 /// Resource manager for virtual machines.
 pub mod resource_manager;
+/// Signal handler for virtual machines.
+pub mod signal_handler;
+/// Virtual CPU manager for virtual machines.
+pub mod vcpu;
 /// Virtual machine manager for virtual machines.
 pub mod vm;
+
+mod io_manager;
+pub use self::io_manager::IoManagerCached;
+
+/// Success exit code.
+pub const EXIT_CODE_OK: u8 = 0;
+/// Generic error exit code.
+pub const EXIT_CODE_GENERIC_ERROR: u8 = 1;
+/// Generic exit code for an error considered not possible to occur if the program logic is sound.
+pub const EXIT_CODE_UNEXPECTED_ERROR: u8 = 2;
+/// Dragonball was shut down after intercepting a restricted system call.
+pub const EXIT_CODE_BAD_SYSCALL: u8 = 148;
+/// Dragonball was shut down after intercepting `SIGBUS`.
+pub const EXIT_CODE_SIGBUS: u8 = 149;
+/// Dragonball was shut down after intercepting `SIGSEGV`.
+pub const EXIT_CODE_SIGSEGV: u8 = 150;
+/// Invalid json passed to the Dragonball process for configuring microvm.
+pub const EXIT_CODE_INVALID_JSON: u8 = 151;
+/// Bad configuration for microvm's resources, when using a single json.
+pub const EXIT_CODE_BAD_CONFIGURATION: u8 = 152;
+/// Command line arguments parsing error.
+pub const EXIT_CODE_ARG_PARSING: u8 = 153;
--- a/src/dragonball/src/metric.rs
+++ b/src/dragonball/src/metric.rs
@ -0,0 +1,58 @@
+// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use dbs_utils::metric::SharedIncMetric;
+use lazy_static::lazy_static;
+use serde::Serialize;
+
+pub use dbs_utils::metric::IncMetric;
+
+lazy_static! {
+    /// Static instance used for handling metrics.
+    pub static ref METRICS: DragonballMetrics = DragonballMetrics::default();
+}
+
+/// Metrics specific to VCPUs' mode of functioning.
+#[derive(Default, Serialize)]
+pub struct VcpuMetrics {
+    /// Number of KVM exits for handling input IO.
+    pub exit_io_in: SharedIncMetric,
+    /// Number of KVM exits for handling output IO.
+    pub exit_io_out: SharedIncMetric,
+    /// Number of KVM exits for handling MMIO reads.
+    pub exit_mmio_read: SharedIncMetric,
+    /// Number of KVM exits for handling MMIO writes.
+    pub exit_mmio_write: SharedIncMetric,
+    /// Number of errors during this VCPU's run.
+    pub failures: SharedIncMetric,
+    /// Failures in configuring the CPUID.
+    pub filter_cpuid: SharedIncMetric,
+}
+
+/// Metrics for the seccomp filtering.
+#[derive(Default, Serialize)]
+pub struct SeccompMetrics {
+    /// Number of errors inside the seccomp filtering.
+    pub num_faults: SharedIncMetric,
+}
+
+/// Metrics related to signals.
+#[derive(Default, Serialize)]
+pub struct SignalMetrics {
+    /// Number of times that SIGBUS was handled.
+    pub sigbus: SharedIncMetric,
+    /// Number of times that SIGSEGV was handled.
+    pub sigsegv: SharedIncMetric,
+}
+
+/// Structure storing all metrics while enforcing serialization support on them.
+#[derive(Default, Serialize)]
+pub struct DragonballMetrics {
+    /// Metrics related to a vcpu's functioning.
+    pub vcpu: VcpuMetrics,
+    /// Metrics related to seccomp filtering.
+    pub seccomp: SeccompMetrics,
+    /// Metrics related to signals.
+    pub signals: SignalMetrics,
+}
--- a/src/dragonball/src/signal_handler.rs
+++ b/src/dragonball/src/signal_handler.rs
@ -0,0 +1,219 @@
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use libc::{_exit, c_int, c_void, siginfo_t, SIGBUS, SIGSEGV, SIGSYS};
+use log::error;
+use vmm_sys_util::signal::register_signal_handler;
+
+use crate::metric::{IncMetric, METRICS};
+
+// The offset of `si_syscall` (offending syscall identifier) within the siginfo structure
+// expressed as an `(u)int*`.
+// Offset `6` for an `i32` field means that the needed information is located at `6 * sizeof(i32)`.
+// See /usr/include/linux/signal.h for the C struct definition.
+// See https://github.com/rust-lang/libc/issues/716 for why the offset is different in Rust.
+const SI_OFF_SYSCALL: isize = 6;
+
+const SYS_SECCOMP_CODE: i32 = 1;
+
+extern "C" {
+    fn __libc_current_sigrtmin() -> c_int;
+    fn __libc_current_sigrtmax() -> c_int;
+}
+
+/// Gets current sigrtmin
+pub fn sigrtmin() -> c_int {
+    unsafe { __libc_current_sigrtmin() }
+}
+
+/// Gets current sigrtmax
+pub fn sigrtmax() -> c_int {
+    unsafe { __libc_current_sigrtmax() }
+}
+
+/// Signal handler for `SIGSYS`.
+///
+/// Increments the `seccomp.num_faults` metric, logs an error message and terminates the process
+/// with a specific exit code.
+extern "C" fn sigsys_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) {
+    // Safe because we're just reading some fields from a supposedly valid argument.
+    let si_signo = unsafe { (*info).si_signo };
+    let si_code = unsafe { (*info).si_code };
+
+    // Sanity check. The condition should never be true.
+    if num != si_signo || num != SIGSYS || si_code != SYS_SECCOMP_CODE as i32 {
+        // Safe because we're terminating the process anyway.
+        unsafe { _exit(i32::from(super::EXIT_CODE_UNEXPECTED_ERROR)) };
+    }
+
+    // Other signals which might do async unsafe things incompatible with the rest of this
+    // function are blocked due to the sa_mask used when registering the signal handler.
+    let syscall = unsafe { *(info as *const i32).offset(SI_OFF_SYSCALL) as usize };
+    // SIGSYS is triggered when bad syscalls are detected. num_faults is only added when SIGSYS is detected
+    // so it actually only collects the count for bad syscalls.
+    METRICS.seccomp.num_faults.inc();
+    error!(
+        "Shutting down VM after intercepting a bad syscall ({}).",
+        syscall
+    );
+
+    // Safe because we're terminating the process anyway. We don't actually do anything when
+    // running unit tests.
+    #[cfg(not(test))]
+    unsafe {
+        _exit(i32::from(super::EXIT_CODE_BAD_SYSCALL))
+    };
+}
+
+/// Signal handler for `SIGBUS` and `SIGSEGV`.
+///
+/// Logs an error message and terminates the process with a specific exit code.
+extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) {
+    // Safe because we're just reading some fields from a supposedly valid argument.
+    let si_signo = unsafe { (*info).si_signo };
+    let si_code = unsafe { (*info).si_code };
+
+    // Sanity check. The condition should never be true.
+    if num != si_signo || (num != SIGBUS && num != SIGSEGV) {
+        // Safe because we're terminating the process anyway.
+        unsafe { _exit(i32::from(super::EXIT_CODE_UNEXPECTED_ERROR)) };
+    }
+
+    // Other signals which might do async unsafe things incompatible with the rest of this
+    // function are blocked due to the sa_mask used when registering the signal handler.
+    match si_signo {
+        SIGBUS => METRICS.signals.sigbus.inc(),
+        SIGSEGV => METRICS.signals.sigsegv.inc(),
+        _ => (),
+    }
+
+    error!(
+        "Shutting down VM after intercepting signal {}, code {}.",
+        si_signo, si_code
+    );
+
+    // Safe because we're terminating the process anyway. We don't actually do anything when
+    // running unit tests.
+    #[cfg(not(test))]
+    unsafe {
+        _exit(i32::from(match si_signo {
+            SIGBUS => super::EXIT_CODE_SIGBUS,
+            SIGSEGV => super::EXIT_CODE_SIGSEGV,
+            _ => super::EXIT_CODE_UNEXPECTED_ERROR,
+        }))
+    };
+}
+
+/// Registers all the required signal handlers.
+///
+/// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS`.
+pub fn register_signal_handlers() -> vmm_sys_util::errno::Result<()> {
+    // Call to unsafe register_signal_handler which is considered unsafe because it will
+    // register a signal handler which will be called in the current thread and will interrupt
+    // whatever work is done on the current thread, so we have to keep in mind that the registered
+    // signal handler must only do async-signal-safe operations.
+    register_signal_handler(SIGSYS, sigsys_handler)?;
+    register_signal_handler(SIGBUS, sigbus_sigsegv_handler)?;
+    register_signal_handler(SIGSEGV, sigbus_sigsegv_handler)?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use libc::{cpu_set_t, syscall};
+    use std::convert::TryInto;
+    use std::{mem, process, thread};
+
+    use seccompiler::{apply_filter, BpfProgram, SeccompAction, SeccompFilter};
+
+    // This function is used when running unit tests, so all the unsafes are safe.
+    fn cpu_count() -> usize {
+        let mut cpuset: cpu_set_t = unsafe { mem::zeroed() };
+        unsafe {
+            libc::CPU_ZERO(&mut cpuset);
+        }
+        let ret = unsafe {
+            libc::sched_getaffinity(
+                0,
+                mem::size_of::<cpu_set_t>(),
+                &mut cpuset as *mut cpu_set_t,
+            )
+        };
+        assert_eq!(ret, 0);
+
+        let mut num = 0;
+        for i in 0..libc::CPU_SETSIZE as usize {
+            if unsafe { libc::CPU_ISSET(i, &cpuset) } {
+                num += 1;
+            }
+        }
+        num
+    }
+
+    #[test]
+    fn test_signal_handler() {
+        let child = thread::spawn(move || {
+            assert!(register_signal_handlers().is_ok());
+
+            let filter = SeccompFilter::new(
+                vec![
+                    (libc::SYS_brk, vec![]),
+                    (libc::SYS_exit, vec![]),
+                    (libc::SYS_futex, vec![]),
+                    (libc::SYS_getpid, vec![]),
+                    (libc::SYS_munmap, vec![]),
+                    (libc::SYS_kill, vec![]),
+                    (libc::SYS_rt_sigprocmask, vec![]),
+                    (libc::SYS_rt_sigreturn, vec![]),
+                    (libc::SYS_sched_getaffinity, vec![]),
+                    (libc::SYS_set_tid_address, vec![]),
+                    (libc::SYS_sigaltstack, vec![]),
+                    (libc::SYS_write, vec![]),
+                ]
+                .into_iter()
+                .collect(),
+                SeccompAction::Trap,
+                SeccompAction::Allow,
+                std::env::consts::ARCH.try_into().unwrap(),
+            )
+            .unwrap();
+
+            assert!(apply_filter(&TryInto::<BpfProgram>::try_into(filter).unwrap()).is_ok());
+            assert_eq!(METRICS.seccomp.num_faults.count(), 0);
+
+            // Call the blacklisted `SYS_mkdirat`.
+            unsafe { syscall(libc::SYS_mkdirat, "/foo/bar\0") };
+
+            // Call SIGBUS signal handler.
+            assert_eq!(METRICS.signals.sigbus.count(), 0);
+            unsafe {
+                syscall(libc::SYS_kill, process::id(), SIGBUS);
+            }
+
+            // Call SIGSEGV signal handler.
+            assert_eq!(METRICS.signals.sigsegv.count(), 0);
+            unsafe {
+                syscall(libc::SYS_kill, process::id(), SIGSEGV);
+            }
+        });
+        assert!(child.join().is_ok());
+
+        // Sanity check.
+        assert!(cpu_count() > 0);
+        // Kcov somehow messes with our handler getting the SIGSYS signal when a bad syscall
+        // is caught, so the following assertion no longer holds. Ideally, we'd have a surefire
+        // way of either preventing this behaviour, or detecting for certain whether this test is
+        // run by kcov or not. The best we could do so far is to look at the perceived number of
+        // available CPUs. Kcov seems to make a single CPU available to the process running the
+        // tests, so we use this as an heuristic to decide if we check the assertion.
+        if cpu_count() > 1 {
+            // The signal handler should let the program continue during unit tests.
+            assert!(METRICS.seccomp.num_faults.count() >= 1);
+        }
+        assert!(METRICS.signals.sigbus.count() >= 1);
+        assert!(METRICS.signals.sigsegv.count() >= 1);
+    }
+}
--- a/src/dragonball/src/vcpu/aarch64.rs
+++ b/src/dragonball/src/vcpu/aarch64.rs
@ -0,0 +1,94 @@
+// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the THIRD-PARTY file.
+
+use std::sync::mpsc::{channel, Sender};
+use std::sync::Arc;
+
+use crate::IoManagerCached;
+use dbs_utils::time::TimestampUs;
+use kvm_ioctls::{VcpuFd, VmFd};
+use vm_memory::GuestAddress;
+use vmm_sys_util::eventfd::EventFd;
+
+use crate::address_space_manager::GuestAddressSpaceImpl;
+use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuStateEvent};
+use crate::vcpu::VcpuConfig;
+
+#[allow(unused)]
+impl Vcpu {
+    /// Constructs a new VCPU for `vm`.
+    ///
+    /// # Arguments
+    ///
+    /// * `id` - Represents the CPU number between [0, max vcpus).
+    /// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu.
+    /// * `io_mgr` - The io-manager used to access port-io and mmio devices.
+    /// * `exit_evt` - An `EventFd` that will be written into when this vcpu
+    ///   exits.
+    /// * `vcpu_state_event` - The eventfd which can notify vmm state of some
+    ///   vcpu should change.
+    /// * `vcpu_state_sender` - The channel to send state change message from
+    ///   vcpu thread to vmm thread.
+    /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime.
+    /// * `support_immediate_exit` -  whether kvm uses supports immediate_exit flag.
+    pub fn new_aarch64(
+        id: u8,
+        vcpu_fd: Arc<VcpuFd>,
+        io_mgr: IoManagerCached,
+        exit_evt: EventFd,
+        vcpu_state_event: EventFd,
+        vcpu_state_sender: Sender<VcpuStateEvent>,
+        create_ts: TimestampUs,
+        support_immediate_exit: bool,
+    ) -> Result<Self> {
+        let (event_sender, event_receiver) = channel();
+        let (response_sender, response_receiver) = channel();
+
+        Ok(Vcpu {
+            fd: vcpu_fd,
+            id,
+            io_mgr,
+            create_ts,
+            event_receiver,
+            event_sender: Some(event_sender),
+            response_receiver: Some(response_receiver),
+            response_sender,
+            vcpu_state_event,
+            vcpu_state_sender,
+            support_immediate_exit,
+            mpidr: 0,
+            exit_evt,
+        })
+    }
+
+    /// Configures an aarch64 specific vcpu.
+    ///
+    /// # Arguments
+    ///
+    /// * `vcpu_config` - vCPU config for this vCPU status
+    /// * `vm_fd` - The kvm `VmFd` for this microvm.
+    /// * `vm_as` - The guest memory address space used by this microvm.
+    /// * `kernel_load_addr` - Offset from `guest_mem` at which the kernel is loaded.
+    /// * `_pgtable_addr` - pgtable address for ap vcpu (not used in aarch64)
+    pub fn configure(
+        &mut self,
+        _vcpu_config: &VcpuConfig,
+        vm_fd: &VmFd,
+        vm_as: &GuestAddressSpaceImpl,
+        kernel_load_addr: Option<GuestAddress>,
+        _pgtable_addr: Option<GuestAddress>,
+    ) -> Result<()> {
+        // TODO: add arm vcpu configure() function. issue: #4445
+        Ok(())
+    }
+
+    /// Gets the MPIDR register value.
+    pub fn get_mpidr(&self) -> u64 {
+        self.mpidr
+    }
+}
--- a/src/dragonball/src/vcpu/mod.rs
+++ b/src/dragonball/src/vcpu/mod.rs
@ -0,0 +1,32 @@
+// Copyright (C) 2022 Alibaba Cloud Computing. All rights reserved.
+// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+mod sm;
+pub mod vcpu_impl;
+pub mod vcpu_manager;
+
+#[cfg(target_arch = "x86_64")]
+use dbs_arch::cpuid::VpmuFeatureLevel;
+
+/// vcpu config collection
+pub struct VcpuConfig {
+    /// initial vcpu count
+    pub boot_vcpu_count: u8,
+    /// max vcpu count for hotplug
+    pub max_vcpu_count: u8,
+    /// threads per core for cpu topology information
+    pub threads_per_core: u8,
+    /// cores per die for cpu topology information
+    pub cores_per_die: u8,
+    /// dies per socket for cpu topology information
+    pub dies_per_socket: u8,
+    /// socket number for cpu topology information
+    pub sockets: u8,
+    /// if vpmu feature is Disabled, it means vpmu feature is off (by default)
+    /// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions)
+    /// if vpmu feature is FullyEnabled, it means all vpmu counters are supported
+    #[cfg(target_arch = "x86_64")]
+    pub vpmu_feature: VpmuFeatureLevel,
+}
--- a/src/dragonball/src/vcpu/sm.rs
+++ b/src/dragonball/src/vcpu/sm.rs
@ -0,0 +1,149 @@
+// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::ops::Deref;
+
+/// Simple abstraction of a state machine.
+///
+/// `StateMachine<T>` is a wrapper over `T` that also encodes state information for `T`.
+///
+/// Each state for `T` is represented by a `StateFn<T>` which is a function that acts as
+/// the state handler for that particular state of `T`.
+///
+/// `StateFn<T>` returns exactly one other `StateMachine<T>` thus each state gets clearly
+/// defined transitions to other states.
+pub struct StateMachine<T> {
+    function: StateFn<T>,
+    end_state: bool,
+}
+
+/// Type representing a state handler of a `StateMachine<T>` machine. Each state handler
+/// is a function from `T` that handles a specific state of `T`.
+type StateFn<T> = fn(&mut T) -> StateMachine<T>;
+
+impl<T> StateMachine<T> {
+    /// Creates a new state wrapper.
+    ///
+    /// # Arguments
+    ///
+    /// `function` - the state handler for this state.
+    /// `end_state` - whether this state is final.
+    pub fn new(function: StateFn<T>, end_state: bool) -> StateMachine<T> {
+        StateMachine {
+            function,
+            end_state,
+        }
+    }
+
+    /// Creates a new state wrapper that has further possible transitions.
+    ///
+    /// # Arguments
+    ///
+    /// `function` - the state handler for this state.
+    pub fn next(function: StateFn<T>) -> StateMachine<T> {
+        StateMachine::new(function, false)
+    }
+
+    /// Creates a new state wrapper that has no further transitions. The state machine
+    /// will finish after running this handler.
+    ///
+    /// # Arguments
+    ///
+    /// `function` - the state handler for this last state.
+    pub fn finish(function: StateFn<T>) -> StateMachine<T> {
+        StateMachine::new(function, true)
+    }
+
+    /// Runs a state machine for `T` starting from the provided state.
+    ///
+    /// # Arguments
+    ///
+    /// `machine` - a mutable reference to the object running through the various states.
+    /// `starting_state_fn` - a `fn(&mut T) -> StateMachine<T>` that should be the handler for
+    ///                       the initial state.
+    pub fn run(machine: &mut T, starting_state_fn: StateFn<T>) {
+        // Start off in the `starting_state` state.
+        let mut sf = StateMachine::new(starting_state_fn, false);
+        // While current state is not a final/end state, keep churning.
+        while !sf.end_state {
+            // Run the current state handler, and get the next one.
+            sf = sf(machine);
+        }
+    }
+}
+
+// Implement Deref of `StateMachine<T>` so that we can directly call its underlying state handler.
+impl<T> Deref for StateMachine<T> {
+    type Target = StateFn<T>;
+    fn deref(&self) -> &Self::Target {
+        &self.function
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // DummyMachine with states `s1`, `s2` and `s3`.
+    struct DummyMachine {
+        private_data_s1: bool,
+        private_data_s2: bool,
+        private_data_s3: bool,
+    }
+
+    impl DummyMachine {
+        fn new() -> Self {
+            DummyMachine {
+                private_data_s1: false,
+                private_data_s2: false,
+                private_data_s3: false,
+            }
+        }
+
+        // DummyMachine functions here.
+
+        // Simple state-machine: start->s1->s2->s3->done.
+        fn run(&mut self) {
+            // Verify the machine has not run yet.
+            assert!(!self.private_data_s1);
+            assert!(!self.private_data_s2);
+            assert!(!self.private_data_s3);
+
+            // Run the state-machine.
+            StateMachine::run(self, Self::s1);
+
+            // Verify the machine went through all states.
+            assert!(self.private_data_s1);
+            assert!(self.private_data_s2);
+            assert!(self.private_data_s3);
+        }
+
+        fn s1(&mut self) -> StateMachine<Self> {
+            // Verify private data mutates along with the states.
+            assert!(!self.private_data_s1);
+            self.private_data_s1 = true;
+            StateMachine::next(Self::s2)
+        }
+
+        fn s2(&mut self) -> StateMachine<Self> {
+            // Verify private data mutates along with the states.
+            assert!(!self.private_data_s2);
+            self.private_data_s2 = true;
+            StateMachine::next(Self::s3)
+        }
+
+        fn s3(&mut self) -> StateMachine<Self> {
+            // Verify private data mutates along with the states.
+            assert!(!self.private_data_s3);
+            self.private_data_s3 = true;
+            // The machine ends here, adding `s1` as next state to validate this.
+            StateMachine::finish(Self::s1)
+        }
+    }
+
+    #[test]
+    fn test_sm() {
+        let mut machine = DummyMachine::new();
+        machine.run();
+    }
+}
--- a/src/dragonball/src/vcpu/vcpu_impl.rs
+++ b/src/dragonball/src/vcpu/vcpu_impl.rs
@ -0,0 +1,955 @@
+// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the THIRD-PARTY file.
+
+//! The implementation for per vcpu
+
+use std::cell::Cell;
+use std::result;
+use std::sync::atomic::{fence, Ordering};
+use std::sync::mpsc::{Receiver, Sender, TryRecvError};
+use std::sync::{Arc, Barrier};
+use std::thread;
+
+use dbs_utils::time::TimestampUs;
+use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
+use kvm_ioctls::{VcpuExit, VcpuFd};
+use libc::{c_int, c_void, siginfo_t};
+use log::{error, info};
+use seccompiler::{apply_filter, BpfProgram, Error as SecError};
+use vmm_sys_util::eventfd::EventFd;
+use vmm_sys_util::signal::{register_signal_handler, Killable};
+
+use super::sm::StateMachine;
+use crate::metric::{IncMetric, METRICS};
+use crate::signal_handler::sigrtmin;
+use crate::IoManagerCached;
+
+#[cfg(target_arch = "x86_64")]
+#[path = "x86_64.rs"]
+mod x86_64;
+
+#[cfg(target_arch = "aarch64")]
+#[path = "aarch64.rs"]
+mod aarch64;
+
+#[cfg(target_arch = "x86_64")]
+const MAGIC_IOPORT_BASE: u16 = 0xdbdb;
+#[cfg(target_arch = "x86_64")]
+const MAGIC_IOPORT_DEBUG_INFO: u16 = MAGIC_IOPORT_BASE;
+
+/// Signal number (SIGRTMIN) used to kick Vcpus.
+pub const VCPU_RTSIG_OFFSET: i32 = 0;
+
+#[cfg(target_arch = "x86_64")]
+/// Errors associated with the wrappers over KVM ioctls.
+#[derive(Debug, thiserror::Error)]
+pub enum VcpuError {
+    /// Failed to signal Vcpu.
+    #[error("cannot signal the vCPU thread")]
+    SignalVcpu(#[source] vmm_sys_util::errno::Error),
+
+    /// Cannot open the vCPU file descriptor.
+    #[error("cannot open the vCPU file descriptor")]
+    VcpuFd(#[source] kvm_ioctls::Error),
+
+    /// Cannot spawn a new vCPU thread.
+    #[error("cannot spawn vCPU thread")]
+    VcpuSpawn(#[source] std::io::Error),
+
+    /// Cannot cleanly initialize vCPU TLS.
+    #[error("cannot cleanly initialize TLS fro vCPU")]
+    VcpuTlsInit,
+
+    /// Vcpu not present in TLS.
+    #[error("vCPU not present in the TLS")]
+    VcpuTlsNotPresent,
+
+    /// Unexpected KVM_RUN exit reason
+    #[error("Unexpected KVM_RUN exit reason")]
+    VcpuUnhandledKvmExit,
+
+    /// Pause vcpu failed
+    #[error("failed to pause vcpus")]
+    PauseFailed,
+
+    /// Kvm Ioctl Error
+    #[error("failure in issuing KVM ioctl command")]
+    Kvm(#[source] kvm_ioctls::Error),
+
+    /// Msr error
+    #[error("failure to deal with MSRs")]
+    Msr(vmm_sys_util::fam::Error),
+
+    /// A call to cpuid instruction failed on x86_64.
+    #[error("failure while configuring CPUID for virtual CPU on x86_64")]
+    CpuId(dbs_arch::cpuid::Error),
+
+    /// Error configuring the floating point related registers on x86_64.
+    #[error("failure while configuring the floating point related registers on x86_64")]
+    FPUConfiguration(dbs_arch::regs::Error),
+
+    /// Cannot set the local interruption due to bad configuration on x86_64.
+    #[error("cannot set the local interruption due to bad configuration on x86_64")]
+    LocalIntConfiguration(dbs_arch::interrupts::Error),
+
+    /// Error configuring the MSR registers on x86_64.
+    #[error("failure while configuring the MSR registers on x86_64")]
+    MSRSConfiguration(dbs_arch::regs::Error),
+
+    /// Error configuring the general purpose registers on x86_64.
+    #[error("failure while configuring the general purpose registers on x86_64")]
+    REGSConfiguration(dbs_arch::regs::Error),
+
+    /// Error configuring the special registers on x86_64.
+    #[error("failure while configuring the special registers on x86_64")]
+    SREGSConfiguration(dbs_arch::regs::Error),
+
+    /// Error configuring the page table on x86_64.
+    #[error("failure while configuring the page table on x86_64")]
+    PageTable(dbs_boot::Error),
+
+    /// The call to KVM_SET_CPUID2 failed on x86_64.
+    #[error("failure while calling KVM_SET_CPUID2 on x86_64")]
+    SetSupportedCpusFailed(#[source] kvm_ioctls::Error),
+}
+
+#[cfg(target_arch = "aarch64")]
+/// Errors associated with the wrappers over KVM ioctls.
+#[derive(Debug, thiserror::Error)]
+pub enum VcpuError {
+    /// Failed to signal Vcpu.
+    #[error("cannot signal the vCPU thread")]
+    SignalVcpu(#[source] vmm_sys_util::errno::Error),
+
+    /// Cannot open the vCPU file descriptor.
+    #[error("cannot open the vCPU file descriptor")]
+    VcpuFd(#[source] kvm_ioctls::Error),
+
+    /// Cannot spawn a new vCPU thread.
+    #[error("cannot spawn vCPU thread")]
+    VcpuSpawn(#[source] std::io::Error),
+
+    /// Cannot cleanly initialize vCPU TLS.
+    #[error("cannot cleanly initialize TLS fro vCPU")]
+    VcpuTlsInit,
+
+    /// Vcpu not present in TLS.
+    #[error("vCPU not present in the TLS")]
+    VcpuTlsNotPresent,
+
+    /// Unexpected KVM_RUN exit reason
+    #[error("Unexpected KVM_RUN exit reason")]
+    VcpuUnhandledKvmExit,
+
+    /// Pause vcpu failed
+    #[error("failed to pause vcpus")]
+    PauseFailed,
+
+    /// Kvm Ioctl Error
+    #[error("failure in issuing KVM ioctl command")]
+    Kvm(#[source] kvm_ioctls::Error),
+
+    /// Msr error
+    #[error("failure to deal with MSRs")]
+    Msr(vmm_sys_util::fam::Error),
+
+    #[cfg(target_arch = "aarch64")]
+    /// Error configuring the general purpose aarch64 registers on aarch64.
+    #[error("failure while configuring the general purpose registers on aarch64")]
+    REGSConfiguration(dbs_arch::regs::Error),
+
+    #[cfg(target_arch = "aarch64")]
+    /// Error setting up the global interrupt controller on aarch64.
+    #[error("failure while setting up the global interrupt controller on aarch64")]
+    SetupGIC(dbs_arch::gic::Error),
+
+    #[cfg(target_arch = "aarch64")]
+    /// Error getting the Vcpu preferred target on aarch64.
+    #[error("failure while getting the vCPU preferred target on aarch64")]
+    VcpuArmPreferredTarget(kvm_ioctls::Error),
+
+    #[cfg(target_arch = "aarch64")]
+    /// Error doing vCPU Init on aarch64.
+    #[error("failure while doing vCPU init on aarch64")]
+    VcpuArmInit(kvm_ioctls::Error),
+}
+
+/// Result for Vcpu related operations.
+pub type Result<T> = result::Result<T, VcpuError>;
+
+/// List of events that the Vcpu can receive.
+#[derive(Debug)]
+pub enum VcpuEvent {
+    /// Kill the Vcpu.
+    Exit,
+    /// Pause the Vcpu.
+    Pause,
+    /// Event that should resume the Vcpu.
+    Resume,
+    /// Get vcpu thread tid
+    Gettid,
+
+    /// Event to revalidate vcpu IoManager cache
+    RevalidateCache,
+}
+
+/// List of responses that the Vcpu reports.
+pub enum VcpuResponse {
+    /// Vcpu is paused.
+    Paused,
+    /// Vcpu is resumed.
+    Resumed,
+    /// Vcpu index and thread tid.
+    Tid(u8, u32),
+    /// Requested Vcpu operation is not allowed.
+    NotAllowed,
+    /// Requestion action encountered an error
+    Error(VcpuError),
+    /// Vcpu IoManager cache is revalidated
+    CacheRevalidated,
+}
+
+/// List of events that the vcpu_state_sender can send.
+pub enum VcpuStateEvent {
+    /// (result, response) for hotplug, result 0 means failure, 1 means success.
+    Hotplug((i32, u32)),
+}
+
+/// Wrapper over vCPU that hides the underlying interactions with the vCPU thread.
+pub struct VcpuHandle {
+    event_sender: Sender<VcpuEvent>,
+    response_receiver: Receiver<VcpuResponse>,
+    vcpu_thread: thread::JoinHandle<()>,
+}
+
+impl VcpuHandle {
+    /// Send event to vCPU thread
+    pub fn send_event(&self, event: VcpuEvent) -> Result<()> {
+        // Use expect() to crash if the other thread closed this channel.
+        self.event_sender
+            .send(event)
+            .expect("event sender channel closed on vcpu end.");
+        // Kick the vCPU so it picks up the message.
+        self.vcpu_thread
+            .kill(sigrtmin() + VCPU_RTSIG_OFFSET)
+            .map_err(VcpuError::SignalVcpu)?;
+        Ok(())
+    }
+
+    /// Receive response from vcpu thread
+    pub fn response_receiver(&self) -> &Receiver<VcpuResponse> {
+        &self.response_receiver
+    }
+
+    #[allow(dead_code)]
+    /// Join the vcpu thread
+    pub fn join_vcpu_thread(self) -> thread::Result<()> {
+        self.vcpu_thread.join()
+    }
+}
+
+#[derive(PartialEq)]
+enum VcpuEmulation {
+    Handled,
+    Interrupted,
+    Stopped,
+}
+
+/// A wrapper around creating and using a kvm-based VCPU.
+pub struct Vcpu {
+    // vCPU fd used by the vCPU
+    fd: Arc<VcpuFd>,
+    // vCPU id info
+    id: u8,
+    // Io manager Cached for facilitating IO operations
+    io_mgr: IoManagerCached,
+    // Records vCPU create time stamp
+    create_ts: TimestampUs,
+
+    // The receiving end of events channel owned by the vcpu side.
+    event_receiver: Receiver<VcpuEvent>,
+    // The transmitting end of the events channel which will be given to the handler.
+    event_sender: Option<Sender<VcpuEvent>>,
+    // The receiving end of the responses channel which will be given to the handler.
+    response_receiver: Option<Receiver<VcpuResponse>>,
+    // The transmitting end of the responses channel owned by the vcpu side.
+    response_sender: Sender<VcpuResponse>,
+    // Event notifier for CPU hotplug.
+    // After arm adapts to hotplug vcpu, the dead code macro needs to be removed
+    #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
+    vcpu_state_event: EventFd,
+    // CPU hotplug events.
+    // After arm adapts to hotplug vcpu, the dead code macro needs to be removed
+    #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
+    vcpu_state_sender: Sender<VcpuStateEvent>,
+
+    // An `EventFd` that will be written into when this vcpu exits.
+    exit_evt: EventFd,
+    // Whether kvm used supports immediate_exit flag.
+    support_immediate_exit: bool,
+
+    // CPUID information for the x86_64 CPU
+    #[cfg(target_arch = "x86_64")]
+    cpuid: kvm_bindings::CpuId,
+
+    /// Multiprocessor affinity register recorded for aarch64
+    #[cfg(target_arch = "aarch64")]
+    pub(crate) mpidr: u64,
+}
+
+// Using this for easier explicit type-casting to help IDEs interpret the code.
+type VcpuCell = Cell<Option<*const Vcpu>>;
+
+impl Vcpu {
+    thread_local!(static TLS_VCPU_PTR: VcpuCell = Cell::new(None));
+
+    /// Associates `self` with the current thread.
+    ///
+    /// It is a prerequisite to successfully run `init_thread_local_data()` before using
+    /// `run_on_thread_local()` on the current thread.
+    /// This function will return an error if there already is a `Vcpu` present in the TLS.
+    fn init_thread_local_data(&mut self) -> Result<()> {
+        Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
+            if cell.get().is_some() {
+                return Err(VcpuError::VcpuTlsInit);
+            }
+            cell.set(Some(self as *const Vcpu));
+            Ok(())
+        })
+    }
+
+    /// Deassociates `self` from the current thread.
+    ///
+    /// Should be called if the current `self` had called `init_thread_local_data()` and
+    /// now needs to move to a different thread.
+    ///
+    /// Fails if `self` was not previously associated with the current thread.
+    fn reset_thread_local_data(&mut self) -> Result<()> {
+        // Best-effort to clean up TLS. If the `Vcpu` was moved to another thread
+        // _before_ running this, then there is nothing we can do.
+        Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
+            if let Some(vcpu_ptr) = cell.get() {
+                if vcpu_ptr == self as *const Vcpu {
+                    Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| cell.take());
+                    return Ok(());
+                }
+            }
+            Err(VcpuError::VcpuTlsNotPresent)
+        })
+    }
+
+    /// Runs `func` for the `Vcpu` associated with the current thread.
+    ///
+    /// It requires that `init_thread_local_data()` was run on this thread.
+    ///
+    /// Fails if there is no `Vcpu` associated with the current thread.
+    ///
+    /// # Safety
+    ///
+    /// This is marked unsafe as it allows temporary aliasing through
+    /// dereferencing from pointer an already borrowed `Vcpu`.
+    unsafe fn run_on_thread_local<F>(func: F) -> Result<()>
+    where
+        F: FnOnce(&Vcpu),
+    {
+        Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
+            if let Some(vcpu_ptr) = cell.get() {
+                // Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty,
+                // and it is being cleared on `Vcpu::drop` so there is no dangling pointer.
+                let vcpu_ref: &Vcpu = &*vcpu_ptr;
+                func(vcpu_ref);
+                Ok(())
+            } else {
+                Err(VcpuError::VcpuTlsNotPresent)
+            }
+        })
+    }
+
+    /// Registers a signal handler which makes use of TLS and kvm immediate exit to
+    /// kick the vcpu running on the current thread, if there is one.
+    pub fn register_kick_signal_handler() {
+        extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) {
+            // This is safe because it's temporarily aliasing the `Vcpu` object, but we are
+            // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`.
+            unsafe {
+                let _ = Vcpu::run_on_thread_local(|vcpu| {
+                    vcpu.fd.set_kvm_immediate_exit(1);
+                    fence(Ordering::Release);
+                });
+            }
+        }
+
+        register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal)
+            .expect("Failed to register vcpu signal handler");
+    }
+
+    /// Returns the cpu index as seen by the guest OS.
+    pub fn cpu_index(&self) -> u8 {
+        self.id
+    }
+
+    /// Moves the vcpu to its own thread and constructs a VcpuHandle.
+    /// The handle can be used to control the remote vcpu.
+    pub fn start_threaded(
+        mut self,
+        seccomp_filter: BpfProgram,
+        barrier: Arc<Barrier>,
+    ) -> Result<VcpuHandle> {
+        let event_sender = self.event_sender.take().unwrap();
+        let response_receiver = self.response_receiver.take().unwrap();
+
+        let vcpu_thread = thread::Builder::new()
+            .name(format!("db_vcpu{}", self.cpu_index()))
+            .spawn(move || {
+                self.init_thread_local_data()
+                    .expect("Cannot cleanly initialize vcpu TLS.");
+                barrier.wait();
+                self.run(seccomp_filter);
+            })
+            .map_err(VcpuError::VcpuSpawn)?;
+
+        Ok(VcpuHandle {
+            event_sender,
+            response_receiver,
+            vcpu_thread,
+        })
+    }
+
+    /// Extract the vcpu running logic for test mocking.
+    #[cfg(not(test))]
+    pub fn emulate(fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
+        fd.run()
+    }
+
+    /// Runs the vCPU in KVM context and handles the kvm exit reason.
+    ///
+    /// Returns error or enum specifying whether emulation was handled or interrupted.
+    fn run_emulation(&mut self) -> Result<VcpuEmulation> {
+        match Vcpu::emulate(&self.fd) {
+            Ok(run) => match run {
+                #[cfg(target_arch = "x86_64")]
+                VcpuExit::IoIn(addr, data) => {
+                    let _ = self.io_mgr.pio_read(addr, data);
+                    METRICS.vcpu.exit_io_in.inc();
+                    Ok(VcpuEmulation::Handled)
+                }
+                #[cfg(target_arch = "x86_64")]
+                VcpuExit::IoOut(addr, data) => {
+                    if !self.check_io_port_info(addr, data)? {
+                        let _ = self.io_mgr.pio_write(addr, data);
+                    }
+                    METRICS.vcpu.exit_io_out.inc();
+                    Ok(VcpuEmulation::Handled)
+                }
+                VcpuExit::MmioRead(addr, data) => {
+                    let _ = self.io_mgr.mmio_read(addr, data);
+                    METRICS.vcpu.exit_mmio_read.inc();
+                    Ok(VcpuEmulation::Handled)
+                }
+                VcpuExit::MmioWrite(addr, data) => {
+                    #[cfg(target_arch = "aarch64")]
+                    self.check_boot_complete_signal(addr, data);
+
+                    let _ = self.io_mgr.mmio_write(addr, data);
+                    METRICS.vcpu.exit_mmio_write.inc();
+                    Ok(VcpuEmulation::Handled)
+                }
+                VcpuExit::Hlt => {
+                    info!("Received KVM_EXIT_HLT signal");
+                    Err(VcpuError::VcpuUnhandledKvmExit)
+                }
+                VcpuExit::Shutdown => {
+                    info!("Received KVM_EXIT_SHUTDOWN signal");
+                    Err(VcpuError::VcpuUnhandledKvmExit)
+                }
+                // Documentation specifies that below kvm exits are considered errors.
+                VcpuExit::FailEntry => {
+                    METRICS.vcpu.failures.inc();
+                    error!("Received KVM_EXIT_FAIL_ENTRY signal");
+                    Err(VcpuError::VcpuUnhandledKvmExit)
+                }
+                VcpuExit::InternalError => {
+                    METRICS.vcpu.failures.inc();
+                    error!("Received KVM_EXIT_INTERNAL_ERROR signal");
+                    Err(VcpuError::VcpuUnhandledKvmExit)
+                }
+                VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
+                    KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
+                        info!(
+                            "Received KVM_SYSTEM_EVENT: type: {}, event: {}",
+                            event_type, event_flags
+                        );
+                        Ok(VcpuEmulation::Stopped)
+                    }
+                    _ => {
+                        METRICS.vcpu.failures.inc();
+                        error!(
+                            "Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
+                            event_type, event_flags
+                        );
+                        Err(VcpuError::VcpuUnhandledKvmExit)
+                    }
+                },
+                r => {
+                    METRICS.vcpu.failures.inc();
+                    // TODO: Are we sure we want to finish running a vcpu upon
+                    // receiving a vm exit that is not necessarily an error?
+                    error!("Unexpected exit reason on vcpu run: {:?}", r);
+                    Err(VcpuError::VcpuUnhandledKvmExit)
+                }
+            },
+            // The unwrap on raw_os_error can only fail if we have a logic
+            // error in our code in which case it is better to panic.
+            Err(ref e) => {
+                match e.errno() {
+                    libc::EAGAIN => Ok(VcpuEmulation::Handled),
+                    libc::EINTR => {
+                        self.fd.set_kvm_immediate_exit(0);
+                        // Notify that this KVM_RUN was interrupted.
+                        Ok(VcpuEmulation::Interrupted)
+                    }
+                    _ => {
+                        METRICS.vcpu.failures.inc();
+                        error!("Failure during vcpu run: {}", e);
+                        #[cfg(target_arch = "x86_64")]
+                        {
+                            error!(
+                                "dump regs: {:?}, dump sregs: {:?}",
+                                self.fd.get_regs(),
+                                self.fd.get_sregs()
+                            );
+                        }
+                        Err(VcpuError::VcpuUnhandledKvmExit)
+                    }
+                }
+            }
+        }
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    // checkout the io port that dragonball used only
+    fn check_io_port_info(&self, addr: u16, data: &[u8]) -> Result<bool> {
+        let mut checked = false;
+
+        match addr {
+            // debug info signal
+            MAGIC_IOPORT_DEBUG_INFO => {
+                if data.len() == 4 {
+                    let data = unsafe { std::ptr::read(data.as_ptr() as *const u32) };
+                    log::warn!("KDBG: guest kernel debug info: 0x{:x}", data);
+                    checked = true;
+                }
+            }
+            _ => {}
+        };
+
+        Ok(checked)
+    }
+
+    fn gettid() -> u32 {
+        nix::unistd::gettid().as_raw() as u32
+    }
+
+    fn revalidate_cache(&mut self) -> Result<()> {
+        self.io_mgr.revalidate_cache();
+
+        Ok(())
+    }
+
+    /// Main loop of the vCPU thread.
+    ///
+    /// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in.
+    /// Note that the state of the VCPU and associated VM must be setup first for this to do
+    /// anything useful.
+    pub fn run(&mut self, seccomp_filter: BpfProgram) {
+        // Load seccomp filters for this vCPU thread.
+        // Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters
+        // altogether is the desired behaviour.
+        if let Err(e) = apply_filter(&seccomp_filter) {
+            if matches!(e, SecError::EmptyFilter) {
+                info!("vCPU thread {} use empty seccomp filters.", self.id);
+            } else {
+                panic!(
+                    "Failed to set the requested seccomp filters on vCPU {}: Error: {}",
+                    self.id, e
+                );
+            }
+        }
+
+        info!("vcpu {} is running", self.cpu_index());
+
+        // Start running the machine state in the `Paused` state.
+        StateMachine::run(self, Self::paused);
+    }
+
+    // This is the main loop of the `Running` state.
+    fn running(&mut self) -> StateMachine<Self> {
+        // This loop is here just for optimizing the emulation path.
+        // No point in ticking the state machine if there are no external events.
+        loop {
+            match self.run_emulation() {
+                // Emulation ran successfully, continue.
+                Ok(VcpuEmulation::Handled) => {
+                    // We need to break here if kvm doesn't support
+                    // immediate_exit flag. Because the signal sent from vmm
+                    // thread may occurs when handling the vcpu exit events, and
+                    // in this case the external vcpu events may not be handled
+                    // correctly, so we need to check the event_receiver channel
+                    // after handle vcpu exit events to decrease the window that
+                    // doesn't handle the vcpu external events.
+                    if !self.support_immediate_exit {
+                        break;
+                    }
+                }
+                // Emulation was interrupted, check external events.
+                Ok(VcpuEmulation::Interrupted) => break,
+                // Emulation was stopped due to reset or shutdown.
+                Ok(VcpuEmulation::Stopped) => return StateMachine::next(Self::waiting_exit),
+                // Emulation errors lead to vCPU exit.
+                Err(e) => {
+                    error!("vcpu: {}, run_emulation failed: {:?}", self.id, e);
+                    return StateMachine::next(Self::waiting_exit);
+                }
+            }
+        }
+
+        // By default don't change state.
+        let mut state = StateMachine::next(Self::running);
+
+        // Break this emulation loop on any transition request/external event.
+        match self.event_receiver.try_recv() {
+            // Running ---- Exit ----> Exited
+            Ok(VcpuEvent::Exit) => {
+                // Move to 'exited' state.
+                state = StateMachine::next(Self::exited);
+            }
+            // Running ---- Pause ----> Paused
+            Ok(VcpuEvent::Pause) => {
+                // Nothing special to do.
+                self.response_sender
+                    .send(VcpuResponse::Paused)
+                    .expect("failed to send pause status");
+
+                // TODO: we should call `KVM_KVMCLOCK_CTRL` here to make sure
+                // TODO continued: the guest soft lockup watchdog does not panic on Resume.
+                //let _ = self.fd.kvmclock_ctrl();
+
+                // Move to 'paused' state.
+                state = StateMachine::next(Self::paused);
+            }
+            Ok(VcpuEvent::Resume) => {
+                self.response_sender
+                    .send(VcpuResponse::Resumed)
+                    .expect("failed to send resume status");
+            }
+            Ok(VcpuEvent::Gettid) => {
+                self.response_sender
+                    .send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid()))
+                    .expect("failed to send vcpu thread tid");
+            }
+            Ok(VcpuEvent::RevalidateCache) => {
+                self.revalidate_cache()
+                    .map(|()| {
+                        self.response_sender
+                            .send(VcpuResponse::CacheRevalidated)
+                            .expect("failed to revalidate vcpu IoManager cache");
+                    })
+                    .map_err(|e| self.response_sender.send(VcpuResponse::Error(e)))
+                    .expect("failed to revalidate vcpu IoManager cache");
+            }
+            // Unhandled exit of the other end.
+            Err(TryRecvError::Disconnected) => {
+                // Move to 'exited' state.
+                state = StateMachine::next(Self::exited);
+            }
+            // All other events or lack thereof have no effect on current 'running' state.
+            Err(TryRecvError::Empty) => (),
+        }
+
+        state
+    }
+
+    // This is the main loop of the `Paused` state.
+    fn paused(&mut self) -> StateMachine<Self> {
+        match self.event_receiver.recv() {
+            // Paused ---- Exit ----> Exited
+            Ok(VcpuEvent::Exit) => {
+                // Move to 'exited' state.
+                StateMachine::next(Self::exited)
+            }
+            // Paused ---- Resume ----> Running
+            Ok(VcpuEvent::Resume) => {
+                self.response_sender
+                    .send(VcpuResponse::Resumed)
+                    .expect("failed to send resume status");
+                // Move to 'running' state.
+                StateMachine::next(Self::running)
+            }
+            Ok(VcpuEvent::Pause) => {
+                self.response_sender
+                    .send(VcpuResponse::Paused)
+                    .expect("failed to send pause status");
+                // continue 'pause' state.
+                StateMachine::next(Self::paused)
+            }
+            Ok(VcpuEvent::Gettid) => {
+                self.response_sender
+                    .send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid()))
+                    .expect("failed to send vcpu thread tid");
+                StateMachine::next(Self::paused)
+            }
+            Ok(VcpuEvent::RevalidateCache) => {
+                self.revalidate_cache()
+                    .map(|()| {
+                        self.response_sender
+                            .send(VcpuResponse::CacheRevalidated)
+                            .expect("failed to revalidate vcpu IoManager cache");
+                    })
+                    .map_err(|e| self.response_sender.send(VcpuResponse::Error(e)))
+                    .expect("failed to revalidate vcpu IoManager cache");
+
+                StateMachine::next(Self::paused)
+            }
+            // Unhandled exit of the other end.
+            Err(_) => {
+                // Move to 'exited' state.
+                StateMachine::next(Self::exited)
+            }
+        }
+    }
+
+    // This is the main loop of the `WaitingExit` state.
+    fn waiting_exit(&mut self) -> StateMachine<Self> {
+        // trigger vmm to stop machine
+        if let Err(e) = self.exit_evt.write(1) {
+            METRICS.vcpu.failures.inc();
+            error!("Failed signaling vcpu exit event: {}", e);
+        }
+
+        let mut state = StateMachine::next(Self::waiting_exit);
+
+        match self.event_receiver.recv() {
+            Ok(VcpuEvent::Exit) => state = StateMachine::next(Self::exited),
+            Ok(_) => error!(
+                "wrong state received in waiting exit state on vcpu {}",
+                self.id
+            ),
+            Err(_) => {
+                error!(
+                    "vcpu channel closed in waiting exit state on vcpu {}",
+                    self.id
+                );
+                state = StateMachine::next(Self::exited);
+            }
+        }
+
+        state
+    }
+
+    // This is the main loop of the `Exited` state.
+    fn exited(&mut self) -> StateMachine<Self> {
+        // State machine reached its end.
+        StateMachine::finish(Self::exited)
+    }
+}
+
+impl Drop for Vcpu {
+    fn drop(&mut self) {
+        let _ = self.reset_thread_local_data();
+    }
+}
+
+#[cfg(test)]
+pub mod tests {
+    use std::os::unix::io::AsRawFd;
+    use std::sync::mpsc::{channel, Receiver};
+    use std::sync::Mutex;
+
+    use arc_swap::ArcSwap;
+    use dbs_device::device_manager::IoManager;
+    use kvm_ioctls::Kvm;
+    use lazy_static::lazy_static;
+
+    use super::*;
+    use crate::kvm_context::KvmContext;
+
+    pub enum EmulationCase {
+        IoIn,
+        IoOut,
+        MmioRead,
+        MmioWrite,
+        Hlt,
+        Shutdown,
+        FailEntry,
+        InternalError,
+        Unknown,
+        SystemEvent(u32, u64),
+        Error(i32),
+    }
+
+    lazy_static! {
+        pub static ref EMULATE_RES: Mutex<EmulationCase> = Mutex::new(EmulationCase::Unknown);
+    }
+
+    impl Vcpu {
+        pub fn emulate(_fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
+            let res = &*EMULATE_RES.lock().unwrap();
+            match res {
+                EmulationCase::IoIn => Ok(VcpuExit::IoIn(0, &mut [])),
+                EmulationCase::IoOut => Ok(VcpuExit::IoOut(0, &[])),
+                EmulationCase::MmioRead => Ok(VcpuExit::MmioRead(0, &mut [])),
+                EmulationCase::MmioWrite => Ok(VcpuExit::MmioWrite(0, &[])),
+                EmulationCase::Hlt => Ok(VcpuExit::Hlt),
+                EmulationCase::Shutdown => Ok(VcpuExit::Shutdown),
+                EmulationCase::FailEntry => Ok(VcpuExit::FailEntry),
+                EmulationCase::InternalError => Ok(VcpuExit::InternalError),
+                EmulationCase::Unknown => Ok(VcpuExit::Unknown),
+                EmulationCase::SystemEvent(event_type, event_flags) => {
+                    Ok(VcpuExit::SystemEvent(*event_type, *event_flags))
+                }
+                EmulationCase::Error(e) => Err(kvm_ioctls::Error::new(*e)),
+            }
+        }
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) {
+        // Call for kvm too frequently would cause error in some host kernel.
+        std::thread::sleep(std::time::Duration::from_millis(5));
+
+        let kvm = Kvm::new().unwrap();
+        let vm = Arc::new(kvm.create_vm().unwrap());
+        let kvm_context = KvmContext::new(Some(kvm.as_raw_fd())).unwrap();
+        let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap());
+        let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new()))));
+        let supported_cpuid = kvm_context
+            .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
+            .unwrap();
+        let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+        let vcpu_state_event = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+        let (tx, rx) = channel();
+        let time_stamp = TimestampUs::default();
+
+        let vcpu = Vcpu::new_x86_64(
+            0,
+            vcpu_fd,
+            io_manager,
+            supported_cpuid,
+            reset_event_fd,
+            vcpu_state_event,
+            tx,
+            time_stamp,
+            false,
+        )
+        .unwrap();
+
+        (vcpu, rx)
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_vcpu_run_emulation() {
+        let (mut vcpu, _) = create_vcpu();
+
+        // Io in
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::IoIn;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Handled)));
+
+        // Io out
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::IoOut;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Handled)));
+
+        // Mmio read
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioRead;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Handled)));
+
+        // Mmio write
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioWrite;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Handled)));
+
+        // KVM_EXIT_HLT signal
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::Hlt;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+
+        // KVM_EXIT_SHUTDOWN signal
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::Shutdown;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+
+        // KVM_EXIT_FAIL_ENTRY signal
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+
+        // KVM_EXIT_INTERNAL_ERROR signal
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::InternalError;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+
+        // KVM_SYSTEM_EVENT_RESET
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_RESET, 0);
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Stopped)));
+
+        // KVM_SYSTEM_EVENT_SHUTDOWN
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, 0);
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Stopped)));
+
+        // Other system event
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(0, 0);
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+
+        // Unknown exit reason
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::Unknown;
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+
+        // Error: EAGAIN
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EAGAIN);
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Handled)));
+
+        // Error: EINTR
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Ok(VcpuEmulation::Interrupted)));
+
+        // other error
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINVAL);
+        let res = vcpu.run_emulation();
+        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_vcpu_check_io_port_info() {
+        let (vcpu, receiver) = create_vcpu();
+
+        // boot complete signal
+        let res = vcpu
+            .check_io_port_info(
+                MAGIC_IOPORT_SIGNAL_GUEST_BOOT_COMPLETE,
+                &[MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE],
+            )
+            .unwrap();
+        assert!(res);
+
+        // debug info signal
+        let res = vcpu
+            .check_io_port_info(MAGIC_IOPORT_DEBUG_INFO, &[0, 0, 0, 0])
+            .unwrap();
+        assert!(res);
+    }
+}
--- a/src/dragonball/src/vcpu/vcpu_manager.rs
+++ b/src/dragonball/src/vcpu/vcpu_manager.rs
--- a/src/dragonball/src/vcpu/x86_64.rs
+++ b/src/dragonball/src/vcpu/x86_64.rs
@ -0,0 +1,149 @@
+// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the THIRD-PARTY file.
+
+use std::sync::mpsc::{channel, Sender};
+use std::sync::Arc;
+
+use dbs_arch::cpuid::{process_cpuid, VmSpec};
+use dbs_arch::gdt::gdt_entry;
+use dbs_utils::time::TimestampUs;
+use kvm_bindings::CpuId;
+use kvm_ioctls::{VcpuFd, VmFd};
+use log::error;
+use vm_memory::{Address, GuestAddress, GuestAddressSpace};
+use vmm_sys_util::eventfd::EventFd;
+
+use crate::address_space_manager::GuestAddressSpaceImpl;
+use crate::metric::{IncMetric, METRICS};
+use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent};
+use crate::vcpu::VcpuConfig;
+use crate::IoManagerCached;
+
+impl Vcpu {
+    /// Constructs a new VCPU for `vm`.
+    ///
+    /// # Arguments
+    ///
+    /// * `id` - Represents the CPU number between [0, max vcpus).
+    /// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu.
+    /// * `io_mgr` - The io-manager used to access port-io and mmio devices.
+    /// * `cpuid` - The `CpuId` listing the supported capabilities of this vcpu.
+    /// * `exit_evt` - An `EventFd` that will be written into when this vcpu
+    ///   exits.
+    /// * `vcpu_state_event` - The eventfd which can notify vmm state of some
+    ///   vcpu should change.
+    /// * `vcpu_state_sender` - The channel to send state change message from
+    ///   vcpu thread to vmm thread.
+    /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime.
+    /// * `support_immediate_exit` -  whether kvm used supports immediate_exit flag.
+    #[allow(clippy::too_many_arguments)]
+    pub fn new_x86_64(
+        id: u8,
+        vcpu_fd: Arc<VcpuFd>,
+        io_mgr: IoManagerCached,
+        cpuid: CpuId,
+        exit_evt: EventFd,
+        vcpu_state_event: EventFd,
+        vcpu_state_sender: Sender<VcpuStateEvent>,
+        create_ts: TimestampUs,
+        support_immediate_exit: bool,
+    ) -> Result<Self> {
+        let (event_sender, event_receiver) = channel();
+        let (response_sender, response_receiver) = channel();
+        // Initially the cpuid per vCPU is the one supported by this VM.
+        Ok(Vcpu {
+            fd: vcpu_fd,
+            id,
+            io_mgr,
+            create_ts,
+            event_receiver,
+            event_sender: Some(event_sender),
+            response_receiver: Some(response_receiver),
+            response_sender,
+            vcpu_state_event,
+            vcpu_state_sender,
+            exit_evt,
+            support_immediate_exit,
+            cpuid,
+        })
+    }
+
+    /// Configures a x86_64 specific vcpu and should be called once per vcpu.
+    ///
+    /// # Arguments
+    ///
+    /// * `vm_config` - The machine configuration of this microvm needed for the CPUID configuration.
+    /// * `vm_fd` - The kvm `VmFd` for the virtual machine this vcpu will get attached to.
+    /// * `vm_memory` - The guest memory used by this microvm.
+    /// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts.
+    /// * `pgtable_addr` - pgtable address for ap vcpu
+    pub fn configure(
+        &mut self,
+        vcpu_config: &VcpuConfig,
+        _vm_fd: &VmFd,
+        vm_as: &GuestAddressSpaceImpl,
+        kernel_start_addr: Option<GuestAddress>,
+        _pgtable_addr: Option<GuestAddress>,
+    ) -> Result<()> {
+        self.set_cpuid(vcpu_config)?;
+
+        dbs_arch::regs::setup_msrs(&self.fd).map_err(VcpuError::MSRSConfiguration)?;
+        if let Some(start_addr) = kernel_start_addr {
+            dbs_arch::regs::setup_regs(
+                &self.fd,
+                start_addr.raw_value() as u64,
+                dbs_boot::layout::BOOT_STACK_POINTER,
+                dbs_boot::layout::BOOT_STACK_POINTER,
+                dbs_boot::layout::ZERO_PAGE_START,
+            )
+            .map_err(VcpuError::REGSConfiguration)?;
+            dbs_arch::regs::setup_fpu(&self.fd).map_err(VcpuError::FPUConfiguration)?;
+            let gdt_table: [u64; dbs_boot::layout::BOOT_GDT_MAX as usize] = [
+                gdt_entry(0, 0, 0),            // NULL
+                gdt_entry(0xa09b, 0, 0xfffff), // CODE
+                gdt_entry(0xc093, 0, 0xfffff), // DATA
+                gdt_entry(0x808b, 0, 0xfffff), // TSS
+            ];
+            let pgtable_addr =
+                dbs_boot::setup_identity_mapping(&*vm_as.memory()).map_err(VcpuError::PageTable)?;
+            dbs_arch::regs::setup_sregs(
+                &*vm_as.memory(),
+                &self.fd,
+                pgtable_addr,
+                &gdt_table,
+                dbs_boot::layout::BOOT_GDT_OFFSET,
+                dbs_boot::layout::BOOT_IDT_OFFSET,
+            )
+            .map_err(VcpuError::SREGSConfiguration)?;
+        }
+        dbs_arch::interrupts::set_lint(&self.fd).map_err(VcpuError::LocalIntConfiguration)?;
+
+        Ok(())
+    }
+
+    fn set_cpuid(&mut self, vcpu_config: &VcpuConfig) -> Result<()> {
+        let cpuid_vm_spec = VmSpec::new(
+            self.id,
+            vcpu_config.max_vcpu_count as u8,
+            vcpu_config.threads_per_core,
+            vcpu_config.cores_per_die,
+            vcpu_config.dies_per_socket,
+            vcpu_config.vpmu_feature,
+        )
+        .map_err(VcpuError::CpuId)?;
+        process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| {
+            METRICS.vcpu.filter_cpuid.inc();
+            error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e);
+            VcpuError::CpuId(e)
+        })?;
+
+        self.fd
+            .set_cpuid2(&self.cpuid)
+            .map_err(VcpuError::SetSupportedCpusFailed)
+    }
+}
--- a/src/dragonball/src/vm/mod.rs
+++ b/src/dragonball/src/vm/mod.rs
@ -18,3 +18,79 @@ pub struct NumaRegionInfo {
    /// vcpu ids belonging to this region
    pub vcpu_ids: Vec<u32>,
 }
+
+/// Information for cpu topology to guide guest init
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+pub struct CpuTopology {
+    /// threads per core to indicate hyperthreading is enabled or not
+    pub threads_per_core: u8,
+    /// cores per die to guide guest cpu topology init
+    pub cores_per_die: u8,
+    /// dies per socket to guide guest cpu topology
+    pub dies_per_socket: u8,
+    /// number of sockets
+    pub sockets: u8,
+}
+
+impl Default for CpuTopology {
+    fn default() -> Self {
+        CpuTopology {
+            threads_per_core: 1,
+            cores_per_die: 1,
+            dies_per_socket: 1,
+            sockets: 1,
+        }
+    }
+}
+
+/// Configuration information for virtual machine instance.
+#[derive(Clone, Debug, PartialEq)]
+pub struct VmConfigInfo {
+    /// Number of vcpu to start.
+    pub vcpu_count: u8,
+    /// Max number of vcpu can be added
+    pub max_vcpu_count: u8,
+    /// Enable or disable hyperthreading.
+    pub ht_enabled: bool,
+    /// cpu power management.
+    pub cpu_pm: String,
+    /// cpu topology information
+    pub cpu_topology: CpuTopology,
+    /// vpmu support level
+    pub vpmu_feature: u8,
+
+    /// Memory type that can be either hugetlbfs or shmem, default is shmem
+    pub mem_type: String,
+    /// Memory file path
+    pub mem_file_path: String,
+    /// The memory size in MiB.
+    pub mem_size_mib: usize,
+    /// reserve memory bytes
+    pub reserve_memory_bytes: u64,
+
+    /// sock path
+    pub serial_path: Option<String>,
+}
+
+impl Default for VmConfigInfo {
+    fn default() -> Self {
+        VmConfigInfo {
+            vcpu_count: 1,
+            max_vcpu_count: 1,
+            ht_enabled: false,
+            cpu_pm: String::from("on"),
+            cpu_topology: CpuTopology {
+                threads_per_core: 1,
+                cores_per_die: 1,
+                dies_per_socket: 1,
+                sockets: 1,
+            },
+            vpmu_feature: 0,
+            mem_type: String::from("shmem"),
+            mem_file_path: String::from(""),
+            mem_size_mib: 128,
+            reserve_memory_bytes: 0,
+            serial_path: None,
+        }
+    }
+}