Merge pull request #13093 from RainaYL/rainax/tdx_boot_pr

dragonball: Add steps to boot TDX VM
This commit is contained in:
Alex Lyn
2026-06-09 10:13:57 +08:00
committed by GitHub
13 changed files with 179 additions and 276 deletions

32
Cargo.lock generated
View File

@@ -1631,16 +1631,6 @@ dependencies = [
"vm-memory",
]
[[package]]
name = "dbs-tdx"
version = "0.1.0"
dependencies = [
"kvm-bindings",
"serde_json",
"thiserror 1.0.69",
"vmm-sys-util 0.15.0",
]
[[package]]
name = "dbs-upcall"
version = "0.3.0"
@@ -1890,6 +1880,7 @@ dependencies = [
"slog-async",
"slog-scope",
"slog-term",
"tdx",
"test-utils",
"thiserror 1.0.69",
"tracing",
@@ -3206,6 +3197,12 @@ dependencies = [
"libc",
]
[[package]]
name = "iocuddle"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8972d5be69940353d5347a1344cb375d9b457d6809b428b05bb1ca2fb9ce007"
[[package]]
name = "iovec"
version = "0.1.4"
@@ -7692,6 +7689,21 @@ dependencies = [
"xattr 1.6.1",
]
[[package]]
name = "tdx"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83943e37cf46979f711ad11489c641fa058fd0fae92c122d1fc26a664e82acab"
dependencies = [
"bitflags 2.11.1",
"iocuddle",
"kvm-bindings",
"kvm-ioctls",
"libc",
"uuid 1.23.1",
"vmm-sys-util 0.15.0",
]
[[package]]
name = "tempfile"
version = "3.27.0"

View File

@@ -35,7 +35,6 @@ members = [
"src/dragonball/dbs_interrupt",
"src/dragonball/dbs_legacy_devices",
"src/dragonball/dbs_pci",
"src/dragonball/dbs_tdx",
"src/dragonball/dbs_upcall",
"src/dragonball/dbs_utils",
"src/dragonball/dbs_virtio_devices",
@@ -100,7 +99,6 @@ dbs-device = { path = "src/dragonball/dbs_device" }
dbs-interrupt = { path = "src/dragonball/dbs_interrupt" }
dbs-legacy-devices = { path = "src/dragonball/dbs_legacy_devices" }
dbs-pci = { path = "src/dragonball/dbs_pci" }
dbs-tdx = { path = "src/dragonball/dbs_tdx" }
dbs-upcall = { path = "src/dragonball/dbs_upcall" }
dbs-utils = { path = "src/dragonball/dbs_utils" }
dbs-virtio-devices = { path = "src/dragonball/dbs_virtio_devices" }
@@ -186,6 +184,7 @@ slog-stdlog = "4.0.0"
slog-term = "2.9.0"
strum = { version = "0.24.0", features = ["derive"] }
strum_macros = "0.26.2"
tdx = "0.1.1"
tempfile = "3.19.1"
thiserror = "1.0.26"
tokio = "1.46.1"

View File

@@ -53,6 +53,9 @@ vfio-bindings = { workspace = true, optional = true }
vfio-ioctls = { workspace = true, optional = true }
kata-sys-util = { path = "../libs/kata-sys-util" }
[target.'cfg(target_arch = "x86_64")'.dependencies]
tdx = { workspace = true }
[dev-dependencies]
slog-async = "2.7.0"
slog-term = "2.9.0"

View File

@@ -28,7 +28,6 @@ and configuration process.
- `dbs_device`: [`dbs_device` Document](dbs_device/README.md)
- `dbs_interrupt`: [`dbs_interrput` Document](dbs_interrupt/README.md)
- `dbs_legacy_devices`: [`dbs_legacy_devices` Document](dbs_legacy_devices/README.md)
- `dbs_tdx`: [`dbs_tdx` Document](dbs_tdx/README.md)
- `dbs_upcall`: [`dbs_upcall` Document](dbs_upcall/README.md)
- `dbs_utils`: [`dbs_utils` Document](dbs_utils/README.md)
- `dbs_virtio_devices`: [`dbs_virtio_devices` Document](dbs_virtio_devices/README.md)

View File

@@ -1,19 +0,0 @@
[package]
name = "dbs-tdx"
version = "0.1.0"
authors = ["Alibaba Dragonball Team"]
description = "helpers and utilities to create TDX VM"
license = "Apache-2.0 AND BSD-3-Clause"
edition = "2018"
homepage = "https://github.com/openanolis/dragonball-sandbox"
repository = "https://github.com/openanolis/dragonball-sandbox"
keywords = ["dragonball", "secure-sandbox", "TDX", "confidential container"]
readme = "README.md"
[dependencies]
thiserror = "1.0"
kvm-bindings = { workspace = true, features = ["fam-wrappers"] }
vmm-sys-util = {workspace = true}
[dev-dependencies]
serde_json = "1.0.9"

View File

@@ -1,14 +0,0 @@
# dbs-tdx
This crate is a collection of modules that provides helpers and utilities to create a TDX Dragonball VM.
Currently this crate involves:
- `tdx-ioctls`
## Acknowledgement
Part of the code is derived from the [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor) project.
## License
This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).

View File

@@ -1,5 +0,0 @@
// Copyright (C) 2023 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#[cfg(target_arch = "x86_64")]
pub mod tdx_ioctls;

View File

@@ -1,220 +0,0 @@
// Copyright © 2019 Intel Corporation
//
// Copyright (c) 2023 Alibaba Cloud.
//
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
//
use std::os::unix::io::RawFd;
use kvm_bindings::{CpuId, __IncompleteArrayField, KVMIO};
use thiserror::Error;
use vmm_sys_util::fam::{FamStruct, FamStructWrapper};
use vmm_sys_util::ioctl::ioctl_with_val;
use vmm_sys_util::{generate_fam_struct_impl, ioctl_iowr_nr};
/// Tdx capability list.
pub type TdxCaps = FamStructWrapper<TdxCapabilities>;
/// Cpuid configs entry counts.
const TDX1_MAX_NR_CPUID_CONFIGS: usize = 6;
generate_fam_struct_impl!(
TdxCapabilities,
TdxCpuidConfig,
cpuid_configs,
u32,
nr_cpuid_configs,
TDX1_MAX_NR_CPUID_CONFIGS
);
#[repr(C)]
#[derive(Debug, Default, Copy, Clone, PartialEq)]
/// Tdx cpuid config.
pub struct TdxCpuidConfig {
/// cpuid leaf
pub leaf: u32,
/// cpuid sub leaf
pub sub_leaf: u32,
/// eax
pub eax: u32,
/// ebx
pub ebx: u32,
/// ecx
pub ecx: u32,
/// edx
pub edx: u32,
}
#[repr(C)]
#[derive(Default)]
/// Tdx capabilities.
pub struct TdxCapabilities {
/// cpuid bits need to be fixed to 0.
pub attrs_fixed0: u64,
/// cpuid bits need to be fixed to 1.
pub attrs_fixed1: u64,
/// xfam bits need to be fixed to 0.
pub xfam_fixed0: u64,
/// xfam bits need to be fixed to 1.
pub xfam_fixed1: u64,
/// cpuid configs entry number.
pub nr_cpuid_configs: u32,
/// padding.
pub padding: u32,
/// cpuid config list
pub cpuid_configs: __IncompleteArrayField<TdxCpuidConfig>,
}
ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
/// TDX module related errors.
#[derive(Error, Debug)]
pub enum TdxIoctlError {
/// Failed to create TdxCaps
#[error("Failed to create TdxCaps")]
TdxCapabilitiesCreate,
/// Failed to get TDX Capbilities
#[error("Failed to get TDX Capbilities: {0}")]
TdxCapabilities(#[source] std::io::Error),
/// Failed to init TDX.
#[error("Failed to init TDX: {0}")]
TdxInit(#[source] std::io::Error),
/// Failed to finalize TDX.
#[error("Failed to finalize TDX: {0}")]
TdxFinalize(#[source] std::io::Error),
/// Failed to init TDX memory region.
#[error("Failed to init TDX memory region: {0}")]
TdxInitMemRegion(#[source] std::io::Error),
/// Failed to init TDX vcpu.
#[error("Failed to init TDX vcpu: {0}")]
TdxInitVcpu(#[source] std::io::Error),
}
/// TDX related ioctl command
#[repr(u32)]
enum TdxCommand {
/// Get Capability
Capabilities = 0,
/// Init TD
InitVm = 1,
/// Init vcpu for TD
InitVcpu = 2,
/// Init memory region for TD
InitMemRegion = 3,
/// Finalize TD
Finalize = 4,
}
/// TDX related ioctl command
fn tdx_command(
fd: &RawFd,
command: TdxCommand,
metadata: u32,
data: u64,
) -> std::result::Result<(), std::io::Error> {
#[repr(C)]
struct TdxIoctlCmd {
command: TdxCommand,
metadata: u32,
data: u64,
}
let cmd = TdxIoctlCmd {
command,
metadata,
data,
};
let ret = unsafe {
ioctl_with_val(
fd,
KVM_MEMORY_ENCRYPT_OP(),
&cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
)
};
if ret < 0 {
return Err(std::io::Error::last_os_error());
}
Ok(())
}
/// Init TDX
pub fn tdx_init(
vm_fd: &RawFd,
cpu_id: &CpuId,
max_vcpus: u32,
) -> std::result::Result<(), TdxIoctlError> {
#[repr(C)]
struct TdxInitVm {
max_vcpus: u32,
tsc_khz: u32,
attributes: u64,
cpuid: u64,
mrconfigid: [u64; 6],
mrowner: [u64; 6],
mrownerconfig: [u64; 6],
reserved: [u64; 43],
}
let data = TdxInitVm {
max_vcpus,
tsc_khz: 0,
attributes: 0, // TDX1_TD_ATTRIBUTE_DEBUG,
cpuid: cpu_id.as_fam_struct_ptr() as u64,
mrconfigid: [0; 6],
mrowner: [0; 6],
mrownerconfig: [0; 6],
reserved: [0; 43],
};
tdx_command(vm_fd, TdxCommand::InitVm, 0, &data as *const _ as u64)
.map_err(TdxIoctlError::TdxInit)
}
/// Finalize the TDX setup for this VM
pub fn tdx_finalize(vm_fd: &RawFd) -> std::result::Result<(), TdxIoctlError> {
tdx_command(vm_fd, TdxCommand::Finalize, 0, 0).map_err(TdxIoctlError::TdxFinalize)
}
/// Initialize TDX memory Region
pub fn tdx_init_memory_region(
vm_fd: &RawFd,
host_address: u64,
guest_address: u64,
size: u64,
measure: bool,
) -> std::result::Result<(), TdxIoctlError> {
#[repr(C)]
struct TdxInitMemRegion {
host_address: u64,
guest_address: u64,
pages: u64,
}
let data = TdxInitMemRegion {
host_address,
guest_address,
pages: size / 4096,
};
tdx_command(
vm_fd,
TdxCommand::InitMemRegion,
if measure { 1 } else { 0 },
&data as *const _ as u64,
)
.map_err(TdxIoctlError::TdxInitMemRegion)
}
/// Initialize TDX vcpu
pub fn tdx_init_vcpu(vcpu_fd: &RawFd, hob_address: u64) -> std::result::Result<(), TdxIoctlError> {
tdx_command(vcpu_fd, TdxCommand::InitVcpu, 0, hob_address).map_err(TdxIoctlError::TdxInitVcpu)
}
/// Get tdx capabilities.
pub fn tdx_get_caps(kvm_fd: &RawFd) -> std::result::Result<TdxCaps, TdxIoctlError> {
let mut tdx_caps = TdxCaps::new(TDX1_MAX_NR_CPUID_CONFIGS)
.map_err(|_| TdxIoctlError::TdxCapabilitiesCreate)?;
tdx_command(
kvm_fd,
TdxCommand::Capabilities,
0,
tdx_caps.as_mut_fam_struct_ptr() as *const _ as u64,
)
.map_err(TdxIoctlError::TdxCapabilities)?;
Ok(tdx_caps)
}

View File

@@ -16,6 +16,9 @@ use dbs_boot::tdshim::TdvfError;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices::Error as VirtioError;
#[cfg(target_arch = "x86_64")]
use tdx::launch::Error as TdxError;
#[cfg(feature = "host-device")]
use crate::device_manager::vfio_dev_mgr::VfioDeviceError;
use crate::{address_space_manager, device_manager, resource_manager, vcpu, vm};
@@ -83,6 +86,11 @@ pub enum Error {
/// Fail to create device manager system
#[error("failed to create device manager system: {0}")]
DeviceMgrError(#[source] device_manager::DeviceMgrError),
#[cfg(target_arch = "x86_64")]
/// TDX related error
#[error("TDX error: {0}")]
TdxError(TdxError),
}
/// Errors associated with starting the instance.
@@ -252,6 +260,15 @@ pub enum StartMicroVmError {
/// Initrd is not supported
#[error("Initrd is not supported")]
InitrdNotSupported,
#[cfg(target_arch = "x86_64")]
/// TDX related error
#[error("Tdx error: {0}")]
TdxError(TdxError),
/// Guest memory error
#[error("Guest memory error: {0}")]
GuestMemoryError(#[source] vm_memory::guest_memory::Error),
}
/// Errors associated with starting the instance.

View File

@@ -65,11 +65,16 @@ impl KvmContext {
self.max_memslots
}
/// Create a virtual machine object.
/// Create a virtual machine object of default type.
pub fn create_vm(&self) -> Result<VmFd> {
self.kvm.create_vm().map_err(Error::Kvm)
}
/// Create a virtual machine object with VM type specified.
pub fn create_vm_with_type(&self, vm_type: u64) -> Result<VmFd> {
self.kvm.create_vm_with_type(vm_type).map_err(Error::Kvm)
}
/// Get the max vcpu count supported by kvm
pub fn get_max_vcpus(&self) -> usize {
self.kvm.get_max_vcpus()

View File

@@ -30,6 +30,8 @@ use vm_memory::GuestAddress;
use vmm_sys_util::eventfd::EventFd;
use crate::address_space_manager::GuestAddressSpaceImpl;
#[cfg(target_arch = "x86_64")]
use crate::api::v1::ConfidentialVmType;
use crate::api::v1::InstanceInfo;
use crate::kvm_context::KvmContext;
use crate::metric::METRICS;
@@ -259,7 +261,17 @@ impl VcpuManager {
) -> Result<Arc<Mutex<Self>>> {
let support_immediate_exit = kvm_context.kvm().check_extension(Cap::ImmediateExit);
let max_vcpu_count = vm_config_info.max_vcpu_count;
#[cfg(not(target_arch = "x86_64"))]
let kvm_max_vcpu_count = kvm_context.get_max_vcpus();
#[cfg(target_arch = "x86_64")]
let kvm_max_vcpu_count =
if shared_info.read().unwrap().confidential_vm_type == Some(ConfidentialVmType::TDX) {
// For TDX VMs, max vcpu allowed from TDX module might be different from that of
// kvm context
vm_fd.check_extension_int(Cap::MaxVcpus) as usize
} else {
kvm_context.get_max_vcpus()
};
// check the max vcpu count in kvm. max_vcpu_count is u8 and kvm_context.get_max_vcpus()
// returns usize, so convert max_vcpu_count to usize instead of converting kvm max vcpu to

View File

@@ -4,6 +4,8 @@
use std::collections::HashMap;
use std::io::{self, Read, Seek, SeekFrom};
use std::ops::Deref;
#[cfg(target_arch = "x86_64")]
use std::os::unix::io::AsRawFd;
use std::os::unix::io::RawFd;
use std::sync::{Arc, Mutex, RwLock};
@@ -22,6 +24,8 @@ use seccompiler::BpfProgram;
use seccompiler::{apply_filter_all_threads, Error as SecError};
use serde_derive::{Deserialize, Serialize};
use slog::{error, info};
#[cfg(target_arch = "x86_64")]
use tdx::launch::*;
use vm_memory::{Bytes, GuestAddress, GuestAddressSpace};
use vmm_sys_util::eventfd::EventFd;
@@ -214,6 +218,11 @@ pub struct Vm {
upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
firmware_type: Option<FirmwareType>,
#[cfg(target_arch = "x86_64")]
tdx_launcher: Option<Launcher>,
#[cfg(target_arch = "x86_64")]
tdx_capabilities: Option<TdxCapabilities>,
}
impl Vm {
@@ -226,6 +235,16 @@ impl Vm {
let id = api_shared_info.read().unwrap().id.clone();
let logger = slog_scope::logger().new(slog::o!("id" => id));
let kvm = KvmContext::new(kvm_fd)?;
#[cfg(target_arch = "x86_64")]
let tdx_enabled =
api_shared_info.read().unwrap().confidential_vm_type == Some(ConfidentialVmType::TDX);
#[cfg(target_arch = "x86_64")]
let vm_fd = if tdx_enabled {
Arc::new(kvm.create_vm_with_type(KVM_X86_TDX_VM)?)
} else {
Arc::new(kvm.create_vm()?)
};
#[cfg(not(target_arch = "x86_64"))]
let vm_fd = Arc::new(kvm.create_vm()?);
let resource_manager = Arc::new(ResourceManager::new(Some(kvm.max_memslots())));
let device_manager = DeviceManager::new(
@@ -238,9 +257,7 @@ impl Vm {
.map_err(Error::DeviceMgrError)?;
#[cfg(target_arch = "x86_64")]
let firmware_type = if api_shared_info.read().unwrap().confidential_vm_type
== Some(ConfidentialVmType::TDX)
{
let firmware_type = if tdx_enabled {
Some(FirmwareType::Tdshim)
} else {
None
@@ -249,6 +266,15 @@ impl Vm {
#[cfg(not(target_arch = "x86_64"))]
let firmware_type = None;
#[cfg(target_arch = "x86_64")]
let (tdx_launcher, tdx_capabilities) = if tdx_enabled {
let mut launcher = Launcher::new(vm_fd.as_raw_fd());
let capabilities = launcher.get_capabilities().map_err(Error::TdxError)?;
(Some(launcher), Some(capabilities))
} else {
(None, None)
};
Ok(Vm {
epoll_manager,
kvm,
@@ -275,6 +301,11 @@ impl Vm {
upcall_client: None,
firmware_type,
#[cfg(target_arch = "x86_64")]
tdx_launcher,
#[cfg(target_arch = "x86_64")]
tdx_capabilities,
})
}
@@ -789,6 +820,11 @@ impl Vm {
AddressManagerError::GuestMemoryNotInitialized,
))?;
#[cfg(target_arch = "x86_64")]
if self.confidential_vm_type() == Some(ConfidentialVmType::TDX) {
self.tdx_init_vm()?;
}
self.init_vcpu_manager(
vm_as.clone(),
seccomp_filters

View File

@@ -9,6 +9,7 @@
use std::collections::HashMap;
use std::convert::TryInto;
use std::ops::Deref;
use std::os::unix::io::AsRawFd;
use dbs_acpi::*;
use dbs_address_space::{AddressSpace, AddressSpaceRegionType};
@@ -26,13 +27,14 @@ use kvm_bindings::{
use linux_loader::cmdline::Cmdline;
use linux_loader::configurator::{linux::LinuxBootConfigurator, BootConfigurator, BootParams};
use slog::info;
use tdx::launch::MemRegion;
use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory};
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
use crate::api::v1::ConfidentialVmType;
use crate::error::{Error, Result, StartMicroVmError};
use crate::event_manager::EventManager;
use crate::vm::{Vm, VmError};
use crate::vm::{VcpuManagerError, Vm, VmError};
/// Configures the system and should be called once per vm before starting vcpu
/// threads.
@@ -236,6 +238,12 @@ impl Vm {
.create_vcpus(boot_vcpu_count, Some(request_ts), None, self.firmware_type)
.map_err(StartMicroVmError::Vcpu)?;
if self.confidential_vm_type() == Some(ConfidentialVmType::TDX) {
self.tdx_init_vcpus(hob_address)?;
self.tdx_init_mem_region(vm_memory.deref(), &sections)?;
self.tdx_finalize()?;
}
return Ok(());
}
@@ -514,4 +522,74 @@ impl Vm {
Ok(())
}
pub(super) fn tdx_init_vm(&mut self) -> std::result::Result<(), StartMicroVmError> {
let supported_cpuid = self
.kvm
.supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
.map_err(VcpuManagerError::Kvm)
.map_err(StartMicroVmError::Vcpu)?;
self.tdx_launcher
.as_mut()
.unwrap()
.init_vm(self.tdx_capabilities.as_ref().unwrap(), supported_cpuid)
.map_err(StartMicroVmError::TdxError)?;
Ok(())
}
pub(super) fn tdx_init_vcpus(
&mut self,
hob_address: u64,
) -> std::result::Result<(), StartMicroVmError> {
let mut vcpu_fds = Vec::new();
self.vcpu_manager()
.map_err(StartMicroVmError::Vcpu)?
.vcpus()
.iter()
.for_each(|vcpu| {
vcpu_fds.push(vcpu.vcpu_fd().as_raw_fd());
});
let launcher = self.tdx_launcher.as_mut().unwrap();
vcpu_fds.iter().for_each(|fd| launcher.add_vcpu_fd(*fd));
launcher
.init_vcpus(hob_address)
.map_err(StartMicroVmError::TdxError)?;
Ok(())
}
pub(super) fn tdx_init_mem_region(
&mut self,
vm_memory: &GuestMemoryImpl,
sections: &Vec<TdvfSection>,
) -> std::result::Result<(), StartMicroVmError> {
let launcher = self.tdx_launcher.as_mut().unwrap();
for section in sections {
let host_address = vm_memory
.get_host_address(GuestAddress(section.address))
.map_err(StartMicroVmError::GuestMemoryError)?;
let region = MemRegion::new(
section.address,
section.size / dbs_boot::PAGE_SIZE as u64,
section.attributes,
host_address as u64,
);
launcher
.init_mem_region(region)
.map_err(StartMicroVmError::TdxError)?;
}
Ok(())
}
pub(super) fn tdx_finalize(&mut self) -> std::result::Result<(), StartMicroVmError> {
self.tdx_launcher
.as_mut()
.unwrap()
.finalize()
.map_err(StartMicroVmError::TdxError)?;
Ok(())
}
}