Merge pull request #12932 from RainaYL/rainax/tdshim_pr

dragonball: Allow guest VM to load tdshim firmware for booting
This commit is contained in:
Alex Lyn
2026-05-18 10:43:22 +08:00
committed by GitHub
22 changed files with 961 additions and 22 deletions

1
Cargo.lock generated
View File

@@ -1790,6 +1790,7 @@ dependencies = [
"arc-swap",
"bytes 1.11.1",
"crossbeam-channel",
"dbs-acpi",
"dbs-address-space",
"dbs-allocator",
"dbs-arch",

View File

@@ -13,6 +13,7 @@ edition = "2018"
anyhow = "1.0.32"
arc-swap = "1.5.0"
bytes = "1.1.0"
dbs-acpi = { workspace = true }
dbs-address-space = { workspace = true }
dbs-allocator = { workspace = true }
dbs-arch = { workspace = true }

View File

@@ -74,6 +74,7 @@ impl AddressSpaceLayout {
return false;
}
}
AddressSpaceRegionType::FirmwareMemory => {}
}
true

View File

@@ -30,6 +30,8 @@ pub enum AddressSpaceRegionType {
DeviceMemory,
/// DAX address region for virtio-fs/virtio-pmem.
DAXMemory,
/// Address region where virtual firmwares are loaded.
FirmwareMemory,
}
/// Struct to maintain configuration information about a guest address region.
@@ -273,6 +275,31 @@ impl AddressSpaceRegion {
))
}
/// Create an address space region for virtual firmware.
///
/// # Arguments
/// * `base` - Base address in VM to map content
/// * `size` - Length of content to map
/// * `prot_flags` - mmap protection flags
pub fn create_firmware_region(
base: GuestAddress,
size: GuestUsize,
prot_flags: i32,
) -> Result<AddressSpaceRegion, AddressSpaceError> {
// Firmware region currently only supports anonymous mmap
let perm_flags = libc::MAP_PRIVATE | libc::MAP_ANONYMOUS;
Ok(Self::build(
AddressSpaceRegionType::FirmwareMemory,
base,
size,
None,
None,
perm_flags,
prot_flags,
false,
))
}
/// Get type of the address space region.
pub fn region_type(&self) -> AddressSpaceRegionType {
self.ty

View File

@@ -0,0 +1,27 @@
// Copyright (c) 2026 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
#![allow(missing_docs)]
#[cfg(target_arch = "x86_64")]
/// Structs and utilities for tdshim
pub mod tdshim;
pub const EFI_RESOURCE_SYSTEM_MEMORY: u32 = 0x00;
pub const EFI_RESOURCE_MEMORY_MAPPED_IO: u32 = 0x01;
pub const EFI_RESOURCE_MEMORY_UNACCEPTED: u32 = 0x07;
pub const EFI_RESOURCE_ATTRIBUTE_PRESENT: u32 = 0x0000_0001;
pub const EFI_RESOURCE_ATTRIBUTE_INITIALIZED: u32 = 0x0000_0002;
pub const EFI_RESOURCE_ATTRIBUTE_TESTED: u32 = 0x0000_0004;
pub const EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE: u32 = 0x0000_0400;
/// Firmware types
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum FirmwareType {
/// Tdshim
#[cfg(target_arch = "x86_64")]
Tdshim,
}

View File

@@ -0,0 +1,403 @@
// Copyright (c) 2026 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use super::TdvfError;
use crate::firmware::*;
use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemoryMmap};
/// HOB Type
#[repr(u16)]
#[derive(Copy, Clone, Debug, Default)]
enum HobType {
/// Hand Off
Handoff = 0x1,
/// Resource Descriptor
ResourceDescriptor = 0x3,
/// Guid Extension
GuidExtension = 0x4,
/// Unused
#[default]
Unused = 0xfffe,
/// End Of HOB List
EndOfHobList = 0xffff,
}
/// HOB header
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
struct HobHeader {
r#type: HobType,
length: u16,
reserved: u32,
}
/// HOB hand off info table
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
struct HobHandoffInfoTable {
header: HobHeader,
version: u32,
boot_mode: u32,
efi_memory_top: u64,
efi_memory_bottom: u64,
efi_free_memory_top: u64,
efi_free_memory_bottom: u64,
efi_end_of_hob_list: u64,
}
impl HobHandoffInfoTable {
pub fn new(efi_end_of_hob_list: u64) -> Self {
HobHandoffInfoTable {
header: HobHeader {
r#type: HobType::Handoff,
length: std::mem::size_of::<HobHandoffInfoTable>() as u16,
reserved: 0,
},
version: 0x9,
boot_mode: 0,
efi_memory_top: 0,
efi_memory_bottom: 0,
efi_free_memory_top: 0,
efi_free_memory_bottom: 0,
efi_end_of_hob_list,
}
}
}
/// HOB resource descriptor
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
struct HobResourceDescriptor {
header: HobHeader,
efi_guid_type: EfiGuid,
resource_type: u32,
resource_attribute: u32,
physical_start: u64,
resource_length: u64,
}
impl HobResourceDescriptor {
fn new(
resource_type: u32,
resource_attribute: u32,
physical_start: u64,
resource_length: u64,
) -> Self {
HobResourceDescriptor {
header: HobHeader {
r#type: HobType::ResourceDescriptor,
length: std::mem::size_of::<HobResourceDescriptor>() as u16,
reserved: 0,
},
efi_guid_type: EfiGuid::resource(),
resource_type,
resource_attribute,
physical_start,
resource_length,
}
}
}
/// HOB end
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
struct HobEnd {
header: HobHeader,
}
impl HobEnd {
fn new() -> Self {
HobEnd {
header: HobHeader {
r#type: HobType::EndOfHobList,
length: std::mem::size_of::<HobEnd>() as u16,
reserved: 0,
},
}
}
}
/// Efi Guid
#[repr(C)]
#[derive(Copy, Clone, Default, Debug, PartialEq)]
struct EfiGuid {
data1: u32,
data2: u16,
data3: u16,
data4: [u8; 8],
}
impl EfiGuid {
/// RESOURCE_HOB_GUID
fn resource() -> Self {
EfiGuid::default()
}
/// HOB_PAYLOAD_INFO_GUID
/// 0xb96fa412, 0x461f, 0x4be3, {0x8c, 0xd, 0xad, 0x80, 0x5a, 0x49, 0x7a, 0xc0
fn payload() -> Self {
EfiGuid {
data1: 0xb96f_a412,
data2: 0x461f,
data3: 0x4be3,
data4: [0x8c, 0xd, 0xad, 0x80, 0x5a, 0x49, 0x7a, 0xc0],
}
}
/// ACPI_TABLE_HOB_GUID
/// 0x6a0c5870, 0xd4ed, 0x44f4, {0xa1, 0x35, 0xdd, 0x23, 0x8b, 0x6f, 0xc, 0x8d }
fn acpi() -> Self {
EfiGuid {
data1: 0x6a0c_5870,
data2: 0xd4ed,
data3: 0x44f4,
data4: [0xa1, 0x35, 0xdd, 0x23, 0x8b, 0x6f, 0xc, 0x8d],
}
}
}
/// Payload image type
#[repr(u32)]
#[derive(Clone, Copy, Default, Debug)]
pub enum PayloadImageType {
/// Raw executable binary
#[default]
ExecutablePayload,
/// BzImage
BzImage,
/// Raw vmlinux kernel in ELF
RawVmLinux,
}
/// Payload Info
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
pub struct PayloadInfo {
/// Payload image type
pub image_type: PayloadImageType,
/// Reserved
pub reserved: u32,
/// Entry point for the payload
pub entry_point: u64,
}
impl PayloadInfo {
/// Create a new payload info struct
pub fn new(image_type: PayloadImageType, entry_point: u64) -> Self {
Self {
image_type,
reserved: 0,
entry_point,
}
}
}
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
struct TdPayloadDescription {
header: HobHeader,
efi_guid_type: EfiGuid,
payload_info: PayloadInfo,
}
impl TdPayloadDescription {
fn new(payload: PayloadInfo) -> Self {
TdPayloadDescription {
header: HobHeader {
r#type: HobType::GuidExtension,
length: std::mem::size_of::<TdPayloadDescription>() as u16,
reserved: 0,
},
efi_guid_type: EfiGuid::payload(),
payload_info: payload,
}
}
}
#[repr(C)]
#[derive(Copy, Clone, Default, Debug)]
struct AcpiDescription {
header: HobHeader,
efi_guid_type: EfiGuid,
}
impl AcpiDescription {
fn new(length: u16) -> Self {
AcpiDescription {
header: HobHeader {
r#type: HobType::GuidExtension,
length,
reserved: 0,
},
// ACPI_TABLE_HOB_GUID
efi_guid_type: EfiGuid::acpi(),
}
}
}
unsafe impl ByteValued for HobHeader {}
unsafe impl ByteValued for HobHandoffInfoTable {}
unsafe impl ByteValued for HobResourceDescriptor {}
unsafe impl ByteValued for TdPayloadDescription {}
unsafe impl ByteValued for AcpiDescription {}
unsafe impl ByteValued for HobEnd {}
/// TD HOB
pub struct TdHob {
start_offset: u64,
current_offset: u64,
}
fn align_hob(v: u64) -> u64 {
v.div_ceil(8) * 8
}
impl TdHob {
/// Update offset to align with 8 bytes
fn update_offset<T>(&mut self) {
self.current_offset = align_hob(self.current_offset + std::mem::size_of::<T>() as u64)
}
/// Add resource to HOB list
fn add_resource(
&mut self,
mem: &GuestMemoryMmap,
physical_start: u64,
resource_length: u64,
resource_type: u32,
resource_attribute: u32,
) -> Result<(), TdvfError> {
let resource_descriptor = HobResourceDescriptor::new(
resource_type,
resource_attribute,
physical_start,
resource_length,
);
mem.write_obj(resource_descriptor, GuestAddress(self.current_offset))
.map_err(TdvfError::WriteHobError)?;
self.update_offset::<HobResourceDescriptor>();
Ok(())
}
/// Start writing HOB list
pub fn start(offset: u64) -> TdHob {
// Leave a gap to place the HandoffTable at the start as it can only be filled in later
let mut hob = TdHob {
start_offset: offset,
current_offset: offset,
};
hob.update_offset::<HobHandoffInfoTable>();
hob
}
/// Finish writing HOB list
pub fn finish(&mut self, mem: &GuestMemoryMmap) -> Result<(), TdvfError> {
// Write end
let end = HobEnd::new();
mem.write_obj(end, GuestAddress(self.current_offset))
.map_err(TdvfError::WriteHobError)?;
self.update_offset::<HobEnd>();
// Write handoff, delayed as it needs end of HOB list
let efi_end_of_hob_list = self.current_offset;
let handoff = HobHandoffInfoTable::new(efi_end_of_hob_list);
mem.write_obj(handoff, GuestAddress(self.start_offset))
.map_err(TdvfError::WriteHobError)
}
/// Add memory resource
pub fn add_memory_resource(
&mut self,
mem: &GuestMemoryMmap,
physical_start: u64,
resource_length: u64,
ram: bool,
) -> Result<(), TdvfError> {
self.add_resource(
mem,
physical_start,
resource_length,
if ram {
EFI_RESOURCE_MEMORY_UNACCEPTED
} else {
EFI_RESOURCE_SYSTEM_MEMORY
},
EFI_RESOURCE_ATTRIBUTE_PRESENT
| EFI_RESOURCE_ATTRIBUTE_INITIALIZED
| EFI_RESOURCE_ATTRIBUTE_TESTED,
)
}
/// Add mmio resource
pub fn add_mmio_resource(
&mut self,
mem: &GuestMemoryMmap,
physical_start: u64,
resource_length: u64,
) -> Result<(), TdvfError> {
self.add_resource(
mem,
physical_start,
resource_length,
EFI_RESOURCE_MEMORY_MAPPED_IO,
EFI_RESOURCE_ATTRIBUTE_PRESENT
| EFI_RESOURCE_ATTRIBUTE_INITIALIZED
| EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE,
)
}
/// Add payload
pub fn add_payload(
&mut self,
mem: &GuestMemoryMmap,
payload_info: PayloadInfo,
) -> Result<(), TdvfError> {
let payload = TdPayloadDescription::new(payload_info);
mem.write_obj(payload, GuestAddress(self.current_offset))
.map_err(TdvfError::WriteHobError)?;
self.update_offset::<TdPayloadDescription>();
Ok(())
}
/// Add ACPI table
pub fn add_acpi_table(
&mut self,
mem: &GuestMemoryMmap,
table_content: &[u8],
) -> Result<(), TdvfError> {
// We already know the HobGuidType size is 8 bytes multiple, but we
// need the total size to be 8 bytes multiple. That is why the ACPI
// table size must be 8 bytes multiple as well.
let length = std::mem::size_of::<AcpiDescription>() as u16
+ align_hob(table_content.len() as u64) as u16;
let hob_guid_type = AcpiDescription::new(length);
mem.write_obj(hob_guid_type, GuestAddress(self.current_offset))
.map_err(TdvfError::WriteHobError)?;
let current_offset = self.current_offset + std::mem::size_of::<AcpiDescription>() as u64;
// In case the table is quite large, let's make sure we can handle
// retrying until everything has been correctly copied.
let mut offset: usize = 0;
loop {
let bytes_written = mem
.write(
&table_content[offset..],
GuestAddress(current_offset + offset as u64),
)
.map_err(TdvfError::WriteHobError)?;
offset += bytes_written;
if offset >= table_content.len() {
break;
}
}
self.current_offset += length as u64;
Ok(())
}
}

View File

@@ -0,0 +1,35 @@
// Copyright (c) 2026 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
#![deny(missing_docs)]
use thiserror::Error;
use vm_memory::GuestMemoryError;
mod section;
pub use section::*;
mod hob;
pub use hob::*;
/// TDVF related errors
#[derive(Error, Debug)]
pub enum TdvfError {
/// Error reading td_shim binary
#[error("Failed to read td_shim file: {0}")]
TdshimFileError(#[source] std::io::Error),
/// Error parsing TDVF descriptor
#[error("Failed to parse TDVF descriptor: {0}")]
TdvfDescriptorError(&'static str),
/// Error writing HOB list
#[error("Failed to write HOB list: {0}")]
WriteHobError(#[source] GuestMemoryError),
/// Error loading section to guest memory
#[error("Failed to load TDVF section to guest memory: {0}")]
LoadTdvfSectionError(#[source] GuestMemoryError),
}

View File

@@ -0,0 +1,146 @@
// Copyright (c) 2026 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use super::TdvfError;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap};
/// TDVF descriptor
#[repr(C, packed)]
pub struct TdvfDescriptor {
signature: [u8; 4],
length: u32,
version: u32,
num_sections: u32,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, Debug)]
/// TDVF section
pub struct TdvfSection {
/// Data offset
pub data_offset: u32,
/// Raw data size
pub data_size: u32,
/// Guest memory address
pub address: u64,
/// Memory data size
pub size: u64,
/// TDVF section type
pub r#type: TdvfSectionType,
/// TDVF attributes
pub attributes: u32,
}
#[repr(u32)]
#[derive(Clone, Copy, Default, Debug, PartialEq)]
/// TDVF section type
pub enum TdvfSectionType {
/// BFV section type
Bfv,
/// CFV section type
Cfv,
/// TD HOB
TdHob,
/// Temp memory
TempMem,
/// Permanent memory
PermMem,
/// Payload
Payload,
/// Payload Parameters
PayloadParam,
/// Reserved
#[default]
Reserved = 0xffffffff,
}
/// Parse TDVF sections and return a list of categorized sections
///
/// #Arguments
/// * `file` - The tdshim image file.
pub fn parse_tdvf_sections(file: &mut File) -> Result<Vec<TdvfSection>, TdvfError> {
// The 32-bit offset to the TDVF metadata is located 32 bytes from
// the end of the file.
// See "TDVF Metadata Pointer" in "TDX Virtual Firmware Design Guide
file.seek(SeekFrom::End(-0x20))
.map_err(TdvfError::TdshimFileError)?;
let mut descriptor_offset: [u8; 4] = [0; 4];
file.read_exact(&mut descriptor_offset)
.map_err(TdvfError::TdshimFileError)?;
let descriptor_offset = u32::from_le_bytes(descriptor_offset) as u64;
file.seek(SeekFrom::Start(descriptor_offset))
.map_err(TdvfError::TdshimFileError)?;
let mut descriptor: TdvfDescriptor = unsafe { std::mem::zeroed() };
// Safe as we read exactly the size of the descriptor header
file.read_exact(unsafe {
std::slice::from_raw_parts_mut(
&mut descriptor as *mut _ as *mut u8,
std::mem::size_of::<TdvfDescriptor>(),
)
})
.map_err(TdvfError::TdshimFileError)?;
if &descriptor.signature != b"TDVF" {
return Err(TdvfError::TdvfDescriptorError(
"Invalid descriptor signature",
));
}
if descriptor.length as usize
!= std::mem::size_of::<TdvfDescriptor>()
+ std::mem::size_of::<TdvfSection>() * descriptor.num_sections as usize
{
return Err(TdvfError::TdvfDescriptorError("Invalid descriptor length"));
}
if descriptor.version != 1 {
return Err(TdvfError::TdvfDescriptorError("Invalid descriptor version"));
}
let mut sections = Vec::new();
sections.resize_with(descriptor.num_sections as usize, TdvfSection::default);
// Safe as we read exactly the advertised sections
file.read_exact(unsafe {
std::slice::from_raw_parts_mut(
sections.as_mut_ptr() as *mut u8,
descriptor.num_sections as usize * std::mem::size_of::<TdvfSection>(),
)
})
.map_err(TdvfError::TdshimFileError)?;
Ok(sections)
}
/// Load a TDVF section to guest memory
///
/// #Arguments
/// * `file` - The tdshim image file.
/// * `section` - The metadata of target section.
/// * `mem` - Guest memory to load TDVF section to.
pub fn load_tdvf_section(
file: &mut File,
section: &TdvfSection,
mem: &GuestMemoryMmap,
) -> Result<(), TdvfError> {
file.seek(SeekFrom::Start(section.data_offset as u64))
.map_err(TdvfError::TdshimFileError)?;
mem.read_volatile_from(
GuestAddress(section.address),
file,
section.data_size as usize,
)
.map_err(TdvfError::LoadTdvfSectionError)?;
Ok(())
}

View File

@@ -15,6 +15,9 @@ mod aarch64;
#[cfg(target_arch = "aarch64")]
pub use aarch64::*;
mod firmware;
pub use firmware::*;
/// Specialized [std::result::Result] for boot related operations.
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -56,7 +56,7 @@ pub const IRQ_BASE: u32 = 5;
pub const IRQ_MAX: u32 = 15;
/// Address for the TSS setup.
pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000;
pub const KVM_TSS_ADDRESS: u64 = 0xfefb_d000;
/// Where BIOS/VGA magic would live on a real PC.
pub const EBDA_START: u64 = 0x9fc00;
@@ -70,6 +70,11 @@ pub const GUEST_MEM_START: u64 = 0u64;
/// Size of memory below MMIO hole.
pub const GUEST_MEM_LOW_SIZE: u64 = MMIO_LOW_START - GUEST_MEM_START;
/// Lower bound of BIOS memory.
pub const BIOS_MEM_START: u64 = MMIO_LOW_END - BIOS_MEM_SIZE + 1;
/// Size of BIOS memory.
pub const BIOS_MEM_SIZE: u64 = 16u64 << 20;
/// Max retry times for reading /proc/cpuinfo
const CPUINFO_READ_RETRY: u64 = 5;

View File

@@ -27,6 +27,8 @@ use dbs_address_space::{
AddressSpaceRegionType, NumaNode, NumaNodeInfo, MPOL_MF_MOVE, MPOL_PREFERRED,
};
use dbs_allocator::Constraint;
#[cfg(target_arch = "x86_64")]
use dbs_boot::layout::{BIOS_MEM_SIZE, BIOS_MEM_START};
use kvm_bindings::kvm_userspace_memory_region;
use kvm_ioctls::VmFd;
use log::{debug, error, info, warn};
@@ -164,6 +166,7 @@ pub struct AddressSpaceMgrBuilder<'a> {
mem_prealloc: bool,
dirty_page_logging: bool,
vmfd: Option<Arc<VmFd>>,
use_firmware: bool,
}
impl<'a> AddressSpaceMgrBuilder<'a> {
@@ -180,6 +183,7 @@ impl<'a> AddressSpaceMgrBuilder<'a> {
mem_prealloc: false,
dirty_page_logging: false,
vmfd: None,
use_firmware: false,
})
}
@@ -201,6 +205,11 @@ impl<'a> AddressSpaceMgrBuilder<'a> {
self.dirty_page_logging = logging;
}
/// Enable/disable firmware memory region.
pub fn toggle_use_firmware(&mut self, firmware: bool) {
self.use_firmware = firmware;
}
/// Set KVM [`VmFd`] handle to configure memory slots.
pub fn set_kvm_vm_fd(&mut self, vmfd: Arc<VmFd>) -> Option<Arc<VmFd>> {
let mut existing_vmfd = None;
@@ -317,17 +326,32 @@ impl AddressSpaceMgr {
}
}
#[cfg(target_arch = "x86_64")]
if param.use_firmware {
let region = Arc::new(
AddressSpaceRegion::create_firmware_region(
GuestAddress(BIOS_MEM_START),
BIOS_MEM_SIZE,
libc::PROT_READ | libc::PROT_WRITE,
)
.map_err(AddressManagerError::CreateAddressSpaceRegion)?,
);
regions.push(region);
}
// Create GuestMemory object
let mut vm_memory = GuestMemoryMmap::new();
for reg in regions.iter() {
// Allocate used guest memory addresses.
// These addresses are statically allocated, resource allocation/update should not fail.
let constraint = Constraint::new(reg.len())
.min(reg.start_addr().raw_value())
.max(reg.last_addr().raw_value());
let _key = res_mgr
.allocate_mem_address(&constraint)
.ok_or(AddressManagerError::NoAvailableMemAddress)?;
if reg.region_type() != AddressSpaceRegionType::FirmwareMemory {
// Allocate used guest memory addresses.
// These addresses are statically allocated, resource allocation/update should not fail.
let constraint = Constraint::new(reg.len())
.min(reg.start_addr().raw_value())
.max(reg.last_addr().raw_value());
let _key = res_mgr
.allocate_mem_address(&constraint)
.ok_or(AddressManagerError::NoAvailableMemAddress)?;
}
let mmap_reg = self.create_mmap_region(reg.clone())?;
vm_memory = vm_memory

View File

@@ -30,6 +30,9 @@ pub struct BootSourceConfig {
/// The boot arguments to pass to the kernel.
#[serde(skip_serializing_if = "Option::is_none")]
pub boot_args: Option<String>,
/// The path to firmware file.
#[serde(skip_serializing_if = "Option::is_none")]
pub firmware_path: Option<String>,
}
/// Errors associated with actions on `BootSourceConfig`.
@@ -49,6 +52,10 @@ pub enum BootSourceConfigError {
#[error("the kernel command line is invalid: {0}")]
InvalidKernelCommandLine(#[source] linux_loader::cmdline::Error),
/// The firmware file cannot be opened.
#[error("the firmware file cannot be opened due to invalid path or invalid permissions: {0}")]
InvalidFirmwarePath(#[source] std::io::Error),
/// The boot source cannot be update post boot.
#[error("the update operation is not allowed after boot")]
UpdateNotAllowedPostBoot,

View File

@@ -441,7 +441,7 @@ impl VmmService {
boot_source_config: BootSourceConfig,
) -> VmmRequestResult {
use super::BootSourceConfigError::{
InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath,
InvalidFirmwarePath, InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath,
UpdateNotAllowedPostBoot,
};
use super::VmmActionError::BootSource;
@@ -468,7 +468,14 @@ impl VmmService {
.insert_str(boot_args)
.map_err(|e| BootSource(InvalidKernelCommandLine(e)))?;
let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline);
let firmware_file = match boot_source_config.firmware_path {
None => None,
Some(ref path) => {
Some(File::open(path).map_err(|e| BootSource(InvalidFirmwarePath(e)))?)
}
};
let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline, firmware_file);
vm.set_kernel_config(kernel_config);
Ok(VmmData::Empty)

View File

@@ -1737,6 +1737,7 @@ mod tests {
kernel_file,
None,
linux_loader::cmdline::Cmdline::new(0x1000).unwrap(),
None,
);
let address_space = vm.vm_address_space().cloned();

View File

@@ -11,6 +11,8 @@
#[cfg(target_arch = "aarch64")]
use dbs_arch::pmu::PmuError;
#[cfg(target_arch = "x86_64")]
use dbs_boot::tdshim::TdvfError;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices::Error as VirtioError;
@@ -228,6 +230,28 @@ pub enum StartMicroVmError {
/// Cannot enable split irqchip
#[error("Failed to enable split irqchip: {0}")]
EnableSplitIrqchip(#[source] vmm_sys_util::errno::Error),
/// Missing firmware file
#[error("Cannot start microvm due to missing firmware file")]
MissingFirmwareFile,
#[cfg(target_arch = "x86_64")]
/// TDVF errors
#[error("TDVF error: {0}")]
TdvfError(#[source] TdvfError),
#[cfg(target_arch = "x86_64")]
/// Missing tdshim section
#[error("Missing tdshim section: {0}")]
MissingTdshimSection(&'static str),
/// Guest address space not initialized
#[error("Guest address space not initialized")]
GuestMemoryNotInitialized,
/// Initrd is not supported
#[error("Initrd is not supported")]
InitrdNotSupported,
}
/// Errors associated with starting the instance.

View File

@@ -22,6 +22,7 @@ pub mod tests {
kernel_file.into_file(),
None,
cmd_line,
None,
));
let vm_config = VmConfigInfo {

View File

@@ -11,7 +11,7 @@ use std::sync::mpsc::{channel, Sender};
use std::sync::Arc;
use dbs_arch::{regs, VpmuFeatureLevel};
use dbs_boot::get_fdt_addr;
use dbs_boot::{get_fdt_addr, FirmwareType};
use dbs_utils::time::TimestampUs;
use kvm_ioctls::{VcpuFd, VmFd};
use vm_memory::{Address, GuestAddress, GuestAddressSpace};
@@ -88,6 +88,7 @@ impl Vcpu {
vm_as: &GuestAddressSpaceImpl,
kernel_load_addr: Option<GuestAddress>,
_pgtable_addr: Option<GuestAddress>,
_firmware_type: Option<FirmwareType>,
) -> Result<()> {
let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default();

View File

@@ -16,6 +16,7 @@ use std::sync::{Arc, Barrier, Mutex, RwLock};
use std::time::Duration;
use dbs_arch::VpmuFeatureLevel;
use dbs_boot::FirmwareType;
#[cfg(target_arch = "x86_64")]
use dbs_interrupt::InterruptManager;
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
@@ -392,7 +393,7 @@ impl VcpuManager {
} else {
self.vcpu_config.boot_vcpu_count
};
self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr))?;
self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr), None)?;
Ok(())
}
@@ -413,6 +414,7 @@ impl VcpuManager {
vcpu_count: u8,
request_ts: Option<TimestampUs>,
entry_addr: Option<GuestAddress>,
firmware_type: Option<FirmwareType>,
) -> Result<Vec<u8>> {
info!("create vcpus");
if vcpu_count > self.vcpu_config.max_vcpu_count {
@@ -422,7 +424,7 @@ impl VcpuManager {
let request_ts = request_ts.unwrap_or_default();
let mut created_cpus = Vec::new();
for cpu_id in self.calculate_available_vcpus(vcpu_count) {
self.create_vcpu(cpu_id, request_ts.clone(), entry_addr)?;
self.create_vcpu(cpu_id, request_ts.clone(), entry_addr, firmware_type)?;
created_cpus.push(cpu_id);
}
@@ -527,6 +529,7 @@ impl VcpuManager {
&mut self,
entry_addr: Option<GuestAddress>,
vcpu: &mut Vcpu,
firmware_type: Option<FirmwareType>,
) -> std::result::Result<(), VcpuError> {
vcpu.configure(
&self.vcpu_config,
@@ -534,6 +537,7 @@ impl VcpuManager {
&self.vm_as,
entry_addr,
None,
firmware_type,
)
}
@@ -542,6 +546,7 @@ impl VcpuManager {
cpu_index: u8,
request_ts: TimestampUs,
entry_addr: Option<GuestAddress>,
firmware_type: Option<FirmwareType>,
) -> Result<()> {
info!("creating vcpu {cpu_index}");
if self.vcpu_infos.get(cpu_index as usize).is_none() {
@@ -564,7 +569,7 @@ impl VcpuManager {
.unwrap()
.vcpu
.insert(cpu_index as u32, vcpu.metrics());
self.configure_single_vcpu(entry_addr, &mut vcpu)
self.configure_single_vcpu(entry_addr, &mut vcpu, firmware_type)
.map_err(VcpuManagerError::Vcpu)?;
self.vcpu_infos[cpu_index as usize].vcpu = Some(vcpu);
@@ -896,7 +901,7 @@ mod hotplug {
}
let created_vcpus = self
.create_vcpus(vcpu_count, None, None)
.create_vcpus(vcpu_count, None, None, None)
.map_err(VcpuResizeError::Vcpu)?;
let cpu_ids = self
.activate_vcpus(vcpu_count, true)
@@ -1246,11 +1251,11 @@ mod tests {
let mut vcpu_manager = vm.vcpu_manager().unwrap();
// test create vcpu more than max
let res = vcpu_manager.create_vcpus(20, None, None);
let res = vcpu_manager.create_vcpus(20, None, None, None);
assert!(matches!(res, Err(VcpuManagerError::ExpectedVcpuExceedMax)));
// test create vcpus
assert!(vcpu_manager.create_vcpus(2, None, None).is_ok());
assert!(vcpu_manager.create_vcpus(2, None, None, None).is_ok());
assert_eq!(vcpu_manager.present_vcpus_count(), 0);
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
assert_eq!(vcpu_manager.vcpus().len(), 2);

View File

@@ -11,6 +11,7 @@ use std::sync::Arc;
use dbs_arch::cpuid::{process_cpuid, VmSpec};
use dbs_arch::gdt::gdt_entry;
use dbs_boot::FirmwareType;
use dbs_interrupt::InterruptManager;
use dbs_utils::metric::IncMetric;
use dbs_utils::time::TimestampUs;
@@ -94,9 +95,15 @@ impl Vcpu {
vm_as: &GuestAddressSpaceImpl,
kernel_start_addr: Option<GuestAddress>,
_pgtable_addr: Option<GuestAddress>,
firmware_type: Option<FirmwareType>,
) -> Result<()> {
self.set_cpuid(vcpu_config)?;
// tdshim will handle the initialization of MSR, regs and sregs
if firmware_type == Some(FirmwareType::Tdshim) {
return Ok(());
}
dbs_arch::regs::setup_msrs(&self.fd).map_err(VcpuError::MSRSConfiguration)?;
if let Some(start_addr) = kernel_start_addr {
dbs_arch::regs::setup_regs(

View File

@@ -11,6 +11,8 @@ pub struct KernelConfigInfo {
initrd_file: Option<File>,
/// The commandline for guest kernel.
cmdline: linux_loader::cmdline::Cmdline,
/// The descriptor to the firmware file.
firmware_file: Option<File>,
}
impl KernelConfigInfo {
@@ -19,11 +21,13 @@ impl KernelConfigInfo {
kernel_file: File,
initrd_file: Option<File>,
cmdline: linux_loader::cmdline::Cmdline,
firmware_file: Option<File>,
) -> Self {
KernelConfigInfo {
kernel_file,
initrd_file,
cmdline,
firmware_file,
}
}
@@ -51,6 +55,16 @@ impl KernelConfigInfo {
pub fn kernel_cmdline_mut(&mut self) -> &mut linux_loader::cmdline::Cmdline {
&mut self.cmdline
}
/// Get a shared reference to the firmware file.
pub fn firmware_file(&self) -> Option<&File> {
self.firmware_file.as_ref()
}
/// Get a mutable reference to the firmware file.
pub fn firmware_file_mut(&mut self) -> Option<&mut File> {
self.firmware_file.as_mut()
}
}
#[cfg(test)]
@@ -64,7 +78,8 @@ mod tests {
let initrd = TempFile::new().unwrap();
let mut cmdline = linux_loader::cmdline::Cmdline::new(1024).unwrap();
cmdline.insert_str("ro").unwrap();
let mut info = KernelConfigInfo::new(kernel.into_file(), Some(initrd.into_file()), cmdline);
let mut info =
KernelConfigInfo::new(kernel.into_file(), Some(initrd.into_file()), cmdline, None);
assert_eq!(info.cmdline.as_cstring().unwrap().as_bytes(), b"ro");
assert!(info.initrd_file_mut().is_some());

View File

@@ -13,7 +13,7 @@ use dbs_address_space::AddressSpace;
use dbs_arch::gic::GICDevice;
#[cfg(target_arch = "aarch64")]
use dbs_arch::pmu::PmuError;
use dbs_boot::InitrdConfig;
use dbs_boot::{FirmwareType, InitrdConfig};
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
use kvm_ioctls::VmFd;
@@ -34,6 +34,8 @@ use crate::address_space_manager::{
AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl,
GuestMemoryImpl,
};
#[cfg(target_arch = "x86_64")]
use crate::api::v1::ConfidentialVmType;
use crate::api::v1::{InstanceInfo, InstanceState};
use crate::device_manager::console_manager::DmesgWriter;
use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext};
@@ -211,6 +213,8 @@ pub struct Vm {
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
firmware_type: Option<FirmwareType>,
}
impl Vm {
@@ -234,6 +238,18 @@ impl Vm {
)
.map_err(Error::DeviceMgrError)?;
#[cfg(target_arch = "x86_64")]
let firmware_type = if api_shared_info.read().unwrap().confidential_vm_type
== Some(ConfidentialVmType::TDX)
{
Some(FirmwareType::Tdshim)
} else {
None
};
#[cfg(not(target_arch = "x86_64"))]
let firmware_type = None;
Ok(Vm {
epoll_manager,
kvm,
@@ -258,6 +274,8 @@ impl Vm {
irqchip_handle: None,
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
upcall_client: None,
firmware_type,
})
}
@@ -597,6 +615,7 @@ impl Vm {
let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path)
.map_err(StartMicroVmError::AddressManagerError)?;
address_space_param.set_kvm_vm_fd(self.vm_fd.clone());
address_space_param.toggle_use_firmware(self.firmware_type.is_some());
self.address_space
.create_address_space(&self.resource_manager, &numa_regions, address_space_param)
.map_err(StartMicroVmError::AddressManagerError)?;
@@ -1072,6 +1091,7 @@ pub mod tests {
kernel_file.into_file(),
None,
cmd_line,
None,
));
vm.init_devices(epoll_mgr).unwrap();

View File

@@ -10,8 +10,12 @@ use std::collections::HashMap;
use std::convert::TryInto;
use std::ops::Deref;
use dbs_address_space::AddressSpace;
use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, InitrdConfig};
use dbs_acpi::sdt::Sdt;
use dbs_address_space::{AddressSpace, AddressSpaceRegionType};
use dbs_boot::{
add_e820_entry, bootparam, layout, mptable, tdshim::*, BootParamsWrapper, FirmwareType,
InitrdConfig,
};
use dbs_interrupt::IOAPIC_MAX_NR_REDIR_ENTRIES;
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
@@ -196,6 +200,41 @@ impl Vm {
}
let vm_memory = vm_as.memory();
if self.firmware_type == Some(FirmwareType::Tdshim) {
let tdshim_file = self
.kernel_config
.as_mut()
.ok_or(StartMicroVmError::MissingKernelConfig)?
.firmware_file_mut()
.ok_or(StartMicroVmError::MissingFirmwareFile)?;
let sections =
parse_tdvf_sections(tdshim_file).map_err(StartMicroVmError::TdvfError)?;
let address_space = self
.vm_address_space()
.cloned()
.ok_or(StartMicroVmError::GuestMemoryNotInitialized)?;
let mut hob_address = 0;
// TODO: Fill the empty list with ACPI table content
let acpi_tables: Vec<Sdt> = Vec::new();
self.load_kernel_with_tdshim(
&sections,
vm_memory.deref(),
address_space,
&mut hob_address,
&acpi_tables,
)?;
let boot_vcpu_count = self.vm_config.vcpu_count;
self.vcpu_manager()
.map_err(StartMicroVmError::Vcpu)?
.create_vcpus(boot_vcpu_count, Some(request_ts), None, self.firmware_type)
.map_err(StartMicroVmError::Vcpu)?;
return Ok(());
}
let kernel_loader_result = self.load_kernel(vm_memory.deref())?;
self.vcpu_manager()
.map_err(StartMicroVmError::Vcpu)?
@@ -216,6 +255,15 @@ impl Vm {
cmdline: &Cmdline,
initrd: Option<InitrdConfig>,
) -> std::result::Result<(), StartMicroVmError> {
// tdshim uses ACPI instead of mptable, and kernel boot parameters
// (including e820) would be prepared by firmware
if self.firmware_type == Some(FirmwareType::Tdshim) {
if initrd.is_some() {
return Err(StartMicroVmError::InitrdNotSupported);
}
return Ok(());
}
let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START);
linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline)
.map_err(StartMicroVmError::LoadCommandline)?;
@@ -328,4 +376,134 @@ impl Vm {
pub(crate) fn split_irqchip(&self) -> bool {
self.shared_info.read().unwrap().split_irqchip()
}
fn load_kernel_with_tdshim(
&mut self,
sections: &Vec<TdvfSection>,
vm_memory: &GuestMemoryImpl,
address_space: AddressSpace,
hob_address: &mut u64,
acpi_tables: &Vec<Sdt>,
) -> std::result::Result<(), StartMicroVmError> {
let mut required_sections = vec!["Bfv", "TdHob", "PayloadParam"];
for section in sections {
match section.r#type {
TdvfSectionType::Bfv => {
let tdshim_file = self
.kernel_config
.as_mut()
.ok_or(StartMicroVmError::MissingKernelConfig)?
.firmware_file_mut()
.ok_or(StartMicroVmError::MissingFirmwareFile)?;
load_tdvf_section(tdshim_file, section, vm_memory)
.map_err(StartMicroVmError::TdvfError)?;
required_sections.retain(|s| *s != "Bfv");
}
TdvfSectionType::Cfv => {
let tdshim_file = self
.kernel_config
.as_mut()
.ok_or(StartMicroVmError::MissingKernelConfig)?
.firmware_file_mut()
.ok_or(StartMicroVmError::MissingFirmwareFile)?;
load_tdvf_section(tdshim_file, section, vm_memory)
.map_err(StartMicroVmError::TdvfError)?;
}
TdvfSectionType::TdHob => {
*hob_address = section.address;
required_sections.retain(|s| *s != "TdHob");
}
TdvfSectionType::PayloadParam => {
let cmdline = self
.kernel_config
.as_mut()
.ok_or(StartMicroVmError::MissingKernelConfig)?
.kernel_cmdline();
linux_loader::loader::load_cmdline(
vm_memory,
GuestAddress(section.address),
cmdline,
)
.map_err(StartMicroVmError::LoadCommandline)?;
required_sections.retain(|s| *s != "PayloadParam");
}
_ => {}
}
}
if !required_sections.is_empty() {
return Err(StartMicroVmError::MissingTdshimSection(
required_sections[0],
));
}
let kernel_loader_result = self.load_kernel(vm_memory)?;
let payload_info = PayloadInfo::new(
PayloadImageType::RawVmLinux,
kernel_loader_result.kernel_load.0,
);
self.write_tdshim_hob_list(
*hob_address,
vm_memory,
address_space,
payload_info,
acpi_tables,
)?;
Ok(())
}
fn write_tdshim_hob_list(
&self,
hob_address: u64,
vm_memory: &GuestMemoryImpl,
address_space: AddressSpace,
payload_info: PayloadInfo,
acpi_tables: &Vec<Sdt>,
) -> std::result::Result<(), StartMicroVmError> {
let mut hob = TdHob::start(hob_address);
let mut regions = Vec::new();
address_space
.walk_regions(|region| {
match region.region_type() {
AddressSpaceRegionType::DefaultMemory => {
regions.push((region.start_addr().0, region.len(), true));
}
AddressSpaceRegionType::FirmwareMemory => {
regions.push((region.start_addr().0, region.len(), false));
}
_ => {}
}
Ok(())
})
.unwrap();
for (start, size, is_ram) in regions {
hob.add_memory_resource(vm_memory, start, size, is_ram)
.map_err(StartMicroVmError::TdvfError)?;
}
hob.add_mmio_resource(
vm_memory,
layout::MMIO_LOW_START,
layout::BIOS_MEM_START - layout::MMIO_LOW_START,
)
.map_err(StartMicroVmError::TdvfError)?;
hob.add_payload(vm_memory, payload_info)
.map_err(StartMicroVmError::TdvfError)?;
for sdt in acpi_tables {
hob.add_acpi_table(vm_memory, sdt.as_slice())
.map_err(StartMicroVmError::TdvfError)?;
}
hob.finish(vm_memory)
.map_err(StartMicroVmError::TdvfError)?;
Ok(())
}
}