diff --git a/Cargo.lock b/Cargo.lock index 41207d0ec5..79bca79908 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1790,6 +1790,7 @@ dependencies = [ "arc-swap", "bytes 1.11.1", "crossbeam-channel", + "dbs-acpi", "dbs-address-space", "dbs-allocator", "dbs-arch", diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index 8ef2e2137f..46d359fc6e 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -13,6 +13,7 @@ edition = "2018" anyhow = "1.0.32" arc-swap = "1.5.0" bytes = "1.1.0" +dbs-acpi = { workspace = true } dbs-address-space = { workspace = true } dbs-allocator = { workspace = true } dbs-arch = { workspace = true } diff --git a/src/dragonball/dbs_address_space/src/layout.rs b/src/dragonball/dbs_address_space/src/layout.rs index cd6c6bfb0a..e8aaf9a22a 100644 --- a/src/dragonball/dbs_address_space/src/layout.rs +++ b/src/dragonball/dbs_address_space/src/layout.rs @@ -74,6 +74,7 @@ impl AddressSpaceLayout { return false; } } + AddressSpaceRegionType::FirmwareMemory => {} } true diff --git a/src/dragonball/dbs_address_space/src/region.rs b/src/dragonball/dbs_address_space/src/region.rs index b729c12e82..044d6e2c77 100644 --- a/src/dragonball/dbs_address_space/src/region.rs +++ b/src/dragonball/dbs_address_space/src/region.rs @@ -30,6 +30,8 @@ pub enum AddressSpaceRegionType { DeviceMemory, /// DAX address region for virtio-fs/virtio-pmem. DAXMemory, + /// Address region where virtual firmwares are loaded. + FirmwareMemory, } /// Struct to maintain configuration information about a guest address region. @@ -273,6 +275,31 @@ impl AddressSpaceRegion { )) } + /// Create an address space region for virtual firmware. + /// + /// # Arguments + /// * `base` - Base address in VM to map content + /// * `size` - Length of content to map + /// * `prot_flags` - mmap protection flags + pub fn create_firmware_region( + base: GuestAddress, + size: GuestUsize, + prot_flags: i32, + ) -> Result { + // Firmware region currently only supports anonymous mmap + let perm_flags = libc::MAP_PRIVATE | libc::MAP_ANONYMOUS; + Ok(Self::build( + AddressSpaceRegionType::FirmwareMemory, + base, + size, + None, + None, + perm_flags, + prot_flags, + false, + )) + } + /// Get type of the address space region. pub fn region_type(&self) -> AddressSpaceRegionType { self.ty diff --git a/src/dragonball/dbs_boot/src/firmware/mod.rs b/src/dragonball/dbs_boot/src/firmware/mod.rs new file mode 100644 index 0000000000..e4f9d81219 --- /dev/null +++ b/src/dragonball/dbs_boot/src/firmware/mod.rs @@ -0,0 +1,27 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![allow(missing_docs)] + +#[cfg(target_arch = "x86_64")] +/// Structs and utilities for tdshim +pub mod tdshim; + +pub const EFI_RESOURCE_SYSTEM_MEMORY: u32 = 0x00; +pub const EFI_RESOURCE_MEMORY_MAPPED_IO: u32 = 0x01; +pub const EFI_RESOURCE_MEMORY_UNACCEPTED: u32 = 0x07; + +pub const EFI_RESOURCE_ATTRIBUTE_PRESENT: u32 = 0x0000_0001; +pub const EFI_RESOURCE_ATTRIBUTE_INITIALIZED: u32 = 0x0000_0002; +pub const EFI_RESOURCE_ATTRIBUTE_TESTED: u32 = 0x0000_0004; +pub const EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE: u32 = 0x0000_0400; + +/// Firmware types +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FirmwareType { + /// Tdshim + #[cfg(target_arch = "x86_64")] + Tdshim, +} diff --git a/src/dragonball/dbs_boot/src/firmware/tdshim/hob.rs b/src/dragonball/dbs_boot/src/firmware/tdshim/hob.rs new file mode 100644 index 0000000000..ccf7057387 --- /dev/null +++ b/src/dragonball/dbs_boot/src/firmware/tdshim/hob.rs @@ -0,0 +1,403 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::TdvfError; +use crate::firmware::*; + +use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemoryMmap}; + +/// HOB Type +#[repr(u16)] +#[derive(Copy, Clone, Debug, Default)] +enum HobType { + /// Hand Off + Handoff = 0x1, + /// Resource Descriptor + ResourceDescriptor = 0x3, + /// Guid Extension + GuidExtension = 0x4, + /// Unused + #[default] + Unused = 0xfffe, + /// End Of HOB List + EndOfHobList = 0xffff, +} + +/// HOB header +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +struct HobHeader { + r#type: HobType, + length: u16, + reserved: u32, +} + +/// HOB hand off info table +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +struct HobHandoffInfoTable { + header: HobHeader, + version: u32, + boot_mode: u32, + efi_memory_top: u64, + efi_memory_bottom: u64, + efi_free_memory_top: u64, + efi_free_memory_bottom: u64, + efi_end_of_hob_list: u64, +} + +impl HobHandoffInfoTable { + pub fn new(efi_end_of_hob_list: u64) -> Self { + HobHandoffInfoTable { + header: HobHeader { + r#type: HobType::Handoff, + length: std::mem::size_of::() as u16, + reserved: 0, + }, + version: 0x9, + boot_mode: 0, + efi_memory_top: 0, + efi_memory_bottom: 0, + efi_free_memory_top: 0, + efi_free_memory_bottom: 0, + efi_end_of_hob_list, + } + } +} + +/// HOB resource descriptor +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +struct HobResourceDescriptor { + header: HobHeader, + efi_guid_type: EfiGuid, + resource_type: u32, + resource_attribute: u32, + physical_start: u64, + resource_length: u64, +} + +impl HobResourceDescriptor { + fn new( + resource_type: u32, + resource_attribute: u32, + physical_start: u64, + resource_length: u64, + ) -> Self { + HobResourceDescriptor { + header: HobHeader { + r#type: HobType::ResourceDescriptor, + length: std::mem::size_of::() as u16, + reserved: 0, + }, + efi_guid_type: EfiGuid::resource(), + resource_type, + resource_attribute, + physical_start, + resource_length, + } + } +} + +/// HOB end +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +struct HobEnd { + header: HobHeader, +} + +impl HobEnd { + fn new() -> Self { + HobEnd { + header: HobHeader { + r#type: HobType::EndOfHobList, + length: std::mem::size_of::() as u16, + reserved: 0, + }, + } + } +} + +/// Efi Guid +#[repr(C)] +#[derive(Copy, Clone, Default, Debug, PartialEq)] +struct EfiGuid { + data1: u32, + data2: u16, + data3: u16, + data4: [u8; 8], +} + +impl EfiGuid { + /// RESOURCE_HOB_GUID + fn resource() -> Self { + EfiGuid::default() + } + + /// HOB_PAYLOAD_INFO_GUID + /// 0xb96fa412, 0x461f, 0x4be3, {0x8c, 0xd, 0xad, 0x80, 0x5a, 0x49, 0x7a, 0xc0 + fn payload() -> Self { + EfiGuid { + data1: 0xb96f_a412, + data2: 0x461f, + data3: 0x4be3, + data4: [0x8c, 0xd, 0xad, 0x80, 0x5a, 0x49, 0x7a, 0xc0], + } + } + + /// ACPI_TABLE_HOB_GUID + /// 0x6a0c5870, 0xd4ed, 0x44f4, {0xa1, 0x35, 0xdd, 0x23, 0x8b, 0x6f, 0xc, 0x8d } + fn acpi() -> Self { + EfiGuid { + data1: 0x6a0c_5870, + data2: 0xd4ed, + data3: 0x44f4, + data4: [0xa1, 0x35, 0xdd, 0x23, 0x8b, 0x6f, 0xc, 0x8d], + } + } +} + +/// Payload image type +#[repr(u32)] +#[derive(Clone, Copy, Default, Debug)] +pub enum PayloadImageType { + /// Raw executable binary + #[default] + ExecutablePayload, + /// BzImage + BzImage, + /// Raw vmlinux kernel in ELF + RawVmLinux, +} + +/// Payload Info +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +pub struct PayloadInfo { + /// Payload image type + pub image_type: PayloadImageType, + /// Reserved + pub reserved: u32, + /// Entry point for the payload + pub entry_point: u64, +} + +impl PayloadInfo { + /// Create a new payload info struct + pub fn new(image_type: PayloadImageType, entry_point: u64) -> Self { + Self { + image_type, + reserved: 0, + entry_point, + } + } +} + +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +struct TdPayloadDescription { + header: HobHeader, + efi_guid_type: EfiGuid, + payload_info: PayloadInfo, +} + +impl TdPayloadDescription { + fn new(payload: PayloadInfo) -> Self { + TdPayloadDescription { + header: HobHeader { + r#type: HobType::GuidExtension, + length: std::mem::size_of::() as u16, + reserved: 0, + }, + efi_guid_type: EfiGuid::payload(), + payload_info: payload, + } + } +} + +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +struct AcpiDescription { + header: HobHeader, + efi_guid_type: EfiGuid, +} + +impl AcpiDescription { + fn new(length: u16) -> Self { + AcpiDescription { + header: HobHeader { + r#type: HobType::GuidExtension, + length, + reserved: 0, + }, + // ACPI_TABLE_HOB_GUID + efi_guid_type: EfiGuid::acpi(), + } + } +} + +unsafe impl ByteValued for HobHeader {} +unsafe impl ByteValued for HobHandoffInfoTable {} +unsafe impl ByteValued for HobResourceDescriptor {} +unsafe impl ByteValued for TdPayloadDescription {} +unsafe impl ByteValued for AcpiDescription {} +unsafe impl ByteValued for HobEnd {} + +/// TD HOB +pub struct TdHob { + start_offset: u64, + current_offset: u64, +} + +fn align_hob(v: u64) -> u64 { + v.div_ceil(8) * 8 +} + +impl TdHob { + /// Update offset to align with 8 bytes + fn update_offset(&mut self) { + self.current_offset = align_hob(self.current_offset + std::mem::size_of::() as u64) + } + + /// Add resource to HOB list + fn add_resource( + &mut self, + mem: &GuestMemoryMmap, + physical_start: u64, + resource_length: u64, + resource_type: u32, + resource_attribute: u32, + ) -> Result<(), TdvfError> { + let resource_descriptor = HobResourceDescriptor::new( + resource_type, + resource_attribute, + physical_start, + resource_length, + ); + + mem.write_obj(resource_descriptor, GuestAddress(self.current_offset)) + .map_err(TdvfError::WriteHobError)?; + self.update_offset::(); + Ok(()) + } + + /// Start writing HOB list + pub fn start(offset: u64) -> TdHob { + // Leave a gap to place the HandoffTable at the start as it can only be filled in later + let mut hob = TdHob { + start_offset: offset, + current_offset: offset, + }; + hob.update_offset::(); + hob + } + + /// Finish writing HOB list + pub fn finish(&mut self, mem: &GuestMemoryMmap) -> Result<(), TdvfError> { + // Write end + let end = HobEnd::new(); + mem.write_obj(end, GuestAddress(self.current_offset)) + .map_err(TdvfError::WriteHobError)?; + self.update_offset::(); + + // Write handoff, delayed as it needs end of HOB list + let efi_end_of_hob_list = self.current_offset; + let handoff = HobHandoffInfoTable::new(efi_end_of_hob_list); + mem.write_obj(handoff, GuestAddress(self.start_offset)) + .map_err(TdvfError::WriteHobError) + } + + /// Add memory resource + pub fn add_memory_resource( + &mut self, + mem: &GuestMemoryMmap, + physical_start: u64, + resource_length: u64, + ram: bool, + ) -> Result<(), TdvfError> { + self.add_resource( + mem, + physical_start, + resource_length, + if ram { + EFI_RESOURCE_MEMORY_UNACCEPTED + } else { + EFI_RESOURCE_SYSTEM_MEMORY + }, + EFI_RESOURCE_ATTRIBUTE_PRESENT + | EFI_RESOURCE_ATTRIBUTE_INITIALIZED + | EFI_RESOURCE_ATTRIBUTE_TESTED, + ) + } + + /// Add mmio resource + pub fn add_mmio_resource( + &mut self, + mem: &GuestMemoryMmap, + physical_start: u64, + resource_length: u64, + ) -> Result<(), TdvfError> { + self.add_resource( + mem, + physical_start, + resource_length, + EFI_RESOURCE_MEMORY_MAPPED_IO, + EFI_RESOURCE_ATTRIBUTE_PRESENT + | EFI_RESOURCE_ATTRIBUTE_INITIALIZED + | EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE, + ) + } + + /// Add payload + pub fn add_payload( + &mut self, + mem: &GuestMemoryMmap, + payload_info: PayloadInfo, + ) -> Result<(), TdvfError> { + let payload = TdPayloadDescription::new(payload_info); + mem.write_obj(payload, GuestAddress(self.current_offset)) + .map_err(TdvfError::WriteHobError)?; + self.update_offset::(); + Ok(()) + } + + /// Add ACPI table + pub fn add_acpi_table( + &mut self, + mem: &GuestMemoryMmap, + table_content: &[u8], + ) -> Result<(), TdvfError> { + // We already know the HobGuidType size is 8 bytes multiple, but we + // need the total size to be 8 bytes multiple. That is why the ACPI + // table size must be 8 bytes multiple as well. + let length = std::mem::size_of::() as u16 + + align_hob(table_content.len() as u64) as u16; + + let hob_guid_type = AcpiDescription::new(length); + + mem.write_obj(hob_guid_type, GuestAddress(self.current_offset)) + .map_err(TdvfError::WriteHobError)?; + let current_offset = self.current_offset + std::mem::size_of::() as u64; + + // In case the table is quite large, let's make sure we can handle + // retrying until everything has been correctly copied. + let mut offset: usize = 0; + loop { + let bytes_written = mem + .write( + &table_content[offset..], + GuestAddress(current_offset + offset as u64), + ) + .map_err(TdvfError::WriteHobError)?; + offset += bytes_written; + if offset >= table_content.len() { + break; + } + } + self.current_offset += length as u64; + + Ok(()) + } +} diff --git a/src/dragonball/dbs_boot/src/firmware/tdshim/mod.rs b/src/dragonball/dbs_boot/src/firmware/tdshim/mod.rs new file mode 100644 index 0000000000..7baa3d144c --- /dev/null +++ b/src/dragonball/dbs_boot/src/firmware/tdshim/mod.rs @@ -0,0 +1,35 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![deny(missing_docs)] + +use thiserror::Error; +use vm_memory::GuestMemoryError; + +mod section; +pub use section::*; + +mod hob; +pub use hob::*; + +/// TDVF related errors +#[derive(Error, Debug)] +pub enum TdvfError { + /// Error reading td_shim binary + #[error("Failed to read td_shim file: {0}")] + TdshimFileError(#[source] std::io::Error), + + /// Error parsing TDVF descriptor + #[error("Failed to parse TDVF descriptor: {0}")] + TdvfDescriptorError(&'static str), + + /// Error writing HOB list + #[error("Failed to write HOB list: {0}")] + WriteHobError(#[source] GuestMemoryError), + + /// Error loading section to guest memory + #[error("Failed to load TDVF section to guest memory: {0}")] + LoadTdvfSectionError(#[source] GuestMemoryError), +} diff --git a/src/dragonball/dbs_boot/src/firmware/tdshim/section.rs b/src/dragonball/dbs_boot/src/firmware/tdshim/section.rs new file mode 100644 index 0000000000..5a7d25906b --- /dev/null +++ b/src/dragonball/dbs_boot/src/firmware/tdshim/section.rs @@ -0,0 +1,146 @@ +// Copyright (c) 2026 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::TdvfError; + +use std::fs::File; +use std::io::{Read, Seek, SeekFrom}; + +use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap}; + +/// TDVF descriptor +#[repr(C, packed)] +pub struct TdvfDescriptor { + signature: [u8; 4], + length: u32, + version: u32, + num_sections: u32, +} + +#[repr(C, packed)] +#[derive(Clone, Copy, Default, Debug)] +/// TDVF section +pub struct TdvfSection { + /// Data offset + pub data_offset: u32, + /// Raw data size + pub data_size: u32, + /// Guest memory address + pub address: u64, + /// Memory data size + pub size: u64, + /// TDVF section type + pub r#type: TdvfSectionType, + /// TDVF attributes + pub attributes: u32, +} + +#[repr(u32)] +#[derive(Clone, Copy, Default, Debug, PartialEq)] +/// TDVF section type +pub enum TdvfSectionType { + /// BFV section type + Bfv, + /// CFV section type + Cfv, + /// TD HOB + TdHob, + /// Temp memory + TempMem, + /// Permanent memory + PermMem, + /// Payload + Payload, + /// Payload Parameters + PayloadParam, + /// Reserved + #[default] + Reserved = 0xffffffff, +} + +/// Parse TDVF sections and return a list of categorized sections +/// +/// #Arguments +/// * `file` - The tdshim image file. +pub fn parse_tdvf_sections(file: &mut File) -> Result, TdvfError> { + // The 32-bit offset to the TDVF metadata is located 32 bytes from + // the end of the file. + // See "TDVF Metadata Pointer" in "TDX Virtual Firmware Design Guide + file.seek(SeekFrom::End(-0x20)) + .map_err(TdvfError::TdshimFileError)?; + + let mut descriptor_offset: [u8; 4] = [0; 4]; + file.read_exact(&mut descriptor_offset) + .map_err(TdvfError::TdshimFileError)?; + let descriptor_offset = u32::from_le_bytes(descriptor_offset) as u64; + + file.seek(SeekFrom::Start(descriptor_offset)) + .map_err(TdvfError::TdshimFileError)?; + + let mut descriptor: TdvfDescriptor = unsafe { std::mem::zeroed() }; + // Safe as we read exactly the size of the descriptor header + file.read_exact(unsafe { + std::slice::from_raw_parts_mut( + &mut descriptor as *mut _ as *mut u8, + std::mem::size_of::(), + ) + }) + .map_err(TdvfError::TdshimFileError)?; + + if &descriptor.signature != b"TDVF" { + return Err(TdvfError::TdvfDescriptorError( + "Invalid descriptor signature", + )); + } + + if descriptor.length as usize + != std::mem::size_of::() + + std::mem::size_of::() * descriptor.num_sections as usize + { + return Err(TdvfError::TdvfDescriptorError("Invalid descriptor length")); + } + + if descriptor.version != 1 { + return Err(TdvfError::TdvfDescriptorError("Invalid descriptor version")); + } + + let mut sections = Vec::new(); + sections.resize_with(descriptor.num_sections as usize, TdvfSection::default); + + // Safe as we read exactly the advertised sections + file.read_exact(unsafe { + std::slice::from_raw_parts_mut( + sections.as_mut_ptr() as *mut u8, + descriptor.num_sections as usize * std::mem::size_of::(), + ) + }) + .map_err(TdvfError::TdshimFileError)?; + + Ok(sections) +} + +/// Load a TDVF section to guest memory +/// +/// #Arguments +/// * `file` - The tdshim image file. +/// * `section` - The metadata of target section. +/// * `mem` - Guest memory to load TDVF section to. +pub fn load_tdvf_section( + file: &mut File, + section: &TdvfSection, + mem: &GuestMemoryMmap, +) -> Result<(), TdvfError> { + file.seek(SeekFrom::Start(section.data_offset as u64)) + .map_err(TdvfError::TdshimFileError)?; + + mem.read_volatile_from( + GuestAddress(section.address), + file, + section.data_size as usize, + ) + .map_err(TdvfError::LoadTdvfSectionError)?; + + Ok(()) +} diff --git a/src/dragonball/dbs_boot/src/lib.rs b/src/dragonball/dbs_boot/src/lib.rs index e281b8d3ca..d88eb726d2 100644 --- a/src/dragonball/dbs_boot/src/lib.rs +++ b/src/dragonball/dbs_boot/src/lib.rs @@ -15,6 +15,9 @@ mod aarch64; #[cfg(target_arch = "aarch64")] pub use aarch64::*; +mod firmware; +pub use firmware::*; + /// Specialized [std::result::Result] for boot related operations. pub type Result = std::result::Result; diff --git a/src/dragonball/dbs_boot/src/x86_64/layout.rs b/src/dragonball/dbs_boot/src/x86_64/layout.rs index df7c7218fe..3e1207a26d 100644 --- a/src/dragonball/dbs_boot/src/x86_64/layout.rs +++ b/src/dragonball/dbs_boot/src/x86_64/layout.rs @@ -56,7 +56,7 @@ pub const IRQ_BASE: u32 = 5; pub const IRQ_MAX: u32 = 15; /// Address for the TSS setup. -pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000; +pub const KVM_TSS_ADDRESS: u64 = 0xfefb_d000; /// Where BIOS/VGA magic would live on a real PC. pub const EBDA_START: u64 = 0x9fc00; @@ -70,6 +70,11 @@ pub const GUEST_MEM_START: u64 = 0u64; /// Size of memory below MMIO hole. pub const GUEST_MEM_LOW_SIZE: u64 = MMIO_LOW_START - GUEST_MEM_START; +/// Lower bound of BIOS memory. +pub const BIOS_MEM_START: u64 = MMIO_LOW_END - BIOS_MEM_SIZE + 1; +/// Size of BIOS memory. +pub const BIOS_MEM_SIZE: u64 = 16u64 << 20; + /// Max retry times for reading /proc/cpuinfo const CPUINFO_READ_RETRY: u64 = 5; diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs index 9f5eb0a650..53743368f5 100644 --- a/src/dragonball/src/address_space_manager.rs +++ b/src/dragonball/src/address_space_manager.rs @@ -27,6 +27,8 @@ use dbs_address_space::{ AddressSpaceRegionType, NumaNode, NumaNodeInfo, MPOL_MF_MOVE, MPOL_PREFERRED, }; use dbs_allocator::Constraint; +#[cfg(target_arch = "x86_64")] +use dbs_boot::layout::{BIOS_MEM_SIZE, BIOS_MEM_START}; use kvm_bindings::kvm_userspace_memory_region; use kvm_ioctls::VmFd; use log::{debug, error, info, warn}; @@ -164,6 +166,7 @@ pub struct AddressSpaceMgrBuilder<'a> { mem_prealloc: bool, dirty_page_logging: bool, vmfd: Option>, + use_firmware: bool, } impl<'a> AddressSpaceMgrBuilder<'a> { @@ -180,6 +183,7 @@ impl<'a> AddressSpaceMgrBuilder<'a> { mem_prealloc: false, dirty_page_logging: false, vmfd: None, + use_firmware: false, }) } @@ -201,6 +205,11 @@ impl<'a> AddressSpaceMgrBuilder<'a> { self.dirty_page_logging = logging; } + /// Enable/disable firmware memory region. + pub fn toggle_use_firmware(&mut self, firmware: bool) { + self.use_firmware = firmware; + } + /// Set KVM [`VmFd`] handle to configure memory slots. pub fn set_kvm_vm_fd(&mut self, vmfd: Arc) -> Option> { let mut existing_vmfd = None; @@ -317,17 +326,32 @@ impl AddressSpaceMgr { } } + #[cfg(target_arch = "x86_64")] + if param.use_firmware { + let region = Arc::new( + AddressSpaceRegion::create_firmware_region( + GuestAddress(BIOS_MEM_START), + BIOS_MEM_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + ) + .map_err(AddressManagerError::CreateAddressSpaceRegion)?, + ); + regions.push(region); + } + // Create GuestMemory object let mut vm_memory = GuestMemoryMmap::new(); for reg in regions.iter() { - // Allocate used guest memory addresses. - // These addresses are statically allocated, resource allocation/update should not fail. - let constraint = Constraint::new(reg.len()) - .min(reg.start_addr().raw_value()) - .max(reg.last_addr().raw_value()); - let _key = res_mgr - .allocate_mem_address(&constraint) - .ok_or(AddressManagerError::NoAvailableMemAddress)?; + if reg.region_type() != AddressSpaceRegionType::FirmwareMemory { + // Allocate used guest memory addresses. + // These addresses are statically allocated, resource allocation/update should not fail. + let constraint = Constraint::new(reg.len()) + .min(reg.start_addr().raw_value()) + .max(reg.last_addr().raw_value()); + let _key = res_mgr + .allocate_mem_address(&constraint) + .ok_or(AddressManagerError::NoAvailableMemAddress)?; + } let mmap_reg = self.create_mmap_region(reg.clone())?; vm_memory = vm_memory diff --git a/src/dragonball/src/api/v1/boot_source.rs b/src/dragonball/src/api/v1/boot_source.rs index 612de04a18..bbd094bddf 100644 --- a/src/dragonball/src/api/v1/boot_source.rs +++ b/src/dragonball/src/api/v1/boot_source.rs @@ -30,6 +30,9 @@ pub struct BootSourceConfig { /// The boot arguments to pass to the kernel. #[serde(skip_serializing_if = "Option::is_none")] pub boot_args: Option, + /// The path to firmware file. + #[serde(skip_serializing_if = "Option::is_none")] + pub firmware_path: Option, } /// Errors associated with actions on `BootSourceConfig`. @@ -49,6 +52,10 @@ pub enum BootSourceConfigError { #[error("the kernel command line is invalid: {0}")] InvalidKernelCommandLine(#[source] linux_loader::cmdline::Error), + /// The firmware file cannot be opened. + #[error("the firmware file cannot be opened due to invalid path or invalid permissions: {0}")] + InvalidFirmwarePath(#[source] std::io::Error), + /// The boot source cannot be update post boot. #[error("the update operation is not allowed after boot")] UpdateNotAllowedPostBoot, diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index e4fbb1413d..43509df139 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -441,7 +441,7 @@ impl VmmService { boot_source_config: BootSourceConfig, ) -> VmmRequestResult { use super::BootSourceConfigError::{ - InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, + InvalidFirmwarePath, InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, UpdateNotAllowedPostBoot, }; use super::VmmActionError::BootSource; @@ -468,7 +468,14 @@ impl VmmService { .insert_str(boot_args) .map_err(|e| BootSource(InvalidKernelCommandLine(e)))?; - let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline); + let firmware_file = match boot_source_config.firmware_path { + None => None, + Some(ref path) => { + Some(File::open(path).map_err(|e| BootSource(InvalidFirmwarePath(e)))?) + } + }; + + let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline, firmware_file); vm.set_kernel_config(kernel_config); Ok(VmmData::Empty) diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 7ff75186ec..e5811102c1 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -1737,6 +1737,7 @@ mod tests { kernel_file, None, linux_loader::cmdline::Cmdline::new(0x1000).unwrap(), + None, ); let address_space = vm.vm_address_space().cloned(); diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 7b7f8c7b6a..b7ab319e0a 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -11,6 +11,8 @@ #[cfg(target_arch = "aarch64")] use dbs_arch::pmu::PmuError; +#[cfg(target_arch = "x86_64")] +use dbs_boot::tdshim::TdvfError; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::Error as VirtioError; @@ -228,6 +230,28 @@ pub enum StartMicroVmError { /// Cannot enable split irqchip #[error("Failed to enable split irqchip: {0}")] EnableSplitIrqchip(#[source] vmm_sys_util::errno::Error), + + /// Missing firmware file + #[error("Cannot start microvm due to missing firmware file")] + MissingFirmwareFile, + + #[cfg(target_arch = "x86_64")] + /// TDVF errors + #[error("TDVF error: {0}")] + TdvfError(#[source] TdvfError), + + #[cfg(target_arch = "x86_64")] + /// Missing tdshim section + #[error("Missing tdshim section: {0}")] + MissingTdshimSection(&'static str), + + /// Guest address space not initialized + #[error("Guest address space not initialized")] + GuestMemoryNotInitialized, + + /// Initrd is not supported + #[error("Initrd is not supported")] + InitrdNotSupported, } /// Errors associated with starting the instance. diff --git a/src/dragonball/src/test_utils.rs b/src/dragonball/src/test_utils.rs index 58612cbaf3..38efe18e1a 100644 --- a/src/dragonball/src/test_utils.rs +++ b/src/dragonball/src/test_utils.rs @@ -22,6 +22,7 @@ pub mod tests { kernel_file.into_file(), None, cmd_line, + None, )); let vm_config = VmConfigInfo { diff --git a/src/dragonball/src/vcpu/aarch64.rs b/src/dragonball/src/vcpu/aarch64.rs index f811158c62..6ca3b8b0dd 100644 --- a/src/dragonball/src/vcpu/aarch64.rs +++ b/src/dragonball/src/vcpu/aarch64.rs @@ -11,7 +11,7 @@ use std::sync::mpsc::{channel, Sender}; use std::sync::Arc; use dbs_arch::{regs, VpmuFeatureLevel}; -use dbs_boot::get_fdt_addr; +use dbs_boot::{get_fdt_addr, FirmwareType}; use dbs_utils::time::TimestampUs; use kvm_ioctls::{VcpuFd, VmFd}; use vm_memory::{Address, GuestAddress, GuestAddressSpace}; @@ -88,6 +88,7 @@ impl Vcpu { vm_as: &GuestAddressSpaceImpl, kernel_load_addr: Option, _pgtable_addr: Option, + _firmware_type: Option, ) -> Result<()> { let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 7d3bed2a50..6439f792cf 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -16,6 +16,7 @@ use std::sync::{Arc, Barrier, Mutex, RwLock}; use std::time::Duration; use dbs_arch::VpmuFeatureLevel; +use dbs_boot::FirmwareType; #[cfg(target_arch = "x86_64")] use dbs_interrupt::InterruptManager; #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] @@ -392,7 +393,7 @@ impl VcpuManager { } else { self.vcpu_config.boot_vcpu_count }; - self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr))?; + self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr), None)?; Ok(()) } @@ -413,6 +414,7 @@ impl VcpuManager { vcpu_count: u8, request_ts: Option, entry_addr: Option, + firmware_type: Option, ) -> Result> { info!("create vcpus"); if vcpu_count > self.vcpu_config.max_vcpu_count { @@ -422,7 +424,7 @@ impl VcpuManager { let request_ts = request_ts.unwrap_or_default(); let mut created_cpus = Vec::new(); for cpu_id in self.calculate_available_vcpus(vcpu_count) { - self.create_vcpu(cpu_id, request_ts.clone(), entry_addr)?; + self.create_vcpu(cpu_id, request_ts.clone(), entry_addr, firmware_type)?; created_cpus.push(cpu_id); } @@ -527,6 +529,7 @@ impl VcpuManager { &mut self, entry_addr: Option, vcpu: &mut Vcpu, + firmware_type: Option, ) -> std::result::Result<(), VcpuError> { vcpu.configure( &self.vcpu_config, @@ -534,6 +537,7 @@ impl VcpuManager { &self.vm_as, entry_addr, None, + firmware_type, ) } @@ -542,6 +546,7 @@ impl VcpuManager { cpu_index: u8, request_ts: TimestampUs, entry_addr: Option, + firmware_type: Option, ) -> Result<()> { info!("creating vcpu {cpu_index}"); if self.vcpu_infos.get(cpu_index as usize).is_none() { @@ -564,7 +569,7 @@ impl VcpuManager { .unwrap() .vcpu .insert(cpu_index as u32, vcpu.metrics()); - self.configure_single_vcpu(entry_addr, &mut vcpu) + self.configure_single_vcpu(entry_addr, &mut vcpu, firmware_type) .map_err(VcpuManagerError::Vcpu)?; self.vcpu_infos[cpu_index as usize].vcpu = Some(vcpu); @@ -896,7 +901,7 @@ mod hotplug { } let created_vcpus = self - .create_vcpus(vcpu_count, None, None) + .create_vcpus(vcpu_count, None, None, None) .map_err(VcpuResizeError::Vcpu)?; let cpu_ids = self .activate_vcpus(vcpu_count, true) @@ -1246,11 +1251,11 @@ mod tests { let mut vcpu_manager = vm.vcpu_manager().unwrap(); // test create vcpu more than max - let res = vcpu_manager.create_vcpus(20, None, None); + let res = vcpu_manager.create_vcpus(20, None, None, None); assert!(matches!(res, Err(VcpuManagerError::ExpectedVcpuExceedMax))); // test create vcpus - assert!(vcpu_manager.create_vcpus(2, None, None).is_ok()); + assert!(vcpu_manager.create_vcpus(2, None, None, None).is_ok()); assert_eq!(vcpu_manager.present_vcpus_count(), 0); assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); assert_eq!(vcpu_manager.vcpus().len(), 2); diff --git a/src/dragonball/src/vcpu/x86_64.rs b/src/dragonball/src/vcpu/x86_64.rs index cb090591a8..5f358057bb 100644 --- a/src/dragonball/src/vcpu/x86_64.rs +++ b/src/dragonball/src/vcpu/x86_64.rs @@ -11,6 +11,7 @@ use std::sync::Arc; use dbs_arch::cpuid::{process_cpuid, VmSpec}; use dbs_arch::gdt::gdt_entry; +use dbs_boot::FirmwareType; use dbs_interrupt::InterruptManager; use dbs_utils::metric::IncMetric; use dbs_utils::time::TimestampUs; @@ -94,9 +95,15 @@ impl Vcpu { vm_as: &GuestAddressSpaceImpl, kernel_start_addr: Option, _pgtable_addr: Option, + firmware_type: Option, ) -> Result<()> { self.set_cpuid(vcpu_config)?; + // tdshim will handle the initialization of MSR, regs and sregs + if firmware_type == Some(FirmwareType::Tdshim) { + return Ok(()); + } + dbs_arch::regs::setup_msrs(&self.fd).map_err(VcpuError::MSRSConfiguration)?; if let Some(start_addr) = kernel_start_addr { dbs_arch::regs::setup_regs( diff --git a/src/dragonball/src/vm/kernel_config.rs b/src/dragonball/src/vm/kernel_config.rs index fb51f8fc13..f0661dd634 100644 --- a/src/dragonball/src/vm/kernel_config.rs +++ b/src/dragonball/src/vm/kernel_config.rs @@ -11,6 +11,8 @@ pub struct KernelConfigInfo { initrd_file: Option, /// The commandline for guest kernel. cmdline: linux_loader::cmdline::Cmdline, + /// The descriptor to the firmware file. + firmware_file: Option, } impl KernelConfigInfo { @@ -19,11 +21,13 @@ impl KernelConfigInfo { kernel_file: File, initrd_file: Option, cmdline: linux_loader::cmdline::Cmdline, + firmware_file: Option, ) -> Self { KernelConfigInfo { kernel_file, initrd_file, cmdline, + firmware_file, } } @@ -51,6 +55,16 @@ impl KernelConfigInfo { pub fn kernel_cmdline_mut(&mut self) -> &mut linux_loader::cmdline::Cmdline { &mut self.cmdline } + + /// Get a shared reference to the firmware file. + pub fn firmware_file(&self) -> Option<&File> { + self.firmware_file.as_ref() + } + + /// Get a mutable reference to the firmware file. + pub fn firmware_file_mut(&mut self) -> Option<&mut File> { + self.firmware_file.as_mut() + } } #[cfg(test)] @@ -64,7 +78,8 @@ mod tests { let initrd = TempFile::new().unwrap(); let mut cmdline = linux_loader::cmdline::Cmdline::new(1024).unwrap(); cmdline.insert_str("ro").unwrap(); - let mut info = KernelConfigInfo::new(kernel.into_file(), Some(initrd.into_file()), cmdline); + let mut info = + KernelConfigInfo::new(kernel.into_file(), Some(initrd.into_file()), cmdline, None); assert_eq!(info.cmdline.as_cstring().unwrap().as_bytes(), b"ro"); assert!(info.initrd_file_mut().is_some()); diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index f97fab32b1..006281c302 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -13,7 +13,7 @@ use dbs_address_space::AddressSpace; use dbs_arch::gic::GICDevice; #[cfg(target_arch = "aarch64")] use dbs_arch::pmu::PmuError; -use dbs_boot::InitrdConfig; +use dbs_boot::{FirmwareType, InitrdConfig}; use dbs_utils::epoll_manager::EpollManager; use dbs_utils::time::TimestampUs; use kvm_ioctls::VmFd; @@ -34,6 +34,8 @@ use crate::address_space_manager::{ AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl, GuestMemoryImpl, }; +#[cfg(target_arch = "x86_64")] +use crate::api::v1::ConfidentialVmType; use crate::api::v1::{InstanceInfo, InstanceState}; use crate::device_manager::console_manager::DmesgWriter; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; @@ -211,6 +213,8 @@ pub struct Vm { #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: Option>>, + + firmware_type: Option, } impl Vm { @@ -234,6 +238,18 @@ impl Vm { ) .map_err(Error::DeviceMgrError)?; + #[cfg(target_arch = "x86_64")] + let firmware_type = if api_shared_info.read().unwrap().confidential_vm_type + == Some(ConfidentialVmType::TDX) + { + Some(FirmwareType::Tdshim) + } else { + None + }; + + #[cfg(not(target_arch = "x86_64"))] + let firmware_type = None; + Ok(Vm { epoll_manager, kvm, @@ -258,6 +274,8 @@ impl Vm { irqchip_handle: None, #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: None, + + firmware_type, }) } @@ -597,6 +615,7 @@ impl Vm { let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path) .map_err(StartMicroVmError::AddressManagerError)?; address_space_param.set_kvm_vm_fd(self.vm_fd.clone()); + address_space_param.toggle_use_firmware(self.firmware_type.is_some()); self.address_space .create_address_space(&self.resource_manager, &numa_regions, address_space_param) .map_err(StartMicroVmError::AddressManagerError)?; @@ -1072,6 +1091,7 @@ pub mod tests { kernel_file.into_file(), None, cmd_line, + None, )); vm.init_devices(epoll_mgr).unwrap(); diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 8a09f4ced4..cd075c047f 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -10,8 +10,12 @@ use std::collections::HashMap; use std::convert::TryInto; use std::ops::Deref; -use dbs_address_space::AddressSpace; -use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, InitrdConfig}; +use dbs_acpi::sdt::Sdt; +use dbs_address_space::{AddressSpace, AddressSpaceRegionType}; +use dbs_boot::{ + add_e820_entry, bootparam, layout, mptable, tdshim::*, BootParamsWrapper, FirmwareType, + InitrdConfig, +}; use dbs_interrupt::IOAPIC_MAX_NR_REDIR_ENTRIES; use dbs_utils::epoll_manager::EpollManager; use dbs_utils::time::TimestampUs; @@ -196,6 +200,41 @@ impl Vm { } let vm_memory = vm_as.memory(); + + if self.firmware_type == Some(FirmwareType::Tdshim) { + let tdshim_file = self + .kernel_config + .as_mut() + .ok_or(StartMicroVmError::MissingKernelConfig)? + .firmware_file_mut() + .ok_or(StartMicroVmError::MissingFirmwareFile)?; + let sections = + parse_tdvf_sections(tdshim_file).map_err(StartMicroVmError::TdvfError)?; + let address_space = self + .vm_address_space() + .cloned() + .ok_or(StartMicroVmError::GuestMemoryNotInitialized)?; + let mut hob_address = 0; + // TODO: Fill the empty list with ACPI table content + let acpi_tables: Vec = Vec::new(); + + self.load_kernel_with_tdshim( + §ions, + vm_memory.deref(), + address_space, + &mut hob_address, + &acpi_tables, + )?; + + let boot_vcpu_count = self.vm_config.vcpu_count; + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .create_vcpus(boot_vcpu_count, Some(request_ts), None, self.firmware_type) + .map_err(StartMicroVmError::Vcpu)?; + + return Ok(()); + } + let kernel_loader_result = self.load_kernel(vm_memory.deref())?; self.vcpu_manager() .map_err(StartMicroVmError::Vcpu)? @@ -216,6 +255,15 @@ impl Vm { cmdline: &Cmdline, initrd: Option, ) -> std::result::Result<(), StartMicroVmError> { + // tdshim uses ACPI instead of mptable, and kernel boot parameters + // (including e820) would be prepared by firmware + if self.firmware_type == Some(FirmwareType::Tdshim) { + if initrd.is_some() { + return Err(StartMicroVmError::InitrdNotSupported); + } + return Ok(()); + } + let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START); linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline) .map_err(StartMicroVmError::LoadCommandline)?; @@ -328,4 +376,134 @@ impl Vm { pub(crate) fn split_irqchip(&self) -> bool { self.shared_info.read().unwrap().split_irqchip() } + + fn load_kernel_with_tdshim( + &mut self, + sections: &Vec, + vm_memory: &GuestMemoryImpl, + address_space: AddressSpace, + hob_address: &mut u64, + acpi_tables: &Vec, + ) -> std::result::Result<(), StartMicroVmError> { + let mut required_sections = vec!["Bfv", "TdHob", "PayloadParam"]; + + for section in sections { + match section.r#type { + TdvfSectionType::Bfv => { + let tdshim_file = self + .kernel_config + .as_mut() + .ok_or(StartMicroVmError::MissingKernelConfig)? + .firmware_file_mut() + .ok_or(StartMicroVmError::MissingFirmwareFile)?; + load_tdvf_section(tdshim_file, section, vm_memory) + .map_err(StartMicroVmError::TdvfError)?; + required_sections.retain(|s| *s != "Bfv"); + } + TdvfSectionType::Cfv => { + let tdshim_file = self + .kernel_config + .as_mut() + .ok_or(StartMicroVmError::MissingKernelConfig)? + .firmware_file_mut() + .ok_or(StartMicroVmError::MissingFirmwareFile)?; + load_tdvf_section(tdshim_file, section, vm_memory) + .map_err(StartMicroVmError::TdvfError)?; + } + TdvfSectionType::TdHob => { + *hob_address = section.address; + required_sections.retain(|s| *s != "TdHob"); + } + TdvfSectionType::PayloadParam => { + let cmdline = self + .kernel_config + .as_mut() + .ok_or(StartMicroVmError::MissingKernelConfig)? + .kernel_cmdline(); + linux_loader::loader::load_cmdline( + vm_memory, + GuestAddress(section.address), + cmdline, + ) + .map_err(StartMicroVmError::LoadCommandline)?; + required_sections.retain(|s| *s != "PayloadParam"); + } + _ => {} + } + } + + if !required_sections.is_empty() { + return Err(StartMicroVmError::MissingTdshimSection( + required_sections[0], + )); + } + + let kernel_loader_result = self.load_kernel(vm_memory)?; + let payload_info = PayloadInfo::new( + PayloadImageType::RawVmLinux, + kernel_loader_result.kernel_load.0, + ); + + self.write_tdshim_hob_list( + *hob_address, + vm_memory, + address_space, + payload_info, + acpi_tables, + )?; + + Ok(()) + } + + fn write_tdshim_hob_list( + &self, + hob_address: u64, + vm_memory: &GuestMemoryImpl, + address_space: AddressSpace, + payload_info: PayloadInfo, + acpi_tables: &Vec, + ) -> std::result::Result<(), StartMicroVmError> { + let mut hob = TdHob::start(hob_address); + + let mut regions = Vec::new(); + address_space + .walk_regions(|region| { + match region.region_type() { + AddressSpaceRegionType::DefaultMemory => { + regions.push((region.start_addr().0, region.len(), true)); + } + AddressSpaceRegionType::FirmwareMemory => { + regions.push((region.start_addr().0, region.len(), false)); + } + _ => {} + } + Ok(()) + }) + .unwrap(); + + for (start, size, is_ram) in regions { + hob.add_memory_resource(vm_memory, start, size, is_ram) + .map_err(StartMicroVmError::TdvfError)?; + } + + hob.add_mmio_resource( + vm_memory, + layout::MMIO_LOW_START, + layout::BIOS_MEM_START - layout::MMIO_LOW_START, + ) + .map_err(StartMicroVmError::TdvfError)?; + + hob.add_payload(vm_memory, payload_info) + .map_err(StartMicroVmError::TdvfError)?; + + for sdt in acpi_tables { + hob.add_acpi_table(vm_memory, sdt.as_slice()) + .map_err(StartMicroVmError::TdvfError)?; + } + + hob.finish(vm_memory) + .map_err(StartMicroVmError::TdvfError)?; + + Ok(()) + } }