From cbe333e2e35b16bec26a5877814d515ac6341a63 Mon Sep 17 00:00:00 2001 From: Adam Jesionowski Date: Wed, 6 Jul 2022 09:50:53 -0700 Subject: [PATCH] kata/processmanager: Merge ITCM and DTCM into TCM. For now we treat the image as one big block. A follow up CL will break it into individual sections like text, model_output, etc. Change-Id: I86006fc18c940f04f4d15ec032f9196c01255170 GitOrigin-RevId: 7f36043202bca8994f62ced3338b4ba4b1ef922a --- .../MlCoordinator/MlCoordinator.camkes | 5 +- .../MlCoordinator/kata-ml-shared/src/lib.rs | 18 ++-- .../MlCoordinator/kata-vec-core/src/lib.rs | 84 +++++++---------- .../MlCoordinator/kata-vec-core/src/vc_top.rs | 92 +++++++------------ apps/system/system.camkes | 24 ++--- 5 files changed, 82 insertions(+), 141 deletions(-) diff --git a/apps/system/components/MlCoordinator/MlCoordinator.camkes b/apps/system/components/MlCoordinator/MlCoordinator.camkes index 0328fae..f06a5b8 100644 --- a/apps/system/components/MlCoordinator/MlCoordinator.camkes +++ b/apps/system/components/MlCoordinator/MlCoordinator.camkes @@ -15,9 +15,8 @@ component MlCoordinator { consumes Interrupt instruction_fault; consumes Interrupt data_fault; - dataport Buf csr; - dataport Buf(0x100000) itcm; - dataport Buf(0x1000000) dtcm; + dataport Buf CSR; + dataport Buf(0x1000000) TCM; uses LoggerInterface logger; uses MemoryInterface memory; diff --git a/apps/system/components/MlCoordinator/kata-ml-shared/src/lib.rs b/apps/system/components/MlCoordinator/kata-ml-shared/src/lib.rs index 766a314..dee3eed 100644 --- a/apps/system/components/MlCoordinator/kata-ml-shared/src/lib.rs +++ b/apps/system/components/MlCoordinator/kata-ml-shared/src/lib.rs @@ -35,7 +35,8 @@ impl ImageSizes { } } -// XXX: Out-dated and should use ImageSizes. Fix in merge. +// XXX: Out-dated and should use ImageSizes. Refactor when multiple sections +// are enabled. /// The Vector Core uses a Windowed MMU (go/sparrow-wmmu) in order to prevent /// models from interferring with each other. Before executing a model, /// windows to only that model's code and data are opened. @@ -45,11 +46,11 @@ pub struct Window { pub size: usize, } +// XXX: Out-dated. Refactor when multiple sections are enabled. /// When a model is loaded onto the Vector Core, the ML Coordinator needs to /// track where each window is. pub struct ModelSections { - pub instructions: Window, - pub data: Window, + pub tcm: Window, } /// The page size of the WMMU. @@ -60,13 +61,8 @@ pub const WMMU_PAGE_SIZE: usize = 0x1000; /// memory contstraints. pub const MAX_MODELS: usize = 32; -// TODO(b/214092253): ITCM size blow-up needs to be addressed. -pub const ITCM_SIZE: usize = 0x100000; -pub const ITCM_PADDR: usize = 0x32000000; -pub const DTCM_SIZE: usize = 0x1000000; -pub const DTCM_PADDR: usize = 0x34000000; - -// XXX: Only TCM variables should remain after TCM merge. Temporarily keeping -// both until further merge work is complete. +/// The size of the Vector Core's Tightly Coupled Memory (TCM). pub const TCM_SIZE: usize = 0x1000000; + +/// The address of the Vector Core's TCM, viewed from the SMC. pub const TCM_PADDR: usize = 0x34000000; diff --git a/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs b/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs index 1f0f39a..15da2e1 100644 --- a/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs +++ b/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs @@ -9,15 +9,14 @@ use core::mem::size_of; use core::slice; use kata_memory_interface::ObjDescBundle; use kata_ml_shared::{ModelSections, Window, WMMU_PAGE_SIZE}; -use kata_ml_shared::{ITCM_SIZE, ITCM_PADDR, DTCM_SIZE, DTCM_PADDR}; +use kata_ml_shared::{TCM_SIZE, TCM_PADDR}; use kata_proc_interface::BundleImage; use io::Read; use kata_io as io; extern "C" { - static itcm: *mut u32; - static dtcm: *mut u32; + static TCM: *mut u32; } fn round_up(a: usize, b: usize) -> usize { @@ -38,16 +37,13 @@ pub fn enable_interrupts(enable: bool) { } pub fn set_wmmu(sections: &ModelSections) { + // XXX: Support multiple sections. // The length of the window is not the size of the window, but rather // the last address of the window. This saves us a bit in hardware: // 0x400000 is 23 bits vs. 0x3FFFFF 22 bits. - vc_top::set_immu_window_offset(0, sections.instructions.addr); - vc_top::set_immu_window_length(0, sections.instructions.size - 1); - vc_top::set_immu_window_permission(0, vc_top::Permission::Read); - - vc_top::set_dmmu_window_offset(0, sections.data.addr); - vc_top::set_dmmu_window_length(0, sections.data.size - 1); - vc_top::set_dmmu_window_permission(0, vc_top::Permission::ReadAndWrite); + vc_top::set_mmu_window_offset(0, sections.tcm.addr); + vc_top::set_mmu_window_length(0, sections.tcm.size - 1); + vc_top::set_mmu_window_permission(0, vc_top::Permission::ReadWriteExecute); } pub fn run() { @@ -61,44 +57,27 @@ pub fn run() { // Loads the model into the TCM. pub fn load_image(frames: &ObjDescBundle) -> Result { let mut image = BundleImage::new(frames); - let mut itcm_found = false; - let mut dtcm_found = false; - // Size of windows is filled in below. - let mut iwindow = Window { - addr: ITCM_PADDR, - size: 0, - }; - let mut dwindow = Window { - addr: DTCM_PADDR, + let mut tcm_found = false; + // Size of window is filled in below. + let mut window = Window { + addr: TCM_PADDR, size: 0, }; clear_tcm(); - // NB: we require both ITCM & DTCM sections and that only one - // instance of each is present + // NB: we require a TCM section and that only one is present while let Some(section) = image.next_section() { - let slice = if section.vaddr == ITCM_PADDR { - if itcm_found { - return Err("dup ITCM"); + let slice = if section.vaddr == TCM_PADDR { + if tcm_found { + return Err("dup TCM section"); } - itcm_found = true; + tcm_found = true; - if section.fsize > ITCM_SIZE { - return Err("ITCM too big"); + if section.fsize > TCM_SIZE { + return Err("TCM section too big"); } - iwindow.size = round_up(section.msize, WMMU_PAGE_SIZE); - unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) } - } else if section.vaddr == DTCM_PADDR { - if dtcm_found { - return Err("dup DTCM"); - } - dtcm_found = true; - - if section.fsize > DTCM_SIZE { - return Err("DTCM section too big"); - } - dwindow.size = round_up(section.msize, WMMU_PAGE_SIZE); - unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) } + window.size = round_up(section.msize, WMMU_PAGE_SIZE); + unsafe { slice::from_raw_parts_mut(TCM as *mut u8, TCM_SIZE) } } else { return Err("Unexpected section"); }; @@ -108,12 +87,11 @@ pub fn load_image(frames: &ObjDescBundle) -> Result // TODO(jesionowski): Remove when clear_tcm is fully implemented. slice[section.zero_range()].fill(0x00); } - if !itcm_found || !dtcm_found { + if !tcm_found { return Err("Incomplete"); } Ok(ModelSections { - instructions: iwindow, - data: dwindow, + tcm: window, }) } @@ -142,10 +120,11 @@ pub fn clear_data_fault() { vc_top::set_intr_state(intr_state); } -fn clear_section(start: u32, end: u32, is_itcm: bool) { +// TODO(jesionowski): Remove dead_code when TCM_SIZE fits into INIT_END. +#[allow(dead_code)] +fn clear_section(start: u32, end: u32) { let init_start = vc_top::InitStart::new() - .with_address(start) - .with_imem_dmem_sel(is_itcm); + .with_address(start); vc_top::set_init_start(init_start); let init_end = vc_top::InitEnd::new().with_address(end).with_valid(true); @@ -155,24 +134,23 @@ fn clear_section(start: u32, end: u32, is_itcm: bool) { } pub fn clear_tcm() { - clear_section(0, ITCM_SIZE as u32, true); - // TODO(jesionowski): Enable when DTCM_SIZE fits into INIT_END. - // clear_section(0, DTCM_SIZE as u32, false); + // TODO(jesionowski): Enable when TCM_SIZE fits into INIT_END. + // clear_section(0, TCM_SIZE as u32, false); } // TODO(jesionowski): Remove these when error handling is refactored. // The status will be faulty iff the interrupt line is raised, and // we won't have the fault registers on Springbok. -fn get_dtcm_slice() -> &'static mut [u32] { - unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / size_of::()) } +fn get_tcm_slice() -> &'static mut [u32] { + unsafe { slice::from_raw_parts_mut(TCM, TCM_SIZE / size_of::()) } } pub fn get_return_code() -> u32 { const RC_OFFSET: usize = 0x3FFFEE; - get_dtcm_slice()[RC_OFFSET] + get_tcm_slice()[RC_OFFSET] } pub fn get_fault_register() -> u32 { const FAULT_OFFSET: usize = 0x3FFFEF; - get_dtcm_slice()[FAULT_OFFSET] + get_tcm_slice()[FAULT_OFFSET] } diff --git a/apps/system/components/MlCoordinator/kata-vec-core/src/vc_top.rs b/apps/system/components/MlCoordinator/kata-vec-core/src/vc_top.rs index a00cf8a..04d8b6c 100644 --- a/apps/system/components/MlCoordinator/kata-vec-core/src/vc_top.rs +++ b/apps/system/components/MlCoordinator/kata-vec-core/src/vc_top.rs @@ -6,7 +6,7 @@ use core::ptr; use modular_bitfield::prelude::*; extern "C" { - static csr: *mut [u32; 9]; + static CSR: *mut [u32; 9]; } #[bitfield] @@ -91,98 +91,97 @@ pub struct InitStatus { } pub fn get_intr_state() -> IntrState { - unsafe { IntrState::from_bytes(ptr::read_volatile(csr)[0].to_ne_bytes()) } + unsafe { IntrState::from_bytes(ptr::read_volatile(CSR)[0].to_ne_bytes()) } } pub fn set_intr_state(intr_state: IntrState) { unsafe { - (*csr)[0] = u32::from_ne_bytes(intr_state.into_bytes()); + (*CSR)[0] = u32::from_ne_bytes(intr_state.into_bytes()); } } pub fn get_intr_enable() -> IntrEnable { - unsafe { IntrEnable::from_bytes(ptr::read_volatile(csr)[1].to_ne_bytes()) } + unsafe { IntrEnable::from_bytes(ptr::read_volatile(CSR)[1].to_ne_bytes()) } } pub fn set_intr_enable(intr_enable: IntrEnable) { unsafe { - (*csr)[1] = u32::from_ne_bytes(intr_enable.into_bytes()); + (*CSR)[1] = u32::from_ne_bytes(intr_enable.into_bytes()); } } pub fn get_intr_test() -> IntrTest { - unsafe { IntrTest::from_bytes(ptr::read_volatile(csr)[2].to_ne_bytes()) } + unsafe { IntrTest::from_bytes(ptr::read_volatile(CSR)[2].to_ne_bytes()) } } pub fn set_intr_test(intr_test: IntrTest) { unsafe { - (*csr)[2] = u32::from_ne_bytes(intr_test.into_bytes()); + (*CSR)[2] = u32::from_ne_bytes(intr_test.into_bytes()); } } pub fn get_ctrl() -> Ctrl { - unsafe { Ctrl::from_bytes(ptr::read_volatile(csr)[3].to_ne_bytes()) } + unsafe { Ctrl::from_bytes(ptr::read_volatile(CSR)[3].to_ne_bytes()) } } pub fn set_ctrl(ctrl: Ctrl) { unsafe { - (*csr)[3] = u32::from_ne_bytes(ctrl.into_bytes()); + (*CSR)[3] = u32::from_ne_bytes(ctrl.into_bytes()); } } pub fn get_memory_bank_ctrl() -> MemoryBankCtrl { - unsafe { MemoryBankCtrl::from_bytes(ptr::read_volatile(csr)[4].to_ne_bytes()) } + unsafe { MemoryBankCtrl::from_bytes(ptr::read_volatile(CSR)[4].to_ne_bytes()) } } pub fn set_memory_bank_ctrl(memory_bank_ctrl: MemoryBankCtrl) { unsafe { - (*csr)[4] = u32::from_ne_bytes(memory_bank_ctrl.into_bytes()); + (*CSR)[4] = u32::from_ne_bytes(memory_bank_ctrl.into_bytes()); } } pub fn get_error_status() -> ErrorStatus { - unsafe { ErrorStatus::from_bytes(ptr::read_volatile(csr)[5].to_ne_bytes()) } + unsafe { ErrorStatus::from_bytes(ptr::read_volatile(CSR)[5].to_ne_bytes()) } } pub fn set_error_status(error_status: ErrorStatus) { unsafe { - (*csr)[5] = u32::from_ne_bytes(error_status.into_bytes()); + (*CSR)[5] = u32::from_ne_bytes(error_status.into_bytes()); } } pub fn get_init_start() -> InitStart { - unsafe { InitStart::from_bytes(ptr::read_volatile(csr)[6].to_ne_bytes()) } + unsafe { InitStart::from_bytes(ptr::read_volatile(CSR)[6].to_ne_bytes()) } } pub fn set_init_start(init_start: InitStart) { unsafe { - (*csr)[6] = u32::from_ne_bytes(init_start.into_bytes()); + (*CSR)[6] = u32::from_ne_bytes(init_start.into_bytes()); } } pub fn get_init_end() -> InitEnd { - unsafe { InitEnd::from_bytes(ptr::read_volatile(csr)[7].to_ne_bytes()) } + unsafe { InitEnd::from_bytes(ptr::read_volatile(CSR)[7].to_ne_bytes()) } } pub fn set_init_end(init_end: InitEnd) { unsafe { - (*csr)[7] = u32::from_ne_bytes(init_end.into_bytes()); + (*CSR)[7] = u32::from_ne_bytes(init_end.into_bytes()); } } pub fn get_init_status() -> InitStatus { - unsafe { InitStatus::from_bytes(ptr::read_volatile(csr)[8].to_ne_bytes()) } + unsafe { InitStatus::from_bytes(ptr::read_volatile(CSR)[8].to_ne_bytes()) } } pub fn set_init_status(init_status: InitStatus) { unsafe { - (*csr)[8] = u32::from_ne_bytes(init_status.into_bytes()); + (*CSR)[8] = u32::from_ne_bytes(init_status.into_bytes()); } } -// The WMMU registers start at 0x400 past the vector core CSRs. There are two -// blocks, one for the DMMU and one for the IMMU, each 0x400 long. Within the -// block, the registers are arranged like this: +// The WMMU registers start at 0x400 past the vector core CSRs and are 0x400 +// long. Within the block, the registers are arranged like this: // 0x0000: Window 0 Offset // 0x0004: Window 0 Length // 0x0008: Window 0 Permissions @@ -193,7 +192,6 @@ pub fn set_init_status(init_status: InitStatus) { // 0x001C: Unused // And so on. const WMMU_OFFSET: usize = 0x400; // From base CSR. -const DMMU_OFFSET: usize = 0x400; // From IMMU CSRs. const OFFSET_ADDR: usize = 0; const LENGTH_ADDR: usize = 4; @@ -202,64 +200,38 @@ const BYTES_PER_WINDOW: usize = 0x10; const MAX_WINDOW: usize = 0x40; -fn window_addr(window: usize, is_immu: bool) -> usize { +fn window_addr(window: usize) -> usize { assert!(window < MAX_WINDOW, "Window out of range of WMMU"); - let mut addr: usize = unsafe { WMMU_OFFSET + csr as usize }; - - if (!is_immu) { - addr += DMMU_OFFSET; - } + let mut addr: usize = unsafe { WMMU_OFFSET + CSR as usize }; addr + window * BYTES_PER_WINDOW } -fn set_window_offset(window: usize, offset: usize, is_immu: bool) { - let addr = window_addr(window, is_immu) + OFFSET_ADDR; +pub fn set_mmu_window_offset(window: usize, offset: usize) { + let addr = window_addr(window) + OFFSET_ADDR; unsafe { core::ptr::write_volatile(addr as *mut usize, offset); } } -pub fn set_immu_window_offset(window: usize, offset: usize) { - set_window_offset(window, offset, true); -} - -pub fn set_dmmu_window_offset(window: usize, offset: usize) { - set_window_offset(window, offset, false); -} - -fn set_window_length(window: usize, length: usize, is_immu: bool) { - let addr = window_addr(window, is_immu) + LENGTH_ADDR; +pub fn set_mmu_window_length(window: usize, length: usize) { + let addr = window_addr(window) + LENGTH_ADDR; unsafe { core::ptr::write_volatile(addr as *mut usize, length); } } -pub fn set_immu_window_length(window: usize, length: usize) { - set_window_length(window, length, true); -} - -pub fn set_dmmu_window_length(window: usize, length: usize) { - set_window_length(window, length, false); -} - pub enum Permission { Read = 1, Write = 2, - ReadAndWrite = 3, + ReadWrite = 3, + Execute = 4, + ReadWriteExecute = 7, } -pub fn set_window_permission(window: usize, permission: Permission, is_immu: bool) { - let addr = window_addr(window, is_immu) + PERMISSIONS_ADDR; +pub fn set_mmu_window_permission(window: usize, permission: Permission) { + let addr = window_addr(window) + PERMISSIONS_ADDR; unsafe { core::ptr::write_volatile(addr as *mut usize, permission as usize); } } - -pub fn set_immu_window_permission(window: usize, permission: Permission) { - set_window_permission(window, permission, true); -} - -pub fn set_dmmu_window_permission(window: usize, permission: Permission) { - set_window_permission(window, permission, false); -} diff --git a/apps/system/system.camkes b/apps/system/system.camkes index 4f51d70..3151e31 100644 --- a/apps/system/system.camkes +++ b/apps/system/system.camkes @@ -41,9 +41,9 @@ component OpenTitanTimer { component VectorCoreHw { hardware; - dataport Buf csr; - dataport Buf(0x100000) itcm; - dataport Buf(0x1000000) dtcm; + dataport Buf CSR; + // TODO(jesionowski): Export TCM_SIZE in cbindgen. + dataport Buf(0x1000000) TCM; emits Interrupt host_req; emits Interrupt finish; @@ -110,7 +110,7 @@ assembly { to uart_driver.tx_empty); // VectorCoreDriver - connection seL4HardwareMMIO vc_csr(from ml_coordinator.csr, to vctop.csr); + connection seL4HardwareMMIO vc_csr(from ml_coordinator.CSR, to vctop.CSR); connection seL4HardwareInterrupt vctop_host_req(from vctop.host_req, to ml_coordinator.host_req); connection seL4HardwareInterrupt vctop_finish(from vctop.finish, @@ -119,10 +119,8 @@ assembly { to ml_coordinator.instruction_fault); connection seL4HardwareInterrupt vctop_data_fault(from vctop.data_fault, to ml_coordinator.data_fault); - connection seL4HardwareMMIO vc_itcm(from ml_coordinator.itcm, - to vctop.itcm); - connection seL4HardwareMMIO vc_dtcm(from ml_coordinator.dtcm, - to vctop.dtcm); + connection seL4HardwareMMIO vc_tcm(from ml_coordinator.TCM, + to vctop.TCM); // TimerService @@ -210,12 +208,10 @@ assembly { uart.rx_watermark_irq_number = 2; uart.tx_empty_irq_number = 3; - vctop.csr_paddr = 0x47000000; - vctop.csr_size = 0x1000; - vctop.itcm_paddr = 0x32000000; - vctop.itcm_size = 0x100000; - vctop.dtcm_paddr = 0x34000000; - vctop.dtcm_size = 0x1000000; + vctop.CSR_paddr = 0x47000000; + vctop.CSR_size = 0x1000; + vctop.TCM_paddr = 0x34000000; + vctop.TCM_size = 0x1000000; vctop.host_req_irq_number = 13; // kTopMatchaPlicIrqIdVcTopHostReq @ top_matcha.h vctop.finish_irq_number = 14; // kTopMatchaPlicIrqIdVcTopFinish @ top_matcha.h vctop.instruction_fault_irq_number = 15; // kTopMatchaPlicIrqIdVcTopInstructionFault @ top_matcha.h