MlCoordinator: get models in BundleImage format from SecurityCoordinator

- remove vestiges of mapped ELF file; models now come from SecurityCoordinator using a LoadModel request in the form of a BundleImage - change execute api to take a bundle_id & model_id - track loded bundle_id & model_id to handle switching between models - load_elf -> load_image (which loads a BundleImage) - add a client-side wrapper to hide marshalling details - add a copyregion (VSpace hole) for processing BundleImages - integrate with slot allocator - setup the heap (needed for various things) Change-Id: I50f3526bddefcdb67b742d42ca396bfefc10801f GitOrigin-RevId: 80aa0da34b057b334b7d76932c7ad25146364569
2025-07-14 22:44:13 +00:00 · 2022-05-11 00:36:05 +00:00 · 2022-05-11 00:36:05 +00:00 · 09ed791c1e
commit 09ed791c1e
parent 43d921eb53
7 changed files with 176 additions and 76 deletions
--- a/apps/system/components/MlCoordinator/MlCoordinator.camkes
+++ b/apps/system/components/MlCoordinator/MlCoordinator.camkes
@ -11,7 +11,6 @@ component MlCoordinator {
  consumes Interrupt data_fault;

  dataport Buf csr;
-  dataport Buf(0x1000000) elf_file;
  dataport Buf(0x100000) itcm;
  dataport Buf(0x1000000) dtcm;

@ -20,4 +19,7 @@ component MlCoordinator {

  // Enable KataOS CAmkES support.
  attribute int kataos = true;
+
+  // Copyregion for loading bundle images
+  has copyregion BUNDLE_IMAGE;
 }
--- a/apps/system/components/MlCoordinator/kata-ml-coordinator/Cargo.toml
+++ b/apps/system/components/MlCoordinator/kata-ml-coordinator/Cargo.toml
@ -7,11 +7,13 @@ authors = ["Adam Jesionowski <jesionowski@google.com>"]
 edition = "2021"

 [dependencies]
+cstr_core = { version = "0.2.3", default-features = false }
 kata-os-common = { path = "../../kata-os-common" }
+kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
 kata-ml-interface = { path = "../kata-ml-interface" }
+kata-security-interface = { path = "../../SecurityCoordinator/kata-security-interface" }
 kata-vec-core = { path = "../kata-vec-core" }
 log = "0.4"
-xmas-elf = "0.8.0"

 [lib]
 name = "kata_ml_coordinator"
--- a/apps/system/components/MlCoordinator/kata-ml-coordinator/src/run.rs
+++ b/apps/system/components/MlCoordinator/kata-ml-coordinator/src/run.rs
@ -2,28 +2,41 @@

 // ML Coordinator Design Doc: go/sparrow-ml-doc

-use core::slice;
+extern crate alloc;
+
+use cstr_core::CStr;
+use alloc::string::String;
+use kata_os_common::allocator;
 use kata_os_common::logger::KataLogger;
+use kata_os_common::sel4_sys;
+use kata_os_common::slot_allocator;
 use kata_ml_interface::MlCoordinatorInterface;
 use kata_ml_interface::MlCoreInterface;
+use kata_security_interface::*;
 use kata_vec_core::MlCore;
 use log::{error, info, trace};

+use sel4_sys::seL4_CPtr;
+
+use slot_allocator::CSpaceSlot;
+use slot_allocator::KATA_CSPACE_SLOTS;
+
+extern "C" {
+    static SELF_CNODE_FIRST_SLOT: seL4_CPtr;
+    static SELF_CNODE_LAST_SLOT: seL4_CPtr;
+}
+
 pub struct MLCoordinator {
-    is_loaded: bool,
+    loaded_bundle: Option<String>,
+    loaded_model: Option<String>,
    is_running: bool,
    continous_mode: bool,
    ml_core: MlCore,
 }

-extern "C" {
-    static elf_file: *const u8;
-}
-// TODO(jesionowski): Get the size programatically.
-const ELF_SIZE: usize = 0x300000;
-
 pub static mut ML_COORD: MLCoordinator = MLCoordinator {
-    is_loaded: false,
+    loaded_bundle: None,
+    loaded_model: None,
    is_running: false,
    continous_mode: false,
    ml_core: MlCore {},
@ -34,6 +47,15 @@ impl MLCoordinator {
        self.ml_core.enable_interrupts(true);
    }

+    fn is_loaded(&self) -> bool {
+        self.loaded_bundle.is_some() && self.loaded_model.is_some()
+    }
+
+    fn cmp_loaded(&self, bundle_id: &str, model_id: &str) -> bool {
+        self.loaded_bundle.as_deref() == Some(bundle_id) &&
+        self.loaded_model.as_deref() == Some(model_id)
+    }
+
    fn handle_return_interrupt(&mut self) {
        extern "C" {
            fn finish_acknowledge() -> u32;
@ -46,15 +68,21 @@ impl MLCoordinator {

        if return_code != 0 {
            error!(
-                "vctop execution failed with code {}, fault pc: {:#010X}",
-                return_code, fault
+                "{}: vctop execution failed with code {}, fault pc: {:#010X}",
+                self.loaded_model.as_ref().unwrap(), return_code, fault
            );
            self.continous_mode = false;
        }

        self.is_running = false;
        if self.continous_mode {
-            self.execute();
+            // TODO(sleffler): can !is_loaded happen?
+            // XXX needs proper state machine
+            // XXX what is the threading/locking model?
+            if self.is_loaded() {
+                self.ml_core.run(); // Unhalt, start at default PC.
+                self.is_running = true;
+            }
        }

        MlCore::clear_finish();
@ -63,26 +91,46 @@ impl MLCoordinator {
 }

 impl MlCoordinatorInterface for MLCoordinator {
-    fn execute(&mut self) {
+    fn execute(&mut self, bundle_id: &str, model_id: &str) {
        if self.is_running {
+            trace!("Skip execute with {}:{} already running",
+                   self.loaded_bundle.as_ref().unwrap(),
+                   self.loaded_model.as_ref().unwrap());
            return;
        }

-        if !self.is_loaded {
-            let res = self
-                .ml_core
-                .load_elf(unsafe { slice::from_raw_parts(elf_file, ELF_SIZE) });
-            if let Err(e) = res {
-                error!("Load error: {:?}", e);
-            } else {
-                info!("Load successful.");
-                self.is_loaded = true;
+        if !self.cmp_loaded(bundle_id, model_id) {
+            // Loads |model_id| associated with |bundle_id| from the
+            // SecurityCoordinator. The data are returned as unmapped
+            // page frames in a CNode container left in |container_slot|.
+            // To load the model into the vector core the pages must be
+            // mapped into the MlCoordinator's VSpace before being copied
+            // to their destination.
+            let container_slot = CSpaceSlot::new();
+            match kata_security_load_model(bundle_id, model_id, &container_slot) {
+                Ok(model_frames) => {
+                    if let Err(e) = self.ml_core.load_image(&model_frames) {
+                        error!("Load of {}:{} failed: {:?}",
+                               bundle_id, model_id, e);
+                        // NB: may have corrupted TCM, clear loaded state
+                        self.loaded_bundle = None;
+                        self.loaded_model = None;
+                    } else {
+                        info!("Load successful.");
+                        self.loaded_bundle = Some(String::from(bundle_id));
+                        self.loaded_model = Some(String::from(model_id));
+                    }
+                }
+                Err(e) => {
+                    error!("LoadModel of bundle {} model {} failed: {:?}",
+                           bundle_id, model_id, e);
+                }
            }
        }

-        if self.is_loaded {
-            self.is_running = true;
+        if self.is_loaded() {
            self.ml_core.run(); // Unhalt, start at default PC.
+            self.is_running = true;
        }
    }

@ -96,21 +144,51 @@ pub extern "C" fn pre_init() {
    static KATA_LOGGER: KataLogger = KataLogger;
    log::set_logger(&KATA_LOGGER).unwrap();
    log::set_max_level(log::LevelFilter::Trace);
+
+    // TODO(sleffler): temp until we integrate with seL4
+    static mut HEAP_MEMORY: [u8; 4 * 1024] = [0; 4 * 1024];
+    unsafe {
+        allocator::ALLOCATOR.init(HEAP_MEMORY.as_mut_ptr() as usize, HEAP_MEMORY.len());
+        trace!(
+            "setup heap: start_addr {:p} size {}",
+            HEAP_MEMORY.as_ptr(),
+            HEAP_MEMORY.len()
+        );
+    }
+
+    unsafe {
+        KATA_CSPACE_SLOTS.init(
+            /*first_slot=*/ SELF_CNODE_FIRST_SLOT,
+            /*size=*/ SELF_CNODE_LAST_SLOT - SELF_CNODE_FIRST_SLOT
+        );
+        trace!("setup cspace slots: first slot {} free {}",
+               KATA_CSPACE_SLOTS.base_slot(),
+               KATA_CSPACE_SLOTS.free_slots());
+    }
 }

 #[no_mangle]
 pub extern "C" fn mlcoord__init() {
-    trace!("init");
    unsafe {
        ML_COORD.init();
    }
 }

-// TODO: Once multiple model support is in start by name.
 #[no_mangle]
-pub extern "C" fn mlcoord_execute() {
+pub extern "C" fn mlcoord_execute(
+    c_bundle_id: *const cstr_core::c_char,
+    c_model_id: *const cstr_core::c_char,
+) {
    unsafe {
-        ML_COORD.execute();
+        match CStr::from_ptr(c_bundle_id).to_str() {
+            Ok(bundle_id) => match CStr::from_ptr(c_model_id).to_str() {
+                Ok(model_id) => {
+                    ML_COORD.execute(bundle_id, model_id)
+                }
+                _ => error!("Invalid model_id"),
+            }
+            _ => error!("Invalid bundle_id"),
+        }
    }
 }

--- a/apps/system/components/MlCoordinator/kata-ml-interface/Cargo.toml
+++ b/apps/system/components/MlCoordinator/kata-ml-interface/Cargo.toml
@ -4,3 +4,7 @@ cargo-features = ["edition2021"]
 name = "kata-ml-interface"
 version = "0.1.0"
 edition = "2021"
+
+[dependencies]
+cstr_core = { version = "0.2.3", default-features = false }
+kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
--- a/apps/system/components/MlCoordinator/kata-ml-interface/src/lib.rs
+++ b/apps/system/components/MlCoordinator/kata-ml-interface/src/lib.rs
@ -1,14 +1,17 @@
 #![no_std]

+use cstr_core::CString;
+use kata_memory_interface::ObjDescBundle;
+
 pub trait MlCoordinatorInterface {
-    fn execute(&mut self);
+    fn execute(&mut self, bundle_id: &str, model_id: &str);
    fn set_continuous_mode(&mut self, mode: bool);
 }

 pub trait MlCoreInterface {
    fn enable_interrupts(&mut self, enabled: bool);
    fn run(&mut self);
-    fn load_elf(&mut self, elf_slice: &[u8]) -> Result<(), &'static str>;
+    fn load_image(&mut self, frames: &ObjDescBundle) -> Result<(), &'static str>;
    fn get_return_code() -> u32;
    fn get_fault_register() -> u32;
    fn clear_host_req();
@ -16,3 +19,21 @@ pub trait MlCoreInterface {
    fn clear_instruction_fault();
    fn clear_data_fault();
 }
+
+#[inline]
+#[allow(dead_code)]
+pub fn kata_mlcoord_execute(bundle_id: &str, model_id: &str)
+    -> Result<(),cstr_core:: NulError>
+{
+    extern "C" {
+        // NB: this assumes the MlCoordinator component is named "mlcoord".
+        fn mlcoord_execute(
+            c_bundle_id: *const cstr_core::c_char,
+            c_model_id: *const cstr_core::c_char
+        );
+    }
+    let bundle_id_cstr = CString::new(bundle_id)?;
+    let model_id_cstr = CString::new(model_id)?;
+    unsafe { mlcoord_execute(bundle_id_cstr.as_ptr(), model_id_cstr.as_ptr()) };
+    Ok(())
+}
--- a/apps/system/components/MlCoordinator/kata-vec-core/Cargo.toml
+++ b/apps/system/components/MlCoordinator/kata-vec-core/Cargo.toml
@ -6,7 +6,10 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
+kata-io = { path = "../../DebugConsole/kata-io" }
+kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
 kata-ml-interface = { path = "../kata-ml-interface" }
+kata-proc-interface = { path = "../../ProcessManager/kata-proc-interface" }
+kata-os-common = { path = "../../kata-os-common" }
 modular-bitfield = "0.11.2"
 log = "0.4"
-xmas-elf = { git = "https://github.com/nrc/xmas-elf" }
--- a/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs
+++ b/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs
@ -5,11 +5,14 @@

 mod vc_top;

-use core::assert;
+use core::mem::size_of;
 use core::slice;
 use kata_ml_interface::MlCoreInterface;
-use xmas_elf::program::{SegmentData, Type};
-use xmas_elf::ElfFile;
+use kata_memory_interface::ObjDescBundle;
+use kata_proc_interface::BundleImage;
+
+use kata_io as io;
+use io::Read;

 // TODO(jesionowski): Move these constants to an auto-generated file.
 // TODO(b/214092253): ITCM size blow-up needs to be addressed.
@ -21,13 +24,11 @@ const DTCM_PADDR: usize = 0x34000000;
 // TODO(jesionowski): ITCM / DTCM will eventually be merged into a single memory.
 extern "C" {
    static itcm: *mut u32;
-}
-extern "C" {
    static dtcm: *mut u32;
 }

 fn get_dtcm_slice() -> &'static mut [u32] {
-    unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / 4) }
+    unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / size_of::<u32>()) }
 }

 pub struct MlCore {}
@ -68,48 +69,37 @@ impl MlCoreInterface for MlCore {
        vc_top::set_ctrl(ctrl);
    }

-    fn load_elf(&mut self, elf_slice: &[u8]) -> Result<(), &'static str> {
-        let itcm_slice = unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) };
-        let dtcm_slice = unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) };
-
-        let elf = ElfFile::new(elf_slice)?;
+    // Loads the model into the TCM.
+    fn load_image(&mut self, frames: &ObjDescBundle) -> Result<(), &'static str> {
+        let mut image = BundleImage::new(frames);
+        let mut itcm_found = false;
+        let mut dtcm_found = false;

        clear_tcm();
+        // NB: we require both ITCM & DTCM sections and that only one
+        //   instance of each is present
+        while let Some(section) = image.next_section() {
+            let slice = if section.vaddr == ITCM_PADDR {
+                if itcm_found { return Err("dup ITCM") }
+                itcm_found = true;

-        for seg in elf.program_iter() {
-            if seg.get_type()? == Type::Load {
-                let fsize = seg.file_size() as usize;
-                let msize = seg.mem_size() as usize;
+                if section.fsize > ITCM_SIZE { return Err("ITCM too big") }
+                unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) }
+            } else if section.vaddr == DTCM_PADDR {
+                if dtcm_found { return Err("dup DTCM") }
+                dtcm_found = true;

-                if seg.virtual_addr() as usize == ITCM_PADDR {
-                    assert!(
-                        fsize <= ITCM_SIZE,
-                        "Elf's ITCM section is larger than than ITCM_SIZE"
-                    );
-
-                    // Due to being Load types we are guarunteed SegmentData::Undefined as the
-                    // data type.
-                    if let SegmentData::Undefined(bytes) = seg.get_data(&elf)? {
-                        itcm_slice[..fsize].copy_from_slice(&bytes);
-                    }
-                } else if seg.virtual_addr() as usize == DTCM_PADDR {
-                    assert!(
-                        msize <= DTCM_SIZE,
-                        "Elf's DTCM section is larger than than DTCM_SIZE"
-                    );
-
-                    if let SegmentData::Undefined(bytes) = seg.get_data(&elf)? {
-                        dtcm_slice[..fsize].copy_from_slice(&bytes);
-                    }
-                    // TODO(jesionowski): Remove when clear_tcm is fully implemented.
-                    // Clear NOBITS sections.
-                    dtcm_slice[fsize..msize].fill(0x00);
-                } else {
-                    assert!(false, "Elf contains LOAD section outside TCM");
-                }
-            }
+                if section.fsize > DTCM_SIZE { return Err("DTCM section too big") }
+                unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) }
+            } else {
+                return Err("Unexpected section");
+            };
+            image.read_exact(&mut slice[section.data_range()])
+                .map_err(|_| "section read error")?;
+            // TODO(jesionowski): Remove when clear_tcm is fully implemented.
+            slice[section.zero_range()].fill(0x00);
        }
-
+        if !itcm_found || !dtcm_found { return Err("Incomplete") }
        Ok(())
    }