From 09ed791c1ee523a63affa63f0ee24da569a01e3a Mon Sep 17 00:00:00 2001
From: Sam Leffler <sleffler@google.com>
Date: Wed, 11 May 2022 00:36:05 +0000
Subject: [PATCH] MlCoordinator: get models in BundleImage format from
 SecurityCoordinator

- remove vestiges of mapped ELF file; models now come from
  SecurityCoordinator using a LoadModel request in the form of a BundleImage
- change execute api to take a bundle_id & model_id
- track loded bundle_id & model_id to handle switching between models
- load_elf -> load_image (which loads a BundleImage)
- add a client-side wrapper to hide marshalling details
- add a copyregion (VSpace hole) for processing BundleImages
- integrate with slot allocator
- setup the heap (needed for various things)

Change-Id: I50f3526bddefcdb67b742d42ca396bfefc10801f
GitOrigin-RevId: 80aa0da34b057b334b7d76932c7ad25146364569
---
 .../MlCoordinator/MlCoordinator.camkes        |   4 +-
 .../kata-ml-coordinator/Cargo.toml            |   4 +-
 .../kata-ml-coordinator/src/run.rs            | 134 ++++++++++++++----
 .../kata-ml-interface/Cargo.toml              |   4 +
 .../kata-ml-interface/src/lib.rs              |  25 +++-
 .../MlCoordinator/kata-vec-core/Cargo.toml    |   5 +-
 .../MlCoordinator/kata-vec-core/src/lib.rs    |  76 +++++-----
 7 files changed, 176 insertions(+), 76 deletions(-)
diff --git a/apps/system/components/MlCoordinator/MlCoordinator.camkes b/apps/system/components/MlCoordinator/MlCoordinator.camkes
index 7d69508..0823f0b 100644
--- a/apps/system/components/MlCoordinator/MlCoordinator.camkes
+++ b/apps/system/components/MlCoordinator/MlCoordinator.camkes
@@ -11,7 +11,6 @@ component MlCoordinator {
   consumes Interrupt data_fault;
 
   dataport Buf csr;
-  dataport Buf(0x1000000) elf_file;
   dataport Buf(0x100000) itcm;
   dataport Buf(0x1000000) dtcm;
 
@@ -20,4 +19,7 @@ component MlCoordinator {
 
   // Enable KataOS CAmkES support.
   attribute int kataos = true;
+
+  // Copyregion for loading bundle images
+  has copyregion BUNDLE_IMAGE;
 }
diff --git a/apps/system/components/MlCoordinator/kata-ml-coordinator/Cargo.toml b/apps/system/components/MlCoordinator/kata-ml-coordinator/Cargo.toml
index 0a41892..f53fc45 100644
--- a/apps/system/components/MlCoordinator/kata-ml-coordinator/Cargo.toml
+++ b/apps/system/components/MlCoordinator/kata-ml-coordinator/Cargo.toml
@@ -7,11 +7,13 @@ authors = ["Adam Jesionowski <jesionowski@google.com>"]
 edition = "2021"
 
 [dependencies]
+cstr_core = { version = "0.2.3", default-features = false }
 kata-os-common = { path = "../../kata-os-common" }
+kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
 kata-ml-interface = { path = "../kata-ml-interface" }
+kata-security-interface = { path = "../../SecurityCoordinator/kata-security-interface" }
 kata-vec-core = { path = "../kata-vec-core" }
 log = "0.4"
-xmas-elf = "0.8.0"
 
 [lib]
 name = "kata_ml_coordinator"
diff --git a/apps/system/components/MlCoordinator/kata-ml-coordinator/src/run.rs b/apps/system/components/MlCoordinator/kata-ml-coordinator/src/run.rs
index 7eeabc6..188db0c 100644
--- a/apps/system/components/MlCoordinator/kata-ml-coordinator/src/run.rs
+++ b/apps/system/components/MlCoordinator/kata-ml-coordinator/src/run.rs
@@ -2,28 +2,41 @@
 
 // ML Coordinator Design Doc: go/sparrow-ml-doc
 
-use core::slice;
+extern crate alloc;
+
+use cstr_core::CStr;
+use alloc::string::String;
+use kata_os_common::allocator;
 use kata_os_common::logger::KataLogger;
+use kata_os_common::sel4_sys;
+use kata_os_common::slot_allocator;
 use kata_ml_interface::MlCoordinatorInterface;
 use kata_ml_interface::MlCoreInterface;
+use kata_security_interface::*;
 use kata_vec_core::MlCore;
 use log::{error, info, trace};
 
+use sel4_sys::seL4_CPtr;
+
+use slot_allocator::CSpaceSlot;
+use slot_allocator::KATA_CSPACE_SLOTS;
+
+extern "C" {
+    static SELF_CNODE_FIRST_SLOT: seL4_CPtr;
+    static SELF_CNODE_LAST_SLOT: seL4_CPtr;
+}
+
 pub struct MLCoordinator {
-    is_loaded: bool,
+    loaded_bundle: Option<String>,
+    loaded_model: Option<String>,
     is_running: bool,
     continous_mode: bool,
     ml_core: MlCore,
 }
 
-extern "C" {
-    static elf_file: *const u8;
-}
-// TODO(jesionowski): Get the size programatically.
-const ELF_SIZE: usize = 0x300000;
-
 pub static mut ML_COORD: MLCoordinator = MLCoordinator {
-    is_loaded: false,
+    loaded_bundle: None,
+    loaded_model: None,
     is_running: false,
     continous_mode: false,
     ml_core: MlCore {},
@@ -34,6 +47,15 @@ impl MLCoordinator {
         self.ml_core.enable_interrupts(true);
     }
 
+    fn is_loaded(&self) -> bool {
+        self.loaded_bundle.is_some() && self.loaded_model.is_some()
+    }
+
+    fn cmp_loaded(&self, bundle_id: &str, model_id: &str) -> bool {
+        self.loaded_bundle.as_deref() == Some(bundle_id) &&
+        self.loaded_model.as_deref() == Some(model_id)
+    }
+
     fn handle_return_interrupt(&mut self) {
         extern "C" {
             fn finish_acknowledge() -> u32;
@@ -46,15 +68,21 @@ impl MLCoordinator {
 
         if return_code != 0 {
             error!(
-                "vctop execution failed with code {}, fault pc: {:#010X}",
-                return_code, fault
+                "{}: vctop execution failed with code {}, fault pc: {:#010X}",
+                self.loaded_model.as_ref().unwrap(), return_code, fault
             );
             self.continous_mode = false;
         }
 
         self.is_running = false;
         if self.continous_mode {
-            self.execute();
+            // TODO(sleffler): can !is_loaded happen?
+            // XXX needs proper state machine
+            // XXX what is the threading/locking model?
+            if self.is_loaded() {
+                self.ml_core.run(); // Unhalt, start at default PC.
+                self.is_running = true;
+            }
         }
 
         MlCore::clear_finish();
@@ -63,26 +91,46 @@ impl MLCoordinator {
 }
 
 impl MlCoordinatorInterface for MLCoordinator {
-    fn execute(&mut self) {
+    fn execute(&mut self, bundle_id: &str, model_id: &str) {
         if self.is_running {
+            trace!("Skip execute with {}:{} already running",
+                   self.loaded_bundle.as_ref().unwrap(),
+                   self.loaded_model.as_ref().unwrap());
             return;
         }
 
-        if !self.is_loaded {
-            let res = self
-                .ml_core
-                .load_elf(unsafe { slice::from_raw_parts(elf_file, ELF_SIZE) });
-            if let Err(e) = res {
-                error!("Load error: {:?}", e);
-            } else {
-                info!("Load successful.");
-                self.is_loaded = true;
+        if !self.cmp_loaded(bundle_id, model_id) {
+            // Loads |model_id| associated with |bundle_id| from the
+            // SecurityCoordinator. The data are returned as unmapped
+            // page frames in a CNode container left in |container_slot|.
+            // To load the model into the vector core the pages must be
+            // mapped into the MlCoordinator's VSpace before being copied
+            // to their destination.
+            let container_slot = CSpaceSlot::new();
+            match kata_security_load_model(bundle_id, model_id, &container_slot) {
+                Ok(model_frames) => {
+                    if let Err(e) = self.ml_core.load_image(&model_frames) {
+                        error!("Load of {}:{} failed: {:?}",
+                               bundle_id, model_id, e);
+                        // NB: may have corrupted TCM, clear loaded state
+                        self.loaded_bundle = None;
+                        self.loaded_model = None;
+                    } else {
+                        info!("Load successful.");
+                        self.loaded_bundle = Some(String::from(bundle_id));
+                        self.loaded_model = Some(String::from(model_id));
+                    }
+                }
+                Err(e) => {
+                    error!("LoadModel of bundle {} model {} failed: {:?}",
+                           bundle_id, model_id, e);
+                }
             }
         }
 
-        if self.is_loaded {
-            self.is_running = true;
+        if self.is_loaded() {
             self.ml_core.run(); // Unhalt, start at default PC.
+            self.is_running = true;
         }
     }
 
@@ -96,21 +144,51 @@ pub extern "C" fn pre_init() {
     static KATA_LOGGER: KataLogger = KataLogger;
     log::set_logger(&KATA_LOGGER).unwrap();
     log::set_max_level(log::LevelFilter::Trace);
+
+    // TODO(sleffler): temp until we integrate with seL4
+    static mut HEAP_MEMORY: [u8; 4 * 1024] = [0; 4 * 1024];
+    unsafe {
+        allocator::ALLOCATOR.init(HEAP_MEMORY.as_mut_ptr() as usize, HEAP_MEMORY.len());
+        trace!(
+            "setup heap: start_addr {:p} size {}",
+            HEAP_MEMORY.as_ptr(),
+            HEAP_MEMORY.len()
+        );
+    }
+
+    unsafe {
+        KATA_CSPACE_SLOTS.init(
+            /*first_slot=*/ SELF_CNODE_FIRST_SLOT,
+            /*size=*/ SELF_CNODE_LAST_SLOT - SELF_CNODE_FIRST_SLOT
+        );
+        trace!("setup cspace slots: first slot {} free {}",
+               KATA_CSPACE_SLOTS.base_slot(),
+               KATA_CSPACE_SLOTS.free_slots());
+    }
 }
 
 #[no_mangle]
 pub extern "C" fn mlcoord__init() {
-    trace!("init");
     unsafe {
         ML_COORD.init();
     }
 }
 
-// TODO: Once multiple model support is in start by name.
 #[no_mangle]
-pub extern "C" fn mlcoord_execute() {
+pub extern "C" fn mlcoord_execute(
+    c_bundle_id: *const cstr_core::c_char,
+    c_model_id: *const cstr_core::c_char,
+) {
     unsafe {
-        ML_COORD.execute();
+        match CStr::from_ptr(c_bundle_id).to_str() {
+            Ok(bundle_id) => match CStr::from_ptr(c_model_id).to_str() {
+                Ok(model_id) => {
+                    ML_COORD.execute(bundle_id, model_id)
+                }
+                _ => error!("Invalid model_id"),
+            }
+            _ => error!("Invalid bundle_id"),
+        }
     }
 }
 
diff --git a/apps/system/components/MlCoordinator/kata-ml-interface/Cargo.toml b/apps/system/components/MlCoordinator/kata-ml-interface/Cargo.toml
index aa4f57f..4fff725 100644
--- a/apps/system/components/MlCoordinator/kata-ml-interface/Cargo.toml
+++ b/apps/system/components/MlCoordinator/kata-ml-interface/Cargo.toml
@@ -4,3 +4,7 @@ cargo-features = ["edition2021"]
 name = "kata-ml-interface"
 version = "0.1.0"
 edition = "2021"
+
+[dependencies]
+cstr_core = { version = "0.2.3", default-features = false }
+kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
diff --git a/apps/system/components/MlCoordinator/kata-ml-interface/src/lib.rs b/apps/system/components/MlCoordinator/kata-ml-interface/src/lib.rs
index cc7ab25..075d8f2 100644
--- a/apps/system/components/MlCoordinator/kata-ml-interface/src/lib.rs
+++ b/apps/system/components/MlCoordinator/kata-ml-interface/src/lib.rs
@@ -1,14 +1,17 @@
 #![no_std]
 
+use cstr_core::CString;
+use kata_memory_interface::ObjDescBundle;
+
 pub trait MlCoordinatorInterface {
-    fn execute(&mut self);
+    fn execute(&mut self, bundle_id: &str, model_id: &str);
     fn set_continuous_mode(&mut self, mode: bool);
 }
 
 pub trait MlCoreInterface {
     fn enable_interrupts(&mut self, enabled: bool);
     fn run(&mut self);
-    fn load_elf(&mut self, elf_slice: &[u8]) -> Result<(), &'static str>;
+    fn load_image(&mut self, frames: &ObjDescBundle) -> Result<(), &'static str>;
     fn get_return_code() -> u32;
     fn get_fault_register() -> u32;
     fn clear_host_req();
@@ -16,3 +19,21 @@ pub trait MlCoreInterface {
     fn clear_instruction_fault();
     fn clear_data_fault();
 }
+
+#[inline]
+#[allow(dead_code)]
+pub fn kata_mlcoord_execute(bundle_id: &str, model_id: &str)
+    -> Result<(),cstr_core:: NulError>
+{
+    extern "C" {
+        // NB: this assumes the MlCoordinator component is named "mlcoord".
+        fn mlcoord_execute(
+            c_bundle_id: *const cstr_core::c_char,
+            c_model_id: *const cstr_core::c_char
+        );
+    }
+    let bundle_id_cstr = CString::new(bundle_id)?;
+    let model_id_cstr = CString::new(model_id)?;
+    unsafe { mlcoord_execute(bundle_id_cstr.as_ptr(), model_id_cstr.as_ptr()) };
+    Ok(())
+}
diff --git a/apps/system/components/MlCoordinator/kata-vec-core/Cargo.toml b/apps/system/components/MlCoordinator/kata-vec-core/Cargo.toml
index f28d6f1..0fa7e79 100644
--- a/apps/system/components/MlCoordinator/kata-vec-core/Cargo.toml
+++ b/apps/system/components/MlCoordinator/kata-vec-core/Cargo.toml
@@ -6,7 +6,10 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
+kata-io = { path = "../../DebugConsole/kata-io" }
+kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
 kata-ml-interface = { path = "../kata-ml-interface" }
+kata-proc-interface = { path = "../../ProcessManager/kata-proc-interface" }
+kata-os-common = { path = "../../kata-os-common" }
 modular-bitfield = "0.11.2"
 log = "0.4"
-xmas-elf = { git = "https://github.com/nrc/xmas-elf" }
diff --git a/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs b/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs
index 356d87c..eb9265e 100644
--- a/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs
+++ b/apps/system/components/MlCoordinator/kata-vec-core/src/lib.rs
@@ -5,11 +5,14 @@
 
 mod vc_top;
 
-use core::assert;
+use core::mem::size_of;
 use core::slice;
 use kata_ml_interface::MlCoreInterface;
-use xmas_elf::program::{SegmentData, Type};
-use xmas_elf::ElfFile;
+use kata_memory_interface::ObjDescBundle;
+use kata_proc_interface::BundleImage;
+
+use kata_io as io;
+use io::Read;
 
 // TODO(jesionowski): Move these constants to an auto-generated file.
 // TODO(b/214092253): ITCM size blow-up needs to be addressed.
@@ -21,13 +24,11 @@ const DTCM_PADDR: usize = 0x34000000;
 // TODO(jesionowski): ITCM / DTCM will eventually be merged into a single memory.
 extern "C" {
     static itcm: *mut u32;
-}
-extern "C" {
     static dtcm: *mut u32;
 }
 
 fn get_dtcm_slice() -> &'static mut [u32] {
-    unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / 4) }
+    unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / size_of::<u32>()) }
 }
 
 pub struct MlCore {}
@@ -68,48 +69,37 @@ impl MlCoreInterface for MlCore {
         vc_top::set_ctrl(ctrl);
     }
 
-    fn load_elf(&mut self, elf_slice: &[u8]) -> Result<(), &'static str> {
-        let itcm_slice = unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) };
-        let dtcm_slice = unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) };
-
-        let elf = ElfFile::new(elf_slice)?;
+    // Loads the model into the TCM.
+    fn load_image(&mut self, frames: &ObjDescBundle) -> Result<(), &'static str> {
+        let mut image = BundleImage::new(frames);
+        let mut itcm_found = false;
+        let mut dtcm_found = false;
 
         clear_tcm();
+        // NB: we require both ITCM & DTCM sections and that only one
+        //   instance of each is present
+        while let Some(section) = image.next_section() {
+            let slice = if section.vaddr == ITCM_PADDR {
+                if itcm_found { return Err("dup ITCM") }
+                itcm_found = true;
 
-        for seg in elf.program_iter() {
-            if seg.get_type()? == Type::Load {
-                let fsize = seg.file_size() as usize;
-                let msize = seg.mem_size() as usize;
+                if section.fsize > ITCM_SIZE { return Err("ITCM too big") }
+                unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) }
+            } else if section.vaddr == DTCM_PADDR {
+                if dtcm_found { return Err("dup DTCM") }
+                dtcm_found = true;
 
-                if seg.virtual_addr() as usize == ITCM_PADDR {
-                    assert!(
-                        fsize <= ITCM_SIZE,
-                        "Elf's ITCM section is larger than than ITCM_SIZE"
-                    );
-
-                    // Due to being Load types we are guarunteed SegmentData::Undefined as the
-                    // data type.
-                    if let SegmentData::Undefined(bytes) = seg.get_data(&elf)? {
-                        itcm_slice[..fsize].copy_from_slice(&bytes);
-                    }
-                } else if seg.virtual_addr() as usize == DTCM_PADDR {
-                    assert!(
-                        msize <= DTCM_SIZE,
-                        "Elf's DTCM section is larger than than DTCM_SIZE"
-                    );
-
-                    if let SegmentData::Undefined(bytes) = seg.get_data(&elf)? {
-                        dtcm_slice[..fsize].copy_from_slice(&bytes);
-                    }
-                    // TODO(jesionowski): Remove when clear_tcm is fully implemented.
-                    // Clear NOBITS sections.
-                    dtcm_slice[fsize..msize].fill(0x00);
-                } else {
-                    assert!(false, "Elf contains LOAD section outside TCM");
-                }
-            }
+                if section.fsize > DTCM_SIZE { return Err("DTCM section too big") }
+                unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) }
+            } else {
+                return Err("Unexpected section");
+            };
+            image.read_exact(&mut slice[section.data_range()])
+                .map_err(|_| "section read error")?;
+            // TODO(jesionowski): Remove when clear_tcm is fully implemented.
+            slice[section.zero_range()].fill(0x00);
         }
-
+        if !itcm_found || !dtcm_found { return Err("Incomplete") }
         Ok(())
     }