MlCoordinator: get models in BundleImage format from SecurityCoordinator

- remove vestiges of mapped ELF file; models now come from
  SecurityCoordinator using a LoadModel request in the form of a BundleImage
- change execute api to take a bundle_id & model_id
- track loded bundle_id & model_id to handle switching between models
- load_elf -> load_image (which loads a BundleImage)
- add a client-side wrapper to hide marshalling details
- add a copyregion (VSpace hole) for processing BundleImages
- integrate with slot allocator
- setup the heap (needed for various things)

Change-Id: I50f3526bddefcdb67b742d42ca396bfefc10801f
GitOrigin-RevId: 80aa0da34b057b334b7d76932c7ad25146364569
This commit is contained in:
Sam Leffler 2022-05-11 00:36:05 +00:00
parent 43d921eb53
commit 09ed791c1e
7 changed files with 176 additions and 76 deletions

View File

@ -11,7 +11,6 @@ component MlCoordinator {
consumes Interrupt data_fault;
dataport Buf csr;
dataport Buf(0x1000000) elf_file;
dataport Buf(0x100000) itcm;
dataport Buf(0x1000000) dtcm;
@ -20,4 +19,7 @@ component MlCoordinator {
// Enable KataOS CAmkES support.
attribute int kataos = true;
// Copyregion for loading bundle images
has copyregion BUNDLE_IMAGE;
}

View File

@ -7,11 +7,13 @@ authors = ["Adam Jesionowski <jesionowski@google.com>"]
edition = "2021"
[dependencies]
cstr_core = { version = "0.2.3", default-features = false }
kata-os-common = { path = "../../kata-os-common" }
kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
kata-ml-interface = { path = "../kata-ml-interface" }
kata-security-interface = { path = "../../SecurityCoordinator/kata-security-interface" }
kata-vec-core = { path = "../kata-vec-core" }
log = "0.4"
xmas-elf = "0.8.0"
[lib]
name = "kata_ml_coordinator"

View File

@ -2,28 +2,41 @@
// ML Coordinator Design Doc: go/sparrow-ml-doc
use core::slice;
extern crate alloc;
use cstr_core::CStr;
use alloc::string::String;
use kata_os_common::allocator;
use kata_os_common::logger::KataLogger;
use kata_os_common::sel4_sys;
use kata_os_common::slot_allocator;
use kata_ml_interface::MlCoordinatorInterface;
use kata_ml_interface::MlCoreInterface;
use kata_security_interface::*;
use kata_vec_core::MlCore;
use log::{error, info, trace};
use sel4_sys::seL4_CPtr;
use slot_allocator::CSpaceSlot;
use slot_allocator::KATA_CSPACE_SLOTS;
extern "C" {
static SELF_CNODE_FIRST_SLOT: seL4_CPtr;
static SELF_CNODE_LAST_SLOT: seL4_CPtr;
}
pub struct MLCoordinator {
is_loaded: bool,
loaded_bundle: Option<String>,
loaded_model: Option<String>,
is_running: bool,
continous_mode: bool,
ml_core: MlCore,
}
extern "C" {
static elf_file: *const u8;
}
// TODO(jesionowski): Get the size programatically.
const ELF_SIZE: usize = 0x300000;
pub static mut ML_COORD: MLCoordinator = MLCoordinator {
is_loaded: false,
loaded_bundle: None,
loaded_model: None,
is_running: false,
continous_mode: false,
ml_core: MlCore {},
@ -34,6 +47,15 @@ impl MLCoordinator {
self.ml_core.enable_interrupts(true);
}
fn is_loaded(&self) -> bool {
self.loaded_bundle.is_some() && self.loaded_model.is_some()
}
fn cmp_loaded(&self, bundle_id: &str, model_id: &str) -> bool {
self.loaded_bundle.as_deref() == Some(bundle_id) &&
self.loaded_model.as_deref() == Some(model_id)
}
fn handle_return_interrupt(&mut self) {
extern "C" {
fn finish_acknowledge() -> u32;
@ -46,15 +68,21 @@ impl MLCoordinator {
if return_code != 0 {
error!(
"vctop execution failed with code {}, fault pc: {:#010X}",
return_code, fault
"{}: vctop execution failed with code {}, fault pc: {:#010X}",
self.loaded_model.as_ref().unwrap(), return_code, fault
);
self.continous_mode = false;
}
self.is_running = false;
if self.continous_mode {
self.execute();
// TODO(sleffler): can !is_loaded happen?
// XXX needs proper state machine
// XXX what is the threading/locking model?
if self.is_loaded() {
self.ml_core.run(); // Unhalt, start at default PC.
self.is_running = true;
}
}
MlCore::clear_finish();
@ -63,26 +91,46 @@ impl MLCoordinator {
}
impl MlCoordinatorInterface for MLCoordinator {
fn execute(&mut self) {
fn execute(&mut self, bundle_id: &str, model_id: &str) {
if self.is_running {
trace!("Skip execute with {}:{} already running",
self.loaded_bundle.as_ref().unwrap(),
self.loaded_model.as_ref().unwrap());
return;
}
if !self.is_loaded {
let res = self
.ml_core
.load_elf(unsafe { slice::from_raw_parts(elf_file, ELF_SIZE) });
if let Err(e) = res {
error!("Load error: {:?}", e);
} else {
info!("Load successful.");
self.is_loaded = true;
if !self.cmp_loaded(bundle_id, model_id) {
// Loads |model_id| associated with |bundle_id| from the
// SecurityCoordinator. The data are returned as unmapped
// page frames in a CNode container left in |container_slot|.
// To load the model into the vector core the pages must be
// mapped into the MlCoordinator's VSpace before being copied
// to their destination.
let container_slot = CSpaceSlot::new();
match kata_security_load_model(bundle_id, model_id, &container_slot) {
Ok(model_frames) => {
if let Err(e) = self.ml_core.load_image(&model_frames) {
error!("Load of {}:{} failed: {:?}",
bundle_id, model_id, e);
// NB: may have corrupted TCM, clear loaded state
self.loaded_bundle = None;
self.loaded_model = None;
} else {
info!("Load successful.");
self.loaded_bundle = Some(String::from(bundle_id));
self.loaded_model = Some(String::from(model_id));
}
}
Err(e) => {
error!("LoadModel of bundle {} model {} failed: {:?}",
bundle_id, model_id, e);
}
}
}
if self.is_loaded {
self.is_running = true;
if self.is_loaded() {
self.ml_core.run(); // Unhalt, start at default PC.
self.is_running = true;
}
}
@ -96,21 +144,51 @@ pub extern "C" fn pre_init() {
static KATA_LOGGER: KataLogger = KataLogger;
log::set_logger(&KATA_LOGGER).unwrap();
log::set_max_level(log::LevelFilter::Trace);
// TODO(sleffler): temp until we integrate with seL4
static mut HEAP_MEMORY: [u8; 4 * 1024] = [0; 4 * 1024];
unsafe {
allocator::ALLOCATOR.init(HEAP_MEMORY.as_mut_ptr() as usize, HEAP_MEMORY.len());
trace!(
"setup heap: start_addr {:p} size {}",
HEAP_MEMORY.as_ptr(),
HEAP_MEMORY.len()
);
}
unsafe {
KATA_CSPACE_SLOTS.init(
/*first_slot=*/ SELF_CNODE_FIRST_SLOT,
/*size=*/ SELF_CNODE_LAST_SLOT - SELF_CNODE_FIRST_SLOT
);
trace!("setup cspace slots: first slot {} free {}",
KATA_CSPACE_SLOTS.base_slot(),
KATA_CSPACE_SLOTS.free_slots());
}
}
#[no_mangle]
pub extern "C" fn mlcoord__init() {
trace!("init");
unsafe {
ML_COORD.init();
}
}
// TODO: Once multiple model support is in start by name.
#[no_mangle]
pub extern "C" fn mlcoord_execute() {
pub extern "C" fn mlcoord_execute(
c_bundle_id: *const cstr_core::c_char,
c_model_id: *const cstr_core::c_char,
) {
unsafe {
ML_COORD.execute();
match CStr::from_ptr(c_bundle_id).to_str() {
Ok(bundle_id) => match CStr::from_ptr(c_model_id).to_str() {
Ok(model_id) => {
ML_COORD.execute(bundle_id, model_id)
}
_ => error!("Invalid model_id"),
}
_ => error!("Invalid bundle_id"),
}
}
}

View File

@ -4,3 +4,7 @@ cargo-features = ["edition2021"]
name = "kata-ml-interface"
version = "0.1.0"
edition = "2021"
[dependencies]
cstr_core = { version = "0.2.3", default-features = false }
kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }

View File

@ -1,14 +1,17 @@
#![no_std]
use cstr_core::CString;
use kata_memory_interface::ObjDescBundle;
pub trait MlCoordinatorInterface {
fn execute(&mut self);
fn execute(&mut self, bundle_id: &str, model_id: &str);
fn set_continuous_mode(&mut self, mode: bool);
}
pub trait MlCoreInterface {
fn enable_interrupts(&mut self, enabled: bool);
fn run(&mut self);
fn load_elf(&mut self, elf_slice: &[u8]) -> Result<(), &'static str>;
fn load_image(&mut self, frames: &ObjDescBundle) -> Result<(), &'static str>;
fn get_return_code() -> u32;
fn get_fault_register() -> u32;
fn clear_host_req();
@ -16,3 +19,21 @@ pub trait MlCoreInterface {
fn clear_instruction_fault();
fn clear_data_fault();
}
#[inline]
#[allow(dead_code)]
pub fn kata_mlcoord_execute(bundle_id: &str, model_id: &str)
-> Result<(),cstr_core:: NulError>
{
extern "C" {
// NB: this assumes the MlCoordinator component is named "mlcoord".
fn mlcoord_execute(
c_bundle_id: *const cstr_core::c_char,
c_model_id: *const cstr_core::c_char
);
}
let bundle_id_cstr = CString::new(bundle_id)?;
let model_id_cstr = CString::new(model_id)?;
unsafe { mlcoord_execute(bundle_id_cstr.as_ptr(), model_id_cstr.as_ptr()) };
Ok(())
}

View File

@ -6,7 +6,10 @@ version = "0.1.0"
edition = "2021"
[dependencies]
kata-io = { path = "../../DebugConsole/kata-io" }
kata-memory-interface = { path = "../../MemoryManager/kata-memory-interface" }
kata-ml-interface = { path = "../kata-ml-interface" }
kata-proc-interface = { path = "../../ProcessManager/kata-proc-interface" }
kata-os-common = { path = "../../kata-os-common" }
modular-bitfield = "0.11.2"
log = "0.4"
xmas-elf = { git = "https://github.com/nrc/xmas-elf" }

View File

@ -5,11 +5,14 @@
mod vc_top;
use core::assert;
use core::mem::size_of;
use core::slice;
use kata_ml_interface::MlCoreInterface;
use xmas_elf::program::{SegmentData, Type};
use xmas_elf::ElfFile;
use kata_memory_interface::ObjDescBundle;
use kata_proc_interface::BundleImage;
use kata_io as io;
use io::Read;
// TODO(jesionowski): Move these constants to an auto-generated file.
// TODO(b/214092253): ITCM size blow-up needs to be addressed.
@ -21,13 +24,11 @@ const DTCM_PADDR: usize = 0x34000000;
// TODO(jesionowski): ITCM / DTCM will eventually be merged into a single memory.
extern "C" {
static itcm: *mut u32;
}
extern "C" {
static dtcm: *mut u32;
}
fn get_dtcm_slice() -> &'static mut [u32] {
unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / 4) }
unsafe { slice::from_raw_parts_mut(dtcm, DTCM_SIZE / size_of::<u32>()) }
}
pub struct MlCore {}
@ -68,48 +69,37 @@ impl MlCoreInterface for MlCore {
vc_top::set_ctrl(ctrl);
}
fn load_elf(&mut self, elf_slice: &[u8]) -> Result<(), &'static str> {
let itcm_slice = unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) };
let dtcm_slice = unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) };
let elf = ElfFile::new(elf_slice)?;
// Loads the model into the TCM.
fn load_image(&mut self, frames: &ObjDescBundle) -> Result<(), &'static str> {
let mut image = BundleImage::new(frames);
let mut itcm_found = false;
let mut dtcm_found = false;
clear_tcm();
// NB: we require both ITCM & DTCM sections and that only one
// instance of each is present
while let Some(section) = image.next_section() {
let slice = if section.vaddr == ITCM_PADDR {
if itcm_found { return Err("dup ITCM") }
itcm_found = true;
for seg in elf.program_iter() {
if seg.get_type()? == Type::Load {
let fsize = seg.file_size() as usize;
let msize = seg.mem_size() as usize;
if section.fsize > ITCM_SIZE { return Err("ITCM too big") }
unsafe { slice::from_raw_parts_mut(itcm as *mut u8, ITCM_SIZE) }
} else if section.vaddr == DTCM_PADDR {
if dtcm_found { return Err("dup DTCM") }
dtcm_found = true;
if seg.virtual_addr() as usize == ITCM_PADDR {
assert!(
fsize <= ITCM_SIZE,
"Elf's ITCM section is larger than than ITCM_SIZE"
);
// Due to being Load types we are guarunteed SegmentData::Undefined as the
// data type.
if let SegmentData::Undefined(bytes) = seg.get_data(&elf)? {
itcm_slice[..fsize].copy_from_slice(&bytes);
}
} else if seg.virtual_addr() as usize == DTCM_PADDR {
assert!(
msize <= DTCM_SIZE,
"Elf's DTCM section is larger than than DTCM_SIZE"
);
if let SegmentData::Undefined(bytes) = seg.get_data(&elf)? {
dtcm_slice[..fsize].copy_from_slice(&bytes);
}
// TODO(jesionowski): Remove when clear_tcm is fully implemented.
// Clear NOBITS sections.
dtcm_slice[fsize..msize].fill(0x00);
} else {
assert!(false, "Elf contains LOAD section outside TCM");
}
}
if section.fsize > DTCM_SIZE { return Err("DTCM section too big") }
unsafe { slice::from_raw_parts_mut(dtcm as *mut u8, DTCM_SIZE) }
} else {
return Err("Unexpected section");
};
image.read_exact(&mut slice[section.data_range()])
.map_err(|_| "section read error")?;
// TODO(jesionowski): Remove when clear_tcm is fully implemented.
slice[section.zero_range()].fill(0x00);
}
if !itcm_found || !dtcm_found { return Err("Incomplete") }
Ok(())
}