runtime-rs: support memory resize

Fixes:#6875 Signed-off-by: Zhongtao Hu <zhongtaohu.tim@linux.alibaba.com>
2025-08-28 12:31:04 +00:00 · 2023-05-17 16:12:35 +08:00 · 2023-05-17 16:12:35 +08:00 · 8d9fd9c067
commit 8d9fd9c067
parent 81e55c424a
22 changed files with 336 additions and 113 deletions
--- a/src/runtime-rs/Cargo.lock
+++ b/src/runtime-rs/Cargo.lock
@ -508,6 +508,7 @@ dependencies = [
 "oci",
 "persist",
 "protobuf 3.2.0",
+ "resource",
 "serde_json",
 "slog",
 "slog-scope",
@ -1761,6 +1762,7 @@ dependencies = [
 "async-trait",
 "common",
 "kata-types",
+ "resource",
 "tokio",
 ]

@ -4273,6 +4275,7 @@ dependencies = [
 "async-trait",
 "common",
 "kata-types",
+ "resource",
 "tokio",
 ]

--- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs
@ -756,11 +756,7 @@ impl CloudHypervisorInner {
        todo!()
    }

-    pub(crate) fn resize_memory(
-        &self,
-        _req_mem_mb: u32,
-        _curr_mem_mb: u32,
-    ) -> Result<(u32, MemoryConfig)> {
+    pub(crate) fn resize_memory(&self, _req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        todo!()
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs
@ -178,13 +178,9 @@ impl Hypervisor for CloudHypervisor {
        inner.guest_memory_block_size_mb()
    }

-    async fn resize_memory(
-        &self,
-        req_mem_mb: u32,
-        curr_mem_mb: u32,
-    ) -> Result<(u32, MemoryConfig)> {
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        let inner = self.inner.read().await;
-        inner.resize_memory(req_mem_mb, curr_mem_mb)
+        inner.resize_memory(req_mem_mb)
    }
 }

--- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
@ -73,6 +73,9 @@ pub struct DragonballInner {

    /// the size of memory block of guest OS
    pub(crate) guest_memory_block_size_mb: u32,
+
+    /// the hotplug memory size
+    pub(crate) mem_hotplug_size_mb: u32,
 }

 impl DragonballInner {
@ -98,6 +101,7 @@ impl DragonballInner {
            cached_block_devices: Default::default(),
            capabilities,
            guest_memory_block_size_mb: 0,
+            mem_hotplug_size_mb: 0,
        }
    }

@ -345,36 +349,14 @@ impl DragonballInner {
        Ok((old_vcpus, new_vcpus))
    }

-    // curr_mem_m size = default + hotplug
-    pub(crate) fn resize_memory(
-        &self,
-        req_mem_mb: u32,
-        curr_mem_mb: u32,
-    ) -> Result<(u32, MemoryConfig)> {
-        let mem_device_to_insert = match req_mem_mb.cmp(&curr_mem_mb) {
+    pub(crate) fn resize_memory(&mut self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
+        let had_mem_mb = self.config.memory_info.default_memory + self.mem_hotplug_size_mb;
+        match req_mem_mb.cmp(&had_mem_mb) {
            Ordering::Greater => {
-                // We need to insert a new memory device
-                let add_mem_mb = req_mem_mb - curr_mem_mb;
-                if self.config.memory_info.enable_virtio_mem {
-                    Some(MemDeviceConfigInfo {
-                        mem_id: format!("mem{}", curr_mem_mb),
-                        size_mib: add_mem_mb as u64,
-                        capacity_mib: add_mem_mb as u64,
-                        multi_region: false,
-                        host_numa_node_id: None,
-                        guest_numa_node_id: None,
-                        use_shared_irq: None,
-                        use_generic_irq: None,
-                    })
-                } else {
-                    None
-                }
-            }
-            Ordering::Less => {
-                // We need to insert a new balloon device to release memory
+                // clean virtio-ballon device before hotplug memory, resize to 0
                let balloon_config = BalloonDeviceConfigInfo {
-                    balloon_id: format!("mem{}", curr_mem_mb),
-                    size_mib: (curr_mem_mb - req_mem_mb) as u64,
+                    balloon_id: "balloon0".to_owned(),
+                    size_mib: 0,
                    use_shared_irq: None,
                    use_generic_irq: None,
                    f_deflate_on_oom: false,
@ -383,17 +365,46 @@ impl DragonballInner {
                self.vmm_instance
                    .insert_balloon_device(balloon_config)
                    .context("failed to insert balloon device")?;
-                None
-            }
-            Ordering::Equal => None, // Everything is already set up
-        };

-        // If we have a memory device to insert, do it now
-        if let Some(mem_config) = mem_device_to_insert {
-            self.vmm_instance
-                .insert_mem_device(mem_config)
-                .context("failed to insert memory device")?;
+                // update the hotplug size
+                self.mem_hotplug_size_mb = req_mem_mb - self.config.memory_info.default_memory;
+
+                // insert a new memory device
+                let add_mem_mb = req_mem_mb - had_mem_mb;
+                self.vmm_instance.insert_mem_device(MemDeviceConfigInfo {
+                    mem_id: format!("mem{}", self.mem_hotplug_size_mb),
+                    size_mib: add_mem_mb as u64,
+                    capacity_mib: add_mem_mb as u64,
+                    multi_region: false,
+                    host_numa_node_id: None,
+                    guest_numa_node_id: None,
+                    use_shared_irq: None,
+                    use_generic_irq: None,
+                })?;
            }
+            Ordering::Less => {
+                // we only use one balloon device here, and resize it to release memory
+                // the operation we do here is inserting a new balloon0 device or resizing it
+                let balloon_config = BalloonDeviceConfigInfo {
+                    balloon_id: "balloon0".to_owned(),
+                    size_mib: (had_mem_mb - req_mem_mb) as u64,
+                    use_shared_irq: None,
+                    use_generic_irq: None,
+                    f_deflate_on_oom: false,
+                    f_reporting: false,
+                };
+                self.vmm_instance
+                    .insert_balloon_device(balloon_config)
+                    .context("failed to insert balloon device")?;
+            }
+            Ordering::Equal => {
+                // Everything is already set up
+                info!(
+                    sl!(),
+                    "memory size unchanged, no need to do memory resizing"
+                );
+            }
+        };

        Ok((
            req_mem_mb,
@ -464,6 +475,7 @@ impl Persist for DragonballInner {
            cached_block_devices: hypervisor_state.cached_block_devices,
            capabilities: Capabilities::new(),
            guest_memory_block_size_mb: 0,
+            mem_hotplug_size_mb: 0,
        })
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs
@ -194,13 +194,9 @@ impl Hypervisor for Dragonball {
        inner.guest_memory_block_size_mb()
    }

-    async fn resize_memory(
-        &self,
-        req_mem_mb: u32,
-        curr_mem_mb: u32,
-    ) -> Result<(u32, MemoryConfig)> {
-        let inner = self.inner.read().await;
-        inner.resize_memory(req_mem_mb, curr_mem_mb)
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
+        let mut inner = self.inner.write().await;
+        inner.resize_memory(req_mem_mb)
    }
 }

--- a/src/runtime-rs/crates/hypervisor/src/lib.rs
+++ b/src/runtime-rs/crates/hypervisor/src/lib.rs
@ -91,6 +91,7 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
    async fn save_vm(&self) -> Result<()>;
    async fn resume_vm(&self) -> Result<()>;
    async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)>; // returns (old_vcpus, new_vcpus)
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)>;

    // device manager
    async fn add_device(&self, device: DeviceType) -> Result<DeviceType>;
@ -114,6 +115,4 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
    async fn set_capabilities(&self, flag: CapabilityBits);
    async fn set_guest_memory_block_size(&self, size: u32);
    async fn guest_memory_block_size(&self) -> u32;
-    async fn resize_memory(&self, req_mem_mb: u32, curr_mem_mb: u32)
-        -> Result<(u32, MemoryConfig)>;
 }
--- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
@ -153,11 +153,7 @@ impl QemuInner {
        todo!()
    }

-    pub(crate) fn resize_memory(
-        &self,
-        _req_mem_mb: u32,
-        _curr_mem_mb: u32,
-    ) -> Result<(u32, MemoryConfig)> {
+    pub(crate) fn resize_memory(&self, _req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        todo!()
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
@ -173,12 +173,8 @@ impl Hypervisor for Qemu {
        inner.guest_memory_block_size_mb()
    }

-    async fn resize_memory(
-        &self,
-        req_mem_mb: u32,
-        curr_mem_mb: u32,
-    ) -> Result<(u32, MemoryConfig)> {
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        let inner = self.inner.read().await;
-        inner.resize_memory(req_mem_mb, curr_mem_mb)
+        inner.resize_memory(req_mem_mb)
    }
 }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
@ -11,7 +11,6 @@ use std::{
    sync::Arc,
 };

-use agent::{Agent, OnlineCPUMemRequest};
 use anyhow::{Context, Ok, Result};
 use hypervisor::Hypervisor;
 use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
@ -52,7 +51,6 @@ impl CpuResource {
        linux_cpus: Option<&LinuxCpu>,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
-        agent: &dyn Agent,
    ) -> Result<()> {
        self.update_container_cpu_resources(cid, linux_cpus, op)
            .await
@ -67,13 +65,13 @@ impl CpuResource {
        }

        let curr_vcpus = self
-            .do_update_cpu_resources(vcpu_required, op, hypervisor, agent)
+            .do_update_cpu_resources(vcpu_required, op, hypervisor)
            .await?;
        self.update_current_vcpu(curr_vcpus).await;
        Ok(())
    }

-    async fn current_vcpu(&self) -> u32 {
+    pub(crate) async fn current_vcpu(&self) -> u32 {
        let current_vcpu = self.current_vcpu.read().await;
        *current_vcpu
    }
@ -148,7 +146,6 @@ impl CpuResource {
        new_vcpus: u32,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
-        agent: &dyn Agent,
    ) -> Result<u32> {
        let old_vcpus = self.current_vcpu().await;

@ -164,25 +161,11 @@ impl CpuResource {
        // the number of vcpus would not be lower than the default size
        let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);

-        let (old, new) = hypervisor
+        let (_, new) = hypervisor
            .resize_vcpu(old_vcpus, new_vcpus)
            .await
            .context("resize vcpus")?;

-        if old < new {
-            let add = new - old;
-            info!(sl!(), "request to onlineCpuMem with {:?} cpus", add);
-
-            agent
-                .online_cpu_mem(OnlineCPUMemRequest {
-                    wait: false,
-                    nb_cpus: new,
-                    cpu_only: true,
-                })
-                .await
-                .context("online vcpus")?;
-        }
-
        Ok(new)
    }
 }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
@ -19,6 +19,7 @@ use kata_types::{
 struct InitialSize {
    vcpu: u32,
    mem_mb: u32,
+    orig_toml_default_mem: u32,
 }

 // generate initial resource(vcpu and memory in MiB) from spec's information
@ -66,7 +67,11 @@ impl TryFrom<&oci::Spec> for InitialSize {
            sl!(),
            "(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb
        );
-        Ok(Self { vcpu, mem_mb })
+        Ok(Self {
+            vcpu,
+            mem_mb,
+            orig_toml_default_mem: 0,
+        })
    }
 }

@ -93,7 +98,7 @@ impl InitialSizeManager {
        })
    }

-    pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> {
+    pub fn setup_config(&mut self, config: &mut TomlConfig) -> Result<()> {
        // update this data to the hypervisor config for later use by hypervisor
        let hypervisor_name = &config.runtime.hypervisor_name;
        let hv = config
@ -104,6 +109,7 @@ impl InitialSizeManager {
        if self.resource.vcpu > 0 {
            hv.cpu_info.default_vcpus = self.resource.vcpu as i32
        }
+        self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
        if self.resource.mem_mb > 0 {
            // since the memory overhead introduced by kata-agent and system components
            // will really affect the amount of memory the user can use, so we choose to
@ -114,6 +120,10 @@ impl InitialSizeManager {
        }
        Ok(())
    }
+
+    pub fn get_orig_toml_default_mem(&self) -> u32 {
+        self.resource.orig_toml_default_mem
+    }
 }

 fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 {
@ -173,7 +183,11 @@ mod tests {
                    quota: None,
                    memory: None,
                },
-                result: InitialSize { vcpu: 0, mem_mb: 0 },
+                result: InitialSize {
+                    vcpu: 0,
+                    mem_mb: 0,
+                    orig_toml_default_mem: 0,
+                },
            },
            TestData {
                desc: "normal resource limit",
@ -186,6 +200,7 @@ mod tests {
                result: InitialSize {
                    vcpu: 3,
                    mem_mb: 512,
+                    orig_toml_default_mem: 0,
                },
            },
        ]
--- a/src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
@ -0,0 +1,170 @@
+// Copyright (c) 2019-2023 Alibaba Cloud
+// Copyright (c) 2019-2023 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use anyhow::{Context, Ok, Result};
+use hypervisor::Hypervisor;
+use kata_types::config::TomlConfig;
+use oci::LinuxResources;
+use tokio::sync::RwLock;
+
+use crate::cpu_mem::initial_size::InitialSizeManager;
+use crate::ResourceUpdateOp;
+
+// MIB_TO_BYTES_SHIFT the number to shift needed to convert MiB to Bytes
+pub const MIB_TO_BYTES_SHIFT: i32 = 20;
+
+#[derive(Default, Debug, Clone)]
+pub struct MemResource {
+    /// Current memory
+    pub(crate) current_mem: Arc<RwLock<u32>>,
+
+    /// Default memory
+    pub(crate) orig_toml_default_mem: u32,
+
+    /// MemResource of each container
+    pub(crate) container_mem_resources: Arc<RwLock<HashMap<String, LinuxResources>>>,
+
+    /// Use guest swap
+    pub(crate) use_guest_swap: bool,
+}
+
+impl MemResource {
+    pub fn new(config: Arc<TomlConfig>, init_size_manager: InitialSizeManager) -> Result<Self> {
+        let hypervisor_name = config.runtime.hypervisor_name.clone();
+        let hypervisor_config = config
+            .hypervisor
+            .get(&hypervisor_name)
+            .context("failed to get hypervisor")?;
+
+        Ok(Self {
+            current_mem: Arc::new(RwLock::new(hypervisor_config.memory_info.default_memory)),
+            container_mem_resources: Arc::new(RwLock::new(HashMap::new())),
+            use_guest_swap: hypervisor_config.memory_info.enable_guest_swap,
+            orig_toml_default_mem: init_size_manager.get_orig_toml_default_mem(),
+        })
+    }
+
+    pub(crate) async fn update_mem_resources(
+        &self,
+        cid: &str,
+        linux_resources: Option<&LinuxResources>,
+        op: ResourceUpdateOp,
+        hypervisor: &dyn Hypervisor,
+    ) -> Result<()> {
+        self.update_container_mem_resources(cid, linux_resources, op)
+            .await
+            .context("update container memory resources")?;
+        // the unit here is MB
+        let (mut mem_sb_mb, need_pod_swap, swap_sb_mb) = self
+            .total_mems(self.use_guest_swap)
+            .await
+            .context("failed to calculate total memory requirement for containers")?;
+        mem_sb_mb += self.orig_toml_default_mem;
+        if need_pod_swap {
+            mem_sb_mb += swap_sb_mb;
+        }
+        info!(sl!(), "calculate mem_sb_mb {}", mem_sb_mb);
+
+        let curr_mem = self
+            .do_update_mem_resource(mem_sb_mb, swap_sb_mb, hypervisor)
+            .await
+            .context("failed to update_mem_resource")?;
+
+        self.update_current_mem(curr_mem).await;
+        Ok(())
+    }
+
+    async fn update_current_mem(&self, new_mem: u32) {
+        let mut current_mem = self.current_mem.write().await;
+        *current_mem = new_mem;
+    }
+
+    async fn total_mems(&self, use_guest_swap: bool) -> Result<(u32, bool, u32)> {
+        // sb stands for sandbox
+        let mut mem_sandbox = 0;
+        let mut need_pod_swap = false;
+        let mut swap_sandbox = 0;
+
+        let resources = self.container_mem_resources.read().await;
+
+        for (_, r) in resources.iter() {
+            for l in &r.hugepage_limits {
+                mem_sandbox += l.limit;
+            }
+
+            if let Some(memory) = &r.memory {
+                // set current_limit to 0 if memory limit is not set to container
+                let current_limit = memory.limit.map_or(0, |limit| {
+                    mem_sandbox += limit as u64;
+                    info!(sl!(), "memory sb: {}, memory limit: {}", mem_sandbox, limit);
+                    limit
+                });
+
+                if let Some(swappiness) = memory.swappiness {
+                    if swappiness > 0 && use_guest_swap {
+                        if let Some(swap) = memory.swap {
+                            if swap > current_limit {
+                                swap_sandbox = swap.saturating_sub(current_limit);
+                            }
+                        }
+                        // if current_limit is 0, the container will have access to the entire memory available on the host system
+                        // so we add swap for this
+                        else if current_limit == 0 {
+                            need_pod_swap = true;
+                        } else {
+                            swap_sandbox += current_limit;
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok((
+            (mem_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
+            need_pod_swap,
+            (swap_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
+        ))
+    }
+
+    // update container_cpu_resources field
+    async fn update_container_mem_resources(
+        &self,
+        cid: &str,
+        linux_resources: Option<&LinuxResources>,
+        op: ResourceUpdateOp,
+    ) -> Result<()> {
+        if let Some(r) = linux_resources {
+            let mut resources = self.container_mem_resources.write().await;
+            match op {
+                ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
+                    resources.insert(cid.to_owned(), r.clone());
+                }
+                ResourceUpdateOp::Del => {
+                    resources.remove(cid);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn do_update_mem_resource(
+        &self,
+        new_mem: u32,
+        _swap_sz_mb: u32,
+        hypervisor: &dyn Hypervisor,
+    ) -> Result<u32> {
+        info!(sl!(), "requesting vmm to update memory to {:?}", new_mem);
+        let (new_memory, _mem_config) = hypervisor
+            .resize_memory(new_mem)
+            .await
+            .context("resize memory")?;
+
+        Ok(new_memory)
+    }
+}
--- a/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
@ -6,3 +6,4 @@

 pub mod cpu;
 pub mod initial_size;
+pub mod mem;
--- a/src/runtime-rs/crates/resource/src/manager.rs
+++ b/src/runtime-rs/crates/resource/src/manager.rs
@ -19,6 +19,7 @@ use persist::sandbox_persist::Persist;
 use tokio::sync::RwLock;
 use tracing::instrument;

+use crate::cpu_mem::initial_size::InitialSizeManager;
 use crate::network::NetworkConfig;
 use crate::resource_persist::ResourceState;
 use crate::ResourceUpdateOp;
@ -47,13 +48,15 @@ impl ResourceManager {
        agent: Arc<dyn Agent>,
        hypervisor: Arc<dyn Hypervisor>,
        toml_config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<Self> {
        // Regist resource logger for later use.
        logging::register_subsystem_logger("runtimes", "resource");

        Ok(Self {
            inner: Arc::new(RwLock::new(
-                ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
+                ResourceManagerInner::new(sid, agent, hypervisor, toml_config, init_size_manager)
+                    .await?,
            )),
        })
    }
--- a/src/runtime-rs/crates/resource/src/manager_inner.rs
+++ b/src/runtime-rs/crates/resource/src/manager_inner.rs
@ -6,7 +6,7 @@

 use std::{sync::Arc, thread};

-use agent::{types::Device, Agent, Storage};
+use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage};
 use anyhow::{anyhow, Context, Ok, Result};
 use async_trait::async_trait;
 use hypervisor::{
@ -25,7 +25,7 @@ use tokio::{runtime, sync::RwLock};

 use crate::{
    cgroups::{CgroupArgs, CgroupsResource},
-    cpu_mem::cpu::CpuResource,
+    cpu_mem::{cpu::CpuResource, initial_size::InitialSizeManager, mem::MemResource},
    manager::ManagerArgs,
    network::{self, Network, NetworkConfig},
    resource_persist::ResourceState,
@ -48,6 +48,7 @@ pub(crate) struct ResourceManagerInner {
    pub volume_resource: VolumeResource,
    pub cgroups_resource: CgroupsResource,
    pub cpu_resource: CpuResource,
+    pub mem_resource: MemResource,
 }

 impl ResourceManagerInner {
@ -56,6 +57,7 @@ impl ResourceManagerInner {
        agent: Arc<dyn Agent>,
        hypervisor: Arc<dyn Hypervisor>,
        toml_config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<Self> {
        // create device manager
        let dev_manager = DeviceManager::new(hypervisor.clone())
@ -64,6 +66,7 @@ impl ResourceManagerInner {

        let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
        let cpu_resource = CpuResource::new(toml_config.clone())?;
+        let mem_resource = MemResource::new(toml_config.clone(), init_size_manager)?;
        Ok(Self {
            sid: sid.to_string(),
            toml_config,
@ -76,6 +79,7 @@ impl ResourceManagerInner {
            volume_resource: VolumeResource::new(),
            cgroups_resource,
            cpu_resource,
+            mem_resource,
        })
    }

@ -427,15 +431,23 @@ impl ResourceManagerInner {

        // if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
        if !self.toml_config.runtime.static_sandbox_resource_mgmt {
+            // update cpu
            self.cpu_resource
-                .update_cpu_resources(
-                    cid,
-                    linux_cpus,
-                    op,
-                    self.hypervisor.as_ref(),
-                    self.agent.as_ref(),
-                )
+                .update_cpu_resources(cid, linux_cpus, op, self.hypervisor.as_ref())
                .await?;
+            // update memory
+            self.mem_resource
+                .update_mem_resources(cid, linux_resources, op, self.hypervisor.as_ref())
+                .await?;
+
+            self.agent
+                .online_cpu_mem(OnlineCPUMemRequest {
+                    wait: false,
+                    nb_cpus: self.cpu_resource.current_vcpu().await,
+                    cpu_only: false,
+                })
+                .await
+                .context("online vcpus")?;
        }

        // we should firstly update the vcpus and mems, and then update the host cgroups
@ -516,6 +528,7 @@ impl Persist for ResourceManagerInner {
            .await?,
            toml_config: Arc::new(TomlConfig::default()),
            cpu_resource: CpuResource::default(),
+            mem_resource: MemResource::default(),
        })
    }
 }
--- a/src/runtime-rs/crates/runtimes/common/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/common/Cargo.toml
@ -26,4 +26,5 @@ agent = { path = "../../agent" }
 kata-sys-util = { path = "../../../../libs/kata-sys-util" }
 kata-types = { path = "../../../../libs/kata-types" }
 oci = { path = "../../../../libs/oci" }
+resource = { path = "../../resource" }

--- a/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs
+++ b/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs
@ -6,13 +6,13 @@

 use std::sync::Arc;

+use crate::{message::Message, ContainerManager, Sandbox};
 use anyhow::Result;
 use async_trait::async_trait;
 use kata_types::config::TomlConfig;
+use resource::cpu_mem::initial_size::InitialSizeManager;
 use tokio::sync::mpsc::Sender;

-use crate::{message::Message, ContainerManager, Sandbox};
-
 #[derive(Clone)]
 pub struct RuntimeInstance {
    pub sandbox: Arc<dyn Sandbox>,
@ -38,6 +38,7 @@ pub trait RuntimeHandler: Send + Sync {
        sid: &str,
        msg_sender: Sender<Message>,
        config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance>;

    fn cleanup(&self, id: &str) -> Result<()>;
--- a/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml
@ -11,3 +11,4 @@ tokio = { version = "1.28.1" }

 common = { path = "../common" }
 kata-types = { path = "../../../../libs/kata-types" }
+resource = { path = "../../resource" }
--- a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs
@ -9,6 +9,7 @@ use anyhow::Result;
 use async_trait::async_trait;
 use common::{message::Message, RuntimeHandler, RuntimeInstance};
 use kata_types::config::TomlConfig;
+use resource::cpu_mem::initial_size::InitialSizeManager;
 use tokio::sync::mpsc::Sender;

 pub struct LinuxContainer {}
@ -32,6 +33,7 @@ impl RuntimeHandler for LinuxContainer {
        _sid: &str,
        _msg_sender: Sender<Message>,
        _config: Arc<TomlConfig>,
+        _init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance> {
        todo!()
    }
--- a/src/runtime-rs/crates/runtimes/src/manager.rs
+++ b/src/runtime-rs/crates/runtimes/src/manager.rs
@ -91,6 +91,7 @@ impl RuntimeHandlerManagerInner {
        network_env: SandboxNetworkEnv,
        dns: Vec<String>,
        config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<()> {
        info!(sl!(), "new runtime handler {}", &config.runtime.name);
        let runtime_handler = match config.runtime.name.as_str() {
@ -105,7 +106,12 @@ impl RuntimeHandlerManagerInner {
            _ => return Err(anyhow!("Unsupported runtime: {}", &config.runtime.name)),
        };
        let runtime_instance = runtime_handler
-            .new_instance(&self.id, self.msg_sender.clone(), config.clone())
+            .new_instance(
+                &self.id,
+                self.msg_sender.clone(),
+                config.clone(),
+                init_size_manager,
+            )
            .await
            .context("new runtime instance")?;

@ -160,7 +166,21 @@ impl RuntimeHandlerManagerInner {
            }
        }

-        let config = load_config(spec, options).context("load config")?;
+        let mut config = load_config(spec, options).context("load config")?;
+
+        // Sandbox sizing information *may* be provided in two scenarios:
+        //   1. The upper layer runtime (ie, containerd or crio) provide sandbox sizing information as an annotation
+        //	in the 'sandbox container's' spec. This would typically be a scenario where as part of a create sandbox
+        //	request the upper layer runtime receives this information as part of a pod, and makes it available to us
+        //	for sizing purposes.
+        //   2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
+        //	then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
+        //	a single container.
+        let mut initial_size_manager =
+            InitialSizeManager::new(spec).context("failed to construct static resource manager")?;
+        initial_size_manager
+            .setup_config(&mut config)
+            .context("failed to setup static resource mgmt config")?;

        update_component_log_level(&config);

@ -202,8 +222,14 @@ impl RuntimeHandlerManagerInner {
            netns,
            network_created,
        };
-
-        self.init_runtime_handler(spec, state, network_env, dns, Arc::new(config))
+        self.init_runtime_handler(
+            spec,
+            state,
+            network_env,
+            dns,
+            Arc::new(config),
+            initial_size_manager,
+        )
        .await
        .context("init runtime handler")?;

@ -507,7 +533,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
    //   2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
    //	then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
    //	a single container.
-    let initial_size_manager =
+    let mut initial_size_manager =
        InitialSizeManager::new(spec).context("failed to construct static resource manager")?;
    initial_size_manager
        .setup_config(&mut toml_config)
--- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs
@ -31,6 +31,7 @@ use hypervisor::ch::CloudHypervisor;
 #[cfg(feature = "cloud-hypervisor")]
 use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig};

+use resource::cpu_mem::initial_size::InitialSizeManager;
 use resource::ResourceManager;
 use sandbox::VIRTCONTAINER;
 use tokio::sync::mpsc::Sender;
@ -77,13 +78,22 @@ impl RuntimeHandler for VirtContainer {
        sid: &str,
        msg_sender: Sender<Message>,
        config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance> {
        let hypervisor = new_hypervisor(&config).await.context("new hypervisor")?;

        // get uds from hypervisor and get config from toml_config
        let agent = new_agent(&config).context("new agent")?;
-        let resource_manager =
-            Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?);
+        let resource_manager = Arc::new(
+            ResourceManager::new(
+                sid,
+                agent.clone(),
+                hypervisor.clone(),
+                config,
+                init_size_manager,
+            )
+            .await?,
+        );
        let pid = std::process::id();

        let sandbox = sandbox::VirtSandbox::new(
--- a/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml
@ -11,3 +11,4 @@ tokio = { version = "1.28.1" }

 common = { path = "../common" }
 kata-types = { path = "../../../../libs/kata-types" }
+resource = { path = "../../resource" }
--- a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs
@ -9,6 +9,7 @@ use anyhow::Result;
 use async_trait::async_trait;
 use common::{message::Message, RuntimeHandler, RuntimeInstance};
 use kata_types::config::TomlConfig;
+use resource::cpu_mem::initial_size::InitialSizeManager;
 use tokio::sync::mpsc::Sender;
 pub struct WasmContainer {}

@ -31,6 +32,7 @@ impl RuntimeHandler for WasmContainer {
        _sid: &str,
        _msg_sender: Sender<Message>,
        _config: Arc<TomlConfig>,
+        _init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance> {
        todo!()
    }