runtime-rs: support memory resize

Fixes:#6875 Signed-off-by: Zhongtao Hu <zhongtaohu.tim@linux.alibaba.com>
2025-08-28 12:31:04 +00:00 · 2023-05-17 16:12:35 +08:00 · 2023-05-17 16:12:35 +08:00 · 8d9fd9c067
commit 8d9fd9c067
parent 81e55c424a
22 changed files with 336 additions and 113 deletions
--- a/src/runtime-rs/Cargo.lock
+++ b/src/runtime-rs/Cargo.lock
@ -508,6 +508,7 @@ dependencies = [
 "oci",
 "persist",
 "protobuf 3.2.0",
 "resource",
 "serde_json",
 "slog",
 "slog-scope",
@ -1761,6 +1762,7 @@ dependencies = [
 "async-trait",
 "common",
 "kata-types",
 "resource",
 "tokio",
 ]
@ -4273,6 +4275,7 @@ dependencies = [
 "async-trait",
 "common",
 "kata-types",
 "resource",
 "tokio",
 ]
--- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs
@ -756,11 +756,7 @@ impl CloudHypervisorInner {
        todo!()
    }
-    pub(crate) fn resize_memory(
+    pub(crate) fn resize_memory(&self, _req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        &self,
        _req_mem_mb: u32,
        _curr_mem_mb: u32,
    ) -> Result<(u32, MemoryConfig)> {
        todo!()
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs
@ -178,13 +178,9 @@ impl Hypervisor for CloudHypervisor {
        inner.guest_memory_block_size_mb()
    }
-    async fn resize_memory(
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        &self,
        req_mem_mb: u32,
        curr_mem_mb: u32,
    ) -> Result<(u32, MemoryConfig)> {
        let inner = self.inner.read().await;
-        inner.resize_memory(req_mem_mb, curr_mem_mb)
+        inner.resize_memory(req_mem_mb)
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
@ -73,6 +73,9 @@ pub struct DragonballInner {
    /// the size of memory block of guest OS
    pub(crate) guest_memory_block_size_mb: u32,
    /// the hotplug memory size
    pub(crate) mem_hotplug_size_mb: u32,
 }
 impl DragonballInner {
@ -98,6 +101,7 @@ impl DragonballInner {
            cached_block_devices: Default::default(),
            capabilities,
            guest_memory_block_size_mb: 0,
            mem_hotplug_size_mb: 0,
        }
    }
@ -345,36 +349,14 @@ impl DragonballInner {
        Ok((old_vcpus, new_vcpus))
    }
-    // curr_mem_m size = default + hotplug
+    pub(crate) fn resize_memory(&mut self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
-    pub(crate) fn resize_memory(
+        let had_mem_mb = self.config.memory_info.default_memory + self.mem_hotplug_size_mb;
-        &self,
+        match req_mem_mb.cmp(&had_mem_mb) {
        req_mem_mb: u32,
        curr_mem_mb: u32,
    ) -> Result<(u32, MemoryConfig)> {
        let mem_device_to_insert = match req_mem_mb.cmp(&curr_mem_mb) {
            Ordering::Greater => {
-                // We need to insert a new memory device
+                // clean virtio-ballon device before hotplug memory, resize to 0
                let add_mem_mb = req_mem_mb - curr_mem_mb;
                if self.config.memory_info.enable_virtio_mem {
                    Some(MemDeviceConfigInfo {
                        mem_id: format!("mem{}", curr_mem_mb),
                        size_mib: add_mem_mb as u64,
                        capacity_mib: add_mem_mb as u64,
                        multi_region: false,
                        host_numa_node_id: None,
                        guest_numa_node_id: None,
                        use_shared_irq: None,
                        use_generic_irq: None,
                    })
                } else {
                    None
                }
            }
            Ordering::Less => {
                // We need to insert a new balloon device to release memory
                let balloon_config = BalloonDeviceConfigInfo {
-                    balloon_id: format!("mem{}", curr_mem_mb),
+                    balloon_id: "balloon0".to_owned(),
-                    size_mib: (curr_mem_mb - req_mem_mb) as u64,
+                    size_mib: 0,
                    use_shared_irq: None,
                    use_generic_irq: None,
                    f_deflate_on_oom: false,
@ -383,17 +365,46 @@ impl DragonballInner {
                self.vmm_instance
                    .insert_balloon_device(balloon_config)
                    .context("failed to insert balloon device")?;
                None
            }
            Ordering::Equal => None, // Everything is already set up
        };
-        // If we have a memory device to insert, do it now
+                // update the hotplug size
-        if let Some(mem_config) = mem_device_to_insert {
+                self.mem_hotplug_size_mb = req_mem_mb - self.config.memory_info.default_memory;
-            self.vmm_instance
+
-                .insert_mem_device(mem_config)
+                // insert a new memory device
-                .context("failed to insert memory device")?;
+                let add_mem_mb = req_mem_mb - had_mem_mb;
                self.vmm_instance.insert_mem_device(MemDeviceConfigInfo {
                    mem_id: format!("mem{}", self.mem_hotplug_size_mb),
                    size_mib: add_mem_mb as u64,
                    capacity_mib: add_mem_mb as u64,
                    multi_region: false,
                    host_numa_node_id: None,
                    guest_numa_node_id: None,
                    use_shared_irq: None,
                    use_generic_irq: None,
                })?;
            }
            Ordering::Less => {
                // we only use one balloon device here, and resize it to release memory
                // the operation we do here is inserting a new balloon0 device or resizing it
                let balloon_config = BalloonDeviceConfigInfo {
                    balloon_id: "balloon0".to_owned(),
                    size_mib: (had_mem_mb - req_mem_mb) as u64,
                    use_shared_irq: None,
                    use_generic_irq: None,
                    f_deflate_on_oom: false,
                    f_reporting: false,
                };
                self.vmm_instance
                    .insert_balloon_device(balloon_config)
                    .context("failed to insert balloon device")?;
            }
            Ordering::Equal => {
                // Everything is already set up
                info!(
                    sl!(),
                    "memory size unchanged, no need to do memory resizing"
                );
            }
        };
        Ok((
            req_mem_mb,
@ -464,6 +475,7 @@ impl Persist for DragonballInner {
            cached_block_devices: hypervisor_state.cached_block_devices,
            capabilities: Capabilities::new(),
            guest_memory_block_size_mb: 0,
            mem_hotplug_size_mb: 0,
        })
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs
@ -194,13 +194,9 @@ impl Hypervisor for Dragonball {
        inner.guest_memory_block_size_mb()
    }
-    async fn resize_memory(
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
-        &self,
+        let mut inner = self.inner.write().await;
-        req_mem_mb: u32,
+        inner.resize_memory(req_mem_mb)
        curr_mem_mb: u32,
    ) -> Result<(u32, MemoryConfig)> {
        let inner = self.inner.read().await;
        inner.resize_memory(req_mem_mb, curr_mem_mb)
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/lib.rs
+++ b/src/runtime-rs/crates/hypervisor/src/lib.rs
@ -91,6 +91,7 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
    async fn save_vm(&self) -> Result<()>;
    async fn resume_vm(&self) -> Result<()>;
    async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)>; // returns (old_vcpus, new_vcpus)
    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)>;
    // device manager
    async fn add_device(&self, device: DeviceType) -> Result<DeviceType>;
@ -114,6 +115,4 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
    async fn set_capabilities(&self, flag: CapabilityBits);
    async fn set_guest_memory_block_size(&self, size: u32);
    async fn guest_memory_block_size(&self) -> u32;
    async fn resize_memory(&self, req_mem_mb: u32, curr_mem_mb: u32)
        -> Result<(u32, MemoryConfig)>;
 }
--- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
@ -153,11 +153,7 @@ impl QemuInner {
        todo!()
    }
-    pub(crate) fn resize_memory(
+    pub(crate) fn resize_memory(&self, _req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        &self,
        _req_mem_mb: u32,
        _curr_mem_mb: u32,
    ) -> Result<(u32, MemoryConfig)> {
        todo!()
    }
 }
--- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
@ -173,12 +173,8 @@ impl Hypervisor for Qemu {
        inner.guest_memory_block_size_mb()
    }
-    async fn resize_memory(
+    async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
        &self,
        req_mem_mb: u32,
        curr_mem_mb: u32,
    ) -> Result<(u32, MemoryConfig)> {
        let inner = self.inner.read().await;
-        inner.resize_memory(req_mem_mb, curr_mem_mb)
+        inner.resize_memory(req_mem_mb)
    }
 }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
@ -11,7 +11,6 @@ use std::{
    sync::Arc,
 };
 use agent::{Agent, OnlineCPUMemRequest};
 use anyhow::{Context, Ok, Result};
 use hypervisor::Hypervisor;
 use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
@ -52,7 +51,6 @@ impl CpuResource {
        linux_cpus: Option<&LinuxCpu>,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
        agent: &dyn Agent,
    ) -> Result<()> {
        self.update_container_cpu_resources(cid, linux_cpus, op)
            .await
@ -67,13 +65,13 @@ impl CpuResource {
        }
        let curr_vcpus = self
-            .do_update_cpu_resources(vcpu_required, op, hypervisor, agent)
+            .do_update_cpu_resources(vcpu_required, op, hypervisor)
            .await?;
        self.update_current_vcpu(curr_vcpus).await;
        Ok(())
    }
-    async fn current_vcpu(&self) -> u32 {
+    pub(crate) async fn current_vcpu(&self) -> u32 {
        let current_vcpu = self.current_vcpu.read().await;
        *current_vcpu
    }
@ -148,7 +146,6 @@ impl CpuResource {
        new_vcpus: u32,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
        agent: &dyn Agent,
    ) -> Result<u32> {
        let old_vcpus = self.current_vcpu().await;
@ -164,25 +161,11 @@ impl CpuResource {
        // the number of vcpus would not be lower than the default size
        let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);
-        let (old, new) = hypervisor
+        let (_, new) = hypervisor
            .resize_vcpu(old_vcpus, new_vcpus)
            .await
            .context("resize vcpus")?;
        if old < new {
            let add = new - old;
            info!(sl!(), "request to onlineCpuMem with {:?} cpus", add);
            agent
                .online_cpu_mem(OnlineCPUMemRequest {
                    wait: false,
                    nb_cpus: new,
                    cpu_only: true,
                })
                .await
                .context("online vcpus")?;
        }
        Ok(new)
    }
 }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
@ -19,6 +19,7 @@ use kata_types::{
 struct InitialSize {
    vcpu: u32,
    mem_mb: u32,
    orig_toml_default_mem: u32,
 }
 // generate initial resource(vcpu and memory in MiB) from spec's information
@ -66,7 +67,11 @@ impl TryFrom<&oci::Spec> for InitialSize {
            sl!(),
            "(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb
        );
-        Ok(Self { vcpu, mem_mb })
+        Ok(Self {
            vcpu,
            mem_mb,
            orig_toml_default_mem: 0,
        })
    }
 }
@ -93,7 +98,7 @@ impl InitialSizeManager {
        })
    }
-    pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> {
+    pub fn setup_config(&mut self, config: &mut TomlConfig) -> Result<()> {
        // update this data to the hypervisor config for later use by hypervisor
        let hypervisor_name = &config.runtime.hypervisor_name;
        let hv = config
@ -104,6 +109,7 @@ impl InitialSizeManager {
        if self.resource.vcpu > 0 {
            hv.cpu_info.default_vcpus = self.resource.vcpu as i32
        }
        self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
        if self.resource.mem_mb > 0 {
            // since the memory overhead introduced by kata-agent and system components
            // will really affect the amount of memory the user can use, so we choose to
@ -114,6 +120,10 @@ impl InitialSizeManager {
        }
        Ok(())
    }
    pub fn get_orig_toml_default_mem(&self) -> u32 {
        self.resource.orig_toml_default_mem
    }
 }
 fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 {
@ -173,7 +183,11 @@ mod tests {
                    quota: None,
                    memory: None,
                },
-                result: InitialSize { vcpu: 0, mem_mb: 0 },
+                result: InitialSize {
                    vcpu: 0,
                    mem_mb: 0,
                    orig_toml_default_mem: 0,
                },
            },
            TestData {
                desc: "normal resource limit",
@ -186,6 +200,7 @@ mod tests {
                result: InitialSize {
                    vcpu: 3,
                    mem_mb: 512,
                    orig_toml_default_mem: 0,
                },
            },
        ]
--- a/src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
@ -0,0 +1,170 @@
 // Copyright (c) 2019-2023 Alibaba Cloud
 // Copyright (c) 2019-2023 Ant Group
 //
 // SPDX-License-Identifier: Apache-2.0
 //
 use std::collections::HashMap;
 use std::sync::Arc;
 use anyhow::{Context, Ok, Result};
 use hypervisor::Hypervisor;
 use kata_types::config::TomlConfig;
 use oci::LinuxResources;
 use tokio::sync::RwLock;
 use crate::cpu_mem::initial_size::InitialSizeManager;
 use crate::ResourceUpdateOp;
 // MIB_TO_BYTES_SHIFT the number to shift needed to convert MiB to Bytes
 pub const MIB_TO_BYTES_SHIFT: i32 = 20;
 #[derive(Default, Debug, Clone)]
 pub struct MemResource {
    /// Current memory
    pub(crate) current_mem: Arc<RwLock<u32>>,
    /// Default memory
    pub(crate) orig_toml_default_mem: u32,
    /// MemResource of each container
    pub(crate) container_mem_resources: Arc<RwLock<HashMap<String, LinuxResources>>>,
    /// Use guest swap
    pub(crate) use_guest_swap: bool,
 }
 impl MemResource {
    pub fn new(config: Arc<TomlConfig>, init_size_manager: InitialSizeManager) -> Result<Self> {
        let hypervisor_name = config.runtime.hypervisor_name.clone();
        let hypervisor_config = config
            .hypervisor
            .get(&hypervisor_name)
            .context("failed to get hypervisor")?;
        Ok(Self {
            current_mem: Arc::new(RwLock::new(hypervisor_config.memory_info.default_memory)),
            container_mem_resources: Arc::new(RwLock::new(HashMap::new())),
            use_guest_swap: hypervisor_config.memory_info.enable_guest_swap,
            orig_toml_default_mem: init_size_manager.get_orig_toml_default_mem(),
        })
    }
    pub(crate) async fn update_mem_resources(
        &self,
        cid: &str,
        linux_resources: Option<&LinuxResources>,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
    ) -> Result<()> {
        self.update_container_mem_resources(cid, linux_resources, op)
            .await
            .context("update container memory resources")?;
        // the unit here is MB
        let (mut mem_sb_mb, need_pod_swap, swap_sb_mb) = self
            .total_mems(self.use_guest_swap)
            .await
            .context("failed to calculate total memory requirement for containers")?;
        mem_sb_mb += self.orig_toml_default_mem;
        if need_pod_swap {
            mem_sb_mb += swap_sb_mb;
        }
        info!(sl!(), "calculate mem_sb_mb {}", mem_sb_mb);
        let curr_mem = self
            .do_update_mem_resource(mem_sb_mb, swap_sb_mb, hypervisor)
            .await
            .context("failed to update_mem_resource")?;
        self.update_current_mem(curr_mem).await;
        Ok(())
    }
    async fn update_current_mem(&self, new_mem: u32) {
        let mut current_mem = self.current_mem.write().await;
        *current_mem = new_mem;
    }
    async fn total_mems(&self, use_guest_swap: bool) -> Result<(u32, bool, u32)> {
        // sb stands for sandbox
        let mut mem_sandbox = 0;
        let mut need_pod_swap = false;
        let mut swap_sandbox = 0;
        let resources = self.container_mem_resources.read().await;
        for (_, r) in resources.iter() {
            for l in &r.hugepage_limits {
                mem_sandbox += l.limit;
            }
            if let Some(memory) = &r.memory {
                // set current_limit to 0 if memory limit is not set to container
                let current_limit = memory.limit.map_or(0, |limit| {
                    mem_sandbox += limit as u64;
                    info!(sl!(), "memory sb: {}, memory limit: {}", mem_sandbox, limit);
                    limit
                });
                if let Some(swappiness) = memory.swappiness {
                    if swappiness > 0 && use_guest_swap {
                        if let Some(swap) = memory.swap {
                            if swap > current_limit {
                                swap_sandbox = swap.saturating_sub(current_limit);
                            }
                        }
                        // if current_limit is 0, the container will have access to the entire memory available on the host system
                        // so we add swap for this
                        else if current_limit == 0 {
                            need_pod_swap = true;
                        } else {
                            swap_sandbox += current_limit;
                        }
                    }
                }
            }
        }
        Ok((
            (mem_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
            need_pod_swap,
            (swap_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
        ))
    }
    // update container_cpu_resources field
    async fn update_container_mem_resources(
        &self,
        cid: &str,
        linux_resources: Option<&LinuxResources>,
        op: ResourceUpdateOp,
    ) -> Result<()> {
        if let Some(r) = linux_resources {
            let mut resources = self.container_mem_resources.write().await;
            match op {
                ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
                    resources.insert(cid.to_owned(), r.clone());
                }
                ResourceUpdateOp::Del => {
                    resources.remove(cid);
                }
            }
        }
        Ok(())
    }
    async fn do_update_mem_resource(
        &self,
        new_mem: u32,
        _swap_sz_mb: u32,
        hypervisor: &dyn Hypervisor,
    ) -> Result<u32> {
        info!(sl!(), "requesting vmm to update memory to {:?}", new_mem);
        let (new_memory, _mem_config) = hypervisor
            .resize_memory(new_mem)
            .await
            .context("resize memory")?;
        Ok(new_memory)
    }
 }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
@ -6,3 +6,4 @@
 pub mod cpu;
 pub mod initial_size;
 pub mod mem;
--- a/src/runtime-rs/crates/resource/src/manager.rs
+++ b/src/runtime-rs/crates/resource/src/manager.rs
@ -19,6 +19,7 @@ use persist::sandbox_persist::Persist;
 use tokio::sync::RwLock;
 use tracing::instrument;
 use crate::cpu_mem::initial_size::InitialSizeManager;
 use crate::network::NetworkConfig;
 use crate::resource_persist::ResourceState;
 use crate::ResourceUpdateOp;
@ -47,13 +48,15 @@ impl ResourceManager {
        agent: Arc<dyn Agent>,
        hypervisor: Arc<dyn Hypervisor>,
        toml_config: Arc<TomlConfig>,
        init_size_manager: InitialSizeManager,
    ) -> Result<Self> {
        // Regist resource logger for later use.
        logging::register_subsystem_logger("runtimes", "resource");
        Ok(Self {
            inner: Arc::new(RwLock::new(
-                ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
+                ResourceManagerInner::new(sid, agent, hypervisor, toml_config, init_size_manager)
                    .await?,
            )),
        })
    }
--- a/src/runtime-rs/crates/resource/src/manager_inner.rs
+++ b/src/runtime-rs/crates/resource/src/manager_inner.rs
@ -6,7 +6,7 @@
 use std::{sync::Arc, thread};
-use agent::{types::Device, Agent, Storage};
+use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage};
 use anyhow::{anyhow, Context, Ok, Result};
 use async_trait::async_trait;
 use hypervisor::{
@ -25,7 +25,7 @@ use tokio::{runtime, sync::RwLock};
 use crate::{
    cgroups::{CgroupArgs, CgroupsResource},
-    cpu_mem::cpu::CpuResource,
+    cpu_mem::{cpu::CpuResource, initial_size::InitialSizeManager, mem::MemResource},
    manager::ManagerArgs,
    network::{self, Network, NetworkConfig},
    resource_persist::ResourceState,
@ -48,6 +48,7 @@ pub(crate) struct ResourceManagerInner {
    pub volume_resource: VolumeResource,
    pub cgroups_resource: CgroupsResource,
    pub cpu_resource: CpuResource,
    pub mem_resource: MemResource,
 }
 impl ResourceManagerInner {
@ -56,6 +57,7 @@ impl ResourceManagerInner {
        agent: Arc<dyn Agent>,
        hypervisor: Arc<dyn Hypervisor>,
        toml_config: Arc<TomlConfig>,
        init_size_manager: InitialSizeManager,
    ) -> Result<Self> {
        // create device manager
        let dev_manager = DeviceManager::new(hypervisor.clone())
@ -64,6 +66,7 @@ impl ResourceManagerInner {
        let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
        let cpu_resource = CpuResource::new(toml_config.clone())?;
        let mem_resource = MemResource::new(toml_config.clone(), init_size_manager)?;
        Ok(Self {
            sid: sid.to_string(),
            toml_config,
@ -76,6 +79,7 @@ impl ResourceManagerInner {
            volume_resource: VolumeResource::new(),
            cgroups_resource,
            cpu_resource,
            mem_resource,
        })
    }
@ -427,15 +431,23 @@ impl ResourceManagerInner {
        // if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
        if !self.toml_config.runtime.static_sandbox_resource_mgmt {
            // update cpu
            self.cpu_resource
-                .update_cpu_resources(
+                .update_cpu_resources(cid, linux_cpus, op, self.hypervisor.as_ref())
                    cid,
                    linux_cpus,
                    op,
                    self.hypervisor.as_ref(),
                    self.agent.as_ref(),
                )
                .await?;
            // update memory
            self.mem_resource
                .update_mem_resources(cid, linux_resources, op, self.hypervisor.as_ref())
                .await?;
            self.agent
                .online_cpu_mem(OnlineCPUMemRequest {
                    wait: false,
                    nb_cpus: self.cpu_resource.current_vcpu().await,
                    cpu_only: false,
                })
                .await
                .context("online vcpus")?;
        }
        // we should firstly update the vcpus and mems, and then update the host cgroups
@ -516,6 +528,7 @@ impl Persist for ResourceManagerInner {
            .await?,
            toml_config: Arc::new(TomlConfig::default()),
            cpu_resource: CpuResource::default(),
            mem_resource: MemResource::default(),
        })
    }
 }
--- a/src/runtime-rs/crates/runtimes/common/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/common/Cargo.toml
@ -26,4 +26,5 @@ agent = { path = "../../agent" }
 kata-sys-util = { path = "../../../../libs/kata-sys-util" }
 kata-types = { path = "../../../../libs/kata-types" }
 oci = { path = "../../../../libs/oci" }
 resource = { path = "../../resource" }
--- a/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs
+++ b/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs
@ -6,13 +6,13 @@
 use std::sync::Arc;
 use crate::{message::Message, ContainerManager, Sandbox};
 use anyhow::Result;
 use async_trait::async_trait;
 use kata_types::config::TomlConfig;
 use resource::cpu_mem::initial_size::InitialSizeManager;
 use tokio::sync::mpsc::Sender;
 use crate::{message::Message, ContainerManager, Sandbox};
 #[derive(Clone)]
 pub struct RuntimeInstance {
    pub sandbox: Arc<dyn Sandbox>,
@ -38,6 +38,7 @@ pub trait RuntimeHandler: Send + Sync {
        sid: &str,
        msg_sender: Sender<Message>,
        config: Arc<TomlConfig>,
        init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance>;
    fn cleanup(&self, id: &str) -> Result<()>;
--- a/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml
@ -11,3 +11,4 @@ tokio = { version = "1.28.1" }
 common = { path = "../common" }
 kata-types = { path = "../../../../libs/kata-types" }
 resource = { path = "../../resource" }
--- a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs
@ -9,6 +9,7 @@ use anyhow::Result;
 use async_trait::async_trait;
 use common::{message::Message, RuntimeHandler, RuntimeInstance};
 use kata_types::config::TomlConfig;
 use resource::cpu_mem::initial_size::InitialSizeManager;
 use tokio::sync::mpsc::Sender;
 pub struct LinuxContainer {}
@ -32,6 +33,7 @@ impl RuntimeHandler for LinuxContainer {
        _sid: &str,
        _msg_sender: Sender<Message>,
        _config: Arc<TomlConfig>,
        _init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance> {
        todo!()
    }
--- a/src/runtime-rs/crates/runtimes/src/manager.rs
+++ b/src/runtime-rs/crates/runtimes/src/manager.rs
@ -91,6 +91,7 @@ impl RuntimeHandlerManagerInner {
        network_env: SandboxNetworkEnv,
        dns: Vec<String>,
        config: Arc<TomlConfig>,
        init_size_manager: InitialSizeManager,
    ) -> Result<()> {
        info!(sl!(), "new runtime handler {}", &config.runtime.name);
        let runtime_handler = match config.runtime.name.as_str() {
@ -105,7 +106,12 @@ impl RuntimeHandlerManagerInner {
            _ => return Err(anyhow!("Unsupported runtime: {}", &config.runtime.name)),
        };
        let runtime_instance = runtime_handler
-            .new_instance(&self.id, self.msg_sender.clone(), config.clone())
+            .new_instance(
                &self.id,
                self.msg_sender.clone(),
                config.clone(),
                init_size_manager,
            )
            .await
            .context("new runtime instance")?;
@ -160,7 +166,21 @@ impl RuntimeHandlerManagerInner {
            }
        }
-        let config = load_config(spec, options).context("load config")?;
+        let mut config = load_config(spec, options).context("load config")?;
        // Sandbox sizing information *may* be provided in two scenarios:
        //   1. The upper layer runtime (ie, containerd or crio) provide sandbox sizing information as an annotation
        //	in the 'sandbox container's' spec. This would typically be a scenario where as part of a create sandbox
        //	request the upper layer runtime receives this information as part of a pod, and makes it available to us
        //	for sizing purposes.
        //   2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
        //	then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
        //	a single container.
        let mut initial_size_manager =
            InitialSizeManager::new(spec).context("failed to construct static resource manager")?;
        initial_size_manager
            .setup_config(&mut config)
            .context("failed to setup static resource mgmt config")?;
        update_component_log_level(&config);
@ -202,8 +222,14 @@ impl RuntimeHandlerManagerInner {
            netns,
            network_created,
        };
-
+        self.init_runtime_handler(
-        self.init_runtime_handler(spec, state, network_env, dns, Arc::new(config))
+            spec,
            state,
            network_env,
            dns,
            Arc::new(config),
            initial_size_manager,
        )
        .await
        .context("init runtime handler")?;
@ -507,7 +533,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
    //   2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
    //	then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
    //	a single container.
-    let initial_size_manager =
+    let mut initial_size_manager =
        InitialSizeManager::new(spec).context("failed to construct static resource manager")?;
    initial_size_manager
        .setup_config(&mut toml_config)
--- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs
@ -31,6 +31,7 @@ use hypervisor::ch::CloudHypervisor;
 #[cfg(feature = "cloud-hypervisor")]
 use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig};
 use resource::cpu_mem::initial_size::InitialSizeManager;
 use resource::ResourceManager;
 use sandbox::VIRTCONTAINER;
 use tokio::sync::mpsc::Sender;
@ -77,13 +78,22 @@ impl RuntimeHandler for VirtContainer {
        sid: &str,
        msg_sender: Sender<Message>,
        config: Arc<TomlConfig>,
        init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance> {
        let hypervisor = new_hypervisor(&config).await.context("new hypervisor")?;
        // get uds from hypervisor and get config from toml_config
        let agent = new_agent(&config).context("new agent")?;
-        let resource_manager =
+        let resource_manager = Arc::new(
-            Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?);
+            ResourceManager::new(
                sid,
                agent.clone(),
                hypervisor.clone(),
                config,
                init_size_manager,
            )
            .await?,
        );
        let pid = std::process::id();
        let sandbox = sandbox::VirtSandbox::new(
--- a/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml
@ -11,3 +11,4 @@ tokio = { version = "1.28.1" }
 common = { path = "../common" }
 kata-types = { path = "../../../../libs/kata-types" }
 resource = { path = "../../resource" }
--- a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs
@ -9,6 +9,7 @@ use anyhow::Result;
 use async_trait::async_trait;
 use common::{message::Message, RuntimeHandler, RuntimeInstance};
 use kata_types::config::TomlConfig;
 use resource::cpu_mem::initial_size::InitialSizeManager;
 use tokio::sync::mpsc::Sender;
 pub struct WasmContainer {}
@ -31,6 +32,7 @@ impl RuntimeHandler for WasmContainer {
        _sid: &str,
        _msg_sender: Sender<Message>,
        _config: Arc<TomlConfig>,
        _init_size_manager: InitialSizeManager,
    ) -> Result<RuntimeInstance> {
        todo!()
    }