mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-28 12:31:04 +00:00
runtime-rs: support memory resize
Fixes:#6875 Signed-off-by: Zhongtao Hu <zhongtaohu.tim@linux.alibaba.com>
This commit is contained in:
parent
81e55c424a
commit
8d9fd9c067
3
src/runtime-rs/Cargo.lock
generated
3
src/runtime-rs/Cargo.lock
generated
@ -508,6 +508,7 @@ dependencies = [
|
||||
"oci",
|
||||
"persist",
|
||||
"protobuf 3.2.0",
|
||||
"resource",
|
||||
"serde_json",
|
||||
"slog",
|
||||
"slog-scope",
|
||||
@ -1761,6 +1762,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"common",
|
||||
"kata-types",
|
||||
"resource",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@ -4273,6 +4275,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"common",
|
||||
"kata-types",
|
||||
"resource",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
|
@ -756,11 +756,7 @@ impl CloudHypervisorInner {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) fn resize_memory(
|
||||
&self,
|
||||
_req_mem_mb: u32,
|
||||
_curr_mem_mb: u32,
|
||||
) -> Result<(u32, MemoryConfig)> {
|
||||
pub(crate) fn resize_memory(&self, _req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
@ -178,13 +178,9 @@ impl Hypervisor for CloudHypervisor {
|
||||
inner.guest_memory_block_size_mb()
|
||||
}
|
||||
|
||||
async fn resize_memory(
|
||||
&self,
|
||||
req_mem_mb: u32,
|
||||
curr_mem_mb: u32,
|
||||
) -> Result<(u32, MemoryConfig)> {
|
||||
async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_memory(req_mem_mb, curr_mem_mb)
|
||||
inner.resize_memory(req_mem_mb)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,6 +73,9 @@ pub struct DragonballInner {
|
||||
|
||||
/// the size of memory block of guest OS
|
||||
pub(crate) guest_memory_block_size_mb: u32,
|
||||
|
||||
/// the hotplug memory size
|
||||
pub(crate) mem_hotplug_size_mb: u32,
|
||||
}
|
||||
|
||||
impl DragonballInner {
|
||||
@ -98,6 +101,7 @@ impl DragonballInner {
|
||||
cached_block_devices: Default::default(),
|
||||
capabilities,
|
||||
guest_memory_block_size_mb: 0,
|
||||
mem_hotplug_size_mb: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,36 +349,14 @@ impl DragonballInner {
|
||||
Ok((old_vcpus, new_vcpus))
|
||||
}
|
||||
|
||||
// curr_mem_m size = default + hotplug
|
||||
pub(crate) fn resize_memory(
|
||||
&self,
|
||||
req_mem_mb: u32,
|
||||
curr_mem_mb: u32,
|
||||
) -> Result<(u32, MemoryConfig)> {
|
||||
let mem_device_to_insert = match req_mem_mb.cmp(&curr_mem_mb) {
|
||||
pub(crate) fn resize_memory(&mut self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
let had_mem_mb = self.config.memory_info.default_memory + self.mem_hotplug_size_mb;
|
||||
match req_mem_mb.cmp(&had_mem_mb) {
|
||||
Ordering::Greater => {
|
||||
// We need to insert a new memory device
|
||||
let add_mem_mb = req_mem_mb - curr_mem_mb;
|
||||
if self.config.memory_info.enable_virtio_mem {
|
||||
Some(MemDeviceConfigInfo {
|
||||
mem_id: format!("mem{}", curr_mem_mb),
|
||||
size_mib: add_mem_mb as u64,
|
||||
capacity_mib: add_mem_mb as u64,
|
||||
multi_region: false,
|
||||
host_numa_node_id: None,
|
||||
guest_numa_node_id: None,
|
||||
use_shared_irq: None,
|
||||
use_generic_irq: None,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Ordering::Less => {
|
||||
// We need to insert a new balloon device to release memory
|
||||
// clean virtio-ballon device before hotplug memory, resize to 0
|
||||
let balloon_config = BalloonDeviceConfigInfo {
|
||||
balloon_id: format!("mem{}", curr_mem_mb),
|
||||
size_mib: (curr_mem_mb - req_mem_mb) as u64,
|
||||
balloon_id: "balloon0".to_owned(),
|
||||
size_mib: 0,
|
||||
use_shared_irq: None,
|
||||
use_generic_irq: None,
|
||||
f_deflate_on_oom: false,
|
||||
@ -383,17 +365,46 @@ impl DragonballInner {
|
||||
self.vmm_instance
|
||||
.insert_balloon_device(balloon_config)
|
||||
.context("failed to insert balloon device")?;
|
||||
None
|
||||
}
|
||||
Ordering::Equal => None, // Everything is already set up
|
||||
};
|
||||
|
||||
// If we have a memory device to insert, do it now
|
||||
if let Some(mem_config) = mem_device_to_insert {
|
||||
self.vmm_instance
|
||||
.insert_mem_device(mem_config)
|
||||
.context("failed to insert memory device")?;
|
||||
// update the hotplug size
|
||||
self.mem_hotplug_size_mb = req_mem_mb - self.config.memory_info.default_memory;
|
||||
|
||||
// insert a new memory device
|
||||
let add_mem_mb = req_mem_mb - had_mem_mb;
|
||||
self.vmm_instance.insert_mem_device(MemDeviceConfigInfo {
|
||||
mem_id: format!("mem{}", self.mem_hotplug_size_mb),
|
||||
size_mib: add_mem_mb as u64,
|
||||
capacity_mib: add_mem_mb as u64,
|
||||
multi_region: false,
|
||||
host_numa_node_id: None,
|
||||
guest_numa_node_id: None,
|
||||
use_shared_irq: None,
|
||||
use_generic_irq: None,
|
||||
})?;
|
||||
}
|
||||
Ordering::Less => {
|
||||
// we only use one balloon device here, and resize it to release memory
|
||||
// the operation we do here is inserting a new balloon0 device or resizing it
|
||||
let balloon_config = BalloonDeviceConfigInfo {
|
||||
balloon_id: "balloon0".to_owned(),
|
||||
size_mib: (had_mem_mb - req_mem_mb) as u64,
|
||||
use_shared_irq: None,
|
||||
use_generic_irq: None,
|
||||
f_deflate_on_oom: false,
|
||||
f_reporting: false,
|
||||
};
|
||||
self.vmm_instance
|
||||
.insert_balloon_device(balloon_config)
|
||||
.context("failed to insert balloon device")?;
|
||||
}
|
||||
Ordering::Equal => {
|
||||
// Everything is already set up
|
||||
info!(
|
||||
sl!(),
|
||||
"memory size unchanged, no need to do memory resizing"
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
Ok((
|
||||
req_mem_mb,
|
||||
@ -464,6 +475,7 @@ impl Persist for DragonballInner {
|
||||
cached_block_devices: hypervisor_state.cached_block_devices,
|
||||
capabilities: Capabilities::new(),
|
||||
guest_memory_block_size_mb: 0,
|
||||
mem_hotplug_size_mb: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -194,13 +194,9 @@ impl Hypervisor for Dragonball {
|
||||
inner.guest_memory_block_size_mb()
|
||||
}
|
||||
|
||||
async fn resize_memory(
|
||||
&self,
|
||||
req_mem_mb: u32,
|
||||
curr_mem_mb: u32,
|
||||
) -> Result<(u32, MemoryConfig)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_memory(req_mem_mb, curr_mem_mb)
|
||||
async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.resize_memory(req_mem_mb)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,7 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
|
||||
async fn save_vm(&self) -> Result<()>;
|
||||
async fn resume_vm(&self) -> Result<()>;
|
||||
async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)>; // returns (old_vcpus, new_vcpus)
|
||||
async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)>;
|
||||
|
||||
// device manager
|
||||
async fn add_device(&self, device: DeviceType) -> Result<DeviceType>;
|
||||
@ -114,6 +115,4 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
|
||||
async fn set_capabilities(&self, flag: CapabilityBits);
|
||||
async fn set_guest_memory_block_size(&self, size: u32);
|
||||
async fn guest_memory_block_size(&self) -> u32;
|
||||
async fn resize_memory(&self, req_mem_mb: u32, curr_mem_mb: u32)
|
||||
-> Result<(u32, MemoryConfig)>;
|
||||
}
|
||||
|
@ -153,11 +153,7 @@ impl QemuInner {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) fn resize_memory(
|
||||
&self,
|
||||
_req_mem_mb: u32,
|
||||
_curr_mem_mb: u32,
|
||||
) -> Result<(u32, MemoryConfig)> {
|
||||
pub(crate) fn resize_memory(&self, _req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
@ -173,12 +173,8 @@ impl Hypervisor for Qemu {
|
||||
inner.guest_memory_block_size_mb()
|
||||
}
|
||||
|
||||
async fn resize_memory(
|
||||
&self,
|
||||
req_mem_mb: u32,
|
||||
curr_mem_mb: u32,
|
||||
) -> Result<(u32, MemoryConfig)> {
|
||||
async fn resize_memory(&self, req_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_memory(req_mem_mb, curr_mem_mb)
|
||||
inner.resize_memory(req_mem_mb)
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,6 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use agent::{Agent, OnlineCPUMemRequest};
|
||||
use anyhow::{Context, Ok, Result};
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
|
||||
@ -52,7 +51,6 @@ impl CpuResource {
|
||||
linux_cpus: Option<&LinuxCpu>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
agent: &dyn Agent,
|
||||
) -> Result<()> {
|
||||
self.update_container_cpu_resources(cid, linux_cpus, op)
|
||||
.await
|
||||
@ -67,13 +65,13 @@ impl CpuResource {
|
||||
}
|
||||
|
||||
let curr_vcpus = self
|
||||
.do_update_cpu_resources(vcpu_required, op, hypervisor, agent)
|
||||
.do_update_cpu_resources(vcpu_required, op, hypervisor)
|
||||
.await?;
|
||||
self.update_current_vcpu(curr_vcpus).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn current_vcpu(&self) -> u32 {
|
||||
pub(crate) async fn current_vcpu(&self) -> u32 {
|
||||
let current_vcpu = self.current_vcpu.read().await;
|
||||
*current_vcpu
|
||||
}
|
||||
@ -148,7 +146,6 @@ impl CpuResource {
|
||||
new_vcpus: u32,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
agent: &dyn Agent,
|
||||
) -> Result<u32> {
|
||||
let old_vcpus = self.current_vcpu().await;
|
||||
|
||||
@ -164,25 +161,11 @@ impl CpuResource {
|
||||
// the number of vcpus would not be lower than the default size
|
||||
let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);
|
||||
|
||||
let (old, new) = hypervisor
|
||||
let (_, new) = hypervisor
|
||||
.resize_vcpu(old_vcpus, new_vcpus)
|
||||
.await
|
||||
.context("resize vcpus")?;
|
||||
|
||||
if old < new {
|
||||
let add = new - old;
|
||||
info!(sl!(), "request to onlineCpuMem with {:?} cpus", add);
|
||||
|
||||
agent
|
||||
.online_cpu_mem(OnlineCPUMemRequest {
|
||||
wait: false,
|
||||
nb_cpus: new,
|
||||
cpu_only: true,
|
||||
})
|
||||
.await
|
||||
.context("online vcpus")?;
|
||||
}
|
||||
|
||||
Ok(new)
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ use kata_types::{
|
||||
struct InitialSize {
|
||||
vcpu: u32,
|
||||
mem_mb: u32,
|
||||
orig_toml_default_mem: u32,
|
||||
}
|
||||
|
||||
// generate initial resource(vcpu and memory in MiB) from spec's information
|
||||
@ -66,7 +67,11 @@ impl TryFrom<&oci::Spec> for InitialSize {
|
||||
sl!(),
|
||||
"(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb
|
||||
);
|
||||
Ok(Self { vcpu, mem_mb })
|
||||
Ok(Self {
|
||||
vcpu,
|
||||
mem_mb,
|
||||
orig_toml_default_mem: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,7 +98,7 @@ impl InitialSizeManager {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> {
|
||||
pub fn setup_config(&mut self, config: &mut TomlConfig) -> Result<()> {
|
||||
// update this data to the hypervisor config for later use by hypervisor
|
||||
let hypervisor_name = &config.runtime.hypervisor_name;
|
||||
let hv = config
|
||||
@ -104,6 +109,7 @@ impl InitialSizeManager {
|
||||
if self.resource.vcpu > 0 {
|
||||
hv.cpu_info.default_vcpus = self.resource.vcpu as i32
|
||||
}
|
||||
self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
|
||||
if self.resource.mem_mb > 0 {
|
||||
// since the memory overhead introduced by kata-agent and system components
|
||||
// will really affect the amount of memory the user can use, so we choose to
|
||||
@ -114,6 +120,10 @@ impl InitialSizeManager {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_orig_toml_default_mem(&self) -> u32 {
|
||||
self.resource.orig_toml_default_mem
|
||||
}
|
||||
}
|
||||
|
||||
fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 {
|
||||
@ -173,7 +183,11 @@ mod tests {
|
||||
quota: None,
|
||||
memory: None,
|
||||
},
|
||||
result: InitialSize { vcpu: 0, mem_mb: 0 },
|
||||
result: InitialSize {
|
||||
vcpu: 0,
|
||||
mem_mb: 0,
|
||||
orig_toml_default_mem: 0,
|
||||
},
|
||||
},
|
||||
TestData {
|
||||
desc: "normal resource limit",
|
||||
@ -186,6 +200,7 @@ mod tests {
|
||||
result: InitialSize {
|
||||
vcpu: 3,
|
||||
mem_mb: 512,
|
||||
orig_toml_default_mem: 0,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
170
src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
Normal file
170
src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
Normal file
@ -0,0 +1,170 @@
|
||||
// Copyright (c) 2019-2023 Alibaba Cloud
|
||||
// Copyright (c) 2019-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Ok, Result};
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::config::TomlConfig;
|
||||
use oci::LinuxResources;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::cpu_mem::initial_size::InitialSizeManager;
|
||||
use crate::ResourceUpdateOp;
|
||||
|
||||
// MIB_TO_BYTES_SHIFT the number to shift needed to convert MiB to Bytes
|
||||
pub const MIB_TO_BYTES_SHIFT: i32 = 20;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct MemResource {
|
||||
/// Current memory
|
||||
pub(crate) current_mem: Arc<RwLock<u32>>,
|
||||
|
||||
/// Default memory
|
||||
pub(crate) orig_toml_default_mem: u32,
|
||||
|
||||
/// MemResource of each container
|
||||
pub(crate) container_mem_resources: Arc<RwLock<HashMap<String, LinuxResources>>>,
|
||||
|
||||
/// Use guest swap
|
||||
pub(crate) use_guest_swap: bool,
|
||||
}
|
||||
|
||||
impl MemResource {
|
||||
pub fn new(config: Arc<TomlConfig>, init_size_manager: InitialSizeManager) -> Result<Self> {
|
||||
let hypervisor_name = config.runtime.hypervisor_name.clone();
|
||||
let hypervisor_config = config
|
||||
.hypervisor
|
||||
.get(&hypervisor_name)
|
||||
.context("failed to get hypervisor")?;
|
||||
|
||||
Ok(Self {
|
||||
current_mem: Arc::new(RwLock::new(hypervisor_config.memory_info.default_memory)),
|
||||
container_mem_resources: Arc::new(RwLock::new(HashMap::new())),
|
||||
use_guest_swap: hypervisor_config.memory_info.enable_guest_swap,
|
||||
orig_toml_default_mem: init_size_manager.get_orig_toml_default_mem(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn update_mem_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
) -> Result<()> {
|
||||
self.update_container_mem_resources(cid, linux_resources, op)
|
||||
.await
|
||||
.context("update container memory resources")?;
|
||||
// the unit here is MB
|
||||
let (mut mem_sb_mb, need_pod_swap, swap_sb_mb) = self
|
||||
.total_mems(self.use_guest_swap)
|
||||
.await
|
||||
.context("failed to calculate total memory requirement for containers")?;
|
||||
mem_sb_mb += self.orig_toml_default_mem;
|
||||
if need_pod_swap {
|
||||
mem_sb_mb += swap_sb_mb;
|
||||
}
|
||||
info!(sl!(), "calculate mem_sb_mb {}", mem_sb_mb);
|
||||
|
||||
let curr_mem = self
|
||||
.do_update_mem_resource(mem_sb_mb, swap_sb_mb, hypervisor)
|
||||
.await
|
||||
.context("failed to update_mem_resource")?;
|
||||
|
||||
self.update_current_mem(curr_mem).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_current_mem(&self, new_mem: u32) {
|
||||
let mut current_mem = self.current_mem.write().await;
|
||||
*current_mem = new_mem;
|
||||
}
|
||||
|
||||
async fn total_mems(&self, use_guest_swap: bool) -> Result<(u32, bool, u32)> {
|
||||
// sb stands for sandbox
|
||||
let mut mem_sandbox = 0;
|
||||
let mut need_pod_swap = false;
|
||||
let mut swap_sandbox = 0;
|
||||
|
||||
let resources = self.container_mem_resources.read().await;
|
||||
|
||||
for (_, r) in resources.iter() {
|
||||
for l in &r.hugepage_limits {
|
||||
mem_sandbox += l.limit;
|
||||
}
|
||||
|
||||
if let Some(memory) = &r.memory {
|
||||
// set current_limit to 0 if memory limit is not set to container
|
||||
let current_limit = memory.limit.map_or(0, |limit| {
|
||||
mem_sandbox += limit as u64;
|
||||
info!(sl!(), "memory sb: {}, memory limit: {}", mem_sandbox, limit);
|
||||
limit
|
||||
});
|
||||
|
||||
if let Some(swappiness) = memory.swappiness {
|
||||
if swappiness > 0 && use_guest_swap {
|
||||
if let Some(swap) = memory.swap {
|
||||
if swap > current_limit {
|
||||
swap_sandbox = swap.saturating_sub(current_limit);
|
||||
}
|
||||
}
|
||||
// if current_limit is 0, the container will have access to the entire memory available on the host system
|
||||
// so we add swap for this
|
||||
else if current_limit == 0 {
|
||||
need_pod_swap = true;
|
||||
} else {
|
||||
swap_sandbox += current_limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
(mem_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
|
||||
need_pod_swap,
|
||||
(swap_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
|
||||
))
|
||||
}
|
||||
|
||||
// update container_cpu_resources field
|
||||
async fn update_container_mem_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
) -> Result<()> {
|
||||
if let Some(r) = linux_resources {
|
||||
let mut resources = self.container_mem_resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
|
||||
resources.insert(cid.to_owned(), r.clone());
|
||||
}
|
||||
ResourceUpdateOp::Del => {
|
||||
resources.remove(cid);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn do_update_mem_resource(
|
||||
&self,
|
||||
new_mem: u32,
|
||||
_swap_sz_mb: u32,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
) -> Result<u32> {
|
||||
info!(sl!(), "requesting vmm to update memory to {:?}", new_mem);
|
||||
let (new_memory, _mem_config) = hypervisor
|
||||
.resize_memory(new_mem)
|
||||
.await
|
||||
.context("resize memory")?;
|
||||
|
||||
Ok(new_memory)
|
||||
}
|
||||
}
|
@ -6,3 +6,4 @@
|
||||
|
||||
pub mod cpu;
|
||||
pub mod initial_size;
|
||||
pub mod mem;
|
||||
|
@ -19,6 +19,7 @@ use persist::sandbox_persist::Persist;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::cpu_mem::initial_size::InitialSizeManager;
|
||||
use crate::network::NetworkConfig;
|
||||
use crate::resource_persist::ResourceState;
|
||||
use crate::ResourceUpdateOp;
|
||||
@ -47,13 +48,15 @@ impl ResourceManager {
|
||||
agent: Arc<dyn Agent>,
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<Self> {
|
||||
// Regist resource logger for later use.
|
||||
logging::register_subsystem_logger("runtimes", "resource");
|
||||
|
||||
Ok(Self {
|
||||
inner: Arc::new(RwLock::new(
|
||||
ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
|
||||
ResourceManagerInner::new(sid, agent, hypervisor, toml_config, init_size_manager)
|
||||
.await?,
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
use std::{sync::Arc, thread};
|
||||
|
||||
use agent::{types::Device, Agent, Storage};
|
||||
use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage};
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
@ -25,7 +25,7 @@ use tokio::{runtime, sync::RwLock};
|
||||
|
||||
use crate::{
|
||||
cgroups::{CgroupArgs, CgroupsResource},
|
||||
cpu_mem::cpu::CpuResource,
|
||||
cpu_mem::{cpu::CpuResource, initial_size::InitialSizeManager, mem::MemResource},
|
||||
manager::ManagerArgs,
|
||||
network::{self, Network, NetworkConfig},
|
||||
resource_persist::ResourceState,
|
||||
@ -48,6 +48,7 @@ pub(crate) struct ResourceManagerInner {
|
||||
pub volume_resource: VolumeResource,
|
||||
pub cgroups_resource: CgroupsResource,
|
||||
pub cpu_resource: CpuResource,
|
||||
pub mem_resource: MemResource,
|
||||
}
|
||||
|
||||
impl ResourceManagerInner {
|
||||
@ -56,6 +57,7 @@ impl ResourceManagerInner {
|
||||
agent: Arc<dyn Agent>,
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<Self> {
|
||||
// create device manager
|
||||
let dev_manager = DeviceManager::new(hypervisor.clone())
|
||||
@ -64,6 +66,7 @@ impl ResourceManagerInner {
|
||||
|
||||
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
|
||||
let cpu_resource = CpuResource::new(toml_config.clone())?;
|
||||
let mem_resource = MemResource::new(toml_config.clone(), init_size_manager)?;
|
||||
Ok(Self {
|
||||
sid: sid.to_string(),
|
||||
toml_config,
|
||||
@ -76,6 +79,7 @@ impl ResourceManagerInner {
|
||||
volume_resource: VolumeResource::new(),
|
||||
cgroups_resource,
|
||||
cpu_resource,
|
||||
mem_resource,
|
||||
})
|
||||
}
|
||||
|
||||
@ -427,15 +431,23 @@ impl ResourceManagerInner {
|
||||
|
||||
// if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
|
||||
if !self.toml_config.runtime.static_sandbox_resource_mgmt {
|
||||
// update cpu
|
||||
self.cpu_resource
|
||||
.update_cpu_resources(
|
||||
cid,
|
||||
linux_cpus,
|
||||
op,
|
||||
self.hypervisor.as_ref(),
|
||||
self.agent.as_ref(),
|
||||
)
|
||||
.update_cpu_resources(cid, linux_cpus, op, self.hypervisor.as_ref())
|
||||
.await?;
|
||||
// update memory
|
||||
self.mem_resource
|
||||
.update_mem_resources(cid, linux_resources, op, self.hypervisor.as_ref())
|
||||
.await?;
|
||||
|
||||
self.agent
|
||||
.online_cpu_mem(OnlineCPUMemRequest {
|
||||
wait: false,
|
||||
nb_cpus: self.cpu_resource.current_vcpu().await,
|
||||
cpu_only: false,
|
||||
})
|
||||
.await
|
||||
.context("online vcpus")?;
|
||||
}
|
||||
|
||||
// we should firstly update the vcpus and mems, and then update the host cgroups
|
||||
@ -516,6 +528,7 @@ impl Persist for ResourceManagerInner {
|
||||
.await?,
|
||||
toml_config: Arc::new(TomlConfig::default()),
|
||||
cpu_resource: CpuResource::default(),
|
||||
mem_resource: MemResource::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -26,4 +26,5 @@ agent = { path = "../../agent" }
|
||||
kata-sys-util = { path = "../../../../libs/kata-sys-util" }
|
||||
kata-types = { path = "../../../../libs/kata-types" }
|
||||
oci = { path = "../../../../libs/oci" }
|
||||
resource = { path = "../../resource" }
|
||||
|
||||
|
@ -6,13 +6,13 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{message::Message, ContainerManager, Sandbox};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use kata_types::config::TomlConfig;
|
||||
use resource::cpu_mem::initial_size::InitialSizeManager;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
use crate::{message::Message, ContainerManager, Sandbox};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RuntimeInstance {
|
||||
pub sandbox: Arc<dyn Sandbox>,
|
||||
@ -38,6 +38,7 @@ pub trait RuntimeHandler: Send + Sync {
|
||||
sid: &str,
|
||||
msg_sender: Sender<Message>,
|
||||
config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<RuntimeInstance>;
|
||||
|
||||
fn cleanup(&self, id: &str) -> Result<()>;
|
||||
|
@ -11,3 +11,4 @@ tokio = { version = "1.28.1" }
|
||||
|
||||
common = { path = "../common" }
|
||||
kata-types = { path = "../../../../libs/kata-types" }
|
||||
resource = { path = "../../resource" }
|
@ -9,6 +9,7 @@ use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use common::{message::Message, RuntimeHandler, RuntimeInstance};
|
||||
use kata_types::config::TomlConfig;
|
||||
use resource::cpu_mem::initial_size::InitialSizeManager;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
pub struct LinuxContainer {}
|
||||
@ -32,6 +33,7 @@ impl RuntimeHandler for LinuxContainer {
|
||||
_sid: &str,
|
||||
_msg_sender: Sender<Message>,
|
||||
_config: Arc<TomlConfig>,
|
||||
_init_size_manager: InitialSizeManager,
|
||||
) -> Result<RuntimeInstance> {
|
||||
todo!()
|
||||
}
|
||||
|
@ -91,6 +91,7 @@ impl RuntimeHandlerManagerInner {
|
||||
network_env: SandboxNetworkEnv,
|
||||
dns: Vec<String>,
|
||||
config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<()> {
|
||||
info!(sl!(), "new runtime handler {}", &config.runtime.name);
|
||||
let runtime_handler = match config.runtime.name.as_str() {
|
||||
@ -105,7 +106,12 @@ impl RuntimeHandlerManagerInner {
|
||||
_ => return Err(anyhow!("Unsupported runtime: {}", &config.runtime.name)),
|
||||
};
|
||||
let runtime_instance = runtime_handler
|
||||
.new_instance(&self.id, self.msg_sender.clone(), config.clone())
|
||||
.new_instance(
|
||||
&self.id,
|
||||
self.msg_sender.clone(),
|
||||
config.clone(),
|
||||
init_size_manager,
|
||||
)
|
||||
.await
|
||||
.context("new runtime instance")?;
|
||||
|
||||
@ -160,7 +166,21 @@ impl RuntimeHandlerManagerInner {
|
||||
}
|
||||
}
|
||||
|
||||
let config = load_config(spec, options).context("load config")?;
|
||||
let mut config = load_config(spec, options).context("load config")?;
|
||||
|
||||
// Sandbox sizing information *may* be provided in two scenarios:
|
||||
// 1. The upper layer runtime (ie, containerd or crio) provide sandbox sizing information as an annotation
|
||||
// in the 'sandbox container's' spec. This would typically be a scenario where as part of a create sandbox
|
||||
// request the upper layer runtime receives this information as part of a pod, and makes it available to us
|
||||
// for sizing purposes.
|
||||
// 2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
|
||||
// then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
|
||||
// a single container.
|
||||
let mut initial_size_manager =
|
||||
InitialSizeManager::new(spec).context("failed to construct static resource manager")?;
|
||||
initial_size_manager
|
||||
.setup_config(&mut config)
|
||||
.context("failed to setup static resource mgmt config")?;
|
||||
|
||||
update_component_log_level(&config);
|
||||
|
||||
@ -202,8 +222,14 @@ impl RuntimeHandlerManagerInner {
|
||||
netns,
|
||||
network_created,
|
||||
};
|
||||
|
||||
self.init_runtime_handler(spec, state, network_env, dns, Arc::new(config))
|
||||
self.init_runtime_handler(
|
||||
spec,
|
||||
state,
|
||||
network_env,
|
||||
dns,
|
||||
Arc::new(config),
|
||||
initial_size_manager,
|
||||
)
|
||||
.await
|
||||
.context("init runtime handler")?;
|
||||
|
||||
@ -507,7 +533,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
|
||||
// 2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
|
||||
// then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
|
||||
// a single container.
|
||||
let initial_size_manager =
|
||||
let mut initial_size_manager =
|
||||
InitialSizeManager::new(spec).context("failed to construct static resource manager")?;
|
||||
initial_size_manager
|
||||
.setup_config(&mut toml_config)
|
||||
|
@ -31,6 +31,7 @@ use hypervisor::ch::CloudHypervisor;
|
||||
#[cfg(feature = "cloud-hypervisor")]
|
||||
use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig};
|
||||
|
||||
use resource::cpu_mem::initial_size::InitialSizeManager;
|
||||
use resource::ResourceManager;
|
||||
use sandbox::VIRTCONTAINER;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
@ -77,13 +78,22 @@ impl RuntimeHandler for VirtContainer {
|
||||
sid: &str,
|
||||
msg_sender: Sender<Message>,
|
||||
config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<RuntimeInstance> {
|
||||
let hypervisor = new_hypervisor(&config).await.context("new hypervisor")?;
|
||||
|
||||
// get uds from hypervisor and get config from toml_config
|
||||
let agent = new_agent(&config).context("new agent")?;
|
||||
let resource_manager =
|
||||
Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?);
|
||||
let resource_manager = Arc::new(
|
||||
ResourceManager::new(
|
||||
sid,
|
||||
agent.clone(),
|
||||
hypervisor.clone(),
|
||||
config,
|
||||
init_size_manager,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
let pid = std::process::id();
|
||||
|
||||
let sandbox = sandbox::VirtSandbox::new(
|
||||
|
@ -11,3 +11,4 @@ tokio = { version = "1.28.1" }
|
||||
|
||||
common = { path = "../common" }
|
||||
kata-types = { path = "../../../../libs/kata-types" }
|
||||
resource = { path = "../../resource" }
|
@ -9,6 +9,7 @@ use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use common::{message::Message, RuntimeHandler, RuntimeInstance};
|
||||
use kata_types::config::TomlConfig;
|
||||
use resource::cpu_mem::initial_size::InitialSizeManager;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
pub struct WasmContainer {}
|
||||
|
||||
@ -31,6 +32,7 @@ impl RuntimeHandler for WasmContainer {
|
||||
_sid: &str,
|
||||
_msg_sender: Sender<Message>,
|
||||
_config: Arc<TomlConfig>,
|
||||
_init_size_manager: InitialSizeManager,
|
||||
) -> Result<RuntimeInstance> {
|
||||
todo!()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user