feat(runtime-rs): merge the update_cgroups in update_linux_resources

Updating vCPU resources and memory resources of the sandbox and
updating cgroups on the host will always happening together, and
they are all updated based on the linux resources declarations of
all the containers.

So we merge update_cgroups into the update_linux_resources, so we
can better manage the resources allocated to one pod in the host.

Fixes: #5030

Signed-off-by: Yushuo <y-shuo@linux.alibaba.com>
Signed-off-by: Ji-Xinyou <jerryji0414@outlook.com>
This commit is contained in:
Yushuo 2023-05-06 17:14:36 +08:00
parent fa6dff9f70
commit a39e1e6cd1
5 changed files with 68 additions and 49 deletions

View File

@ -26,6 +26,8 @@ use oci::LinuxResources;
use persist::sandbox_persist::Persist;
use tokio::sync::RwLock;
use crate::ResourceUpdateOp;
const OS_ERROR_NO_SUCH_PROCESS: i32 = 3;
pub struct CgroupArgs {
@ -149,29 +151,51 @@ impl CgroupsResource {
&self,
cid: &str,
linux_resources: Option<&LinuxResources>,
op: ResourceUpdateOp,
h: &dyn Hypervisor,
) -> Result<()> {
let resource = self.calc_resource(linux_resources);
let changed = self.update_resources(cid, resource).await;
let new_resources = self.calc_resource(linux_resources);
let old_resources = self.update_resources(cid, new_resources.clone(), op).await;
if !changed {
return Ok(());
}
self.do_update_cgroups(h).await
}
async fn update_resources(&self, cid: &str, new_resource: Resources) -> bool {
let mut resources = self.resources.write().await;
let old_resource = resources.insert(cid.to_owned(), new_resource.clone());
if let Some(old_resource) = old_resource {
if old_resource == new_resource {
return false;
if let Some(old_resource) = old_resources.clone() {
if old_resource == new_resources {
return Ok(());
}
}
true
match self.do_update_cgroups(h).await {
Err(e) => {
// if update failed, we should roll back the records in resources
let mut resources = self.resources.write().await;
match op {
ResourceUpdateOp::Add => {
resources.remove(cid);
}
ResourceUpdateOp::Update | ResourceUpdateOp::Del => {
if let Some(old_resource) = old_resources {
resources.insert(cid.to_owned(), old_resource.clone());
}
}
}
Err(e)
}
Ok(()) => Ok(()),
}
}
async fn update_resources(
&self,
cid: &str,
new_resource: Resources,
op: ResourceUpdateOp,
) -> Option<Resources> {
let mut resources = self.resources.write().await;
match op {
ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
resources.insert(cid.to_owned(), new_resource.clone())
}
ResourceUpdateOp::Del => resources.remove(cid),
}
}
async fn do_update_cgroups(&self, h: &dyn Hypervisor) -> Result<()> {

View File

@ -32,7 +32,7 @@ pub enum ResourceConfig {
ShareFs(SharedFsInfo),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ResourceUpdateOp {
Add,
Del,

View File

@ -113,15 +113,6 @@ impl ResourceManager {
inner.dump().await
}
pub async fn update_cgroups(
&self,
cid: &str,
linux_resources: Option<&LinuxResources>,
) -> Result<()> {
let inner = self.inner.read().await;
inner.update_cgroups(cid, linux_resources).await
}
pub async fn update_linux_resource(
&self,
cid: &str,

View File

@ -317,17 +317,7 @@ impl ResourceManagerInner {
sb_bindmnt.cleanup_sandbox_bind_mounts()
}
}
pub async fn update_cgroups(
&self,
cid: &str,
linux_resources: Option<&LinuxResources>,
) -> Result<()> {
self.cgroups_resource
.update_cgroups(cid, linux_resources, self.hypervisor.as_ref())
.await
}
pub async fn cleanup(&self) -> Result<()> {
// clean up cgroup
self.cgroups_resource
@ -365,15 +355,24 @@ impl ResourceManagerInner {
) -> Result<()> {
let linux_cpus = || -> Option<&LinuxCpu> { linux_resources.as_ref()?.cpu.as_ref() }();
self.cpu_resource
.update_cpu_resources(
cid,
linux_cpus,
op,
self.hypervisor.as_ref(),
self.agent.as_ref(),
)
// if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
if !self.toml_config.runtime.static_sandbox_resource_mgmt {
self.cpu_resource
.update_cpu_resources(
cid,
linux_cpus,
op,
self.hypervisor.as_ref(),
self.agent.as_ref(),
)
.await?;
}
// we should firstly update the vcpus and mems, and then update the host cgroups
self.cgroups_resource
.update_cgroups(cid, linux_resources, op, self.hypervisor.as_ref())
.await?;
Ok(())
}
}

View File

@ -19,7 +19,7 @@ use common::{
use kata_sys_util::k8s::update_ephemeral_storage_type;
use oci::{LinuxResources, Process as OCIProcess};
use resource::ResourceManager;
use resource::{ResourceManager, ResourceUpdateOp};
use tokio::sync::RwLock;
use super::{
@ -155,11 +155,12 @@ impl Container {
// update cgroups
self.resource_manager
.update_cgroups(
.update_linux_resource(
&config.container_id,
spec.linux
.as_ref()
.and_then(|linux| linux.resources.as_ref()),
ResourceUpdateOp::Add,
)
.await?;
@ -402,7 +403,11 @@ impl Container {
pub async fn update(&self, resources: &LinuxResources) -> Result<()> {
self.resource_manager
.update_cgroups(&self.config.container_id, Some(resources))
.update_linux_resource(
&self.config.container_id,
Some(resources),
ResourceUpdateOp::Update,
)
.await?;
let req = agent::UpdateContainerRequest {