feat(runtime-rs): clear cpuset in runtime side

The declaration of the cpu number in the cpuset is greater
than the actual number of vcpus, which will cause an error when
updating the cgroup in the guest.

This problem is difficult to solve, so we temporarily clean up
the cpuset in the container spec before passing in the agent.

Fixes: #5030

Signed-off-by: Yushuo <y-shuo@linux.alibaba.com>
Signed-off-by: Ji-Xinyou <jerryji0414@outlook.com>
This commit is contained in:
Yushuo 2023-05-09 10:27:11 +08:00
parent a0385e1383
commit d66f7572dd
5 changed files with 35 additions and 8 deletions

View File

@ -336,7 +336,7 @@ impl From<UpdateContainerRequest> for agent::UpdateContainerRequest {
fn from(from: UpdateContainerRequest) -> Self {
Self {
container_id: from.container_id,
resources: from_option(Some(from.resources)),
resources: from_option(from.resources),
..Default::default()
}
}

View File

@ -201,7 +201,7 @@ pub struct ListProcessesRequest {
#[derive(PartialEq, Clone, Default)]
pub struct UpdateContainerRequest {
pub container_id: String,
pub resources: oci::LinuxResources,
pub resources: Option<oci::LinuxResources>,
pub mounts: Vec<oci::Mount>,
}

View File

@ -118,7 +118,7 @@ impl ResourceManager {
cid: &str,
linux_resources: Option<&LinuxResources>,
op: ResourceUpdateOp,
) -> Result<()> {
) -> Result<Option<LinuxResources>> {
let inner = self.inner.read().await;
inner.update_linux_resource(cid, linux_resources, op).await
}

View File

@ -352,7 +352,7 @@ impl ResourceManagerInner {
cid: &str,
linux_resources: Option<&LinuxResources>,
op: ResourceUpdateOp,
) -> Result<()> {
) -> Result<Option<LinuxResources>> {
let linux_cpus = || -> Option<&LinuxCpu> { linux_resources.as_ref()?.cpu.as_ref() }();
// if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
@ -373,7 +373,29 @@ impl ResourceManagerInner {
.update_cgroups(cid, linux_resources, op, self.hypervisor.as_ref())
.await?;
Ok(())
// update the linux resources for agent
self.agent_linux_resources(linux_resources)
}
fn agent_linux_resources(
&self,
linux_resources: Option<&LinuxResources>,
) -> Result<Option<LinuxResources>> {
let mut resources = match linux_resources {
Some(linux_resources) => linux_resources.clone(),
None => {
return Ok(None);
}
};
// clear the cpuset
// for example, if there are only 5 vcpus now, and the cpuset in LinuxResources is 0-2,6, guest os will report
// error when creating the container. so we choose to clear the cpuset here.
if let Some(cpu) = &mut resources.cpu {
cpu.cpus = String::new();
}
Ok(Some(resources))
}
}

View File

@ -156,13 +156,17 @@ impl Container {
.await?;
// update vcpus, mems and host cgroups
self.resource_manager
let resources = self
.resource_manager
.update_linux_resource(
&config.container_id,
inner.linux_resources.as_ref(),
ResourceUpdateOp::Add,
)
.await?;
if let Some(linux) = &mut spec.linux {
linux.resources = resources;
}
// create container
let r = agent::CreateContainerRequest {
@ -418,7 +422,8 @@ impl Container {
let mut inner = self.inner.write().await;
inner.linux_resources = Some(resources.clone());
// update vcpus, mems and host cgroups
self.resource_manager
let agent_resources = self
.resource_manager
.update_linux_resource(
&self.config.container_id,
Some(resources),
@ -428,7 +433,7 @@ impl Container {
let req = agent::UpdateContainerRequest {
container_id: self.container_id.container_id.clone(),
resources: resources.clone(),
resources: agent_resources,
mounts: Vec::new(),
};
self.agent