mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-28 16:27:50 +00:00
runtime-rs: enable container hugepage
enable the functionality of using hugepages in container Fixes: #5560 Signed-off-by: Zhongtao Hu <zhongtaohu.tim@linux.alibaba.com>
This commit is contained in:
parent
fc4a67eec3
commit
afaf17f423
8
src/libs/Cargo.lock
generated
8
src/libs/Cargo.lock
generated
@ -40,6 +40,12 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
@ -420,6 +426,8 @@ dependencies = [
|
||||
name = "kata-types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
"bitmask-enum",
|
||||
"byte-unit",
|
||||
"glob",
|
||||
|
@ -62,7 +62,7 @@ use crate::sl;
|
||||
/// Default permission for directories created for mountpoint.
|
||||
const MOUNT_PERM: u32 = 0o755;
|
||||
|
||||
const PROC_MOUNTS_FILE: &str = "/proc/mounts";
|
||||
pub const PROC_MOUNTS_FILE: &str = "/proc/mounts";
|
||||
const PROC_FIELDS_PER_LINE: usize = 6;
|
||||
const PROC_DEVICE_INDEX: usize = 0;
|
||||
const PROC_PATH_INDEX: usize = 1;
|
||||
|
20
src/runtime-rs/Cargo.lock
generated
20
src/runtime-rs/Cargo.lock
generated
@ -329,6 +329,16 @@ version = "3.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8"
|
||||
|
||||
[[package]]
|
||||
name = "byte-unit"
|
||||
version = "4.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "581ad4b3d627b0c09a0ccb2912148f839acaca0b93cf54cbe42b6c674e86079c"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"utf8-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
@ -1361,7 +1371,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
"bitmask-enum",
|
||||
"byte-unit",
|
||||
"byte-unit 3.1.4",
|
||||
"glob",
|
||||
"lazy_static",
|
||||
"num_cpus",
|
||||
@ -2279,6 +2289,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"bitflags",
|
||||
"byte-unit 4.0.17",
|
||||
"cgroups-rs",
|
||||
"futures 0.3.21",
|
||||
"hypervisor",
|
||||
@ -2299,6 +2310,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"slog",
|
||||
"slog-scope",
|
||||
"tempfile",
|
||||
"test-utils",
|
||||
"tokio",
|
||||
"uuid",
|
||||
@ -2998,6 +3010,12 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-width"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "0.4.0"
|
||||
|
@ -7,11 +7,13 @@ license = "Apache-2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
test-utils = { path = "../../../libs/test-utils" }
|
||||
tempfile = "3.2.0"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "^1.0"
|
||||
async-trait = "0.1.48"
|
||||
bitflags = "1.2.1"
|
||||
byte-unit = "4.0.14"
|
||||
cgroups-rs = "0.2.9"
|
||||
futures = "0.3.11"
|
||||
lazy_static = "1.4.0"
|
||||
|
@ -78,10 +78,10 @@ impl ResourceManager {
|
||||
pub async fn handler_volumes(
|
||||
&self,
|
||||
cid: &str,
|
||||
oci_mounts: &[oci::Mount],
|
||||
spec: &oci::Spec,
|
||||
) -> Result<Vec<Arc<dyn Volume>>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.handler_volumes(cid, oci_mounts).await
|
||||
inner.handler_volumes(cid, spec).await
|
||||
}
|
||||
|
||||
pub async fn dump(&self) {
|
||||
|
@ -214,10 +214,10 @@ impl ResourceManagerInner {
|
||||
pub async fn handler_volumes(
|
||||
&self,
|
||||
cid: &str,
|
||||
oci_mounts: &[oci::Mount],
|
||||
spec: &oci::Spec,
|
||||
) -> Result<Vec<Arc<dyn Volume>>> {
|
||||
self.volume_resource
|
||||
.handler_volumes(&self.share_fs, cid, oci_mounts)
|
||||
.handler_volumes(&self.share_fs, cid, spec)
|
||||
.await
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ use tokio::sync::Mutex;
|
||||
pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path};
|
||||
mod virtio_fs_share_mount;
|
||||
use virtio_fs_share_mount::VirtiofsShareMount;
|
||||
pub use virtio_fs_share_mount::EPHEMERAL_PATH;
|
||||
|
||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
|
||||
|
||||
|
@ -17,7 +17,7 @@ use std::path::Path;
|
||||
|
||||
const WATCHABLE_PATH_NAME: &str = "watchable";
|
||||
const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
|
||||
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
|
||||
pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
|
||||
|
||||
use super::{
|
||||
utils::{self, do_get_host_path},
|
||||
|
223
src/runtime-rs/crates/resource/src/volume/hugepage.rs
Normal file
223
src/runtime-rs/crates/resource/src/volume/hugepage.rs
Normal file
@ -0,0 +1,223 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fs::File,
|
||||
io::{BufRead, BufReader},
|
||||
};
|
||||
|
||||
use crate::share_fs::EPHEMERAL_PATH;
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use async_trait::async_trait;
|
||||
use byte_unit::Byte;
|
||||
use hypervisor::HUGETLBFS;
|
||||
use kata_sys_util::{fs::get_base_name, mount::PROC_MOUNTS_FILE};
|
||||
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;
|
||||
|
||||
use super::{Volume, BIND};
|
||||
|
||||
type PageSize = Byte;
|
||||
type Limit = u64;
|
||||
|
||||
const NODEV: &str = "nodev";
|
||||
|
||||
// container hugepage
|
||||
pub(crate) struct Hugepage {
|
||||
// storage info
|
||||
storage: Option<Storage>,
|
||||
// mount info
|
||||
mount: oci::Mount,
|
||||
}
|
||||
|
||||
// handle hugepage
|
||||
impl Hugepage {
|
||||
pub(crate) fn new(
|
||||
mount: &oci::Mount,
|
||||
hugepage_limits_map: HashMap<PageSize, Limit>,
|
||||
fs_options: Vec<String>,
|
||||
) -> Result<Self> {
|
||||
// Create mount option string
|
||||
let page_size = get_page_size(fs_options).context("failed to get page size")?;
|
||||
let option = hugepage_limits_map
|
||||
.get(&page_size)
|
||||
.map(|limit| format!("pagesize={},size={}", page_size.get_bytes(), limit))
|
||||
.context("failed to get hugepage option")?;
|
||||
let base_name = get_base_name(mount.source.clone())?
|
||||
.into_string()
|
||||
.map_err(|e| anyhow!("failed to convert to string{:?}", e))?;
|
||||
let mut mount = mount.clone();
|
||||
// Set the mount source path to a path that resides inside the VM
|
||||
mount.source = format!("{}{}{}", EPHEMERAL_PATH, "/", base_name);
|
||||
// Set the mount type to "bind"
|
||||
mount.r#type = BIND.to_string();
|
||||
|
||||
// Create a storage struct so that kata agent is able to create
|
||||
// hugetlbfs backed volume inside the VM
|
||||
let storage = Storage {
|
||||
driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(),
|
||||
source: NODEV.to_string(),
|
||||
fs_type: HUGETLBFS.to_string(),
|
||||
mount_point: mount.source.clone(),
|
||||
options: vec![option],
|
||||
..Default::default()
|
||||
};
|
||||
Ok(Self {
|
||||
storage: Some(storage),
|
||||
mount,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for Hugepage {
|
||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
}
|
||||
|
||||
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
|
||||
let s = if let Some(s) = self.storage.as_ref() {
|
||||
vec![s.clone()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_huge_page_option(m: &oci::Mount) -> Result<Option<Vec<String>>> {
|
||||
if m.source.is_empty() {
|
||||
return Err(anyhow!("empty mount source"));
|
||||
}
|
||||
let file = File::open(PROC_MOUNTS_FILE).context("failed open file")?;
|
||||
let reader = BufReader::new(file);
|
||||
for line in reader.lines().flatten() {
|
||||
let items: Vec<&str> = line.split(' ').collect();
|
||||
if m.source == items[1] && items[2] == HUGETLBFS {
|
||||
let fs_options: Vec<&str> = items[3].split(',').collect();
|
||||
return Ok(Some(
|
||||
fs_options
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
// TODO add hugepage limit to sandbox memory once memory hotplug is enabled
|
||||
// https://github.com/kata-containers/kata-containers/issues/5880
|
||||
pub(crate) fn get_huge_page_limits_map(spec: &oci::Spec) -> Result<HashMap<PageSize, Limit>> {
|
||||
let mut hugepage_limits_map: HashMap<PageSize, Limit> = HashMap::new();
|
||||
if let Some(l) = &spec.linux {
|
||||
if let Some(r) = &l.resources {
|
||||
let hugepage_limits = r.hugepage_limits.clone();
|
||||
for hugepage_limit in hugepage_limits {
|
||||
// the pagesize send from oci spec is MB or GB, change it to Mi and Gi
|
||||
let page_size = hugepage_limit.page_size.replace('B', "i");
|
||||
let page_size = Byte::from_str(page_size)
|
||||
.context("failed to create Byte object from String")?;
|
||||
hugepage_limits_map.insert(page_size, hugepage_limit.limit);
|
||||
}
|
||||
return Ok(hugepage_limits_map);
|
||||
}
|
||||
return Ok(hugepage_limits_map);
|
||||
}
|
||||
Ok(hugepage_limits_map)
|
||||
}
|
||||
|
||||
fn get_page_size(fs_options: Vec<String>) -> Result<Byte> {
|
||||
for fs_option in fs_options {
|
||||
if fs_option.starts_with("pagesize=") {
|
||||
let page_size = fs_option
|
||||
.strip_prefix("pagesize=")
|
||||
// the parameters passed are in unit M or G, append i to be Mi and Gi
|
||||
.map(|s| format!("{}i", s))
|
||||
.context("failed to strip prefix pagesize")?;
|
||||
return Byte::from_str(page_size)
|
||||
.map_err(|_| anyhow!("failed to convert string to byte"));
|
||||
}
|
||||
}
|
||||
Err(anyhow!("failed to get page size"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::{collections::HashMap, fs};
|
||||
|
||||
use crate::volume::hugepage::{get_page_size, HUGETLBFS, NODEV};
|
||||
|
||||
use super::{get_huge_page_limits_map, get_huge_page_option};
|
||||
use byte_unit::Byte;
|
||||
use nix::mount::{mount, umount, MsFlags};
|
||||
use oci::{Linux, LinuxHugepageLimit, LinuxResources};
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
#[test]
|
||||
fn test_get_huge_page_option() {
|
||||
let format_sizes = ["1GB", "2MB"];
|
||||
let mut huge_page_limits: Vec<LinuxHugepageLimit> = vec![];
|
||||
for format_size in format_sizes {
|
||||
huge_page_limits.push(LinuxHugepageLimit {
|
||||
page_size: format_size.to_string(),
|
||||
limit: 100000,
|
||||
});
|
||||
}
|
||||
|
||||
let spec = oci::Spec {
|
||||
linux: Some(Linux {
|
||||
resources: Some(LinuxResources {
|
||||
hugepage_limits: huge_page_limits,
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert!(get_huge_page_limits_map(&spec).is_ok());
|
||||
|
||||
let mut expect_res = HashMap::new();
|
||||
expect_res.insert(Byte::from_str("1Gi").ok().unwrap(), 100000);
|
||||
expect_res.insert(Byte::from_str("2Mi").ok().unwrap(), 100000);
|
||||
assert_eq!(get_huge_page_limits_map(&spec).unwrap(), expect_res);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_huge_page_size() {
|
||||
skip_if_not_root!();
|
||||
let format_sizes = ["1Gi", "2Mi"];
|
||||
for format_size in format_sizes {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let dst = dir.path().join(format!("hugepages-{}", format_size));
|
||||
fs::create_dir_all(&dst).unwrap();
|
||||
mount(
|
||||
Some(NODEV),
|
||||
&dst,
|
||||
Some(HUGETLBFS),
|
||||
MsFlags::MS_NODEV,
|
||||
Some(format!("pagesize={}", format_size).as_str()),
|
||||
)
|
||||
.unwrap();
|
||||
let mount = oci::Mount {
|
||||
source: dst.to_str().unwrap().to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
let option = get_huge_page_option(&mount).unwrap().unwrap();
|
||||
let page_size = get_page_size(option).unwrap();
|
||||
assert_eq!(page_size, Byte::from_str(format_size).unwrap());
|
||||
umount(&dst).unwrap();
|
||||
fs::remove_dir(&dst).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
@ -6,17 +6,20 @@
|
||||
|
||||
mod block_volume;
|
||||
mod default_volume;
|
||||
pub mod hugepage;
|
||||
mod share_fs_volume;
|
||||
mod shm_volume;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::share_fs::ShareFs;
|
||||
|
||||
use self::hugepage::{get_huge_page_limits_map, get_huge_page_option};
|
||||
|
||||
const BIND: &str = "bind";
|
||||
#[async_trait]
|
||||
pub trait Volume: Send + Sync {
|
||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>>;
|
||||
@ -43,9 +46,11 @@ impl VolumeResource {
|
||||
&self,
|
||||
share_fs: &Option<Arc<dyn ShareFs>>,
|
||||
cid: &str,
|
||||
oci_mounts: &[oci::Mount],
|
||||
spec: &oci::Spec,
|
||||
) -> Result<Vec<Arc<dyn Volume>>> {
|
||||
let mut volumes: Vec<Arc<dyn Volume>> = vec![];
|
||||
let oci_mounts = &spec.mounts;
|
||||
// handle mounts
|
||||
for m in oci_mounts {
|
||||
let volume: Arc<dyn Volume> = if shm_volume::is_shim_volume(m) {
|
||||
let shm_size = shm_volume::DEFAULT_SHM_SIZE;
|
||||
@ -59,6 +64,17 @@ impl VolumeResource {
|
||||
.await
|
||||
.with_context(|| format!("new share fs volume {:?}", m))?,
|
||||
)
|
||||
} else if let Some(options) =
|
||||
get_huge_page_option(m).context("failed to check huge page")?
|
||||
{
|
||||
// get hugepage limits from oci
|
||||
let hugepage_limits =
|
||||
get_huge_page_limits_map(spec).context("get huge page option")?;
|
||||
// handle container hugepage
|
||||
Arc::new(
|
||||
hugepage::Hugepage::new(m, hugepage_limits, options)
|
||||
.with_context(|| format!("handle hugepages {:?}", m))?,
|
||||
)
|
||||
} else if block_volume::is_block_volume(m) {
|
||||
Arc::new(
|
||||
block_volume::BlockVolume::new(m)
|
||||
|
@ -110,7 +110,7 @@ impl Container {
|
||||
// handler volumes
|
||||
let volumes = self
|
||||
.resource_manager
|
||||
.handler_volumes(&config.container_id, &spec.mounts)
|
||||
.handler_volumes(&config.container_id, &spec)
|
||||
.await
|
||||
.context("handler volumes")?;
|
||||
let mut oci_mounts = vec![];
|
||||
@ -394,7 +394,6 @@ fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
|
||||
resource.devices = Vec::new();
|
||||
resource.pids = None;
|
||||
resource.block_io = None;
|
||||
resource.hugepage_limits = Vec::new();
|
||||
resource.network = None;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user