mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-05-04 04:00:07 +00:00
runtime-rs: resource: discover hugetlbfs page sizes from sysfs in test
`volume::hugepage::tests::test_get_huge_page_size` was hard-coded to
exercise the round-trip through `get_huge_page_option` /
`get_page_size` for two hugetlbfs page sizes:
let format_sizes = ["1Gi", "2Mi"];
These are the sizes x86_64 Ubuntu kernels expose by default
(`/sys/kernel/mm/hugepages/hugepages-{1048576,2048}kB`), but other
architectures use different sizes:
* s390x: typically `hugepages-1048576kB` only (1 GiB; no 2 MiB pool)
-- the kernel returns `EINVAL` for the missing 2 MiB iteration:
thread 'volume::hugepage::tests::test_get_huge_page_size'
panicked at .../resource/src/volume/hugepage.rs:242:14:
called `Result::unwrap()` on an `Err` value: EINVAL
* ppc64le: page sizes vary by kernel build (e.g. 16M/16G with 64K
base pages, 2M/1G with 4K base pages), and may not match
`["1Gi", "2Mi"]` exactly. Same EINVAL on the iteration whose
size isn't a registered hstate.
The reason this never bit before is the same as the SELinux test
in the previous-but-one commit: the runtime-rs `Makefile` wrapped
`test` in an `ifeq UNSUPPORTED_ARCHS` block that turned it into
`echo ...; exit 0` on s390x/ppc64le/riscv64gc, so the test was
only ever exercised on x86_64 (and aarch64, which happens to have
the same default hugetlb page sizes). Dropping that gate is what
exposed the latent assumption.
Replace the hard-coded list with a small helper that lists the
hugetlbfs page sizes the running kernel actually exposes via
`/sys/kernel/mm/hugepages/hugepages-NkB`, rendered as binary-unit
strings (e.g. "2Mi", "1Gi") that are accepted both by the kernel's
`pagesize=...` mount option and by `byte_unit::Byte::parse_str(s,
/*allow_binary=*/ true)`. If `/sys/kernel/mm/hugepages` doesn't
exist or the directory is empty (e.g. hugetlbfs is unconfigured in
the test environment) the test simply returns -- there's nothing
meaningful to round-trip.
On x86_64 the discovered list comes out as `["1Gi", "2Mi"]` (the
same coverage as before). On s390x it becomes `["1Gi"]`, on ppc64le
whatever that kernel build supports.
Sysfs alone, however, is a necessary-but-not-sufficient signal: it
tells us the kernel registered the page size, not whether *this
process* is allowed to mount hugetlbfs. The ubuntu-24.04-s390x GHA
runner demonstrates the gap -- it exposes `hugepages-1048576kB`
via /sys but runs the build inside a user/mount namespace where
mount(2) of hugetlbfs returns EPERM even when the test is invoked
through sudo:
thread 'volume::hugepage::tests::test_get_huge_page_size'
panicked at .../resource/src/volume/hugepage.rs:292:14:
called `Result::unwrap()` on an `Err` value: EPERM
There's no portable capability bit we can sniff for that, so probe
once with the first discovered size before iterating; if the probe
mount fails, skip the test (rather than panic on something it
can't control). A real regression on a host where mount() *does*
work will still surface inside the loop below, since the per-size
mount calls there continue to assert via `.unwrap()`.
While here, feed the kernel-native shorthand (e.g. "2M", "1G")
rather than the IEC form ("2Mi", "1Gi") to mount(2). hugetlbfs
parses `pagesize=` via `memparse()`, which understands K/M/G but
not the IEC `Ki/Mi/Gi`; today the kernel happens to silently drop
the trailing `i` (memparse just stops scanning), but that leniency
is incidental. /proc/mounts in turn always renders the option back
as `pagesize=<N>{K,M,G}`, which is exactly the form
`get_page_size()` already expects -- it strips `pagesize=` and
unconditionally appends `i` before handing the result to byte_unit.
Stripping the `i` for the mount option keeps the test's input
aligned with the kernel's canonical syntax, while leaving the IEC
form intact for the `Byte::parse_str(..., /*allow_binary=*/ true)`
comparison.
Also drop the unused `Ok` re-export from
`use anyhow::{anyhow, Context, Ok, Result}`. Every existing
`Ok(...)` site in this module is the variant-constructor form, for
which the prelude's `Result::Ok` already works fine in
`anyhow::Result<T>` context (same enum, with `E = anyhow::Error`
inferred from the surrounding return type), so nothing actually
needed `anyhow::Ok` to begin with. Removing the import lets the
new helper use plain `let Ok(entries) = ... else` /
`let Ok(name) = ... else` patterns directly instead of funneling
everything through `.ok()` + `if let Some(...)` to dodge the
shadowing.
Made-with: Cursor
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Made-with: Cursor
This commit is contained in:
@@ -13,7 +13,7 @@ use std::{
|
||||
use super::{Volume, BIND};
|
||||
use crate::share_fs::ephemeral_path;
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use byte_unit::{Byte, Unit};
|
||||
use hypervisor::{device::device_manager::DeviceManager, HUGETLBFS};
|
||||
@@ -188,6 +188,46 @@ mod tests {
|
||||
};
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
/// List the huge page sizes the running kernel actually exposes via
|
||||
/// `/sys/kernel/mm/hugepages/hugepages-NkB`, rendered as binary-unit
|
||||
/// strings (e.g. "2Mi", "1Gi") that are accepted both by the kernel's
|
||||
/// `pagesize=...` mount option and by `byte_unit::Byte::parse_str(s,
|
||||
/// /*allow_binary=*/ true)`.
|
||||
///
|
||||
/// This test was historically hard-coded to `["1Gi", "2Mi"]`, which
|
||||
/// happens to match what x86_64 Ubuntu kernels expose by default, but
|
||||
/// other architectures use different page sizes (s390x typically
|
||||
/// exposes "1Mi", ppc64le with 64K base pages typically exposes "16Mi"
|
||||
/// and/or "16Gi", etc.). Discovering them at runtime keeps the test
|
||||
/// arch-portable.
|
||||
fn supported_hugetlbfs_page_sizes() -> Vec<String> {
|
||||
let Ok(entries) = fs::read_dir("/sys/kernel/mm/hugepages") else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut sizes = Vec::new();
|
||||
for entry in entries.flatten() {
|
||||
let Ok(name) = entry.file_name().into_string() else {
|
||||
continue;
|
||||
};
|
||||
let Some(kib) = name
|
||||
.strip_prefix("hugepages-")
|
||||
.and_then(|s| s.strip_suffix("kB"))
|
||||
.and_then(|s| s.parse::<u64>().ok())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let s = if kib % (1024 * 1024) == 0 {
|
||||
format!("{}Gi", kib / (1024 * 1024))
|
||||
} else if kib % 1024 == 0 {
|
||||
format!("{}Mi", kib / 1024)
|
||||
} else {
|
||||
format!("{}Ki", kib)
|
||||
};
|
||||
sizes.push(s);
|
||||
}
|
||||
sizes
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_huge_page_option() {
|
||||
let format_sizes = ["1GB", "2MB"];
|
||||
@@ -227,17 +267,62 @@ mod tests {
|
||||
#[test]
|
||||
fn test_get_huge_page_size() {
|
||||
skip_if_not_root!();
|
||||
let format_sizes = ["1Gi", "2Mi"];
|
||||
let format_sizes = supported_hugetlbfs_page_sizes();
|
||||
if format_sizes.is_empty() {
|
||||
// No hugetlbfs pools on this kernel (e.g. hugetlbfs is
|
||||
// unconfigured or /sys isn't mounted in the test environment);
|
||||
// nothing meaningful to round-trip.
|
||||
return;
|
||||
}
|
||||
// Probe once before iterating: some CI runners (e.g. the
|
||||
// ubuntu-24.04-s390x GHA runner) report supported huge-page sizes via
|
||||
// /sys but execute the test inside a user/mount namespace where
|
||||
// mount(2) of hugetlbfs is forbidden (EPERM) even when running as
|
||||
// root. There's no portable capability bit we can sniff for that, so
|
||||
// just try once and bail out cleanly if the kernel won't let us mount
|
||||
// hugetlbfs at all -- skipping is more honest than failing on
|
||||
// something this test can't control. A real regression on a host
|
||||
// where mount() *does* work will still surface inside the loop below.
|
||||
// Hugetlbfs's `pagesize=` mount option expects the kernel-native
|
||||
// shorthand ("2M", "1G"), not byte_unit's IEC form ("2Mi", "1Gi"):
|
||||
// it parses the value with `memparse()`, and `/proc/mounts` always
|
||||
// renders it back as `pagesize=<N>{K,M,G}` regardless of input. Pass
|
||||
// the trimmed form to mount(2) so the test doesn't rely on the
|
||||
// kernel silently ignoring the trailing `i`, and keep the IEC form
|
||||
// for the `Byte::parse_str(_, /*allow_binary=*/ true)` comparison.
|
||||
let probe_dir = tempfile::tempdir().unwrap();
|
||||
let probe_dst = probe_dir
|
||||
.path()
|
||||
.join(format!("hugepages-probe-{}", format_sizes[0]));
|
||||
fs::create_dir_all(&probe_dst).unwrap();
|
||||
let probe_kernel_size = format_sizes[0].trim_end_matches('i');
|
||||
if let Err(e) = mount(
|
||||
Some(NODEV),
|
||||
&probe_dst,
|
||||
Some(HUGETLBFS),
|
||||
MsFlags::MS_NODEV,
|
||||
Some(format!("pagesize={}", probe_kernel_size).as_str()),
|
||||
) {
|
||||
eprintln!(
|
||||
"test_get_huge_page_size: skipping, hugetlbfs mount probe failed \
|
||||
(pagesize={}): {}",
|
||||
probe_kernel_size, e
|
||||
);
|
||||
return;
|
||||
}
|
||||
umount(&probe_dst).unwrap();
|
||||
|
||||
for format_size in format_sizes {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let dst = dir.path().join(format!("hugepages-{}", format_size));
|
||||
fs::create_dir_all(&dst).unwrap();
|
||||
let kernel_size = format_size.trim_end_matches('i');
|
||||
mount(
|
||||
Some(NODEV),
|
||||
&dst,
|
||||
Some(HUGETLBFS),
|
||||
MsFlags::MS_NODEV,
|
||||
Some(format!("pagesize={}", format_size).as_str()),
|
||||
Some(format!("pagesize={}", kernel_size).as_str()),
|
||||
)
|
||||
.unwrap();
|
||||
let mut mount = oci::Mount::default();
|
||||
@@ -245,7 +330,7 @@ mod tests {
|
||||
|
||||
let option = get_huge_page_option(&mount).unwrap().unwrap();
|
||||
let page_size = get_page_size(option).unwrap();
|
||||
assert_eq!(page_size, Byte::parse_str(format_size, true).unwrap());
|
||||
assert_eq!(page_size, Byte::parse_str(&format_size, true).unwrap());
|
||||
umount(&dst).unwrap();
|
||||
fs::remove_dir(&dst).unwrap();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user