libs/sys-util: add utilities to parse NUMA information

Add utilities to parse NUMA information.

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Qingyuan Hou <qingyuan.hou@linux.alibaba.com>
Signed-off-by: Simon Guo <wei.guo.simon@linux.alibaba.com>
This commit is contained in:
Liu Jiang 2021-12-10 00:23:23 +08:00 committed by Fupan Li
parent 87887026f6
commit 1d5c898d7f
14 changed files with 634 additions and 40 deletions

View File

@ -9,6 +9,7 @@ extern crate slog;
pub mod cgroup;
pub mod fs;
pub mod mount;
pub mod numa;
// Convenience macro to obtain the scoped logger
#[macro_export]

View File

@ -0,0 +1,221 @@
// Copyright (c) 2021 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::collections::HashMap;
use std::fs::DirEntry;
use std::io::Read;
use std::path::PathBuf;
use kata_types::cpu::CpuSet;
use lazy_static::lazy_static;
use crate::sl;
use std::str::FromStr;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Invalid CPU number {0}")]
InvalidCpu(u32),
#[error("Invalid node file name {0}")]
InvalidNodeFileName(String),
#[error("Can not read directory {1}: {0}")]
ReadDirectory(#[source] std::io::Error, String),
#[error("Can not read from file {0}, {1:?}")]
ReadFile(String, #[source] std::io::Error),
#[error("Can not open from file {0}, {1:?}")]
OpenFile(String, #[source] std::io::Error),
#[error("Can not parse CPU info, {0:?}")]
ParseCpuInfo(#[from] kata_types::Error),
}
pub type Result<T> = std::result::Result<T, Error>;
// global config in UT
#[cfg(test)]
lazy_static! {
static ref SYS_FS_PREFIX: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test/texture");
// numa node file for UT, we can mock data
static ref NUMA_NODE_PATH: PathBuf = (&*SYS_FS_PREFIX).join("sys/devices/system/node");
// sysfs directory for CPU devices
static ref NUMA_CPU_PATH: PathBuf = (&*SYS_FS_PREFIX).join("sys/devices/system/cpu");
}
// global config in release
#[cfg(not(test))]
lazy_static! {
// numa node file for UT, we can mock data
static ref NUMA_NODE_PATH: PathBuf = PathBuf::from("/sys/devices/system/node");
// sysfs directory for CPU devices
static ref NUMA_CPU_PATH: PathBuf = PathBuf::from("/sys/devices/system/cpu");
}
const NUMA_NODE_PREFIX: &str = "node";
const NUMA_NODE_CPU_LIST_NAME: &str = "cpulist";
/// Get numa node id for a CPU
pub fn get_node_id(cpu: u32) -> Result<u32> {
let path = NUMA_CPU_PATH.join(format!("cpu{}", cpu));
let dirs = path.read_dir().map_err(|_| Error::InvalidCpu(cpu))?;
for d in dirs {
let d = d.map_err(|e| Error::ReadDirectory(e, path.to_string_lossy().to_string()))?;
if let Some(file_name) = d.file_name().to_str() {
if !file_name.starts_with(NUMA_NODE_PREFIX) {
continue;
}
let index_str = file_name.trim_start_matches(NUMA_NODE_PREFIX);
if let Ok(i) = index_str.parse::<u32>() {
return Ok(i);
}
}
}
// Default to node 0 on UMA systems.
Ok(0)
}
/// Map cpulist to NUMA node, returns a HashMap<numa_node_id, Vec<cpu_id>>.
pub fn get_node_map(cpus: &str) -> Result<HashMap<u32, Vec<u32>>> {
// <numa id, Vec<cpu id> >
let mut node_map: HashMap<u32, Vec<u32>> = HashMap::new();
let cpuset = CpuSet::from_str(cpus)?;
for c in cpuset.iter() {
let node_id = get_node_id(*c)?;
node_map.entry(node_id).or_insert_with(Vec::new).push(*c);
}
Ok(node_map)
}
/// Get CPU to NUMA node mapping by reading `/sys/devices/system/node/nodex/cpulist`.
///
/// Return a HashMap<cpu id, node id>. The hashmap will be empty if NUMA is not enabled on the
/// system.
pub fn get_numa_nodes() -> Result<HashMap<u32, u32>> {
let mut numa_nodes = HashMap::new();
let numa_node_path = &*NUMA_NODE_PATH;
if !numa_node_path.exists() {
debug!(sl!(), "no numa node available on this system");
return Ok(numa_nodes);
}
let dirs = numa_node_path
.read_dir()
.map_err(|e| Error::ReadDirectory(e, numa_node_path.to_string_lossy().to_string()))?;
for d in dirs {
match d {
Err(e) => {
return Err(Error::ReadDirectory(
e,
numa_node_path.to_string_lossy().to_string(),
))
}
Ok(d) => {
if let Ok(file_name) = d.file_name().into_string() {
if file_name.starts_with(NUMA_NODE_PREFIX) {
let index_string = file_name.trim_start_matches(NUMA_NODE_PREFIX);
info!(
sl!(),
"get node dir {} node index {}", &file_name, index_string
);
match index_string.parse::<u32>() {
Ok(nid) => read_cpu_info_from_node(&d, nid, &mut numa_nodes)?,
Err(_e) => {
return Err(Error::InvalidNodeFileName(file_name.to_string()))
}
}
}
}
}
}
}
Ok(numa_nodes)
}
fn read_cpu_info_from_node(
d: &DirEntry,
node_index: u32,
numa_nodes: &mut HashMap<u32, u32>,
) -> Result<()> {
let cpu_list_path = d.path().join(NUMA_NODE_CPU_LIST_NAME);
let mut file = std::fs::File::open(&cpu_list_path)
.map_err(|e| Error::OpenFile(cpu_list_path.to_string_lossy().to_string(), e))?;
let mut cpu_list_string = String::new();
if let Err(e) = file.read_to_string(&mut cpu_list_string) {
return Err(Error::ReadFile(
cpu_list_path.to_string_lossy().to_string(),
e,
));
}
let split_cpus = CpuSet::from_str(cpu_list_string.trim())?;
info!(
sl!(),
"node {} list {:?} from {}", node_index, split_cpus, &cpu_list_string
);
for split_cpu_id in split_cpus.iter() {
numa_nodes.insert(*split_cpu_id, node_index);
}
Ok(())
}
/// Check whether all specified CPUs have associated NUMA node.
pub fn is_valid_numa_cpu(cpus: &[u32]) -> Result<bool> {
let numa_nodes = get_numa_nodes()?;
for cpu in cpus {
if numa_nodes.get(cpu).is_none() {
return Ok(false);
}
}
Ok(true)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_node_id() {
assert_eq!(get_node_id(0).unwrap(), 0);
assert_eq!(get_node_id(1).unwrap(), 0);
assert_eq!(get_node_id(64).unwrap(), 1);
get_node_id(65).unwrap_err();
}
#[test]
fn test_get_node_map() {
let map = get_node_map("0-1,64").unwrap();
assert_eq!(map.len(), 2);
assert_eq!(map.get(&0).unwrap().len(), 2);
assert_eq!(map.get(&1).unwrap().len(), 1);
get_node_map("0-1,64,65").unwrap_err();
}
#[test]
fn test_get_numa_nodes() {
let map = get_numa_nodes().unwrap();
assert_eq!(map.len(), 65);
assert_eq!(*map.get(&0).unwrap(), 0);
assert_eq!(*map.get(&1).unwrap(), 0);
assert_eq!(*map.get(&63).unwrap(), 0);
assert_eq!(*map.get(&64).unwrap(), 1);
}
#[test]
fn test_is_valid_numa_cpu() {
assert!(is_valid_numa_cpu(&[0]).unwrap());
assert!(is_valid_numa_cpu(&[1]).unwrap());
assert!(is_valid_numa_cpu(&[63]).unwrap());
assert!(is_valid_numa_cpu(&[64]).unwrap());
assert!(is_valid_numa_cpu(&[0, 1, 64]).unwrap());
assert!(!is_valid_numa_cpu(&[0, 1, 64, 65]).unwrap());
assert!(!is_valid_numa_cpu(&[65]).unwrap());
}
}

View File

@ -0,0 +1 @@
ffffffff,ffffffff

View File

@ -0,0 +1 @@
ffffffff,ffffffff

View File

@ -0,0 +1 @@
1,00000000,00000000

View File

@ -3,46 +3,156 @@
// SPDX-License-Identifier: Apache-2.0
//
/// A list of CPU IDs.
#[derive(Debug)]
pub struct CpuSet(Vec<u32>);
use std::convert::TryFrom;
use std::str::FromStr;
impl CpuSet {
/// Create a new instance of `CpuSet`.
pub fn new() -> Self {
CpuSet(vec![])
use oci::LinuxCpu;
/// A set of CPU ids.
pub type CpuSet = crate::utils::u32_set::U32Set;
/// A set of NUMA memory nodes.
pub type NumaNodeSet = crate::utils::u32_set::U32Set;
/// Error code for CPU related operations.
#[derive(thiserror::Error, Debug)]
pub enum Error {
/// Invalid CPU list.
#[error("Invalid CPU list: {0}")]
InvalidCpuSet(crate::Error),
/// Invalid NUMA memory node list.
#[error("Invalid NUMA memory node list: {0}")]
InvalidNodeSet(crate::Error),
}
/// Assigned CPU resources for a Linux container.
#[derive(Default, Debug)]
pub struct LinuxContainerCpuResources {
shares: u64,
period: u64,
quota: i64,
cpuset: CpuSet,
nodeset: NumaNodeSet,
calculated_vcpu_time_ms: Option<u64>,
}
impl LinuxContainerCpuResources {
/// Get the CPU shares.
pub fn shares(&self) -> u64 {
self.shares
}
/// Add new CPUs into the set.
pub fn extend(&mut self, cpus: &[u32]) {
self.0.extend_from_slice(cpus);
self.0.sort_unstable();
self.0.dedup();
/// Get the CPU schedule period.
pub fn period(&self) -> u64 {
self.period
}
/// Returns true if the CPU set contains elements.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
/// Get the CPU schedule quota.
pub fn quota(&self) -> i64 {
self.quota
}
/// Get number of elements in the CPU set.
pub fn len(&self) -> usize {
self.0.len()
/// Get the CPU set.
pub fn cpuset(&self) -> &CpuSet {
&self.cpuset
}
/// Get the NUMA memory node set.
pub fn nodeset(&self) -> &NumaNodeSet {
&self.nodeset
}
/// Get number of vCPUs to fulfill the CPU resource request, `None` means unconstrained.
pub fn get_vcpus(&self) -> Option<u64> {
self.calculated_vcpu_time_ms
.map(|v| v.saturating_add(999) / 1000)
}
}
impl From<Vec<u32>> for CpuSet {
fn from(mut cpus: Vec<u32>) -> Self {
cpus.sort_unstable();
cpus.dedup();
CpuSet(cpus)
impl TryFrom<&LinuxCpu> for LinuxContainerCpuResources {
type Error = Error;
// Unhandled fields: realtime_runtime, realtime_period, mems
fn try_from(value: &LinuxCpu) -> Result<Self, Self::Error> {
let period = value.period.unwrap_or(0);
let quota = value.quota.unwrap_or(-1);
let cpuset = CpuSet::from_str(&value.cpus).map_err(Error::InvalidCpuSet)?;
let nodeset = NumaNodeSet::from_str(&value.mems).map_err(Error::InvalidNodeSet)?;
// If quota is -1, it means the CPU resource request is unconstrained. In that case,
// we don't currently assign additional CPUs.
let milli_sec = if quota >= 0 && period != 0 {
Some((quota as u64).saturating_mul(1000) / period)
} else {
None
};
Ok(LinuxContainerCpuResources {
shares: value.shares.unwrap_or(0),
period,
quota,
cpuset,
nodeset,
calculated_vcpu_time_ms: milli_sec,
})
}
}
/// Test whether two CPU sets are equal.
impl PartialEq for CpuSet {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
/// Assigned CPU resources for a Linux sandbox/pod.
#[derive(Default, Debug)]
pub struct LinuxSandboxCpuResources {
shares: u64,
calculated_vcpu_time_ms: u64,
cpuset: CpuSet,
nodeset: NumaNodeSet,
}
impl LinuxSandboxCpuResources {
/// Create a new instance of `LinuxSandboxCpuResources`.
pub fn new(shares: u64) -> Self {
Self {
shares,
..Default::default()
}
}
/// Get the CPU shares.
pub fn shares(&self) -> u64 {
self.shares
}
/// Get assigned vCPU time in ms.
pub fn calculated_vcpu_time_ms(&self) -> u64 {
self.calculated_vcpu_time_ms
}
/// Get the CPU set.
pub fn cpuset(&self) -> &CpuSet {
&self.cpuset
}
/// Get the NUMA memory node set.
pub fn nodeset(&self) -> &NumaNodeSet {
&self.nodeset
}
/// Get number of vCPUs to fulfill the CPU resource request.
pub fn get_vcpus(&self) -> u64 {
if self.calculated_vcpu_time_ms == 0 && !self.cpuset.is_empty() {
self.cpuset.len() as u64
} else {
self.calculated_vcpu_time_ms.saturating_add(999) / 1000
}
}
/// Merge resources assigned to a container into the sandbox/pod resources.
pub fn merge(&mut self, container_resource: &LinuxContainerCpuResources) -> &mut Self {
if let Some(v) = container_resource.calculated_vcpu_time_ms.as_ref() {
self.calculated_vcpu_time_ms += v;
}
self.cpuset.extend(&container_resource.cpuset);
self.nodeset.extend(&container_resource.nodeset);
self
}
}
@ -51,20 +161,95 @@ mod tests {
use super::*;
#[test]
fn test_is_cpu_list_equal() {
let cpuset1 = CpuSet::from(vec![1, 2, 3]);
let cpuset2 = CpuSet::from(vec![3, 2, 1]);
let cpuset3 = CpuSet::from(vec![]);
let cpuset4 = CpuSet::from(vec![3, 2, 4]);
let cpuset5 = CpuSet::from(vec![1, 2, 3, 3, 2, 1]);
fn test_linux_container_cpu_resources() {
let resources = LinuxContainerCpuResources::default();
assert_eq!(cpuset1.len(), 3);
assert!(cpuset3.is_empty());
assert_eq!(cpuset5.len(), 3);
assert_eq!(resources.shares(), 0);
assert_eq!(resources.calculated_vcpu_time_ms, None);
assert!(resources.cpuset.is_empty());
assert!(resources.nodeset.is_empty());
assert!(resources.calculated_vcpu_time_ms.is_none());
assert_eq!(cpuset1, cpuset2);
assert_eq!(cpuset1, cpuset5);
assert_ne!(cpuset1, cpuset3);
assert_ne!(cpuset1, cpuset4);
let oci = oci::LinuxCpu {
shares: Some(2048),
quota: Some(1001),
period: Some(100),
realtime_runtime: None,
realtime_period: None,
cpus: "1,2,3".to_string(),
mems: "1".to_string(),
};
let resources = LinuxContainerCpuResources::try_from(&oci).unwrap();
assert_eq!(resources.shares(), 2048);
assert_eq!(resources.period(), 100);
assert_eq!(resources.quota(), 1001);
assert_eq!(resources.calculated_vcpu_time_ms, Some(10010));
assert_eq!(resources.get_vcpus().unwrap(), 11);
assert_eq!(resources.cpuset().len(), 3);
assert_eq!(resources.nodeset().len(), 1);
let oci = oci::LinuxCpu {
shares: Some(2048),
quota: None,
period: None,
realtime_runtime: None,
realtime_period: None,
cpus: "1".to_string(),
mems: "1-2".to_string(),
};
let resources = LinuxContainerCpuResources::try_from(&oci).unwrap();
assert_eq!(resources.shares(), 2048);
assert_eq!(resources.period(), 0);
assert_eq!(resources.quota(), -1);
assert_eq!(resources.calculated_vcpu_time_ms, None);
assert!(resources.get_vcpus().is_none());
assert_eq!(resources.cpuset().len(), 1);
assert_eq!(resources.nodeset().len(), 2);
}
#[test]
fn test_linux_sandbox_cpu_resources() {
let mut sandbox = LinuxSandboxCpuResources::new(1024);
assert_eq!(sandbox.shares(), 1024);
assert_eq!(sandbox.get_vcpus(), 0);
assert_eq!(sandbox.calculated_vcpu_time_ms(), 0);
assert!(sandbox.cpuset().is_empty());
assert!(sandbox.nodeset().is_empty());
let oci = oci::LinuxCpu {
shares: Some(2048),
quota: Some(1001),
period: Some(100),
realtime_runtime: None,
realtime_period: None,
cpus: "1,2,3".to_string(),
mems: "1".to_string(),
};
let resources = LinuxContainerCpuResources::try_from(&oci).unwrap();
sandbox.merge(&resources);
assert_eq!(sandbox.shares(), 1024);
assert_eq!(sandbox.get_vcpus(), 11);
assert_eq!(sandbox.calculated_vcpu_time_ms(), 10010);
assert_eq!(sandbox.cpuset().len(), 3);
assert_eq!(sandbox.nodeset().len(), 1);
let oci = oci::LinuxCpu {
shares: Some(2048),
quota: None,
period: None,
realtime_runtime: None,
realtime_period: None,
cpus: "1,4".to_string(),
mems: "1-2".to_string(),
};
let resources = LinuxContainerCpuResources::try_from(&oci).unwrap();
sandbox.merge(&resources);
assert_eq!(sandbox.shares(), 1024);
assert_eq!(sandbox.get_vcpus(), 11);
assert_eq!(sandbox.calculated_vcpu_time_ms(), 10010);
assert_eq!(sandbox.cpuset().len(), 4);
assert_eq!(sandbox.nodeset().len(), 2);
}
}

View File

@ -29,6 +29,16 @@ pub mod k8s;
/// Constants and data types related to mount point.
pub mod mount;
pub(crate) mod utils;
/// Common error codes.
#[derive(thiserror::Error, Debug)]
pub enum Error {
/// Invalid configuration list.
#[error("invalid list {0}")]
InvalidList(String),
}
/// Convenience macro to obtain the scoped logger
#[macro_export]
macro_rules! sl {

View File

@ -0,0 +1,6 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
pub(crate) mod u32_set;

View File

@ -0,0 +1,163 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::ops::Deref;
use std::slice::Iter;
use std::str::FromStr;
use crate::Error;
/// A set of unique `u32` IDs.
///
/// The `U32Set` may be used to save CPUs parsed from a CPU list file or NUMA nodes parsed from
/// a NUMA node list file.
#[derive(Default, Debug)]
pub struct U32Set(Vec<u32>);
impl U32Set {
/// Create a new instance of `U32Set`.
pub fn new() -> Self {
U32Set(vec![])
}
/// Add the `cpu` to the CPU set.
pub fn add(&mut self, cpu: u32) {
self.0.push(cpu);
self.0.sort_unstable();
self.0.dedup();
}
/// Add new CPUs into the set.
pub fn extend(&mut self, cpus: &[u32]) {
self.0.extend_from_slice(cpus);
self.0.sort_unstable();
self.0.dedup();
}
/// Returns true if the CPU set contains elements.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Get number of elements in the CPU set.
pub fn len(&self) -> usize {
self.0.len()
}
/// Get an iterator over the CPU set.
pub fn iter(&self) -> Iter<u32> {
self.0.iter()
}
}
impl From<Vec<u32>> for U32Set {
fn from(mut cpus: Vec<u32>) -> Self {
cpus.sort_unstable();
cpus.dedup();
U32Set(cpus)
}
}
impl FromStr for U32Set {
type Err = Error;
fn from_str(cpus_str: &str) -> Result<Self, Self::Err> {
if cpus_str.is_empty() {
return Ok(U32Set::new());
}
let mut cpus = Vec::new();
for split_cpu in cpus_str.split(',') {
if !split_cpu.contains('-') {
if !split_cpu.is_empty() {
if let Ok(cpu_id) = split_cpu.parse::<u32>() {
cpus.push(cpu_id);
continue;
}
}
} else {
let fields: Vec<&str> = split_cpu.split('-').collect();
if fields.len() == 2 {
if let Ok(start) = fields[0].parse::<u32>() {
if let Ok(end) = fields[1].parse::<u32>() {
if start < end {
for cpu in start..=end {
cpus.push(cpu);
}
continue;
}
}
}
}
}
return Err(Error::InvalidList(cpus_str.to_string()));
}
Ok(U32Set::from(cpus))
}
}
impl Deref for U32Set {
type Target = [u32];
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// Test whether two CPU sets are equal.
impl PartialEq for U32Set {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_cpuset_equal() {
let cpuset1 = U32Set::from(vec![1, 2, 3]);
let cpuset2 = U32Set::from(vec![3, 2, 1]);
let cpuset3 = U32Set::from(vec![]);
let cpuset4 = U32Set::from(vec![3, 2, 4]);
let cpuset5 = U32Set::from(vec![1, 2, 3, 3, 2, 1]);
assert_eq!(cpuset1.len(), 3);
assert!(cpuset3.is_empty());
assert_eq!(cpuset5.len(), 3);
assert_eq!(cpuset1, cpuset2);
assert_eq!(cpuset1, cpuset5);
assert_ne!(cpuset1, cpuset3);
assert_ne!(cpuset1, cpuset4);
}
#[test]
fn test_cpuset_from_str() {
assert!(U32Set::from_str("").unwrap().is_empty());
let support_cpus1 = U32Set::from(vec![1, 2, 3]);
assert_eq!(support_cpus1, U32Set::from_str("1,2,3").unwrap());
assert_eq!(support_cpus1, U32Set::from_str("1-2,3").unwrap());
let support_cpus2 = U32Set::from(vec![1, 3, 4, 6, 7, 8]);
assert_eq!(support_cpus2, U32Set::from_str("1,3,4,6,7,8").unwrap());
assert_eq!(support_cpus2, U32Set::from_str("1,3-4,6-8").unwrap());
assert!(U32Set::from_str("1-2-3,3").is_err());
assert!(U32Set::from_str("1-2,,3").is_err());
assert!(U32Set::from_str("1-2.5,3").is_err());
assert!(U32Set::from_str("1-1").is_err());
assert!(U32Set::from_str("2-1").is_err());
assert!(U32Set::from_str("0,,1").is_err());
assert!(U32Set::from_str("-1").is_err());
assert!(U32Set::from_str("1-").is_err());
assert!(U32Set::from_str("-1--2").is_err());
assert!(U32Set::from_str("999999999999999999999999999999999999999999999").is_err());
}
}