Merge pull request #7531 from justxuewei/device-cgroup

agent: Restrict device access at upper node of container's cgroup
This commit is contained in:
Fupan Li 2023-11-08 22:01:48 +08:00 committed by GitHub
commit 100a73d2fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 939 additions and 171 deletions

4
src/agent/Cargo.lock generated
View File

@ -355,9 +355,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cgroups-rs"
version = "0.3.2"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965"
checksum = "1fb3af90c8d48ad5f432d8afb521b5b40c2a2fce46dd60e05912de51c47fba64"
dependencies = [
"libc",
"log",

View File

@ -53,7 +53,7 @@ cfg-if = "1.0.0"
prometheus = { version = "0.13.0", features = ["process"] }
procfs = "0.12.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.3.2" }
cgroups = { package = "cgroups-rs", version = "0.3.3" }
# Tracing
tracing = "0.1.26"

View File

@ -25,7 +25,7 @@ scan_fmt = "0.2.6"
regex = "1.5.6"
path-absolutize = "1.2.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.3.2" }
cgroups = { package = "cgroups-rs", version = "0.3.3" }
rlimit = "0.5.3"
cfg-if = "0.1.0"

View File

@ -18,13 +18,13 @@ use cgroups::{
DeviceResource, HugePageResource, MaxValue, NetworkPriority,
};
use crate::cgroups::Manager as CgroupManager;
use crate::cgroups::{rule_for_all_devices, Manager as CgroupManager};
use crate::container::DEFAULT_DEVICES;
use anyhow::{anyhow, Context, Result};
use libc::{self, pid_t};
use oci::{
LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
LinuxNetwork, LinuxPids, LinuxResources,
LinuxNetwork, LinuxPids, LinuxResources, Spec,
};
use protobuf::MessageField;
@ -35,7 +35,10 @@ use protocols::agent::{
use std::any::Any;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use super::DevicesCgroupInfo;
const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online";
@ -60,6 +63,10 @@ pub struct Manager {
pub cpath: String,
#[serde(skip)]
cgroup: cgroups::Cgroup,
#[serde(skip)]
pod_cgroup: Option<cgroups::Cgroup>,
#[serde(skip)]
devcg_allowed_all: bool,
}
// set_resource is used to set reources by cgroup controller.
@ -85,6 +92,7 @@ impl CgroupManager for Manager {
);
let res = &mut cgroups::Resources::default();
let pod_res = &mut cgroups::Resources::default();
// set cpuset and cpu reources
if let Some(cpu) = &r.cpu {
@ -117,10 +125,18 @@ impl CgroupManager for Manager {
}
// set devices resources
set_devices_resources(&self.cgroup, &r.devices, res);
info!(sl(), "resources after processed {:?}", res);
if !self.devcg_allowed_all {
set_devices_resources(&self.cgroup, &r.devices, res, pod_res);
}
debug!(
sl(),
"Resources after processed, pod_res = {:?}, res = {:?}", pod_res, res
);
// apply resources
if let Some(pod_cg) = self.pod_cgroup.as_ref() {
pod_cg.apply(pod_res)?;
}
self.cgroup.apply(res)?;
Ok(())
@ -179,7 +195,14 @@ impl CgroupManager for Manager {
}
fn destroy(&mut self) -> Result<()> {
let _ = self.cgroup.delete();
if let Err(err) = self.cgroup.delete() {
warn!(
sl(),
"Failed to delete cgroup {}: {}",
self.cgroup.path(),
err
);
}
Ok(())
}
@ -300,28 +323,21 @@ fn set_devices_resources(
_cg: &cgroups::Cgroup,
device_resources: &[LinuxDeviceCgroup],
res: &mut cgroups::Resources,
pod_res: &mut cgroups::Resources,
) {
info!(sl(), "cgroup manager set devices");
let mut devices = vec![];
for d in device_resources.iter() {
if let Some(dev) = linux_device_group_to_cgroup_device(d) {
devices.push(dev);
}
}
for d in DEFAULT_DEVICES.iter() {
if let Some(dev) = linux_device_to_cgroup_device(d) {
devices.push(dev);
}
}
for d in DEFAULT_ALLOWED_DEVICES.iter() {
if let Some(dev) = linux_device_group_to_cgroup_device(d) {
if rule_for_all_devices(d) {
continue;
}
if let Some(dev) = linux_device_cgroup_to_device_resource(d) {
devices.push(dev);
}
}
pod_res.devices.devices = devices.clone();
res.devices.devices = devices;
}
@ -519,28 +535,7 @@ fn build_blk_io_device_throttle_resource(
blk_io_device_throttle_resources
}
fn linux_device_to_cgroup_device(d: &LinuxDevice) -> Option<DeviceResource> {
let dev_type = match DeviceType::from_char(d.r#type.chars().next()) {
Some(t) => t,
None => return None,
};
let permissions = vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
];
Some(DeviceResource {
allow: true,
devtype: dev_type,
major: d.major,
minor: d.minor,
access: permissions,
})
}
fn linux_device_group_to_cgroup_device(d: &LinuxDeviceCgroup) -> Option<DeviceResource> {
fn linux_device_cgroup_to_device_resource(d: &LinuxDeviceCgroup) -> Option<DeviceResource> {
let dev_type = match DeviceType::from_char(d.r#type.chars().next()) {
Some(t) => t,
None => return None,
@ -1001,13 +996,136 @@ pub fn get_mounts(paths: &HashMap<String, String>) -> Result<HashMap<String, Str
Ok(m)
}
#[inline]
fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Result<Cgroup> {
let valid_path = path.trim_start_matches('/').to_string();
cgroups::Cgroup::new(h, valid_path.as_str()).map_err(anyhow::Error::from)
}
#[inline]
fn load_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Cgroup {
let valid_path = path.trim_start_matches('/').to_string();
cgroups::Cgroup::load(h, valid_path.as_str())
}
impl Manager {
pub fn new(cpath: &str) -> Result<Self> {
pub fn new(
cpath: &str,
spec: &Spec,
devcg_info: Option<Arc<RwLock<DevicesCgroupInfo>>>,
) -> Result<Self> {
let (paths, mounts) = Self::get_paths_and_mounts(cpath).context("Get paths and mounts")?;
// Do not expect poisoning lock
let mut devices_group_info = devcg_info.as_ref().map(|i| i.write().unwrap());
let pod_cgroup: Option<Cgroup>;
if let Some(devices_group_info) = devices_group_info.as_mut() {
// Cgroup path of parent of container
let pod_cpath = PathBuf::from(cpath)
.parent()
.unwrap_or(Path::new("/"))
.display()
.to_string();
if pod_cpath.as_str() == "/" {
// Skip setting pod cgroup for cpath due to no parent path
pod_cgroup = None
} else {
// Create a cgroup for the pod if not exists.
// Note that creating pod cgroup MUST be done before the pause
// container's cgroup created, since the upper node might have
// some excessive permissions, and children inherit upper
// node's rules. You'll feel painful to shrink upper nodes'
// permissions if the new permissions are subset of old.
pod_cgroup = Some(load_cgroup(cgroups::hierarchies::auto(), &pod_cpath));
let pod_cg = pod_cgroup.as_ref().unwrap();
let is_allowded_all = Self::has_allowed_all_devices_rule(spec);
if devices_group_info.inited {
debug!(sl(), "Devices cgroup has been initialzied.");
// Set allowed all devices to pod cgroup
if !devices_group_info.allowed_all && is_allowded_all {
info!(
sl(),
"Pod devices cgroup is changed to allowed all devices mode, devices_group_info = {:?}",
devices_group_info
);
Self::setup_allowed_all_mode(pod_cg).with_context(|| {
format!("Setup allowed all devices mode for {}", pod_cpath)
})?;
devices_group_info.allowed_all = true;
}
} else {
// This is the first container (aka pause container)
debug!(sl(), "Started to init devices cgroup");
pod_cg.create().context("Create pod cgroup")?;
if !is_allowded_all {
Self::setup_devcg_whitelist(pod_cg).with_context(|| {
format!("Setup device cgroup whitelist for {}", pod_cpath)
})?;
} else {
Self::setup_allowed_all_mode(pod_cg)
.with_context(|| format!("Setup allowed all mode for {}", pod_cpath))?;
devices_group_info.allowed_all = true;
}
devices_group_info.inited = true
}
}
} else {
pod_cgroup = None;
}
// Create a cgroup for the container.
let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?;
// The rules of container cgroup are copied from its parent, which
// contains some permissions that the container doesn't need.
// Therefore, resetting the container's devices cgroup is required.
if let Some(devices_group_info) = devices_group_info.as_ref() {
if !devices_group_info.allowed_all {
Self::setup_devcg_whitelist(&cg)
.with_context(|| format!("Setup device cgroup whitelist for {}", cpath))?;
}
}
Ok(Self {
paths,
mounts,
// rels: paths,
cpath: cpath.to_string(),
cgroup: cg,
pod_cgroup,
devcg_allowed_all: devices_group_info
.map(|info| info.allowed_all)
.unwrap_or(false),
})
}
/// Create a cgroupfs manager for systemd cgroup.
/// The device cgroup is disabled in systemd cgroup, given that it is
/// implemented by eBPF.
pub fn new_systemd(cpath: &str) -> Result<Self> {
let (paths, mounts) = Self::get_paths_and_mounts(cpath).context("Get paths and mounts")?;
let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?;
Ok(Self {
paths,
mounts,
cpath: cpath.to_string(),
pod_cgroup: None,
cgroup: cg,
devcg_allowed_all: false,
})
}
fn get_paths_and_mounts(
cpath: &str,
) -> Result<(HashMap<String, String>, HashMap<String, String>)> {
let mut m = HashMap::new();
let paths = get_paths()?;
@ -1020,21 +1138,140 @@ impl Manager {
continue;
}
let p = format!("{}/{}", mnt.unwrap(), cpath);
m.insert(key.to_string(), p);
m.insert(key.to_string(), format!("{}/{}", mnt.unwrap(), cpath));
}
let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?;
Ok(Self {
paths: m,
mounts,
// rels: paths,
cpath: cpath.to_string(),
cgroup: cg,
})
Ok((m, mounts))
}
fn setup_allowed_all_mode(cgroup: &cgroups::Cgroup) -> Result<()> {
// Insert two rules: `b *:* rwm` and `c *:* rwm`.
// The reason of not inserting `a *:* rwm` is that the Linux kernel
// will deny writing `a` to `devices.allow` once a cgroup has
// children. You can refer to
// https://www.kernel.org/doc/Documentation/cgroup-v1/devices.txt.
let res = cgroups::Resources {
devices: cgroups::DeviceResources {
devices: vec![
DeviceResource {
allow: true,
devtype: DeviceType::Block,
major: -1,
minor: -1,
access: vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
],
},
DeviceResource {
allow: true,
devtype: DeviceType::Char,
major: -1,
minor: -1,
access: vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
],
},
],
},
..Default::default()
};
cgroup.apply(&res)?;
Ok(())
}
/// Setup device cgroup whitelist:
/// - Deny all devices in order to cleanup device cgroup.
/// - Allow default devices and default allowed devices.
fn setup_devcg_whitelist(cgroup: &cgroups::Cgroup) -> Result<()> {
#[allow(unused_mut)]
let mut dev_res_list = vec![DeviceResource {
allow: false,
devtype: DeviceType::All,
major: -1,
minor: -1,
access: vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
],
}];
// Do not append default allowed devices for simplicity while
// testing.
#[cfg(not(test))]
dev_res_list.append(&mut default_allowed_devices());
let res = cgroups::Resources {
devices: cgroups::DeviceResources {
devices: dev_res_list,
},
..Default::default()
};
cgroup.apply(&res)?;
Ok(())
}
/// Check if OCI spec contains a rule of allowed all devices.
fn has_allowed_all_devices_rule(spec: &Spec) -> bool {
let linux = match spec.linux.as_ref() {
Some(linux) => linux,
None => return false,
};
let resources = match linux.resources.as_ref() {
Some(resource) => resource,
None => return false,
};
resources
.devices
.iter()
.find(|dev| rule_for_all_devices(dev))
.map(|dev| dev.allow)
.unwrap_or_default()
}
}
/// Generate a list for allowed devices including `DEFAULT_DEVICES` and
/// `DEFAULT_ALLOWED_DEVICES`.
fn default_allowed_devices() -> Vec<DeviceResource> {
let mut dev_res_list = Vec::new();
DEFAULT_DEVICES.iter().for_each(|dev| {
if let Some(dev_res) = linux_device_to_device_resource(dev) {
dev_res_list.push(dev_res)
}
});
DEFAULT_ALLOWED_DEVICES.iter().for_each(|dev| {
if let Some(dev_res) = linux_device_cgroup_to_device_resource(dev) {
dev_res_list.push(dev_res)
}
});
dev_res_list
}
/// Convert LinuxDevice to DeviceResource.
fn linux_device_to_device_resource(d: &LinuxDevice) -> Option<DeviceResource> {
let dev_type = match DeviceType::from_char(d.r#type.chars().next()) {
Some(t) => t,
None => return None,
};
let permissions = vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
];
Some(DeviceResource {
allow: true,
devtype: dev_type,
major: d.major,
minor: d.minor,
access: permissions,
})
}
// get the guest's online cpus.
@ -1085,7 +1322,20 @@ fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result<i64>
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
use std::process::Command;
use std::sync::{Arc, RwLock};
use cgroups::devices::{DevicePermissions, DeviceType};
use oci::{Linux, LinuxDeviceCgroup, LinuxResources, Spec};
use test_utils::skip_if_not_root;
use super::default_allowed_devices;
use crate::cgroups::fs::{
line_to_vec, lines_to_map, Manager, DEFAULT_ALLOWED_DEVICES, WILDCARD,
};
use crate::cgroups::DevicesCgroupInfo;
use crate::container::DEFAULT_DEVICES;
#[test]
fn test_line_to_vec() {
@ -1133,4 +1383,192 @@ mod tests {
);
}
}
struct MockSandbox {
devcg_info: Arc<RwLock<DevicesCgroupInfo>>,
}
impl MockSandbox {
fn new() -> Self {
Self {
devcg_info: Arc::new(RwLock::new(DevicesCgroupInfo::default())),
}
}
}
#[test]
fn test_new_fs_manager() {
skip_if_not_root!();
struct TestCase {
cpath: Vec<String>,
devices: Vec<Vec<LinuxDeviceCgroup>>,
allowed_all: Vec<bool>,
pod_devices_list: Vec<String>,
container_devices_list: Vec<String>,
}
let allow_all = LinuxDeviceCgroup {
allow: true,
r#type: String::new(),
major: Some(0),
minor: Some(0),
access: String::from("rwm"),
};
let deny_all = LinuxDeviceCgroup {
allow: false,
r#type: String::new(),
major: Some(0),
minor: Some(0),
access: String::from("rwm"),
};
let test_cases = vec![
TestCase {
cpath: vec![String::from(
"/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9",
)],
devices: vec![vec![allow_all.clone()]],
allowed_all: vec![true],
pod_devices_list: vec![String::from("a *:* rwm\n")],
container_devices_list: vec![String::from("a *:* rwm\n")],
},
TestCase {
cpath: vec![String::from(
"/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9",
)],
devices: vec![vec![deny_all.clone()]],
allowed_all: vec![false],
pod_devices_list: vec![String::new()],
container_devices_list: vec![String::new()],
},
TestCase {
cpath: vec![
String::from(
"/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9",
),
String::from(
"/kata-agent-fs-manager-test/1c7affca-1f65-427c-ba92-caff1cea61f6",
),
],
devices: vec![vec![deny_all.clone()], vec![allow_all.clone()]],
allowed_all: vec![false, true],
pod_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")],
container_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")],
},
TestCase {
cpath: vec![
String::from(
"/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9",
),
String::from(
"/kata-agent-fs-manager-test/1c7affca-1f65-427c-ba92-caff1cea61f6",
),
],
devices: vec![vec![allow_all], vec![deny_all]],
allowed_all: vec![true, true],
pod_devices_list: vec![String::from("a *:* rwm\n"), String::from("a *:* rwm\n")],
container_devices_list: vec![
String::from("a *:* rwm\n"),
String::from("a *:* rwm\n"),
],
},
];
for (round, tc) in test_cases.iter().enumerate() {
let sandbox = MockSandbox::new();
let devcg_info = sandbox.devcg_info.read().unwrap();
assert!(!devcg_info.inited);
assert!(!devcg_info.allowed_all);
drop(devcg_info);
let mut managers = Vec::with_capacity(tc.devices.len());
for cid in 0..tc.devices.len() {
let spec = Spec {
linux: Some(Linux {
resources: Some(LinuxResources {
devices: tc.devices[cid].clone(),
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
managers.push(
Manager::new(&tc.cpath[cid], &spec, Some(sandbox.devcg_info.clone())).unwrap(),
);
let devcg_info = sandbox.devcg_info.read().unwrap();
assert!(devcg_info.inited);
assert_eq!(
devcg_info.allowed_all, tc.allowed_all[cid],
"Round {}, cid {} allowed all assertion failure",
round, cid
);
drop(devcg_info);
let pod_devices_list = Command::new("cat")
.arg("/sys/fs/cgroup/devices/kata-agent-fs-manager-test/devices.list")
.output()
.unwrap();
let container_devices_list = Command::new("cat")
.arg(&format!(
"/sys/fs/cgroup/devices{}/devices.list",
tc.cpath[cid]
))
.output()
.unwrap();
let pod_devices_list = String::from_utf8(pod_devices_list.stdout).unwrap();
let container_devices_list =
String::from_utf8(container_devices_list.stdout).unwrap();
assert_eq!(&pod_devices_list, &tc.pod_devices_list[cid]);
assert_eq!(&container_devices_list, &tc.container_devices_list[cid])
}
// Clean up cgroups
managers
.iter()
.for_each(|manager| manager.cgroup.delete().unwrap());
// The pod_cgroup must not be None
managers[0].pod_cgroup.as_ref().unwrap().delete().unwrap();
}
}
#[test]
fn test_default_allowed_devices() {
let allowed_devices = default_allowed_devices();
assert_eq!(
allowed_devices.len(),
DEFAULT_DEVICES.len() + DEFAULT_ALLOWED_DEVICES.len()
);
let allowed_permissions = vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
];
let default_devices_0 = &allowed_devices[0];
assert!(default_devices_0.allow);
assert_eq!(default_devices_0.devtype, DeviceType::Char);
assert_eq!(default_devices_0.major, 1);
assert_eq!(default_devices_0.minor, 3);
assert!(default_devices_0
.access
.iter()
.all(|&p| allowed_permissions.iter().any(|&ap| ap == p)));
let default_allowed_devices_0 = &allowed_devices[DEFAULT_DEVICES.len()];
assert!(default_allowed_devices_0.allow);
assert_eq!(default_allowed_devices_0.devtype, DeviceType::Char);
assert_eq!(default_allowed_devices_0.major, WILDCARD);
assert_eq!(default_allowed_devices_0.minor, WILDCARD);
assert_eq!(
default_allowed_devices_0.access,
vec![DevicePermissions::MkNod]
);
}
}

View File

@ -10,10 +10,13 @@ use crate::protocols::agent::{BlkioStats, CgroupStats, CpuStats, MemoryStats, Pi
use anyhow::Result;
use cgroups::freezer::FreezerState;
use libc::{self, pid_t};
use oci::LinuxResources;
use oci::{LinuxResources, Spec};
use std::any::Any;
use std::collections::HashMap;
use std::string::String;
use std::sync::{Arc, RwLock};
use super::DevicesCgroupInfo;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Manager {
@ -72,7 +75,11 @@ impl CgroupManager for Manager {
}
impl Manager {
pub fn new(cpath: &str) -> Result<Self> {
pub fn new(
cpath: &str,
_spec: &Spec,
_devcg_info: Option<Arc<RwLock<DevicesCgroupInfo>>>,
) -> Result<Self> {
Ok(Self {
paths: HashMap::new(),
mounts: HashMap::new(),

View File

@ -5,7 +5,7 @@
use anyhow::{anyhow, Result};
use core::fmt::Debug;
use oci::LinuxResources;
use oci::{LinuxDeviceCgroup, LinuxResources};
use protocols::agent::CgroupStats;
use std::any::Any;
@ -16,6 +16,15 @@ pub mod mock;
pub mod notifier;
pub mod systemd;
#[derive(Default, Debug)]
pub struct DevicesCgroupInfo {
/// Indicate if the pod cgroup is initialized.
inited: bool,
/// Indicate if pod's devices cgroup is in whitelist mode. Returns true
/// once one container requires `a *:* rwm` permission.
allowed_all: bool,
}
pub trait Manager {
fn apply(&self, _pid: i32) -> Result<()> {
Err(anyhow!("not supported!".to_string()))
@ -61,3 +70,20 @@ impl Debug for dyn Manager + Send + Sync {
write!(f, "{}", self.name())
}
}
/// Check if device cgroup is a rule for all devices from OCI spec.
///
/// The formats representing all devices between OCI spec and cgroups-rs
/// are different.
/// - OCI spec: major: 0, minor: 0, type: "", access: "rwm";
/// - Cgroups-rs: major: -1, minor: -1, type: "a", access: "rwm";
/// - Linux: a *:* rwm
#[inline]
fn rule_for_all_devices(dev_cgroup: &LinuxDeviceCgroup) -> bool {
dev_cgroup.major.unwrap_or(0) == 0
&& dev_cgroup.minor.unwrap_or(0) == 0
&& (dev_cgroup.r#type.as_str() == "" || dev_cgroup.r#type.as_str() == "a")
&& dev_cgroup.access.contains('r')
&& dev_cgroup.access.contains('w')
&& dev_cgroup.access.contains('m')
}

View File

@ -113,7 +113,7 @@ impl Manager {
let (parent_slice, unit_name) = cgroups_path.parse()?;
let cpath = parent_slice + "/" + &unit_name;
let fs_manager = FsManager::new(cpath.as_str())?;
let fs_manager = FsManager::new_systemd(cpath.as_str())?;
Ok(Manager {
paths: fs_manager.paths.clone(),

View File

@ -23,7 +23,7 @@ use crate::cgroups::fs::Manager as FsManager;
#[cfg(test)]
use crate::cgroups::mock::Manager as FsManager;
use crate::cgroups::systemd::manager::Manager as SystemdManager;
use crate::cgroups::Manager;
use crate::cgroups::{DevicesCgroupInfo, Manager};
#[cfg(feature = "standard-oci-runtime")]
use crate::console;
use crate::log_child;
@ -55,7 +55,7 @@ use regex::Regex;
use std::collections::HashMap;
use std::os::unix::io::FromRawFd;
use std::str::FromStr;
use std::sync::Arc;
use std::sync::{Arc, RwLock};
use slog::{info, o, Logger};
@ -136,7 +136,7 @@ lazy_static! {
m
};
// type to name hashmap, better to be in NAMESPACES
// type to name hashmap, better to be in NAMESPACES
pub static ref TYPETONAME: HashMap<&'static str, &'static str> = {
let mut m = HashMap::new();
m.insert("ipc", "ipc");
@ -1494,6 +1494,7 @@ impl LinuxContainer {
pub fn new<T: Into<String> + Display + Clone>(
id: T,
base: T,
devcg_info: Option<Arc<RwLock<DevicesCgroupInfo>>>,
config: Config,
logger: &Logger,
) -> Result<Self> {
@ -1535,19 +1536,12 @@ impl LinuxContainer {
};
let cgroup_manager: Box<dyn Manager + Send + Sync> = if config.use_systemd_cgroup {
Box::new(SystemdManager::new(cpath.as_str()).map_err(|e| {
anyhow!(format!(
"fail to create cgroup manager with path {}: {:}",
cpath, e
))
})?)
Box::new(SystemdManager::new(cpath.as_str()).context("Create systemd manager")?)
} else {
Box::new(FsManager::new(cpath.as_str()).map_err(|e| {
anyhow!(format!(
"fail to create cgroup manager with path {}: {:}",
cpath, e
))
})?)
Box::new(
FsManager::new(cpath.as_str(), spec, devcg_info)
.context("Create cgroupfs manager")?,
)
};
info!(logger, "new cgroup_manager {:?}", &cgroup_manager);
@ -1611,12 +1605,16 @@ mod tests {
use super::*;
use crate::process::Process;
use nix::unistd::Uid;
use oci::{LinuxDeviceCgroup, Root};
use std::fs;
use std::os::unix::fs::MetadataExt;
use std::os::unix::io::AsRawFd;
use std::time::UNIX_EPOCH;
use tempfile::tempdir;
use test_utils::skip_if_not_root;
const CGROUP_PARENT: &str = "kata.agent.test.k8s.io";
fn sl() -> slog::Logger {
slog_scope::logger()
}
@ -1717,13 +1715,42 @@ mod tests {
}
fn create_dummy_opts() -> CreateOpts {
let mut root = oci::Root::default();
root.path = "/tmp".to_string();
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let linux = Linux::default();
let mut spec = Spec::default();
spec.root = Some(root).into();
spec.linux = Some(linux).into();
let root = Root {
path: String::from("/tmp"),
..Default::default()
};
let linux_resources = LinuxResources {
devices: vec![LinuxDeviceCgroup {
allow: true,
r#type: String::new(),
major: None,
minor: None,
access: String::from("rwm"),
}],
..Default::default()
};
let cgroups_path = format!(
"/{}/dummycontainer{}",
CGROUP_PARENT,
since_the_epoch.as_millis()
);
let spec = Spec {
linux: Some(Linux {
cgroups_path,
resources: Some(linux_resources),
..Default::default()
}),
root: Some(root),
..Default::default()
};
CreateOpts {
cgroup_name: "".to_string(),
@ -1748,6 +1775,7 @@ mod tests {
LinuxContainer::new(
"some_id",
&dir.path().join("rootfs").to_str().unwrap(),
None,
create_dummy_opts(),
&slog_scope::logger(),
),
@ -1777,9 +1805,10 @@ mod tests {
#[test]
fn test_linuxcontainer_pause() {
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
})?);
c.cgroup_manager =
Box::new(FsManager::new("", &Spec::default(), None).map_err(|e| {
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
})?);
c.pause().map_err(|e| anyhow!(e))
});
@ -1801,9 +1830,10 @@ mod tests {
#[test]
fn test_linuxcontainer_resume() {
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
})?);
c.cgroup_manager =
Box::new(FsManager::new("", &Spec::default(), None).map_err(|e| {
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
})?);
// Change status to paused, this way we can resume it
c.status.transition(ContainerState::Paused);
c.resume().map_err(|e| anyhow!(e))

View File

@ -11,6 +11,7 @@ use std::fmt;
use std::fs;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::MetadataExt;
use std::os::unix::prelude::FileTypeExt;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
@ -31,7 +32,6 @@ fn sl() -> slog::Logger {
slog_scope::logger().new(o!("subsystem" => "device"))
}
const VM_ROOTFS: &str = "/";
const BLOCK: &str = "block";
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
@ -515,25 +515,53 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
}
#[derive(Debug, Clone)]
struct DevNumUpdate {
// the major and minor numbers for the device within the guest
pub struct DeviceInfo {
// Device type, "b" for block device and "c" for character device
cgroup_type: String,
// The major and minor numbers for the device within the guest
guest_major: i64,
guest_minor: i64,
}
impl DevNumUpdate {
fn from_vm_path<T: AsRef<Path>>(vm_path: T) -> Result<Self> {
let vm_path = vm_path.as_ref();
impl DeviceInfo {
/// Create a device info.
///
/// # Arguments
///
/// * `vm_path` - Device's vm path.
/// * `is_rdev` - If the vm_path is a device, set to true. If the
/// vm_path is a file in a device, set to false.
pub fn new(vm_path: &str, is_rdev: bool) -> Result<Self> {
let cgroup_type;
let devid;
let vm_path = PathBuf::from(vm_path);
if !vm_path.exists() {
return Err(anyhow!("VM device path {:?} doesn't exist", vm_path));
}
let devid = fs::metadata(vm_path)?.rdev();
let metadata = fs::metadata(&vm_path)?;
if is_rdev {
devid = metadata.rdev();
let file_type = metadata.file_type();
if file_type.is_block_device() {
cgroup_type = String::from("b");
} else if file_type.is_char_device() {
cgroup_type = String::from("c");
} else {
return Err(anyhow!("Unknown device {:?}'s cgroup type", vm_path));
}
} else {
devid = metadata.dev();
cgroup_type = String::from("b");
}
let guest_major = stat::major(devid) as i64;
let guest_minor = stat::minor(devid) as i64;
Ok(DevNumUpdate {
Ok(DeviceInfo {
cgroup_type,
guest_major,
guest_minor,
})
@ -544,25 +572,25 @@ impl DevNumUpdate {
// spec needed for a particular device
#[derive(Debug, Clone)]
struct DevUpdate {
num: DevNumUpdate,
info: DeviceInfo,
// an optional new path to update the device to in the "inner" container
// specification
final_path: Option<String>,
}
impl DevUpdate {
fn from_vm_path<T: AsRef<Path>>(vm_path: T, final_path: String) -> Result<Self> {
fn new(vm_path: &str, final_path: &str) -> Result<Self> {
Ok(DevUpdate {
final_path: Some(final_path),
..DevNumUpdate::from_vm_path(vm_path)?.into()
final_path: Some(final_path.to_owned()),
..DeviceInfo::new(vm_path, true)?.into()
})
}
}
impl From<DevNumUpdate> for DevUpdate {
fn from(num: DevNumUpdate) -> Self {
impl From<DeviceInfo> for DevUpdate {
fn from(info: DeviceInfo) -> Self {
DevUpdate {
num,
info,
final_path: None,
}
}
@ -596,7 +624,7 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) -
.linux
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't contain linux field"))?;
let mut res_updates = HashMap::<(&str, i64, i64), DevNumUpdate>::with_capacity(updates.len());
let mut res_updates = HashMap::<(&str, i64, i64), DeviceInfo>::with_capacity(updates.len());
for specdev in &mut linux.devices {
if let Some(update) = updates.remove(specdev.path.as_str()) {
@ -610,13 +638,13 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) -
"type" => &specdev.r#type,
"host_major" => host_major,
"host_minor" => host_minor,
"guest_major" => update.num.guest_major,
"guest_minor" => update.num.guest_minor,
"guest_major" => update.info.guest_major,
"guest_minor" => update.info.guest_minor,
"final_path" => update.final_path.as_ref(),
);
specdev.major = update.num.guest_major;
specdev.minor = update.num.guest_minor;
specdev.major = update.info.guest_major;
specdev.minor = update.info.guest_minor;
if let Some(final_path) = update.final_path {
specdev.path = final_path;
}
@ -624,7 +652,7 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) -
if res_updates
.insert(
(specdev.r#type.as_str(), host_major, host_minor),
update.num,
update.info,
)
.is_some()
{
@ -728,7 +756,9 @@ async fn virtiommio_blk_device_handler(
.context("failed to get mmio device name")?;
}
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
Ok(DeviceInfo::new(device.vm_path(), true)
.context("New device info")?
.into())
}
// device.Id should be a PCI path string
@ -740,7 +770,9 @@ async fn virtio_blk_device_handler(
let pcipath = pci::Path::from_str(&device.id)?;
let vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
Ok(DevNumUpdate::from_vm_path(vm_path)?.into())
Ok(DeviceInfo::new(&vm_path, true)
.context("New device info")?
.into())
}
// device.id should be a CCW path string
@ -753,7 +785,7 @@ async fn virtio_blk_ccw_device_handler(
let ccw_device = ccw::Device::from_str(&device.id)?;
let vm_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?;
Ok(DevNumUpdate::from_vm_path(vm_path)?.into())
Ok(DeviceInfo::new(&vm_path, true)?.into())
}
#[cfg(not(target_arch = "s390x"))]
@ -770,7 +802,9 @@ async fn virtio_scsi_device_handler(
) -> Result<SpecUpdate> {
let vm_path = get_scsi_device_name(sandbox, &device.id).await?;
Ok(DevNumUpdate::from_vm_path(vm_path)?.into())
Ok(DeviceInfo::new(&vm_path, true)
.context("New device info")?
.into())
}
#[instrument]
@ -782,7 +816,9 @@ async fn virtio_nvdimm_device_handler(
return Err(anyhow!("Invalid path for nvdimm device"));
}
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
Ok(DeviceInfo::new(device.vm_path(), true)
.context("New device info")?
.into())
}
fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> {
@ -842,7 +878,7 @@ async fn vfio_pci_device_handler(
let vm_path = get_vfio_device_name(sandbox, group).await?;
Some(DevUpdate::from_vm_path(&vm_path, vm_path.clone())?)
Some(DevUpdate::new(&vm_path, &vm_path)?)
} else {
None
};
@ -887,7 +923,7 @@ pub async fn add_devices(
let update = add_device(device, sandbox).await?;
if let Some(dev_update) = update.dev {
if dev_updates
.insert(&device.container_path, dev_update)
.insert(&device.container_path, dev_update.clone())
.is_some()
{
return Err(anyhow!(
@ -907,6 +943,10 @@ pub async fn add_devices(
));
}
}
// Update cgroup to allow all devices added to guest.
insert_devices_cgroup_rule(spec, &dev_update.info, true, "rwm")
.context("Update device cgroup")?;
}
}
@ -948,16 +988,14 @@ async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<Sp
}
}
// update_device_cgroup update the device cgroup for container
// to not allow access to the guest root partition. This prevents
// the container from being able to access the VM rootfs.
// Insert a devices cgroup rule to control access to device.
#[instrument]
pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
let meta = fs::metadata(VM_ROOTFS)?;
let rdev = meta.dev();
let major = stat::major(rdev) as i64;
let minor = stat::minor(rdev) as i64;
pub fn insert_devices_cgroup_rule(
spec: &mut Spec,
dev_info: &DeviceInfo,
allow: bool,
access: &str,
) -> Result<()> {
let linux = spec
.linux
.as_mut()
@ -965,13 +1003,25 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
let resources = linux.resources.get_or_insert(LinuxResources::default());
resources.devices.push(LinuxDeviceCgroup {
allow: false,
major: Some(major),
minor: Some(minor),
r#type: String::from("b"),
access: String::from("rw"),
});
let cgroup = LinuxDeviceCgroup {
allow,
major: Some(dev_info.guest_major),
minor: Some(dev_info.guest_minor),
r#type: dev_info.cgroup_type.clone(),
access: access.to_owned(),
};
debug!(
sl(),
"Insert a devices cgroup rule";
"linux_device_cgroup" => cgroup.allow,
"guest_major" => cgroup.major,
"guest_minor" => cgroup.minor,
"type" => cgroup.r#type.as_str(),
"access" => cgroup.access.as_str(),
);
resources.devices.push(cgroup);
Ok(())
}
@ -984,6 +1034,8 @@ mod tests {
use std::iter::FromIterator;
use tempfile::tempdir;
const VM_ROOTFS: &str = "/";
#[test]
fn test_update_device_cgroup() {
let mut spec = Spec {
@ -991,7 +1043,8 @@ mod tests {
..Default::default()
};
update_device_cgroup(&mut spec).unwrap();
let dev_info = DeviceInfo::new(VM_ROOTFS, false).unwrap();
insert_devices_cgroup_rule(&mut spec, &dev_info, false, "rw").unwrap();
let devices = spec.linux.unwrap().resources.unwrap().devices;
assert_eq!(devices.len(), 1);
@ -1011,7 +1064,7 @@ mod tests {
let mut spec = Spec::default();
// vm_path empty
let update = DevNumUpdate::from_vm_path("");
let update = DeviceInfo::new("", true);
assert!(update.is_err());
// linux is empty
@ -1021,7 +1074,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
DeviceInfo::new(vm_path, true).unwrap().into(),
)]),
);
assert!(res.is_err());
@ -1033,7 +1086,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
DeviceInfo::new(vm_path, true).unwrap().into(),
)]),
);
assert!(res.is_err());
@ -1050,7 +1103,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
DeviceInfo::new(vm_path, true).unwrap().into(),
)]),
);
assert!(
@ -1068,7 +1121,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
DeviceInfo::new(vm_path, true).unwrap().into(),
)]),
);
assert!(res.is_ok());
@ -1094,7 +1147,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
DeviceInfo::new(vm_path, true).unwrap().into(),
)]),
);
assert!(res.is_ok());
@ -1178,11 +1231,11 @@ mod tests {
let updates = HashMap::from_iter(vec![
(
container_path_a,
DevNumUpdate::from_vm_path(vm_path_a).unwrap().into(),
DeviceInfo::new(vm_path_a, true).unwrap().into(),
),
(
container_path_b,
DevNumUpdate::from_vm_path(vm_path_b).unwrap().into(),
DeviceInfo::new(vm_path_b, true).unwrap().into(),
),
]);
let res = update_spec_devices(&mut spec, updates);
@ -1263,7 +1316,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
DeviceInfo::new(vm_path, true).unwrap().into(),
)]),
);
assert!(res.is_ok());
@ -1307,7 +1360,7 @@ mod tests {
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevUpdate::from_vm_path(vm_path, final_path.to_string()).unwrap(),
DevUpdate::new(vm_path, final_path).unwrap(),
)]),
);
assert!(res.is_ok());

View File

@ -52,9 +52,7 @@ use nix::sys::{stat, statfs};
use nix::unistd::{self, Pid};
use rustjail::process::ProcessOperations;
use crate::device::{
add_devices, get_virtio_blk_pci_device_name, update_device_cgroup, update_env_pci,
};
use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_env_pci};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::baremount;
@ -238,9 +236,6 @@ impl AgentService {
update_container_namespaces(&s, &mut oci, use_sandbox_pidns)?;
// Add the root partition to the device cgroup to prevent access
update_device_cgroup(&mut oci)?;
// Append guest hooks
append_guest_hooks(&s, &mut oci)?;
@ -272,8 +267,13 @@ impl AgentService {
container_name,
};
let mut ctr: LinuxContainer =
LinuxContainer::new(cid.as_str(), CONTAINER_BASE, opts, &sl())?;
let mut ctr: LinuxContainer = LinuxContainer::new(
cid.as_str(),
CONTAINER_BASE,
Some(s.devcg_info.clone()),
opts,
&sl(),
)?;
let pipe_size = AGENT_CONFIG.container_pipe_size;
@ -1981,16 +1981,20 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> {
#[cfg(test)]
mod tests {
use std::time::{SystemTime, UNIX_EPOCH};
use super::*;
use crate::{namespace::Namespace, protocols::agent_ttrpc_async::AgentService as _};
use nix::mount;
use nix::sched::{unshare, CloneFlags};
use oci::{Hook, Hooks, Linux, LinuxNamespace};
use oci::{Hook, Hooks, Linux, LinuxDeviceCgroup, LinuxNamespace, LinuxResources};
use tempfile::{tempdir, TempDir};
use test_utils::{assert_result, skip_if_not_root};
use ttrpc::{r#async::TtrpcContext, MessageHeader};
use which::which;
const CGROUP_PARENT: &str = "kata.agent.test.k8s.io";
fn check_command(cmd: &str) -> bool {
which(cmd).is_ok()
}
@ -2005,13 +2009,39 @@ mod tests {
}
fn create_dummy_opts() -> CreateOpts {
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let root = Root {
path: String::from("/"),
..Default::default()
};
let linux_resources = LinuxResources {
devices: vec![LinuxDeviceCgroup {
allow: true,
r#type: String::new(),
major: None,
minor: None,
access: String::from("rwm"),
}],
..Default::default()
};
let cgroups_path = format!(
"/{}/dummycontainer{}",
CGROUP_PARENT,
since_the_epoch.as_millis()
);
let spec = Spec {
linux: Some(oci::Linux::default()),
linux: Some(Linux {
cgroups_path,
resources: Some(linux_resources),
..Default::default()
}),
root: Some(root),
..Default::default()
};
@ -2035,6 +2065,7 @@ mod tests {
LinuxContainer::new(
"some_id",
dir.path().join("rootfs").to_str().unwrap(),
None,
create_dummy_opts(),
&slog_scope::logger(),
)
@ -2141,6 +2172,8 @@ mod tests {
#[tokio::test]
async fn test_do_write_stream() {
skip_if_not_root!();
#[derive(Debug)]
struct TestData<'a> {
create_container: bool,

View File

@ -12,7 +12,7 @@ use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::str::FromStr;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
use std::{thread, time};
@ -26,7 +26,7 @@ use nix::sys::stat::Mode;
use oci::{Hook, Hooks};
use protocols::agent::{OnlineCPUMemRequest, SharedMount};
use regex::Regex;
use rustjail::cgroups as rustjail_cgroups;
use rustjail::cgroups::{self as rustjail_cgroups, DevicesCgroupInfo};
use rustjail::container::BaseContainer;
use rustjail::container::LinuxContainer;
use rustjail::process::Process;
@ -118,6 +118,7 @@ pub struct Sandbox {
pub event_tx: Option<Sender<String>>,
pub bind_watcher: BindWatcher,
pub pcimap: HashMap<pci::Address, pci::Address>,
pub devcg_info: Arc<RwLock<DevicesCgroupInfo>>,
}
impl Sandbox {
@ -151,6 +152,7 @@ impl Sandbox {
event_tx: Some(tx),
bind_watcher: BindWatcher::new(),
pcimap: HashMap::new(),
devcg_info: Arc::new(RwLock::new(DevicesCgroupInfo::default())),
})
}
@ -658,7 +660,7 @@ mod tests {
use crate::mount::baremount;
use anyhow::{anyhow, Error};
use nix::mount::MsFlags;
use oci::{Linux, Root, Spec};
use oci::{Linux, LinuxDeviceCgroup, LinuxResources, Root, Spec};
use rustjail::container::LinuxContainer;
use rustjail::process::Process;
use rustjail::specconv::CreateOpts;
@ -667,9 +669,12 @@ mod tests {
use std::io::prelude::*;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
use tempfile::{tempdir, Builder, TempDir};
use test_utils::skip_if_not_root;
const CGROUP_PARENT: &str = "kata.agent.test.k8s.io";
fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> {
let src_path = Path::new(src);
let dst_path = Path::new(dst);
@ -824,13 +829,39 @@ mod tests {
}
fn create_dummy_opts() -> CreateOpts {
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let root = Root {
path: String::from("/"),
..Default::default()
};
let linux_resources = LinuxResources {
devices: vec![LinuxDeviceCgroup {
allow: true,
r#type: String::new(),
major: None,
minor: None,
access: String::from("rwm"),
}],
..Default::default()
};
let cgroups_path = format!(
"/{}/dummycontainer{}",
CGROUP_PARENT,
since_the_epoch.as_millis()
);
let spec = Spec {
linux: Some(Linux::default()),
linux: Some(Linux {
cgroups_path,
resources: Some(linux_resources),
..Default::default()
}),
root: Some(root),
..Default::default()
};
@ -853,22 +884,24 @@ mod tests {
.map_err(|e| anyhow!(e).context("tempdir failed"))
.unwrap();
// Create a new container
(
LinuxContainer::new(
"some_id",
dir.path().join("rootfs").to_str().unwrap(),
create_dummy_opts(),
&slog_scope::logger(),
)
.unwrap(),
dir,
let container = LinuxContainer::new(
"some_id",
dir.path().join("rootfs").to_str().unwrap(),
None,
create_dummy_opts(),
&slog_scope::logger(),
)
.unwrap();
// Create a new container
(container, dir)
}
#[tokio::test]
#[serial]
async fn get_container_entry_exist() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let (linux_container, _root) = create_linuxcontainer();
@ -892,6 +925,8 @@ mod tests {
#[tokio::test]
#[serial]
async fn add_and_get_container() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let (linux_container, _root) = create_linuxcontainer();
@ -903,6 +938,8 @@ mod tests {
#[tokio::test]
#[serial]
async fn update_shared_pidns() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let test_pid = 9999;
@ -953,6 +990,8 @@ mod tests {
#[tokio::test]
async fn test_find_container_process() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let cid = "container-123";
@ -998,6 +1037,8 @@ mod tests {
#[tokio::test]
async fn test_find_process() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let test_pids = [std::i32::MIN, -1, 0, 1, std::i32::MAX];

View File

@ -292,9 +292,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cgroups-rs"
version = "0.3.2"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965"
checksum = "1fb3af90c8d48ad5f432d8afb521b5b40c2a2fce46dd60e05912de51c47fba64"
dependencies = [
"libc",
"log",

View File

@ -361,6 +361,7 @@ pub fn create_linux_container(
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("failed to convert bundle path"))?
.as_str(),
None,
config,
logger,
)?;
@ -384,6 +385,7 @@ pub fn load_linux_container(
.to_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("failed to convert a root path"))?,
None,
status.config.clone(),
logger,
)?;

View File

@ -430,6 +430,143 @@ function stop_containerd() {
sudo systemctl stop containerd
}
function mountLoopDevice() {
local loop_file="$1"
if [ -e "$loop_file" ]; then
sudo rm -f $loop_file
info "$loop_file was removed"
fi
sudo dd if=/dev/zero of=$loop_file bs=100M count=2
sudo mkfs.ext4 $loop_file
sudo losetup -fP $loop_file
local loinfo=$(sudo losetup -a | grep $loop_file)
local device=$(echo "$loinfo" | awk -F'[: ]' '{print $1}')
echo $device
}
function startDeviceCgroupContainers() {
local pod_yaml=${REPORT_DIR}/device-cgroup-pod.yaml
local container1_yaml=${REPORT_DIR}/device-cgroup-container1.yaml
local container2_yaml=${REPORT_DIR}/device-cgroup-container2.yaml
local image="busybox:latest"
cat > "$pod_yaml" <<EOF
metadata:
name: busybox-device-cgroup-sandbox
namespace: default
uid: busybox-device-cgroup-sandbox-uid
EOF
cat > "$container1_yaml" <<EOF
metadata:
name: busybox-device-cgroup-container1
namespace: default
uid: busybox-device-cgroup-container1-uid
image:
image: $image
command:
- top
linux: {}
devices:
- container_path: $loop_dev1_container_path
host_path: "$loop_dev1"
permissions: rwm
EOF
cat > "$container2_yaml" <<EOF
metadata:
name: busybox-device-cgroup-container2
namespace: default
uid: busybox-device-cgroup-container2-uid
image:
image: $image
command:
- top
linux: {}
devices:
- container_path: $loop_dev2_container_path
host_path: "$loop_dev2"
permissions: rwm
EOF
sudo cp "$default_containerd_config" "$default_containerd_config_backup"
sudo cp $CONTAINERD_CONFIG_FILE "$default_containerd_config"
restart_containerd_service
sudo crictl pull $image
podid=$(sudo crictl runp $pod_yaml)
cid1=$(sudo crictl create $podid $container1_yaml $pod_yaml)
cid2=$(sudo crictl create $podid $container2_yaml $pod_yaml)
sudo crictl start $cid1
sudo crictl start $cid2
}
function stopDeviceCgroupContainers() {
info "show pod $podid"
sudo crictl --timeout=20s pods --id $podid
info "stop pod $podid"
sudo crictl --timeout=20s stopp $podid
info "remove pod $podid"
sudo crictl --timeout=20s rmp $podid
sudo cp "$default_containerd_config_backup" "$default_containerd_config"
restart_containerd_service
}
function TestDeviceCgroup() {
loop_dev1=$(mountLoopDevice "/tmp/device-cgroup-1.img" | tail -n 1)
loop_dev2=$(mountLoopDevice "/tmp/device-cgroup-2.img" | tail -n 1)
info "Two loop devices, $loop_dev1 and $loop_dev2, are created."
loop_dev1_container_path="/dev/test-block-1"
loop_dev2_container_path="/dev/test-block-2"
startDeviceCgroupContainers
local dev1_ls=$(sudo crictl exec $cid1 ls -l $loop_dev1_container_path)
local dev1_no=$(echo $dev1_ls | awk '{print $5, $6}')
local dev1_major=$(echo "${dev1_no%%,*}" | tr -d ' ')
local dev1_minor=$(echo "${dev1_no##*,}" | tr -d ' ')
local dev2_ls=$(sudo crictl exec $cid2 ls -l $loop_dev2_container_path)
local dev2_no=$(echo $dev2_ls | awk '{print $5, $6}')
local dev2_major=$(echo "${dev2_no%%,*}" | tr -d ' ')
local dev2_minor=$(echo "${dev2_no##*,}" | tr -d ' ')
info "\"$dev1_major:$dev1_minor\" is for container1, and \"$dev2_major:$dev2_minor\" is for container2."
local cid1_device_cgroup=$(sudo crictl exec $cid1 cat /sys/fs/cgroup/devices/devices.list)
local cid2_device_cgroup=$(sudo crictl exec $cid2 cat /sys/fs/cgroup/devices/devices.list)
if [[ $cid1_device_cgroup != *"b $dev1_major:$dev1_minor rwm"* ]]; then
die "The device cgroup of container1 is expected to have loop dev1"
fi
info "Container1 has \"b $dev1_major:$dev1_minor rwm\"."
if [[ $cid1_device_cgroup == *"b $dev2_major:$dev2_minor rwm"* ]]; then
die "The device cgroup of container1 isn't expected to have loop dev2"
fi
info "Container1 doesn't have \"b $dev2_major:$dev2_minor rwm\"."
if [[ $cid2_device_cgroup == *"b $dev1_major:$dev1_minor rwm"* ]]; then
die "The device cgroup of container2 isn't expected to have loop dev2"
fi
info "Container2 doesn't have \"b $dev1_major:$dev1_minor rwm\"."
if [[ $cid2_device_cgroup != *"b $dev2_major:$dev2_minor rwm"* ]]; then
die "The device cgroup of container2 is expected to have loop dev2"
fi
info "Container2 has \"b $dev2_major:$dev2_minor rwm\"."
stopDeviceCgroupContainers
# Umount loop devices
sudo losetup -d $loop_dev1
sudo losetup -d $loop_dev2
info "Two loop devices, $loop_dev1 and $loop_dev2, are umounted."
}
function main() {
info "Stop crio service"
@ -494,6 +631,7 @@ function main() {
fi
TestKilledVmmCleanup
TestDeviceCgroup
popd
}