agent: Changes according to Pan's comments

- Disable device cgroup restriction while pod cgroup is not available.
- Remove balcklist-related names and change whitelist-related names to
  allowed_all.

Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
This commit is contained in:
Xuewei Niu
2023-11-03 23:58:08 +08:00
parent 136fb76222
commit 023d8dc01e
2 changed files with 76 additions and 77 deletions

View File

@@ -18,7 +18,7 @@ use cgroups::{
DeviceResource, HugePageResource, MaxValue, NetworkPriority, DeviceResource, HugePageResource, MaxValue, NetworkPriority,
}; };
use crate::cgroups::{is_all_devices_rule, Manager as CgroupManager}; use crate::cgroups::{rule_for_all_devices, Manager as CgroupManager};
use crate::container::DEFAULT_DEVICES; use crate::container::DEFAULT_DEVICES;
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use libc::{self, pid_t}; use libc::{self, pid_t};
@@ -66,7 +66,7 @@ pub struct Manager {
#[serde(skip)] #[serde(skip)]
pod_cgroup: Option<cgroups::Cgroup>, pod_cgroup: Option<cgroups::Cgroup>,
#[serde(skip)] #[serde(skip)]
devcg_whitelist: bool, devcg_allowed_all: bool,
} }
// set_resource is used to set reources by cgroup controller. // set_resource is used to set reources by cgroup controller.
@@ -125,7 +125,7 @@ impl CgroupManager for Manager {
} }
// set devices resources // set devices resources
if !self.devcg_whitelist { if !self.devcg_allowed_all {
set_devices_resources(&self.cgroup, &r.devices, res, pod_res); set_devices_resources(&self.cgroup, &r.devices, res, pod_res);
} }
debug!( debug!(
@@ -329,7 +329,7 @@ fn set_devices_resources(
let mut devices = vec![]; let mut devices = vec![];
for d in device_resources.iter() { for d in device_resources.iter() {
if is_all_devices_rule(d) { if rule_for_all_devices(d) {
continue; continue;
} }
if let Some(dev) = linux_device_cgroup_to_device_resource(d) { if let Some(dev) = linux_device_cgroup_to_device_resource(d) {
@@ -1024,56 +1024,57 @@ impl Manager {
// Cgroup path of parent of container // Cgroup path of parent of container
let pod_cpath = PathBuf::from(cpath) let pod_cpath = PathBuf::from(cpath)
.parent() .parent()
.context("Get parent of cgroup path")? .unwrap_or(Path::new("/"))
.display() .display()
.to_string(); .to_string();
// Create a cgroup for the pod if not exists. if pod_cpath.as_str() == "/" {
// Note that creating pod cgroup MUST be done before the pause // Skip setting pod cgroup for cpath due to no parent path
// container's cgroup created, since the upper node might have pod_cgroup = None
// some excessive permissions, and children inherit upper
// node's rules. You'll feel painful to shrink upper nodes'
// permissions if the new permissions are subset of old.
pod_cgroup = Some(load_cgroup(cgroups::hierarchies::auto(), &pod_cpath));
let pod_cg = pod_cgroup.as_ref().unwrap();
// The default is blacklist mode.
let is_whitelist_mode = Self::devcg_whitelist_rule(spec).unwrap_or(false);
if devices_group_info.inited {
// Do nothing for whitelist except for updating whitelist
// field of devices_group_info.
debug!(sl(), "Devices cgroup has been initialzied.");
// Set allowed all devices to pod cgroup
if !devices_group_info.whitelist && is_whitelist_mode {
info!(
sl(),
"Pod devices cgroup is changed to whitelist mode, devices_group_info = {:?}",
devices_group_info
);
Self::setup_whitelist_mode(pod_cg)
.with_context(|| format!("Setup whitelist mode for {}", pod_cpath))?;
devices_group_info.whitelist = true;
}
} else { } else {
// This is the first container (aka pause container) // Create a cgroup for the pod if not exists.
debug!(sl(), "Started to init devices cgroup"); // Note that creating pod cgroup MUST be done before the pause
// container's cgroup created, since the upper node might have
// some excessive permissions, and children inherit upper
// node's rules. You'll feel painful to shrink upper nodes'
// permissions if the new permissions are subset of old.
pod_cgroup = Some(load_cgroup(cgroups::hierarchies::auto(), &pod_cpath));
let pod_cg = pod_cgroup.as_ref().unwrap();
pod_cg.create().context("Create pod cgroup")?; let is_allowded_all = Self::has_allowed_all_devices_rule(spec);
if devices_group_info.inited {
debug!(sl(), "Devices cgroup has been initialzied.");
// Setup blacklist for pod cgroup // Set allowed all devices to pod cgroup
if !is_whitelist_mode { if !devices_group_info.allowed_all && is_allowded_all {
// Setup blacklist rule and allowed default devices for info!(
// blacklist. sl(),
Self::setup_blacklist_mode(pod_cg) "Pod devices cgroup is changed to allowed all devices mode, devices_group_info = {:?}",
.with_context(|| format!("Setup blacklist mode for {}", pod_cpath))?; devices_group_info
);
Self::setup_allowed_all_mode(pod_cg).with_context(|| {
format!("Setup allowed all devices mode for {}", pod_cpath)
})?;
devices_group_info.allowed_all = true;
}
} else { } else {
Self::setup_whitelist_mode(pod_cg) // This is the first container (aka pause container)
.with_context(|| format!("Setup whitelist mode for {}", pod_cpath))?; debug!(sl(), "Started to init devices cgroup");
devices_group_info.whitelist = true;
}
devices_group_info.inited = true pod_cg.create().context("Create pod cgroup")?;
if !is_allowded_all {
Self::setup_devcg_whitelist(pod_cg).with_context(|| {
format!("Setup device cgroup whitelist for {}", pod_cpath)
})?;
} else {
Self::setup_allowed_all_mode(pod_cg)
.with_context(|| format!("Setup allowed all mode for {}", pod_cpath))?;
devices_group_info.allowed_all = true;
}
devices_group_info.inited = true
}
} }
} else { } else {
pod_cgroup = None; pod_cgroup = None;
@@ -1085,9 +1086,9 @@ impl Manager {
// contains some permissions that the container doesn't need. // contains some permissions that the container doesn't need.
// Therefore, resetting the container's devices cgroup is required. // Therefore, resetting the container's devices cgroup is required.
if let Some(devices_group_info) = devices_group_info.as_ref() { if let Some(devices_group_info) = devices_group_info.as_ref() {
if !devices_group_info.whitelist { if !devices_group_info.allowed_all {
Self::setup_blacklist_mode(&cg) Self::setup_devcg_whitelist(&cg)
.with_context(|| format!("Setup blacklist mode for {}", cpath))?; .with_context(|| format!("Setup device cgroup whitelist for {}", cpath))?;
} }
} }
@@ -1098,8 +1099,8 @@ impl Manager {
cpath: cpath.to_string(), cpath: cpath.to_string(),
cgroup: cg, cgroup: cg,
pod_cgroup, pod_cgroup,
devcg_whitelist: devices_group_info devcg_allowed_all: devices_group_info
.map(|info| info.whitelist) .map(|info| info.allowed_all)
.unwrap_or(false), .unwrap_or(false),
}) })
} }
@@ -1118,7 +1119,7 @@ impl Manager {
cpath: cpath.to_string(), cpath: cpath.to_string(),
pod_cgroup: None, pod_cgroup: None,
cgroup: cg, cgroup: cg,
devcg_whitelist: false, devcg_allowed_all: false,
}) })
} }
@@ -1143,7 +1144,7 @@ impl Manager {
Ok((m, mounts)) Ok((m, mounts))
} }
fn setup_whitelist_mode(cgroup: &cgroups::Cgroup) -> Result<()> { fn setup_allowed_all_mode(cgroup: &cgroups::Cgroup) -> Result<()> {
// Insert two rules: `b *:* rwm` and `c *:* rwm`. // Insert two rules: `b *:* rwm` and `c *:* rwm`.
// The reason of not inserting `a *:* rwm` is that the Linux kernel // The reason of not inserting `a *:* rwm` is that the Linux kernel
// will deny writing `a` to `devices.allow` once a cgroup has // will deny writing `a` to `devices.allow` once a cgroup has
@@ -1183,7 +1184,10 @@ impl Manager {
Ok(()) Ok(())
} }
fn setup_blacklist_mode(cgroup: &cgroups::Cgroup) -> Result<()> { /// Setup device cgroup whitelist:
/// - Deny all devices in order to cleanup device cgroup.
/// - Allow default devices and default allowed devices.
fn setup_devcg_whitelist(cgroup: &cgroups::Cgroup) -> Result<()> {
#[allow(unused_mut)] #[allow(unused_mut)]
let mut dev_res_list = vec![DeviceResource { let mut dev_res_list = vec![DeviceResource {
allow: false, allow: false,
@@ -1212,27 +1216,22 @@ impl Manager {
Ok(()) Ok(())
} }
/// Check if OCI spec contains a whiltlist rule. /// Check if OCI spec contains a rule of allowed all devices.
/// fn has_allowed_all_devices_rule(spec: &Spec) -> bool {
/// # Returns
///
/// * `Some(true)` - Contains whitelist all rule;
/// * `Some(false)` - Contains blacklist all rule;
/// * `None` - Neither whitelist rule nor blacklist rule found.
fn devcg_whitelist_rule(spec: &Spec) -> Option<bool> {
let linux = match spec.linux.as_ref() { let linux = match spec.linux.as_ref() {
Some(linux) => linux, Some(linux) => linux,
None => return None, None => return false,
}; };
let resources = match linux.resources.as_ref() { let resources = match linux.resources.as_ref() {
Some(resource) => resource, Some(resource) => resource,
None => return None, None => return false,
}; };
resources resources
.devices .devices
.iter() .iter()
.find(|dev| is_all_devices_rule(dev)) .find(|dev| rule_for_all_devices(dev))
.map(|dev| dev.allow) .map(|dev| dev.allow)
.unwrap_or_default()
} }
} }
@@ -1404,7 +1403,7 @@ mod tests {
struct TestCase { struct TestCase {
cpath: Vec<String>, cpath: Vec<String>,
devices: Vec<Vec<LinuxDeviceCgroup>>, devices: Vec<Vec<LinuxDeviceCgroup>>,
whitelist: Vec<bool>, allowed_all: Vec<bool>,
pod_devices_list: Vec<String>, pod_devices_list: Vec<String>,
container_devices_list: Vec<String>, container_devices_list: Vec<String>,
} }
@@ -1431,7 +1430,7 @@ mod tests {
"/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9",
)], )],
devices: vec![vec![allow_all.clone()]], devices: vec![vec![allow_all.clone()]],
whitelist: vec![true], allowed_all: vec![true],
pod_devices_list: vec![String::from("a *:* rwm\n")], pod_devices_list: vec![String::from("a *:* rwm\n")],
container_devices_list: vec![String::from("a *:* rwm\n")], container_devices_list: vec![String::from("a *:* rwm\n")],
}, },
@@ -1440,7 +1439,7 @@ mod tests {
"/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9",
)], )],
devices: vec![vec![deny_all.clone()]], devices: vec![vec![deny_all.clone()]],
whitelist: vec![false], allowed_all: vec![false],
pod_devices_list: vec![String::new()], pod_devices_list: vec![String::new()],
container_devices_list: vec![String::new()], container_devices_list: vec![String::new()],
}, },
@@ -1454,7 +1453,7 @@ mod tests {
), ),
], ],
devices: vec![vec![deny_all.clone()], vec![allow_all.clone()]], devices: vec![vec![deny_all.clone()], vec![allow_all.clone()]],
whitelist: vec![false, true], allowed_all: vec![false, true],
pod_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")], pod_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")],
container_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")], container_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")],
}, },
@@ -1468,7 +1467,7 @@ mod tests {
), ),
], ],
devices: vec![vec![allow_all], vec![deny_all]], devices: vec![vec![allow_all], vec![deny_all]],
whitelist: vec![true, true], allowed_all: vec![true, true],
pod_devices_list: vec![String::from("a *:* rwm\n"), String::from("a *:* rwm\n")], pod_devices_list: vec![String::from("a *:* rwm\n"), String::from("a *:* rwm\n")],
container_devices_list: vec![ container_devices_list: vec![
String::from("a *:* rwm\n"), String::from("a *:* rwm\n"),
@@ -1481,7 +1480,7 @@ mod tests {
let sandbox = MockSandbox::new(); let sandbox = MockSandbox::new();
let devcg_info = sandbox.devcg_info.read().unwrap(); let devcg_info = sandbox.devcg_info.read().unwrap();
assert!(!devcg_info.inited); assert!(!devcg_info.inited);
assert!(!devcg_info.whitelist); assert!(!devcg_info.allowed_all);
drop(devcg_info); drop(devcg_info);
let mut managers = Vec::with_capacity(tc.devices.len()); let mut managers = Vec::with_capacity(tc.devices.len());
@@ -1503,8 +1502,8 @@ mod tests {
let devcg_info = sandbox.devcg_info.read().unwrap(); let devcg_info = sandbox.devcg_info.read().unwrap();
assert!(devcg_info.inited); assert!(devcg_info.inited);
assert_eq!( assert_eq!(
devcg_info.whitelist, tc.whitelist[cid], devcg_info.allowed_all, tc.allowed_all[cid],
"Round {}, cid {} whitelist assertion failure", "Round {}, cid {} allowed all assertion failure",
round, cid round, cid
); );
drop(devcg_info); drop(devcg_info);

View File

@@ -22,7 +22,7 @@ pub struct DevicesCgroupInfo {
inited: bool, inited: bool,
/// Indicate if pod's devices cgroup is in whitelist mode. Returns true /// Indicate if pod's devices cgroup is in whitelist mode. Returns true
/// once one container requires `a *:* rwm` permission. /// once one container requires `a *:* rwm` permission.
whitelist: bool, allowed_all: bool,
} }
pub trait Manager { pub trait Manager {
@@ -71,15 +71,15 @@ impl Debug for dyn Manager + Send + Sync {
} }
} }
/// Check if device cgroup is an all rule from OCI spec. /// Check if device cgroup is a rule for all devices from OCI spec.
/// ///
/// The formats representing all devices between OCI spec and cgroups-rs /// The formats representing all devices between OCI spec and cgroups-rs
/// are different. /// are different.
/// - OCI spec: major: 0, minor: 0, type: "" (All devices), access: "rwm"; /// - OCI spec: major: 0, minor: 0, type: "", access: "rwm";
/// - Cgroups-rs: major: -1, minor: -1, type: "a", access: "rwm"; /// - Cgroups-rs: major: -1, minor: -1, type: "a", access: "rwm";
/// - Linux: a *:* rwm /// - Linux: a *:* rwm
#[inline] #[inline]
fn is_all_devices_rule(dev_cgroup: &LinuxDeviceCgroup) -> bool { fn rule_for_all_devices(dev_cgroup: &LinuxDeviceCgroup) -> bool {
dev_cgroup.major.unwrap_or(0) == 0 dev_cgroup.major.unwrap_or(0) == 0
&& dev_cgroup.minor.unwrap_or(0) == 0 && dev_cgroup.minor.unwrap_or(0) == 0
&& (dev_cgroup.r#type.as_str() == "" || dev_cgroup.r#type.as_str() == "a") && (dev_cgroup.r#type.as_str() == "" || dev_cgroup.r#type.as_str() == "a")