diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 65c6d02016..7c770955f9 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -4,6 +4,7 @@ // use libc::{c_uint, major, minor}; +use nix::sys::stat; use std::collections::HashMap; use std::fs; use std::os::unix::fs::MetadataExt; @@ -14,7 +15,7 @@ use crate::linux_abi::*; use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE}; use crate::sandbox::Sandbox; use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER}; -use oci::Spec; +use oci::{LinuxDeviceCgroup, LinuxResources, Spec}; use protocols::agent::Device; use rustjail::errors::*; @@ -25,6 +26,8 @@ macro_rules! sl { }; } +const VM_ROOTFS: &str = "/"; + // DeviceHandler is the type of callback to be defined to handle every type of device driver. type DeviceHandler = fn(&Device, &mut Spec, &Arc>) -> Result<()>; @@ -360,3 +363,63 @@ fn add_device(device: &Device, spec: &mut Spec, sandbox: &Arc>) - Some(dev_handler) => dev_handler(device, spec, sandbox), } } + +// update_device_cgroup update the device cgroup for container +// to not allow access to the guest root partition. This prevents +// the container from being able to access the VM rootfs. +pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> { + let meta = fs::metadata(VM_ROOTFS)?; + let rdev = meta.dev(); + let major = stat::major(rdev) as i64; + let minor = stat::minor(rdev) as i64; + + let linux = match spec.linux.as_mut() { + None => { + return Err( + ErrorKind::ErrorCode("Spec didn't container linux field".to_string()).into(), + ) + } + Some(l) => l, + }; + + if linux.resources.is_none() { + linux.resources = Some(LinuxResources::default()); + } + + let resources = linux.resources.as_mut().unwrap(); + resources.devices.push(LinuxDeviceCgroup { + allow: false, + major: Some(major), + minor: Some(minor), + r#type: String::from("b"), + access: String::from("rwm"), + }); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use oci::Linux; + + #[test] + fn test_update_device_cgroup() { + let mut spec = Spec::default(); + + spec.linux = Some(Linux::default()); + + update_device_cgroup(&mut spec).unwrap(); + + let devices = spec.linux.unwrap().resources.unwrap().devices; + assert_eq!(devices.len(), 1); + + let meta = fs::metadata(VM_ROOTFS).unwrap(); + let rdev = meta.dev(); + let major = stat::major(rdev) as i64; + let minor = stat::minor(rdev) as i64; + + assert_eq!(devices[0].major, Some(major)); + assert_eq!(devices[0].minor, Some(minor)); + } +} diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index a2f6370647..e925e6a1b7 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -31,6 +31,7 @@ use nix::fcntl::{self, OFlag}; use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType}; use nix::sys::wait::{self, WaitStatus}; use nix::unistd; +use nix::unistd::dup; use prctl::set_child_subreaper; use rustjail::errors::*; use signal_hook::{iterator::Signals, SIGCHLD}; @@ -117,6 +118,12 @@ fn main() -> Result<()> { let agentConfig = AGENT_CONFIG.clone(); if unistd::getpid() == Pid::from_raw(1) { + // dup a new file descriptor for this temporary logger writer, + // since this logger would be dropped and it's writer would + // be closed out of this code block. + let newwfd = dup(wfd)?; + let writer = unsafe { File::from_raw_fd(newwfd) }; + // Init a temporary logger used by init agent as init process // since before do the base mount, it wouldn't access "/proc/cmdline" // to get the customzied debug level. diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 126647ff82..96298e382f 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -29,7 +29,7 @@ use nix::sys::stat; use nix::unistd::{self, Pid}; use rustjail::process::ProcessOperations; -use crate::device::{add_devices, rescan_pci_bus}; +use crate::device::{add_devices, rescan_pci_bus, update_device_cgroup}; use crate::linux_abi::*; use crate::mount::{add_storages, remove_mounts, STORAGEHANDLERLIST}; use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS}; @@ -121,6 +121,9 @@ impl agentService { update_container_namespaces(&s, &mut oci)?; + // Add the root partition to the device cgroup to prevent access + update_device_cgroup(&mut oci)?; + // write spec to bundle path, hooks might // read ocispec let olddir = setup_bundle(&oci)?;