From 3be50adab98a3ab8ee156301693c5218d14c3452 Mon Sep 17 00:00:00 2001 From: Manabu Sugimoto Date: Fri, 16 Jul 2021 14:50:58 +0900 Subject: [PATCH] agent: Add support for Seccomp The kata-agent supports seccomp feature based on the OCI runtime specification. This seccomp capability in the kata-agent is enabled by default. However, it is not enforced by default: users need to enable that by setting `disable_guest_seccomp` to `false` in the main configuration file. Fixes: #1476 Signed-off-by: Manabu Sugimoto --- src/agent/Cargo.lock | 38 ++++ src/agent/Cargo.toml | 3 + src/agent/Makefile | 21 +- src/agent/README.md | 1 + src/agent/rustjail/Cargo.toml | 4 + src/agent/rustjail/src/container.rs | 25 ++- src/agent/rustjail/src/lib.rs | 2 + src/agent/rustjail/src/seccomp.rs | 237 +++++++++++++++++++++++ src/agent/src/config.rs | 3 + src/agent/src/rpc.rs | 10 +- src/runtime/virtcontainers/kata_agent.go | 4 + 11 files changed, 342 insertions(+), 6 deletions(-) create mode 100644 src/agent/rustjail/src/seccomp.rs diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 517088b1ec..2988b73b95 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -594,6 +594,24 @@ dependencies = [ "rle-decode-fast", ] +[[package]] +name = "libseccomp" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36ad71a5b66ceef3acfe6a3178b29b4da063f8bcb2c36dab666d52a7a9cfdb86" +dependencies = [ + "libc", + "libseccomp-sys", + "nix 0.17.0", + "pkg-config", +] + +[[package]] +name = "libseccomp-sys" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "539912de229a4fc16e507e8df12a394038a524a5b5b6c92045ad344472aac475" + [[package]] name = "lock_api" version = "0.4.4" @@ -763,6 +781,19 @@ dependencies = [ "void", ] +[[package]] +name = "nix" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50e4785f2c3b7589a0d0c1dd60285e1188adac4006e8abd6dd578e1567027363" +dependencies = [ + "bitflags", + "cc", + "cfg-if 0.1.10", + "libc", + "void", +] + [[package]] name = "nix" version = "0.19.1" @@ -977,6 +1008,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + [[package]] name = "ppv-lite86" version = "0.2.10" @@ -1284,6 +1321,7 @@ dependencies = [ "inotify", "lazy_static", "libc", + "libseccomp", "nix 0.21.0", "oci", "path-absolutize", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index c4864ef50c..6d0189b513 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -75,3 +75,6 @@ members = [ [profile.release] lto = true + +[features] +seccomp = ["rustjail/seccomp"] diff --git a/src/agent/Makefile b/src/agent/Makefile index 4806c39f46..24a98e34b8 100644 --- a/src/agent/Makefile +++ b/src/agent/Makefile @@ -27,6 +27,20 @@ COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown) # Exported to allow cargo to see it export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION)) +EXTRA_RUSTFEATURES := + +##VAR SECCOMP=yes|no define if agent enables seccomp feature +SECCOMP := yes + +# Enable seccomp feature of rust build +ifeq ($(SECCOMP),yes) + override EXTRA_RUSTFEATURES += seccomp +endif + +ifneq ($(EXTRA_RUSTFEATURES),) + override EXTRA_RUSTFEATURES := --features $(EXTRA_RUSTFEATURES) +endif + include ../../utils.mk TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET) @@ -90,15 +104,14 @@ default: $(TARGET) show-header $(TARGET): $(GENERATED_CODE) $(TARGET_PATH) $(TARGET_PATH): $(SOURCES) | show-summary - @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) + @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES) $(GENERATED_FILES): %: %.in @sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@" ##TARGET optimize: optimized build optimize: $(SOURCES) | show-summary show-header - @RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) - + @RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES) ##TARGET clippy: run clippy linter clippy: $(GENERATED_CODE) @@ -127,7 +140,7 @@ vendor: #TARGET test: run cargo tests test: - @cargo test --all --target $(TRIPLE) -- --nocapture + @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture ##TARGET check: run test check: clippy format diff --git a/src/agent/README.md b/src/agent/README.md index cc47453bcc..08f1c1e8e8 100644 --- a/src/agent/README.md +++ b/src/agent/README.md @@ -19,6 +19,7 @@ After that, we drafted the initial code here, and any contributions are welcome. | I/O stream | :white_check_mark: | | Cgroups | :white_check_mark: | | Capabilities, `rlimit`, readonly path, masked path, users | :white_check_mark: | +| Seccomp | :white_check_mark: | | container stats (`stats_container`) | :white_check_mark: | | Hooks | :white_check_mark: | | **Agent Features & APIs** | diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index e350b2f069..497a862105 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -30,7 +30,11 @@ tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", " futures = "0.3" async-trait = "0.1.31" inotify = "0.9.2" +libseccomp = { version = "0.1.3", optional = true } [dev-dependencies] serial_test = "0.5.0" tempfile = "3.1.0" + +[features] +seccomp = ["libseccomp"] diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index ac34b4976c..6e71552efa 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -25,6 +25,8 @@ use crate::cgroups::mock::Manager as FsManager; use crate::cgroups::Manager; use crate::log_child; use crate::process::Process; +#[cfg(feature = "seccomp")] +use crate::seccomp; use crate::specconv::CreateOpts; use crate::{mount, validator}; @@ -593,11 +595,22 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { })?; } - // NoNewPeiviledges, Drop capabilities + // NoNewPrivileges if oci_process.no_new_privileges { capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?; } + // Without NoNewPrivileges, we need to set seccomp + // before dropping capabilities because the calling thread + // must have the CAP_SYS_ADMIN. + #[cfg(feature = "seccomp")] + if !oci_process.no_new_privileges { + if let Some(ref scmp) = linux.seccomp { + seccomp::init_seccomp(scmp)?; + } + } + + // Drop capabilities if oci_process.capabilities.is_some() { let c = oci_process.capabilities.as_ref().unwrap(); capabilities::drop_privileges(cfd_log, c)?; @@ -669,6 +682,16 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { unistd::read(fd, &mut buf)?; } + // With NoNewPrivileges, we should set seccomp as close to + // do_exec as possible in order to reduce the amount of + // system calls in the seccomp profiles. + #[cfg(feature = "seccomp")] + if oci_process.no_new_privileges { + if let Some(ref scmp) = linux.seccomp { + seccomp::init_seccomp(scmp)?; + } + } + do_exec(&args); } diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index f6b394a2ab..7535bf9901 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -34,6 +34,8 @@ pub mod container; pub mod mount; pub mod pipestream; pub mod process; +#[cfg(feature = "seccomp")] +pub mod seccomp; pub mod specconv; pub mod sync; pub mod sync_with_async; diff --git a/src/agent/rustjail/src/seccomp.rs b/src/agent/rustjail/src/seccomp.rs new file mode 100644 index 0000000000..37eb175767 --- /dev/null +++ b/src/agent/rustjail/src/seccomp.rs @@ -0,0 +1,237 @@ +// Copyright 2021 Sony Group Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; +use libseccomp::*; +use oci::{LinuxSeccomp, LinuxSeccompArg}; +use std::str::FromStr; + +fn get_filter_attr_from_flag(flag: &str) -> Result { + match flag { + "SECCOMP_FILTER_FLAG_TSYNC" => Ok(ScmpFilterAttr::CtlTsync), + "SECCOMP_FILTER_FLAG_LOG" => Ok(ScmpFilterAttr::CtlLog), + "SECCOMP_FILTER_FLAG_SPEC_ALLOW" => Ok(ScmpFilterAttr::CtlSsb), + _ => Err(anyhow!("Invalid seccomp flag")), + } +} + +// get_rule_conditions gets rule conditions for a system call from the args. +fn get_rule_conditions(args: &[LinuxSeccompArg]) -> Result> { + let mut conditions: Vec = Vec::new(); + + for arg in args { + if arg.op.is_empty() { + return Err(anyhow!("seccomp opreator is required")); + } + + let cond = ScmpArgCompare::new( + arg.index, + ScmpCompareOp::from_str(&arg.op)?, + arg.value, + Some(arg.value_two), + ); + + conditions.push(cond); + } + + Ok(conditions) +} + +// init_seccomp creates a seccomp filter and loads it for the current process +// including all the child processes. +pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { + let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM as u32))?; + + // Create a new filter context + let mut filter = ScmpFilterContext::new_filter(def_action)?; + + // Add extra architectures + for arch in &scmp.architectures { + let scmp_arch = ScmpArch::from_str(arch)?; + filter.add_arch(scmp_arch)?; + } + + // Unset no new privileges bit + filter.set_no_new_privs_bit(false)?; + + // Add a rule for each system call + for syscall in &scmp.syscalls { + if syscall.names.is_empty() { + return Err(anyhow!("syscall name is required")); + } + + let action = ScmpAction::from_str(&syscall.action, Some(syscall.errno_ret))?; + if action == def_action { + continue; + } + + for name in &syscall.names { + let syscall_num = get_syscall_from_name(name, None)?; + + if syscall.args.is_empty() { + filter.add_rule(action, syscall_num, None)?; + } else { + let conditions = get_rule_conditions(&syscall.args)?; + filter.add_rule(action, syscall_num, Some(&conditions))?; + } + } + } + + // Set filter attributes for each seccomp flag + for flag in &scmp.flags { + let scmp_attr = get_filter_attr_from_flag(flag)?; + filter.set_filter_attr(scmp_attr, 1)?; + } + + // Load the filter + filter.load()?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::skip_if_not_root; + use libc::{dup2, process_vm_readv, EPERM}; + use std::io::Error; + use std::ptr::null; + + macro_rules! syscall_assert { + ($e1: expr, $e2: expr) => { + let mut errno: i32 = 0; + if $e1 < 0 { + errno = -Error::last_os_error().raw_os_error().unwrap(); + } + assert_eq!(errno, $e2); + }; + } + + #[test] + fn test_get_filter_attr_from_flag() { + skip_if_not_root!(); + + assert_eq!( + get_filter_attr_from_flag("SECCOMP_FILTER_FLAG_TSYNC").unwrap(), + ScmpFilterAttr::CtlTsync + ); + + assert_eq!(get_filter_attr_from_flag("ERROR").is_err(), true); + } + + #[test] + fn test_init_seccomp() { + skip_if_not_root!(); + + let data = r#"{ + "defaultAction": "SCMP_ACT_ALLOW", + "architectures": [ + ], + "flags": [ + "SECCOMP_FILTER_FLAG_LOG" + ], + "syscalls": [ + { + "names": [ + "dup2" + ], + "action": "SCMP_ACT_ERRNO" + }, + { + "names": [ + "process_vm_readv" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 111, + "args": [ + { + "index": 0, + "value": 10, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "process_vm_readv" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 111, + "args": [ + { + "index": 0, + "value": 20, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "process_vm_readv" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 222, + "args": [ + { + "index": 0, + "value": 30, + "op": "SCMP_CMP_EQ" + }, + { + "index": 2, + "value": 40, + "op": "SCMP_CMP_EQ" + } + ] + } + ] + }"#; + + let mut scmp: oci::LinuxSeccomp = serde_json::from_str(data).unwrap(); + let mut arch: Vec; + + if cfg!(target_endian = "little") { + // For little-endian architectures + arch = vec![ + "SCMP_ARCH_X86".to_string(), + "SCMP_ARCH_X32".to_string(), + "SCMP_ARCH_X86_64".to_string(), + "SCMP_ARCH_AARCH64".to_string(), + "SCMP_ARCH_ARM".to_string(), + "SCMP_ARCH_PPC64LE".to_string(), + ]; + } else { + // For big-endian architectures + arch = vec!["SCMP_ARCH_S390X".to_string()]; + } + + scmp.architectures.append(&mut arch); + + init_seccomp(&scmp).unwrap(); + + // Basic syscall with simple rule + syscall_assert!(unsafe { dup2(0, 1) }, -EPERM); + + // Syscall with permitted arguments + syscall_assert!(unsafe { process_vm_readv(1, null(), 0, null(), 0, 0) }, 0); + + // Multiple arguments with OR rules with ERRNO + syscall_assert!( + unsafe { process_vm_readv(10, null(), 0, null(), 0, 0) }, + -111 + ); + syscall_assert!( + unsafe { process_vm_readv(20, null(), 0, null(), 0, 0) }, + -111 + ); + + // Multiple arguments with AND rules with ERRNO + syscall_assert!(unsafe { process_vm_readv(30, null(), 0, null(), 0, 0) }, 0); + syscall_assert!( + unsafe { process_vm_readv(30, null(), 40, null(), 0, 0) }, + -222 + ); + } +} diff --git a/src/agent/src/config.rs b/src/agent/src/config.rs index 9dd0264d2c..bec90d401d 100644 --- a/src/agent/src/config.rs +++ b/src/agent/src/config.rs @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 // +use crate::rpc; use anyhow::{bail, ensure, Context, Result}; use serde::Deserialize; use std::collections::HashSet; @@ -74,6 +75,7 @@ pub struct AgentConfig { pub unified_cgroup_hierarchy: bool, pub tracing: bool, pub endpoints: AgentEndpoints, + pub supports_seccomp: bool, } #[derive(Debug, Deserialize)] @@ -149,6 +151,7 @@ impl Default for AgentConfig { unified_cgroup_hierarchy: false, tracing: false, endpoints: Default::default(), + supports_seccomp: rpc::have_seccomp(), } } } diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index e4665ac361..c9032d4057 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -1334,11 +1334,19 @@ fn get_memory_info(block_size: bool, hotplug: bool) -> Result<(u64, bool)> { Ok((size, plug)) } +pub fn have_seccomp() -> bool { + if cfg!(feature = "seccomp") { + return true; + } + + false +} + fn get_agent_details() -> AgentDetails { let mut detail = AgentDetails::new(); detail.set_version(AGENT_VERSION.to_string()); - detail.set_supports_seccomp(false); + detail.set_supports_seccomp(have_seccomp()); detail.init_daemon = unistd::getpid() == Pid::from_raw(1); detail.device_handlers = RepeatedField::new(); diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 57f7b0cded..b553159fba 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1422,6 +1422,10 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co sharedPidNs := k.handlePidNamespace(grpcSpec, sandbox) + if !sandbox.config.DisableGuestSeccomp && !sandbox.seccompSupported { + return nil, fmt.Errorf("Seccomp profiles are passed to the virtual machine, but the Kata agent does not support seccomp") + } + passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported // We need to constrain the spec to make sure we're not