agent: Add support for Seccomp

The kata-agent supports seccomp feature based on the OCI runtime specification.
This seccomp capability in the kata-agent is enabled by default.
However, it is not enforced by default: users need to enable that by setting
`disable_guest_seccomp` to `false` in the main configuration file.

Fixes: #1476

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
This commit is contained in:
Manabu Sugimoto 2021-07-16 14:50:58 +09:00
parent 4d4a15d6ce
commit 3be50adab9
11 changed files with 342 additions and 6 deletions

38
src/agent/Cargo.lock generated
View File

@ -594,6 +594,24 @@ dependencies = [
"rle-decode-fast",
]
[[package]]
name = "libseccomp"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36ad71a5b66ceef3acfe6a3178b29b4da063f8bcb2c36dab666d52a7a9cfdb86"
dependencies = [
"libc",
"libseccomp-sys",
"nix 0.17.0",
"pkg-config",
]
[[package]]
name = "libseccomp-sys"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "539912de229a4fc16e507e8df12a394038a524a5b5b6c92045ad344472aac475"
[[package]]
name = "lock_api"
version = "0.4.4"
@ -763,6 +781,19 @@ dependencies = [
"void",
]
[[package]]
name = "nix"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50e4785f2c3b7589a0d0c1dd60285e1188adac4006e8abd6dd578e1567027363"
dependencies = [
"bitflags",
"cc",
"cfg-if 0.1.10",
"libc",
"void",
]
[[package]]
name = "nix"
version = "0.19.1"
@ -977,6 +1008,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
[[package]]
name = "ppv-lite86"
version = "0.2.10"
@ -1284,6 +1321,7 @@ dependencies = [
"inotify",
"lazy_static",
"libc",
"libseccomp",
"nix 0.21.0",
"oci",
"path-absolutize",

View File

@ -75,3 +75,6 @@ members = [
[profile.release]
lto = true
[features]
seccomp = ["rustjail/seccomp"]

View File

@ -27,6 +27,20 @@ COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)
# Exported to allow cargo to see it
export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION))
EXTRA_RUSTFEATURES :=
##VAR SECCOMP=yes|no define if agent enables seccomp feature
SECCOMP := yes
# Enable seccomp feature of rust build
ifeq ($(SECCOMP),yes)
override EXTRA_RUSTFEATURES += seccomp
endif
ifneq ($(EXTRA_RUSTFEATURES),)
override EXTRA_RUSTFEATURES := --features $(EXTRA_RUSTFEATURES)
endif
include ../../utils.mk
TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)
@ -90,15 +104,14 @@ default: $(TARGET) show-header
$(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
$(TARGET_PATH): $(SOURCES) | show-summary
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
$(GENERATED_FILES): %: %.in
@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
##TARGET optimize: optimized build
optimize: $(SOURCES) | show-summary show-header
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
##TARGET clippy: run clippy linter
clippy: $(GENERATED_CODE)
@ -127,7 +140,7 @@ vendor:
#TARGET test: run cargo tests
test:
@cargo test --all --target $(TRIPLE) -- --nocapture
@cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture
##TARGET check: run test
check: clippy format

View File

@ -19,6 +19,7 @@ After that, we drafted the initial code here, and any contributions are welcome.
| I/O stream | :white_check_mark: |
| Cgroups | :white_check_mark: |
| Capabilities, `rlimit`, readonly path, masked path, users | :white_check_mark: |
| Seccomp | :white_check_mark: |
| container stats (`stats_container`) | :white_check_mark: |
| Hooks | :white_check_mark: |
| **Agent Features & APIs** |

View File

@ -30,7 +30,11 @@ tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "
futures = "0.3"
async-trait = "0.1.31"
inotify = "0.9.2"
libseccomp = { version = "0.1.3", optional = true }
[dev-dependencies]
serial_test = "0.5.0"
tempfile = "3.1.0"
[features]
seccomp = ["libseccomp"]

View File

@ -25,6 +25,8 @@ use crate::cgroups::mock::Manager as FsManager;
use crate::cgroups::Manager;
use crate::log_child;
use crate::process::Process;
#[cfg(feature = "seccomp")]
use crate::seccomp;
use crate::specconv::CreateOpts;
use crate::{mount, validator};
@ -593,11 +595,22 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
})?;
}
// NoNewPeiviledges, Drop capabilities
// NoNewPrivileges
if oci_process.no_new_privileges {
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
}
// Without NoNewPrivileges, we need to set seccomp
// before dropping capabilities because the calling thread
// must have the CAP_SYS_ADMIN.
#[cfg(feature = "seccomp")]
if !oci_process.no_new_privileges {
if let Some(ref scmp) = linux.seccomp {
seccomp::init_seccomp(scmp)?;
}
}
// Drop capabilities
if oci_process.capabilities.is_some() {
let c = oci_process.capabilities.as_ref().unwrap();
capabilities::drop_privileges(cfd_log, c)?;
@ -669,6 +682,16 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
unistd::read(fd, &mut buf)?;
}
// With NoNewPrivileges, we should set seccomp as close to
// do_exec as possible in order to reduce the amount of
// system calls in the seccomp profiles.
#[cfg(feature = "seccomp")]
if oci_process.no_new_privileges {
if let Some(ref scmp) = linux.seccomp {
seccomp::init_seccomp(scmp)?;
}
}
do_exec(&args);
}

View File

@ -34,6 +34,8 @@ pub mod container;
pub mod mount;
pub mod pipestream;
pub mod process;
#[cfg(feature = "seccomp")]
pub mod seccomp;
pub mod specconv;
pub mod sync;
pub mod sync_with_async;

View File

@ -0,0 +1,237 @@
// Copyright 2021 Sony Group Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Result};
use libseccomp::*;
use oci::{LinuxSeccomp, LinuxSeccompArg};
use std::str::FromStr;
fn get_filter_attr_from_flag(flag: &str) -> Result<ScmpFilterAttr> {
match flag {
"SECCOMP_FILTER_FLAG_TSYNC" => Ok(ScmpFilterAttr::CtlTsync),
"SECCOMP_FILTER_FLAG_LOG" => Ok(ScmpFilterAttr::CtlLog),
"SECCOMP_FILTER_FLAG_SPEC_ALLOW" => Ok(ScmpFilterAttr::CtlSsb),
_ => Err(anyhow!("Invalid seccomp flag")),
}
}
// get_rule_conditions gets rule conditions for a system call from the args.
fn get_rule_conditions(args: &[LinuxSeccompArg]) -> Result<Vec<ScmpArgCompare>> {
let mut conditions: Vec<ScmpArgCompare> = Vec::new();
for arg in args {
if arg.op.is_empty() {
return Err(anyhow!("seccomp opreator is required"));
}
let cond = ScmpArgCompare::new(
arg.index,
ScmpCompareOp::from_str(&arg.op)?,
arg.value,
Some(arg.value_two),
);
conditions.push(cond);
}
Ok(conditions)
}
// init_seccomp creates a seccomp filter and loads it for the current process
// including all the child processes.
pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> {
let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM as u32))?;
// Create a new filter context
let mut filter = ScmpFilterContext::new_filter(def_action)?;
// Add extra architectures
for arch in &scmp.architectures {
let scmp_arch = ScmpArch::from_str(arch)?;
filter.add_arch(scmp_arch)?;
}
// Unset no new privileges bit
filter.set_no_new_privs_bit(false)?;
// Add a rule for each system call
for syscall in &scmp.syscalls {
if syscall.names.is_empty() {
return Err(anyhow!("syscall name is required"));
}
let action = ScmpAction::from_str(&syscall.action, Some(syscall.errno_ret))?;
if action == def_action {
continue;
}
for name in &syscall.names {
let syscall_num = get_syscall_from_name(name, None)?;
if syscall.args.is_empty() {
filter.add_rule(action, syscall_num, None)?;
} else {
let conditions = get_rule_conditions(&syscall.args)?;
filter.add_rule(action, syscall_num, Some(&conditions))?;
}
}
}
// Set filter attributes for each seccomp flag
for flag in &scmp.flags {
let scmp_attr = get_filter_attr_from_flag(flag)?;
filter.set_filter_attr(scmp_attr, 1)?;
}
// Load the filter
filter.load()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::skip_if_not_root;
use libc::{dup2, process_vm_readv, EPERM};
use std::io::Error;
use std::ptr::null;
macro_rules! syscall_assert {
($e1: expr, $e2: expr) => {
let mut errno: i32 = 0;
if $e1 < 0 {
errno = -Error::last_os_error().raw_os_error().unwrap();
}
assert_eq!(errno, $e2);
};
}
#[test]
fn test_get_filter_attr_from_flag() {
skip_if_not_root!();
assert_eq!(
get_filter_attr_from_flag("SECCOMP_FILTER_FLAG_TSYNC").unwrap(),
ScmpFilterAttr::CtlTsync
);
assert_eq!(get_filter_attr_from_flag("ERROR").is_err(), true);
}
#[test]
fn test_init_seccomp() {
skip_if_not_root!();
let data = r#"{
"defaultAction": "SCMP_ACT_ALLOW",
"architectures": [
],
"flags": [
"SECCOMP_FILTER_FLAG_LOG"
],
"syscalls": [
{
"names": [
"dup2"
],
"action": "SCMP_ACT_ERRNO"
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 10,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 20,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 222,
"args": [
{
"index": 0,
"value": 30,
"op": "SCMP_CMP_EQ"
},
{
"index": 2,
"value": 40,
"op": "SCMP_CMP_EQ"
}
]
}
]
}"#;
let mut scmp: oci::LinuxSeccomp = serde_json::from_str(data).unwrap();
let mut arch: Vec<oci::Arch>;
if cfg!(target_endian = "little") {
// For little-endian architectures
arch = vec![
"SCMP_ARCH_X86".to_string(),
"SCMP_ARCH_X32".to_string(),
"SCMP_ARCH_X86_64".to_string(),
"SCMP_ARCH_AARCH64".to_string(),
"SCMP_ARCH_ARM".to_string(),
"SCMP_ARCH_PPC64LE".to_string(),
];
} else {
// For big-endian architectures
arch = vec!["SCMP_ARCH_S390X".to_string()];
}
scmp.architectures.append(&mut arch);
init_seccomp(&scmp).unwrap();
// Basic syscall with simple rule
syscall_assert!(unsafe { dup2(0, 1) }, -EPERM);
// Syscall with permitted arguments
syscall_assert!(unsafe { process_vm_readv(1, null(), 0, null(), 0, 0) }, 0);
// Multiple arguments with OR rules with ERRNO
syscall_assert!(
unsafe { process_vm_readv(10, null(), 0, null(), 0, 0) },
-111
);
syscall_assert!(
unsafe { process_vm_readv(20, null(), 0, null(), 0, 0) },
-111
);
// Multiple arguments with AND rules with ERRNO
syscall_assert!(unsafe { process_vm_readv(30, null(), 0, null(), 0, 0) }, 0);
syscall_assert!(
unsafe { process_vm_readv(30, null(), 40, null(), 0, 0) },
-222
);
}
}

View File

@ -2,6 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::rpc;
use anyhow::{bail, ensure, Context, Result};
use serde::Deserialize;
use std::collections::HashSet;
@ -74,6 +75,7 @@ pub struct AgentConfig {
pub unified_cgroup_hierarchy: bool,
pub tracing: bool,
pub endpoints: AgentEndpoints,
pub supports_seccomp: bool,
}
#[derive(Debug, Deserialize)]
@ -149,6 +151,7 @@ impl Default for AgentConfig {
unified_cgroup_hierarchy: false,
tracing: false,
endpoints: Default::default(),
supports_seccomp: rpc::have_seccomp(),
}
}
}

View File

@ -1334,11 +1334,19 @@ fn get_memory_info(block_size: bool, hotplug: bool) -> Result<(u64, bool)> {
Ok((size, plug))
}
pub fn have_seccomp() -> bool {
if cfg!(feature = "seccomp") {
return true;
}
false
}
fn get_agent_details() -> AgentDetails {
let mut detail = AgentDetails::new();
detail.set_version(AGENT_VERSION.to_string());
detail.set_supports_seccomp(false);
detail.set_supports_seccomp(have_seccomp());
detail.init_daemon = unistd::getpid() == Pid::from_raw(1);
detail.device_handlers = RepeatedField::new();

View File

@ -1422,6 +1422,10 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
sharedPidNs := k.handlePidNamespace(grpcSpec, sandbox)
if !sandbox.config.DisableGuestSeccomp && !sandbox.seccompSupported {
return nil, fmt.Errorf("Seccomp profiles are passed to the virtual machine, but the Kata agent does not support seccomp")
}
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
// We need to constrain the spec to make sure we're not