diff --git a/docs/Developer-Guide.md b/docs/Developer-Guide.md index a57bd2d72b..ab1c05a2d7 100644 --- a/docs/Developer-Guide.md +++ b/docs/Developer-Guide.md @@ -86,6 +86,27 @@ $ sudo sed -i '/^disable_guest_seccomp/ s/true/false/' /etc/kata-containers/conf This will pass container seccomp profiles to the kata agent. +## Enable SELinux on the guest + +> **Note:** +> +> - To enable SELinux on the guest, SELinux MUST be also enabled on the host. +> - You MUST create and build a rootfs image for SELinux in advance. +> See [Create a rootfs image](#create-a-rootfs-image) and [Build a rootfs image](#build-a-rootfs-image). +> - SELinux on the guest is supported in only a rootfs image currently, so +> you cannot enable SELinux with the agent init (`AGENT_INIT=yes`) yet. + +Enable guest SELinux in Enforcing mode as follows: + +``` +$ sudo sed -i '/^disable_guest_selinux/ s/true/false/g' /etc/kata-containers/configuration.toml +``` + +The runtime automatically will set `selinux=1` to the kernel parameters and `xattr` option to +`virtiofsd` when `disable_guest_selinux` is set to `false`. + +If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kernel parameters. + ## Enable full debug Enable full debug as follows: @@ -256,6 +277,12 @@ If you want to build the agent without seccomp capability, you need to run the ` $ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"' ``` +If you want to enable SELinux on the guest, you MUST choose `centos` and run the `rootfs.sh` script with `SELINUX=yes` as follows. + +``` +$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SELINUX=yes ./rootfs.sh centos' +``` + > **Note:** > > - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs. @@ -283,6 +310,19 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh "${ROOTFS_DIR}"' $ popd ``` +If you want to enable SELinux on the guest, you MUST run the `image_builder.sh` script with `SELINUX=yes` +to label the guest image as follows. +To label the image on the host, you need to make sure that SELinux is enabled (`selinuxfs` is mounted) on the host +and the rootfs MUST be created by running the `rootfs.sh` with `SELINUX=yes`. + +``` +$ script -fec 'sudo -E USE_DOCKER=true SELINUX=yes ./image_builder.sh ${ROOTFS_DIR}' +``` + +Currently, the `image_builder.sh` uses `chcon` as an interim solution in order to apply `container_runtime_exec_t` +to the `kata-agent`. Hence, if you run `restorecon` to the guest image after running the `image_builder.sh`, +the `kata-agent` needs to be labeled `container_runtime_exec_t` again by yourself. + > **Notes:** > > - You must ensure that the *default Docker runtime* is `runc` to make use of diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 4b351c5c6d..581facc756 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -1705,6 +1705,7 @@ dependencies = [ "tempfile", "test-utils", "tokio", + "xattr", "zbus", ] @@ -2478,6 +2479,15 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "xattr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + [[package]] name = "zbus" version = "2.3.2" diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index 031175e867..851721ce0a 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -35,6 +35,7 @@ inotify = "0.9.2" libseccomp = { version = "0.3.0", optional = true } zbus = "2.3.0" bit-vec= "0.6.3" +xattr = "0.2.3" [dev-dependencies] serial_test = "0.5.0" diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index 31dbeda707..88bbf105eb 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -30,6 +30,7 @@ use crate::log_child; use crate::process::Process; #[cfg(feature = "seccomp")] use crate::seccomp; +use crate::selinux; use crate::specconv::CreateOpts; use crate::{mount, validator}; @@ -526,6 +527,8 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { } } + let selinux_enabled = selinux::is_enabled()?; + sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?; if userns { @@ -627,6 +630,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?; } + // Set SELinux label + if !oci_process.selinux_label.is_empty() { + if !selinux_enabled { + return Err(anyhow!( + "SELinux label for the process is provided but SELinux is not enabled on the running kernel" + )); + } + + log_child!(cfd_log, "Set SELinux label to the container process"); + selinux::set_exec_label(&oci_process.selinux_label)?; + } + // Log unknown seccomp system calls in advance before the log file descriptor closes. #[cfg(feature = "seccomp")] if let Some(ref scmp) = linux.seccomp { diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index fb51d9f395..6f96d18c2d 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -38,6 +38,7 @@ pub mod pipestream; pub mod process; #[cfg(feature = "seccomp")] pub mod seccomp; +pub mod selinux; pub mod specconv; pub mod sync; pub mod sync_with_async; diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index d3f87a8b49..a6418a3435 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -25,6 +25,7 @@ use std::fs::File; use std::io::{BufRead, BufReader}; use crate::container::DEFAULT_DEVICES; +use crate::selinux; use crate::sync::write_count; use std::string::ToString; @@ -181,6 +182,8 @@ pub fn init_rootfs( None => flags |= MsFlags::MS_SLAVE, } + let label = &linux.mount_label; + let root = spec .root .as_ref() @@ -244,7 +247,7 @@ pub fn init_rootfs( } } - mount_from(cfd_log, m, rootfs, flags, &data, "")?; + mount_from(cfd_log, m, rootfs, flags, &data, label)?; // bind mount won't change mount options, we need remount to make mount options // effective. // first check that we have non-default options required before attempting a @@ -524,7 +527,6 @@ pub fn pivot_rootfs(path: &P) -> Result<( fn rootfs_parent_mount_private(path: &str) -> Result<()> { let mount_infos = parse_mount_table(MOUNTINFO_PATH)?; - let mut max_len = 0; let mut mount_point = String::from(""); let mut options = String::from(""); @@ -767,9 +769,9 @@ fn mount_from( rootfs: &str, flags: MsFlags, data: &str, - _label: &str, + label: &str, ) -> Result<()> { - let d = String::from(data); + let mut d = String::from(data); let dest = secure_join(rootfs, &m.destination); let src = if m.r#type.as_str() == "bind" { @@ -822,6 +824,37 @@ fn mount_from( e })?; + // Set the SELinux context for the mounts + let mut use_xattr = false; + if !label.is_empty() { + if selinux::is_enabled()? { + let device = Path::new(&m.source) + .file_name() + .ok_or_else(|| anyhow!("invalid device source path: {}", &m.source))? + .to_str() + .ok_or_else(|| anyhow!("failed to convert device source path: {}", &m.source))?; + + match device { + // SELinux does not support labeling of /proc or /sys + "proc" | "sysfs" => (), + // SELinux does not support mount labeling against /dev/mqueue, + // so we use setxattr instead + "mqueue" => { + use_xattr = true; + } + _ => { + log_child!(cfd_log, "add SELinux mount label to {}", dest.as_str()); + selinux::add_mount_label(&mut d, label); + } + } + } else { + log_child!( + cfd_log, + "SELinux label for the mount is provided but SELinux is not enabled on the running kernel" + ); + } + } + mount( Some(src.as_str()), dest.as_str(), @@ -834,6 +867,10 @@ fn mount_from( e })?; + if !label.is_empty() && selinux::is_enabled()? && use_xattr { + xattr::set(dest.as_str(), "security.selinux", label.as_bytes())?; + } + if flags.contains(MsFlags::MS_BIND) && flags.intersects( !(MsFlags::MS_REC diff --git a/src/agent/rustjail/src/selinux.rs b/src/agent/rustjail/src/selinux.rs new file mode 100644 index 0000000000..5a647e3cc4 --- /dev/null +++ b/src/agent/rustjail/src/selinux.rs @@ -0,0 +1,80 @@ +// Copyright 2022 Sony Group Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use nix::unistd::gettid; +use std::fs::{self, OpenOptions}; +use std::io::prelude::*; +use std::path::Path; + +pub fn is_enabled() -> Result { + let buf = fs::read_to_string("/proc/mounts")?; + let enabled = buf.contains("selinuxfs"); + + Ok(enabled) +} + +pub fn add_mount_label(data: &mut String, label: &str) { + if data.is_empty() { + let context = format!("context=\"{}\"", label); + data.push_str(&context); + } else { + let context = format!(",context=\"{}\"", label); + data.push_str(&context); + } +} + +pub fn set_exec_label(label: &str) -> Result<()> { + let mut attr_path = Path::new("/proc/thread-self/attr/exec").to_path_buf(); + if !attr_path.exists() { + // Fall back to the old convention + attr_path = Path::new("/proc/self/task") + .join(gettid().to_string()) + .join("attr/exec") + } + + let mut file = OpenOptions::new() + .write(true) + .truncate(true) + .open(attr_path)?; + file.write_all(label.as_bytes()) + .with_context(|| "failed to apply SELinux label")?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST_LABEL: &str = "system_u:system_r:unconfined_t:s0"; + + #[test] + fn test_is_enabled() { + let ret = is_enabled(); + assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret); + } + + #[test] + fn test_add_mount_label() { + let mut data = String::new(); + add_mount_label(&mut data, TEST_LABEL); + assert_eq!(data, format!("context=\"{}\"", TEST_LABEL)); + + let mut data = String::from("defaults"); + add_mount_label(&mut data, TEST_LABEL); + assert_eq!(data, format!("defaults,context=\"{}\"", TEST_LABEL)); + } + + #[test] + fn test_set_exec_label() { + let ret = set_exec_label(TEST_LABEL); + if is_enabled().unwrap() { + assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret); + } else { + assert!(ret.is_err(), "Expecting error, Got {:?}", ret); + } + } +} diff --git a/src/agent/rustjail/src/validator.rs b/src/agent/rustjail/src/validator.rs index aea0f8f063..4955fbf466 100644 --- a/src/agent/rustjail/src/validator.rs +++ b/src/agent/rustjail/src/validator.rs @@ -6,6 +6,7 @@ use crate::container::Config; use anyhow::{anyhow, Context, Result}; use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec}; +use regex::Regex; use std::collections::HashMap; use std::path::{Component, PathBuf}; @@ -86,6 +87,23 @@ fn hostname(oci: &Spec) -> Result<()> { fn security(oci: &Spec) -> Result<()> { let linux = get_linux(oci)?; + let label_pattern = r".*_u:.*_r:.*_t:s[0-9]|1[0-5].*"; + let label_regex = Regex::new(label_pattern)?; + + if let Some(ref process) = oci.process { + if !process.selinux_label.is_empty() && !label_regex.is_match(&process.selinux_label) { + return Err(anyhow!( + "SELinux label for the process is invalid format: {}", + &process.selinux_label + )); + } + } + if !linux.mount_label.is_empty() && !label_regex.is_match(&linux.mount_label) { + return Err(anyhow!( + "SELinux label for the mount is invalid format: {}", + &linux.mount_label + )); + } if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() { return Ok(()); @@ -95,8 +113,6 @@ fn security(oci: &Spec) -> Result<()> { return Err(anyhow!("Linux namespace does not contain mount")); } - // don't care about selinux at present - Ok(()) } @@ -285,7 +301,7 @@ pub fn validate(conf: &Config) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use oci::Mount; + use oci::{Mount, Process}; #[test] fn test_namespace() { @@ -388,6 +404,29 @@ mod tests { ]; spec.linux = Some(linux); security(&spec).unwrap(); + + // SELinux + let valid_label = "system_u:system_r:container_t:s0:c123,c456"; + let mut process = Process::default(); + process.selinux_label = valid_label.to_string(); + spec.process = Some(process); + security(&spec).unwrap(); + + let mut linux = Linux::default(); + linux.mount_label = valid_label.to_string(); + spec.linux = Some(linux); + security(&spec).unwrap(); + + let invalid_label = "system_u:system_r:container_t"; + let mut process = Process::default(); + process.selinux_label = invalid_label.to_string(); + spec.process = Some(process); + security(&spec).unwrap_err(); + + let mut linux = Linux::default(); + linux.mount_label = invalid_label.to_string(); + spec.linux = Some(linux); + security(&spec).unwrap_err(); } #[test] diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 0f49badd44..852b4d5795 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -171,6 +171,11 @@ DEFDISABLEGUESTEMPTYDIR := false DEFAULTEXPFEATURES := [] DEFDISABLESELINUX := false + +# Default guest SELinux configuration +DEFDISABLEGUESTSELINUX := true +DEFGUESTSELINUXLABEL := system_u:system_r:container_t + #Default SeccomSandbox param #The same default policy is used by libvirt #More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html @@ -460,6 +465,8 @@ USER_VARS += DEFNETWORKMODEL_QEMU USER_VARS += DEFDISABLEGUESTEMPTYDIR USER_VARS += DEFDISABLEGUESTSECCOMP USER_VARS += DEFDISABLESELINUX +USER_VARS += DEFDISABLEGUESTSELINUX +USER_VARS += DEFGUESTSELINUXLABEL USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN diff --git a/src/runtime/cmd/kata-runtime/kata-env.go b/src/runtime/cmd/kata-runtime/kata-env.go index b1421fa006..c129f8f434 100644 --- a/src/runtime/cmd/kata-runtime/kata-env.go +++ b/src/runtime/cmd/kata-runtime/kata-env.go @@ -76,6 +76,7 @@ type RuntimeConfigInfo struct { type RuntimeInfo struct { Config RuntimeConfigInfo Path string + GuestSeLinuxLabel string Experimental []exp.Feature Version RuntimeVersionInfo Debug bool @@ -186,6 +187,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo { SandboxCgroupOnly: config.SandboxCgroupOnly, Experimental: config.Experimental, DisableGuestSeccomp: config.DisableGuestSeccomp, + GuestSeLinuxLabel: config.GuestSeLinuxLabel, } } diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index e47a1d92a0..cedf2303ad 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -38,6 +38,13 @@ image = "@IMAGEPATH@" # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + # Path to the firmware. # If you want Cloud Hypervisor to use a specific firmware, set its path below. # This is option is only used when confidential_guest is enabled. @@ -321,6 +328,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 8330042977..f7e70a6d53 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -438,6 +438,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and @@ -555,6 +563,14 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 601d95612c..43dd5cc5a4 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -90,6 +90,7 @@ const defaultSevSnpGuest = false const defaultGuestSwap = false const defaultRootlessHypervisor = false const defaultDisableSeccomp = false +const defaultDisableGuestSeLinux = true const defaultVfioMode = "guest-kernel" const defaultLegacySerial = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 3fabfe0af1..3ed3177f57 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -59,9 +59,9 @@ const ( type tomlConfig struct { Hypervisor map[string]hypervisor Agent map[string]agent - Runtime runtime Image image Factory factory + Runtime runtime } type image struct { @@ -154,6 +154,7 @@ type hypervisor struct { Rootless bool `toml:"rootless"` DisableSeccomp bool `toml:"disable_seccomp"` DisableSeLinux bool `toml:"disable_selinux"` + DisableGuestSeLinux bool `toml:"disable_guest_selinux"` LegacySerial bool `toml:"use_legacy_serial"` EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` } @@ -164,12 +165,13 @@ type runtime struct { JaegerUser string `toml:"jaeger_user"` JaegerPassword string `toml:"jaeger_password"` VfioMode string `toml:"vfio_mode"` + GuestSeLinuxLabel string `toml:"guest_selinux_label"` SandboxBindMounts []string `toml:"sandbox_bind_mounts"` Experimental []string `toml:"experimental"` - Debug bool `toml:"enable_debug"` Tracing bool `toml:"enable_tracing"` DisableNewNetNs bool `toml:"disable_new_netns"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + Debug bool `toml:"enable_debug"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` EnablePprof bool `toml:"enable_pprof"` @@ -690,6 +692,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { TxRateLimiterMaxRate: txRateLimiterMaxRate, EnableAnnotations: h.EnableAnnotations, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker }, nil } @@ -836,6 +839,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { LegacySerial: h.LegacySerial, DisableSeLinux: h.DisableSeLinux, EnableVCPUsPinning: h.EnableVCPUsPinning, + DisableGuestSeLinux: h.DisableGuestSeLinux, }, nil } @@ -902,6 +906,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestHookPath: h.guestHookPath(), DisableSeLinux: h.DisableSeLinux, EnableAnnotations: h.EnableAnnotations, + DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN }, nil } @@ -1007,6 +1012,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DisableSeccomp: h.DisableSeccomp, ConfidentialGuest: h.ConfidentialGuest, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(), NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(), NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(), @@ -1230,6 +1236,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { GuestSwap: defaultGuestSwap, Rootless: defaultRootlessHypervisor, DisableSeccomp: defaultDisableSeccomp, + DisableGuestSeLinux: defaultDisableGuestSeLinux, LegacySerial: defaultLegacySerial, } } @@ -1317,7 +1324,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat } config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp - + config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 5e493b40e3..335f077fbb 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -554,6 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, BlockDeviceAIO: defaultBlockDeviceAIO, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } expectedAgentConfig := vc.KataAgentConfig{ diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 57c2ed1a15..2cd7c10f53 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -128,6 +128,9 @@ type RuntimeConfig struct { //Determines if seccomp should be applied inside guest DisableGuestSeccomp bool + //SELinux security context applied to the container process inside guest. + GuestSeLinuxLabel string + // Sandbox sizing information which, if provided, indicates the size of // the sandbox needed for the workload(s) SandboxCPUs uint32 @@ -945,6 +948,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st DisableGuestSeccomp: runtime.DisableGuestSeccomp, + GuestSeLinuxLabel: runtime.GuestSeLinuxLabel, + Experimental: runtime.Experimental, } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 548ce6f77d..955da7d107 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -74,6 +74,8 @@ const ( MinHypervisorMemory = 256 defaultMsize9p = 8192 + + defaultDisableGuestSeLinux = true ) var ( @@ -560,6 +562,9 @@ type HypervisorConfig struct { // Disable selinux from the hypervisor process DisableSeLinux bool + // Disable selinux from the container process + DisableGuestSeLinux bool + // Use legacy serial for the guest console LegacySerial bool diff --git a/src/runtime/virtcontainers/hypervisor_config_linux_test.go b/src/runtime/virtcontainers/hypervisor_config_linux_test.go index 609e52fd73..41cabb1c35 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux_test.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux_test.go @@ -92,22 +92,24 @@ func TestHypervisorConfigValidTemplateConfig(t *testing.T) { func TestHypervisorConfigDefaults(t *testing.T) { assert := assert.New(t) hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + DisableGuestSeLinux: defaultDisableGuestSeLinux, } testHypervisorConfigValid(t, hypervisorConfig, true) hypervisorConfigDefaultsExpected := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 477be9fde4..5746759542 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -36,6 +36,7 @@ import ( "context" "github.com/gogo/protobuf/proto" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "google.golang.org/grpc/codes" @@ -69,6 +70,9 @@ const ( kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" kernelParamDebugConsoleVPortValue = "1026" + + // Default SELinux type applied to the container process inside guest + defaultSeLinuxContainerType = "container_t" ) var ( @@ -895,7 +899,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st return nil } -func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) { +func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error { // Disable Hooks since they have been handled on the host and there is // no reason to send them to the agent. It would make no sense to try // to apply them on the guest. @@ -907,11 +911,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str grpcSpec.Linux.Seccomp = nil } - // Disable SELinux inside of the virtual machine, the label will apply - // to the KVM process + // Pass SELinux label for the container process to the agent. if grpcSpec.Process.SelinuxLabel != "" { - k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload") - grpcSpec.Process.SelinuxLabel = "" + if !disableGuestSeLinux { + k.Logger().Info("SELinux label will be applied to the container process inside guest") + + var label string + if guestSeLinuxLabel != "" { + label = guestSeLinuxLabel + } else { + label = grpcSpec.Process.SelinuxLabel + } + + processContext, err := selinux.NewContext(label) + if err != nil { + return err + } + + // Change the type from KVM to container because the type passed from the high-level + // runtime is for KVM process. + if guestSeLinuxLabel == "" { + processContext["type"] = defaultSeLinuxContainerType + } + grpcSpec.Process.SelinuxLabel = processContext.Get() + } else { + k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled") + grpcSpec.Process.SelinuxLabel = "" + grpcSpec.Linux.MountLabel = "" + } } // By now only CPU constraints are supported @@ -973,6 +1000,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str } grpcSpec.Linux.Devices = linuxDevices } + + return nil } func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) { @@ -1256,9 +1285,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported + // Currently, guest SELinux can be enabled only when SELinux is enabled on the host side. + if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() { + return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side") + } + if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" { + return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled") + } + // We need to constrain the spec to make sure we're not // passing irrelevant information to the agent. - k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel) + err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel) + if err != nil { + return nil, err + } req := &grpc.CreateContainerRequest{ ContainerId: c.id, diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 9711a5cf55..885fd8acc7 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) { } k := kataAgent{} - k.constrainGRPCSpec(g, true, true) + k.constrainGRPCSpec(g, true, true, "", true) // Check nil fields assert.Nil(g.Hooks) diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 59c6dda15f..906ed10761 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { SystemdCgroup: sconfig.SystemdCgroup, SandboxCgroupOnly: sconfig.SandboxCgroupOnly, DisableGuestSeccomp: sconfig.DisableGuestSeccomp, + GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel, } ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...) @@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { SystemdCgroup: savedConf.SystemdCgroup, SandboxCgroupOnly: savedConf.SandboxCgroupOnly, DisableGuestSeccomp: savedConf.DisableGuestSeccomp, + GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel, } sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 1c16b7bd91..44ba820643 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -243,19 +243,6 @@ type ContainerConfig struct { // SandboxConfig is a sandbox configuration. // Refs: virtcontainers/sandbox.go:SandboxConfig type SandboxConfig struct { - // Information for fields not saved: - // * Annotation: this is kind of casual data, we don't need casual data in persist file, - // if you know this data needs to persist, please gives it - // a specific field - - ContainerConfigs []ContainerConfig - - // SandboxBindMounts - list of paths to mount into guest - SandboxBindMounts []string - - // Experimental enables experimental features - Experimental []string - // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available Cgroups *configs.Cgroup `json:"cgroups"` @@ -265,8 +252,24 @@ type SandboxConfig struct { KataShimConfig *ShimConfig - HypervisorType string - NetworkConfig NetworkConfig + // Custom SELinux security policy to the container process inside the VM + GuestSeLinuxLabel string + + HypervisorType string + + // SandboxBindMounts - list of paths to mount into guest + SandboxBindMounts []string + + // Experimental enables experimental features + Experimental []string + + // Information for fields not saved: + // * Annotation: this is kind of casual data, we don't need casual data in persist file, + // if you know this data needs to persist, please gives it a specific field + ContainerConfigs []ContainerConfig + + NetworkConfig NetworkConfig + HypervisorConfig HypervisorConfig ShmSize uint64 diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index d785580d9b..67c81cb1f8 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -247,6 +247,9 @@ const ( // DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest. DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp" + // GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest. + GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label" + // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index d33f02f6ed..75a6731dd1 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -181,6 +181,15 @@ func (q *qemu) kernelParameters() string { // set the maximum number of vCPUs params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + // set the SELinux params in accordance with the runtime configuration, disable_guest_selinux. + if q.config.DisableGuestSeLinux { + q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled") + params = append(params, Param{"selinux", "0"}) + } else { + q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled") + params = append(params, Param{"selinux", "1"}) + } + // add the params specified by the provided config. As the kernel // honours the last parameter value set and since the config-provided // params are added here, they will take priority over the defaults. @@ -476,6 +485,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { return nd, nil } + // Set the xattr option for virtiofsd daemon to enable extended attributes + // in virtiofs if SELinux on the guest side is enabled. + if !q.config.DisableGuestSeLinux { + q.Logger().Info("Set the xattr option for virtiofsd") + q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr") + } + // default use virtiofsd return &virtiofsd{ path: q.config.VirtioFSDaemon, @@ -846,7 +862,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. if !q.config.DisableSeLinux { - if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { return err } diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index f30dd0a696..a8bc6a33db 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -27,15 +27,16 @@ import ( func newQemuConfig() HypervisorConfig { return HypervisorConfig{ - KernelPath: testQemuKernelPath, - InitrdPath: testQemuInitrdPath, - HypervisorPath: testQemuPath, - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, + KernelPath: testQemuKernelPath, + InitrdPath: testQemuInitrdPath, + HypervisorPath: testQemuPath, + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } } @@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs()) + expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs()) params := []Param{ { Key: "foo", diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 32ccc2dbb7..025537fed9 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -126,14 +126,17 @@ type SandboxResourceSizing struct { // SandboxConfig is a Sandbox configuration. type SandboxConfig struct { - // Volumes is a list of shared volumes between the host and the Sandbox. - Volumes []types.Volume + // Annotations keys must be unique strings and must be name-spaced + Annotations map[string]string - // Containers describe the list of containers within a Sandbox. - // This list can be empty and populated by adding containers - // to the Sandbox a posteriori. - //TODO: this should be a map to avoid duplicated containers - Containers []ContainerConfig + // Custom SELinux security policy to the container process inside the VM + GuestSeLinuxLabel string + + HypervisorType HypervisorType + + ID string + + Hostname string // SandboxBindMounts - list of paths to mount into guest SandboxBindMounts []string @@ -141,31 +144,29 @@ type SandboxConfig struct { // Experimental features enabled Experimental []exp.Feature - // Annotations keys must be unique strings and must be name-spaced - // with e.g. reverse domain notation (org.clearlinux.key). - Annotations map[string]string + // Containers describe the list of containers within a Sandbox. + // This list can be empty and populated by adding containers + // to the Sandbox a posteriori. + // TODO: this should be a map to avoid duplicated containers + Containers []ContainerConfig - ID string - - Hostname string - - HypervisorType HypervisorType - - AgentConfig KataAgentConfig + Volumes []types.Volume NetworkConfig NetworkConfig + AgentConfig KataAgentConfig + HypervisorConfig HypervisorConfig - SandboxResources SandboxResourceSizing - - // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) - StaticResourceMgmt bool - ShmSize uint64 + SandboxResources SandboxResourceSizing + VfioMode config.VFIOModeType + // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) + StaticResourceMgmt bool + // SharePidNs sets all containers to share the same sandbox level pid namespace. SharePidNs bool diff --git a/tools/osbuilder/image-builder/image_builder.sh b/tools/osbuilder/image-builder/image_builder.sh index 8b65ab4e8f..75b23b1765 100755 --- a/tools/osbuilder/image-builder/image_builder.sh +++ b/tools/osbuilder/image-builder/image_builder.sh @@ -64,6 +64,8 @@ readonly -a systemd_files=( # Set a default value AGENT_INIT=${AGENT_INIT:-no} +SELINUX=${SELINUX:-no} +SELINUXFS="/sys/fs/selinux" # Align image to 128M readonly mem_boundary_mb=128 @@ -93,6 +95,10 @@ Extra environment variables: DEFAULT: not set USE_PODMAN: If set and USE_DOCKER not set, will build image in a Podman Container (requries podman) DEFAULT: not set + SELINUX: If set to "yes", the rootfs is labeled for SELinux. + Make sure that selinuxfs is mounted to /sys/fs/selinux on the host + and the rootfs is built with SELINUX=yes. + DEFAULT value: "no" Following diagram shows how the resulting image will look like @@ -134,6 +140,7 @@ build_with_container() { local nsdax_bin="$9" local container_image_name="image-builder-osbuilder" local shared_files="" + local selinuxfs="" image_dir=$(readlink -f "$(dirname "${image}")") image_name=$(basename "${image}") @@ -157,6 +164,14 @@ build_with_container() { shared_files+="-v ${mke2fs_conf}:${mke2fs_conf}:ro " fi + if [ "${SELINUX}" == "yes" ]; then + if mountpoint $SELINUXFS > /dev/null; then + selinuxfs="-v ${SELINUXFS}:${SELINUXFS}" + else + die "Make sure that SELinux is enabled on the host" + fi + fi + #Make sure we use a compatible runtime to build rootfs # In case Clear Containers Runtime is installed we dont want to hit issue: #https://github.com/clearcontainers/runtime/issues/828 @@ -170,12 +185,14 @@ build_with_container() { --env BLOCK_SIZE="${block_size}" \ --env ROOT_FREE_SPACE="${root_free_space}" \ --env NSDAX_BIN="${nsdax_bin}" \ + --env SELINUX="${SELINUX}" \ --env DEBUG="${DEBUG}" \ -v /dev:/dev \ -v "${script_dir}":"/osbuilder" \ -v "${script_dir}/../scripts":"/scripts" \ -v "${rootfs}":"/rootfs" \ -v "${image_dir}":"/image" \ + ${selinuxfs} \ ${shared_files} \ ${container_image_name} \ bash "/osbuilder/${script_name}" -o "/image/${image_name}" /rootfs @@ -384,6 +401,7 @@ create_rootfs_image() { local img_size="$3" local fs_type="$4" local block_size="$5" + local agent_bin="$6" create_disk "${image}" "${img_size}" "${fs_type}" "${rootfs_start}" @@ -402,6 +420,31 @@ create_rootfs_image() { info "Copying content from rootfs to root partition" cp -a "${rootfs}"/* "${mount_dir}" + + if [ "${SELINUX}" == "yes" ]; then + if [ "${AGENT_INIT}" == "yes" ]; then + die "Guest SELinux with the agent init is not supported yet" + fi + + info "Labeling rootfs for SELinux" + selinuxfs_path="${mount_dir}${SELINUXFS}" + mkdir -p $selinuxfs_path + if mountpoint $SELINUXFS > /dev/null && \ + chroot "${mount_dir}" command -v restorecon > /dev/null; then + mount -t selinuxfs selinuxfs $selinuxfs_path + chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / + # TODO: This operation will be removed after the updated container-selinux that + # includes the following commit is released. + # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f + # We use chcon as an interim solution until then. + chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" + umount $selinuxfs_path + else + die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ +and the rootfs is built with SELINUX=yes" + fi + fi + sync OK "rootfs copied" @@ -529,7 +572,7 @@ main() { # consider in calculate_img_size rootfs_img_size=$((img_size - dax_header_sz)) create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ - "${fs_type}" "${block_size}" + "${fs_type}" "${block_size}" "${agent_bin}" # insert at the beginning of the image the MBR + DAX header set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}" diff --git a/tools/osbuilder/rootfs-builder/centos/config.sh b/tools/osbuilder/rootfs-builder/centos/config.sh index 7226da047c..2123903a06 100644 --- a/tools/osbuilder/rootfs-builder/centos/config.sh +++ b/tools/osbuilder/rootfs-builder/centos/config.sh @@ -8,10 +8,15 @@ OS_VERSION=${OS_VERSION:-stream9} PACKAGES="chrony iptables" [ "$AGENT_INIT" = no ] && PACKAGES+=" systemd" [ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp" +[ "$SELINUX" = yes ] && PACKAGES+=" container-selinux" # Container registry tag is different from metalink repo, e.g. "stream9" => "9-stream" os_repo_version="$(sed -E "s/(stream)(.+)/\2-\1/" <<< "$OS_VERSION")" METALINK="https://mirrors.centos.org/metalink?repo=centos-baseos-$os_repo_version&arch=\$basearch" +if [ "$SELINUX" == yes ]; then + # AppStream repository is required for the container-selinux package + METALINK_APPSTREAM="https://mirrors.centos.org/metalink?repo=centos-appstream-$os_repo_version&arch=\$basearch" +fi GPG_KEY_FILE=RPM-GPG-KEY-CentOS-Official GPG_KEY_URL="https://centos.org/keys/$GPG_KEY_FILE" diff --git a/tools/osbuilder/rootfs-builder/rootfs.sh b/tools/osbuilder/rootfs-builder/rootfs.sh index c69f8dfef0..43c79fd7d5 100755 --- a/tools/osbuilder/rootfs-builder/rootfs.sh +++ b/tools/osbuilder/rootfs-builder/rootfs.sh @@ -25,6 +25,7 @@ LIBC=${LIBC:-musl} # The kata agent enables seccomp feature. # However, it is not enforced by default: you need to enable that in the main configuration file. SECCOMP=${SECCOMP:-"yes"} +SELINUX=${SELINUX:-"no"} lib_file="${script_dir}/../scripts/lib.sh" source "$lib_file" @@ -142,6 +143,11 @@ ROOTFS_DIR Path to the directory that is populated with the rootfs. SECCOMP When set to "no", the kata-agent is built without seccomp capability. Default value: "yes" +SELINUX When set to "yes", build the rootfs with the required packages to + enable SELinux in the VM. + Make sure the guest kernel is compiled with SELinux enabled. + Default value: "no" + USE_DOCKER If set, build the rootfs inside a container (requires Docker). Default value: @@ -346,6 +352,15 @@ build_rootfs_distro() echo "Required rust version: $RUST_VERSION" + if [ "${SELINUX}" == "yes" ]; then + if [ "${AGENT_INIT}" == "yes" ]; then + die "Guest SELinux with the agent init is not supported yet" + fi + if [ "${distro}" != "centos" ]; then + die "The guest rootfs must be CentOS to enable guest SELinux" + fi + fi + if [ -z "${USE_DOCKER}" ] && [ -z "${USE_PODMAN}" ]; then info "build directly" build_rootfs ${ROOTFS_DIR} @@ -426,6 +441,7 @@ build_rootfs_distro() --env OS_VERSION="${OS_VERSION}" \ --env INSIDE_CONTAINER=1 \ --env SECCOMP="${SECCOMP}" \ + --env SELINUX="${SELINUX}" \ --env DEBUG="${DEBUG}" \ --env HOME="/root" \ -v "${repo_dir}":"/kata-containers" \ diff --git a/tools/osbuilder/scripts/lib.sh b/tools/osbuilder/scripts/lib.sh index 5ed0176771..615ff10a95 100644 --- a/tools/osbuilder/scripts/lib.sh +++ b/tools/osbuilder/scripts/lib.sh @@ -79,7 +79,23 @@ gpgcheck=1 gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE} EOF fi - + if [ "$SELINUX" == "yes" ]; then + cat > "${DNF_CONF}" << EOF +[appstream] +name=${OS_NAME}-${OS_VERSION} upstream +releasever=${OS_VERSION} +EOF + echo "metalink=$METALINK_APPSTREAM" >> "$DNF_CONF" + if [ -n "$GPG_KEY_URL" ]; then + if [ ! -f "${CONFIG_DIR}/${GPG_KEY_FILE}" ]; then + curl -L "${GPG_KEY_URL}" -o "${CONFIG_DIR}/${GPG_KEY_FILE}" + fi + cat >> "${DNF_CONF}" << EOF +gpgcheck=1 +gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE} +EOF + fi + fi } build_rootfs() diff --git a/tools/packaging/kernel/configs/fragments/common/lsm.conf b/tools/packaging/kernel/configs/fragments/common/lsm.conf new file mode 100644 index 0000000000..6dc685fca7 --- /dev/null +++ b/tools/packaging/kernel/configs/fragments/common/lsm.conf @@ -0,0 +1,12 @@ +# SELinux support: +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_LSM_MMAP_MIN_ADDR=6553 +CONFIG_NETWORK_SECMARK=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0 +CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 +CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256