From c617bbe70dcd8b1787eaeebba3fb1e7e33ac48cd Mon Sep 17 00:00:00 2001 From: Manabu Sugimoto Date: Sun, 7 Aug 2022 19:46:07 +0900 Subject: [PATCH] runtime: Pass SELinux policy for containers to the agent Pass SELinux policy for containers to the agent if `disable_guest_selinux` is set to `false` in the runtime configuration. The `container_t` type is applied to the container process inside the guest by default. Users can also set a custom SELinux policy to the container process using `guest_selinux_label` in the runtime configuration. This will be an alternative configuration of Kubernetes' security context for SELinux because users cannot specify the policy in Kata through Kubernetes's security context. To apply SELinux policy to the container, the guest rootfs must be CentOS that is created and built with `SELINUX=yes`. Fixes: #4812 Signed-off-by: Manabu Sugimoto --- src/runtime/Makefile | 7 +++ src/runtime/cmd/kata-runtime/kata-env.go | 2 + src/runtime/config/configuration-clh.toml.in | 15 ++++++ src/runtime/config/configuration-qemu.toml.in | 16 ++++++ .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 13 +++-- src/runtime/pkg/katautils/config_test.go | 1 + src/runtime/pkg/oci/utils.go | 5 ++ src/runtime/virtcontainers/hypervisor.go | 5 ++ .../hypervisor_config_linux_test.go | 26 +++++----- src/runtime/virtcontainers/kata_agent.go | 52 ++++++++++++++++--- src/runtime/virtcontainers/kata_agent_test.go | 2 +- src/runtime/virtcontainers/persist.go | 2 + .../virtcontainers/persist/api/config.go | 33 ++++++------ .../pkg/annotations/annotations.go | 3 ++ src/runtime/virtcontainers/qemu.go | 17 +++++- src/runtime/virtcontainers/qemu_test.go | 21 ++++---- src/runtime/virtcontainers/sandbox.go | 45 ++++++++-------- 18 files changed, 196 insertions(+), 70 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 0f49badd44..852b4d5795 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -171,6 +171,11 @@ DEFDISABLEGUESTEMPTYDIR := false DEFAULTEXPFEATURES := [] DEFDISABLESELINUX := false + +# Default guest SELinux configuration +DEFDISABLEGUESTSELINUX := true +DEFGUESTSELINUXLABEL := system_u:system_r:container_t + #Default SeccomSandbox param #The same default policy is used by libvirt #More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html @@ -460,6 +465,8 @@ USER_VARS += DEFNETWORKMODEL_QEMU USER_VARS += DEFDISABLEGUESTEMPTYDIR USER_VARS += DEFDISABLEGUESTSECCOMP USER_VARS += DEFDISABLESELINUX +USER_VARS += DEFDISABLEGUESTSELINUX +USER_VARS += DEFGUESTSELINUXLABEL USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN diff --git a/src/runtime/cmd/kata-runtime/kata-env.go b/src/runtime/cmd/kata-runtime/kata-env.go index b1421fa006..c129f8f434 100644 --- a/src/runtime/cmd/kata-runtime/kata-env.go +++ b/src/runtime/cmd/kata-runtime/kata-env.go @@ -76,6 +76,7 @@ type RuntimeConfigInfo struct { type RuntimeInfo struct { Config RuntimeConfigInfo Path string + GuestSeLinuxLabel string Experimental []exp.Feature Version RuntimeVersionInfo Debug bool @@ -186,6 +187,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo { SandboxCgroupOnly: config.SandboxCgroupOnly, Experimental: config.Experimental, DisableGuestSeccomp: config.DisableGuestSeccomp, + GuestSeLinuxLabel: config.GuestSeLinuxLabel, } } diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index e47a1d92a0..cedf2303ad 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -38,6 +38,13 @@ image = "@IMAGEPATH@" # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + # Path to the firmware. # If you want Cloud Hypervisor to use a specific firmware, set its path below. # This is option is only used when confidential_guest is enabled. @@ -321,6 +328,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 8330042977..f7e70a6d53 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -438,6 +438,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and @@ -555,6 +563,14 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 601d95612c..43dd5cc5a4 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -90,6 +90,7 @@ const defaultSevSnpGuest = false const defaultGuestSwap = false const defaultRootlessHypervisor = false const defaultDisableSeccomp = false +const defaultDisableGuestSeLinux = true const defaultVfioMode = "guest-kernel" const defaultLegacySerial = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 3fabfe0af1..3ed3177f57 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -59,9 +59,9 @@ const ( type tomlConfig struct { Hypervisor map[string]hypervisor Agent map[string]agent - Runtime runtime Image image Factory factory + Runtime runtime } type image struct { @@ -154,6 +154,7 @@ type hypervisor struct { Rootless bool `toml:"rootless"` DisableSeccomp bool `toml:"disable_seccomp"` DisableSeLinux bool `toml:"disable_selinux"` + DisableGuestSeLinux bool `toml:"disable_guest_selinux"` LegacySerial bool `toml:"use_legacy_serial"` EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` } @@ -164,12 +165,13 @@ type runtime struct { JaegerUser string `toml:"jaeger_user"` JaegerPassword string `toml:"jaeger_password"` VfioMode string `toml:"vfio_mode"` + GuestSeLinuxLabel string `toml:"guest_selinux_label"` SandboxBindMounts []string `toml:"sandbox_bind_mounts"` Experimental []string `toml:"experimental"` - Debug bool `toml:"enable_debug"` Tracing bool `toml:"enable_tracing"` DisableNewNetNs bool `toml:"disable_new_netns"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + Debug bool `toml:"enable_debug"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` EnablePprof bool `toml:"enable_pprof"` @@ -690,6 +692,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { TxRateLimiterMaxRate: txRateLimiterMaxRate, EnableAnnotations: h.EnableAnnotations, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker }, nil } @@ -836,6 +839,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { LegacySerial: h.LegacySerial, DisableSeLinux: h.DisableSeLinux, EnableVCPUsPinning: h.EnableVCPUsPinning, + DisableGuestSeLinux: h.DisableGuestSeLinux, }, nil } @@ -902,6 +906,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestHookPath: h.guestHookPath(), DisableSeLinux: h.DisableSeLinux, EnableAnnotations: h.EnableAnnotations, + DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN }, nil } @@ -1007,6 +1012,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DisableSeccomp: h.DisableSeccomp, ConfidentialGuest: h.ConfidentialGuest, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(), NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(), NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(), @@ -1230,6 +1236,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { GuestSwap: defaultGuestSwap, Rootless: defaultRootlessHypervisor, DisableSeccomp: defaultDisableSeccomp, + DisableGuestSeLinux: defaultDisableGuestSeLinux, LegacySerial: defaultLegacySerial, } } @@ -1317,7 +1324,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat } config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp - + config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 5e493b40e3..335f077fbb 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -554,6 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, BlockDeviceAIO: defaultBlockDeviceAIO, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } expectedAgentConfig := vc.KataAgentConfig{ diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 57c2ed1a15..2cd7c10f53 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -128,6 +128,9 @@ type RuntimeConfig struct { //Determines if seccomp should be applied inside guest DisableGuestSeccomp bool + //SELinux security context applied to the container process inside guest. + GuestSeLinuxLabel string + // Sandbox sizing information which, if provided, indicates the size of // the sandbox needed for the workload(s) SandboxCPUs uint32 @@ -945,6 +948,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st DisableGuestSeccomp: runtime.DisableGuestSeccomp, + GuestSeLinuxLabel: runtime.GuestSeLinuxLabel, + Experimental: runtime.Experimental, } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 548ce6f77d..955da7d107 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -74,6 +74,8 @@ const ( MinHypervisorMemory = 256 defaultMsize9p = 8192 + + defaultDisableGuestSeLinux = true ) var ( @@ -560,6 +562,9 @@ type HypervisorConfig struct { // Disable selinux from the hypervisor process DisableSeLinux bool + // Disable selinux from the container process + DisableGuestSeLinux bool + // Use legacy serial for the guest console LegacySerial bool diff --git a/src/runtime/virtcontainers/hypervisor_config_linux_test.go b/src/runtime/virtcontainers/hypervisor_config_linux_test.go index 609e52fd73..41cabb1c35 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux_test.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux_test.go @@ -92,22 +92,24 @@ func TestHypervisorConfigValidTemplateConfig(t *testing.T) { func TestHypervisorConfigDefaults(t *testing.T) { assert := assert.New(t) hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + DisableGuestSeLinux: defaultDisableGuestSeLinux, } testHypervisorConfigValid(t, hypervisorConfig, true) hypervisorConfigDefaultsExpected := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 477be9fde4..5746759542 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -36,6 +36,7 @@ import ( "context" "github.com/gogo/protobuf/proto" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "google.golang.org/grpc/codes" @@ -69,6 +70,9 @@ const ( kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" kernelParamDebugConsoleVPortValue = "1026" + + // Default SELinux type applied to the container process inside guest + defaultSeLinuxContainerType = "container_t" ) var ( @@ -895,7 +899,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st return nil } -func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) { +func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error { // Disable Hooks since they have been handled on the host and there is // no reason to send them to the agent. It would make no sense to try // to apply them on the guest. @@ -907,11 +911,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str grpcSpec.Linux.Seccomp = nil } - // Disable SELinux inside of the virtual machine, the label will apply - // to the KVM process + // Pass SELinux label for the container process to the agent. if grpcSpec.Process.SelinuxLabel != "" { - k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload") - grpcSpec.Process.SelinuxLabel = "" + if !disableGuestSeLinux { + k.Logger().Info("SELinux label will be applied to the container process inside guest") + + var label string + if guestSeLinuxLabel != "" { + label = guestSeLinuxLabel + } else { + label = grpcSpec.Process.SelinuxLabel + } + + processContext, err := selinux.NewContext(label) + if err != nil { + return err + } + + // Change the type from KVM to container because the type passed from the high-level + // runtime is for KVM process. + if guestSeLinuxLabel == "" { + processContext["type"] = defaultSeLinuxContainerType + } + grpcSpec.Process.SelinuxLabel = processContext.Get() + } else { + k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled") + grpcSpec.Process.SelinuxLabel = "" + grpcSpec.Linux.MountLabel = "" + } } // By now only CPU constraints are supported @@ -973,6 +1000,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str } grpcSpec.Linux.Devices = linuxDevices } + + return nil } func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) { @@ -1256,9 +1285,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported + // Currently, guest SELinux can be enabled only when SELinux is enabled on the host side. + if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() { + return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side") + } + if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" { + return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled") + } + // We need to constrain the spec to make sure we're not // passing irrelevant information to the agent. - k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel) + err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel) + if err != nil { + return nil, err + } req := &grpc.CreateContainerRequest{ ContainerId: c.id, diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 9711a5cf55..885fd8acc7 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) { } k := kataAgent{} - k.constrainGRPCSpec(g, true, true) + k.constrainGRPCSpec(g, true, true, "", true) // Check nil fields assert.Nil(g.Hooks) diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 59c6dda15f..906ed10761 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { SystemdCgroup: sconfig.SystemdCgroup, SandboxCgroupOnly: sconfig.SandboxCgroupOnly, DisableGuestSeccomp: sconfig.DisableGuestSeccomp, + GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel, } ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...) @@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { SystemdCgroup: savedConf.SystemdCgroup, SandboxCgroupOnly: savedConf.SandboxCgroupOnly, DisableGuestSeccomp: savedConf.DisableGuestSeccomp, + GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel, } sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 1c16b7bd91..44ba820643 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -243,19 +243,6 @@ type ContainerConfig struct { // SandboxConfig is a sandbox configuration. // Refs: virtcontainers/sandbox.go:SandboxConfig type SandboxConfig struct { - // Information for fields not saved: - // * Annotation: this is kind of casual data, we don't need casual data in persist file, - // if you know this data needs to persist, please gives it - // a specific field - - ContainerConfigs []ContainerConfig - - // SandboxBindMounts - list of paths to mount into guest - SandboxBindMounts []string - - // Experimental enables experimental features - Experimental []string - // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available Cgroups *configs.Cgroup `json:"cgroups"` @@ -265,8 +252,24 @@ type SandboxConfig struct { KataShimConfig *ShimConfig - HypervisorType string - NetworkConfig NetworkConfig + // Custom SELinux security policy to the container process inside the VM + GuestSeLinuxLabel string + + HypervisorType string + + // SandboxBindMounts - list of paths to mount into guest + SandboxBindMounts []string + + // Experimental enables experimental features + Experimental []string + + // Information for fields not saved: + // * Annotation: this is kind of casual data, we don't need casual data in persist file, + // if you know this data needs to persist, please gives it a specific field + ContainerConfigs []ContainerConfig + + NetworkConfig NetworkConfig + HypervisorConfig HypervisorConfig ShmSize uint64 diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index d785580d9b..67c81cb1f8 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -247,6 +247,9 @@ const ( // DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest. DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp" + // GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest. + GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label" + // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index d33f02f6ed..75a6731dd1 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -181,6 +181,15 @@ func (q *qemu) kernelParameters() string { // set the maximum number of vCPUs params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + // set the SELinux params in accordance with the runtime configuration, disable_guest_selinux. + if q.config.DisableGuestSeLinux { + q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled") + params = append(params, Param{"selinux", "0"}) + } else { + q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled") + params = append(params, Param{"selinux", "1"}) + } + // add the params specified by the provided config. As the kernel // honours the last parameter value set and since the config-provided // params are added here, they will take priority over the defaults. @@ -476,6 +485,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { return nd, nil } + // Set the xattr option for virtiofsd daemon to enable extended attributes + // in virtiofs if SELinux on the guest side is enabled. + if !q.config.DisableGuestSeLinux { + q.Logger().Info("Set the xattr option for virtiofsd") + q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr") + } + // default use virtiofsd return &virtiofsd{ path: q.config.VirtioFSDaemon, @@ -846,7 +862,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. if !q.config.DisableSeLinux { - if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { return err } diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index f30dd0a696..a8bc6a33db 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -27,15 +27,16 @@ import ( func newQemuConfig() HypervisorConfig { return HypervisorConfig{ - KernelPath: testQemuKernelPath, - InitrdPath: testQemuInitrdPath, - HypervisorPath: testQemuPath, - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, + KernelPath: testQemuKernelPath, + InitrdPath: testQemuInitrdPath, + HypervisorPath: testQemuPath, + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } } @@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs()) + expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs()) params := []Param{ { Key: "foo", diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 32ccc2dbb7..025537fed9 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -126,14 +126,17 @@ type SandboxResourceSizing struct { // SandboxConfig is a Sandbox configuration. type SandboxConfig struct { - // Volumes is a list of shared volumes between the host and the Sandbox. - Volumes []types.Volume + // Annotations keys must be unique strings and must be name-spaced + Annotations map[string]string - // Containers describe the list of containers within a Sandbox. - // This list can be empty and populated by adding containers - // to the Sandbox a posteriori. - //TODO: this should be a map to avoid duplicated containers - Containers []ContainerConfig + // Custom SELinux security policy to the container process inside the VM + GuestSeLinuxLabel string + + HypervisorType HypervisorType + + ID string + + Hostname string // SandboxBindMounts - list of paths to mount into guest SandboxBindMounts []string @@ -141,31 +144,29 @@ type SandboxConfig struct { // Experimental features enabled Experimental []exp.Feature - // Annotations keys must be unique strings and must be name-spaced - // with e.g. reverse domain notation (org.clearlinux.key). - Annotations map[string]string + // Containers describe the list of containers within a Sandbox. + // This list can be empty and populated by adding containers + // to the Sandbox a posteriori. + // TODO: this should be a map to avoid duplicated containers + Containers []ContainerConfig - ID string - - Hostname string - - HypervisorType HypervisorType - - AgentConfig KataAgentConfig + Volumes []types.Volume NetworkConfig NetworkConfig + AgentConfig KataAgentConfig + HypervisorConfig HypervisorConfig - SandboxResources SandboxResourceSizing - - // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) - StaticResourceMgmt bool - ShmSize uint64 + SandboxResources SandboxResourceSizing + VfioMode config.VFIOModeType + // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) + StaticResourceMgmt bool + // SharePidNs sets all containers to share the same sandbox level pid namespace. SharePidNs bool