runtime: Pass SELinux policy for containers to the agent

Pass SELinux policy for containers to the agent if `disable_guest_selinux`
is set to `false` in the runtime configuration. The `container_t` type
is applied to the container process inside the guest by default.
Users can also set a custom SELinux policy to the container process using
`guest_selinux_label` in the runtime configuration. This will be an
alternative configuration of Kubernetes' security context for SELinux
because users cannot specify the policy in Kata through Kubernetes's security
context. To apply SELinux policy to the container, the guest rootfs must
be CentOS that is created and built with `SELINUX=yes`.

Fixes: #4812

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
This commit is contained in:
Manabu Sugimoto 2022-08-07 19:46:07 +09:00
parent 9354769286
commit c617bbe70d
18 changed files with 196 additions and 70 deletions

View File

@ -171,6 +171,11 @@ DEFDISABLEGUESTEMPTYDIR := false
DEFAULTEXPFEATURES := [] DEFAULTEXPFEATURES := []
DEFDISABLESELINUX := false DEFDISABLESELINUX := false
# Default guest SELinux configuration
DEFDISABLEGUESTSELINUX := true
DEFGUESTSELINUXLABEL := system_u:system_r:container_t
#Default SeccomSandbox param #Default SeccomSandbox param
#The same default policy is used by libvirt #The same default policy is used by libvirt
#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html #More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
@ -460,6 +465,8 @@ USER_VARS += DEFNETWORKMODEL_QEMU
USER_VARS += DEFDISABLEGUESTEMPTYDIR USER_VARS += DEFDISABLEGUESTEMPTYDIR
USER_VARS += DEFDISABLEGUESTSECCOMP USER_VARS += DEFDISABLEGUESTSECCOMP
USER_VARS += DEFDISABLESELINUX USER_VARS += DEFDISABLESELINUX
USER_VARS += DEFDISABLEGUESTSELINUX
USER_VARS += DEFGUESTSELINUXLABEL
USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFAULTEXPFEATURES
USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFDISABLEBLOCK
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN

View File

@ -76,6 +76,7 @@ type RuntimeConfigInfo struct {
type RuntimeInfo struct { type RuntimeInfo struct {
Config RuntimeConfigInfo Config RuntimeConfigInfo
Path string Path string
GuestSeLinuxLabel string
Experimental []exp.Feature Experimental []exp.Feature
Version RuntimeVersionInfo Version RuntimeVersionInfo
Debug bool Debug bool
@ -186,6 +187,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo {
SandboxCgroupOnly: config.SandboxCgroupOnly, SandboxCgroupOnly: config.SandboxCgroupOnly,
Experimental: config.Experimental, Experimental: config.Experimental,
DisableGuestSeccomp: config.DisableGuestSeccomp, DisableGuestSeccomp: config.DisableGuestSeccomp,
GuestSeLinuxLabel: config.GuestSeLinuxLabel,
} }
} }

View File

@ -38,6 +38,13 @@ image = "@IMAGEPATH@"
# disable applying SELinux on the VMM process (default false) # disable applying SELinux on the VMM process (default false)
disable_selinux=@DEFDISABLESELINUX@ disable_selinux=@DEFDISABLESELINUX@
# disable applying SELinux on the container process
# If set to false, the type `container_t` is applied to the container process by default.
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
# with `SELINUX=yes`.
# (default: true)
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
# Path to the firmware. # Path to the firmware.
# If you want Cloud Hypervisor to use a specific firmware, set its path below. # If you want Cloud Hypervisor to use a specific firmware, set its path below.
# This is option is only used when confidential_guest is enabled. # This is option is only used when confidential_guest is enabled.
@ -321,6 +328,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@"
# (default: true) # (default: true)
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# Apply a custom SELinux security policy to the container process inside the VM.
# This is used when you want to apply a type other than the default `container_t`,
# so general users should not uncomment and apply it.
# (format: "user:role:type")
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
# categories are determined automatically by high-level container runtimes such as containerd.
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
# If enabled, the runtime will create opentracing.io traces and spans. # If enabled, the runtime will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started). # (See https://www.jaegertracing.io/docs/getting-started).
# (default: disabled) # (default: disabled)

View File

@ -438,6 +438,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# disable applying SELinux on the VMM process (default false) # disable applying SELinux on the VMM process (default false)
disable_selinux=@DEFDISABLESELINUX@ disable_selinux=@DEFDISABLESELINUX@
# disable applying SELinux on the container process
# If set to false, the type `container_t` is applied to the container process by default.
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
# with `SELINUX=yes`.
# (default: true)
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
[factory] [factory]
# VM templating support. Once enabled, new VMs are created from template # VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and # using vm cloning. They will share the same initial kernel, initramfs and
@ -555,6 +563,14 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@"
# (default: true) # (default: true)
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# Apply a custom SELinux security policy to the container process inside the VM.
# This is used when you want to apply a type other than the default `container_t`,
# so general users should not uncomment and apply it.
# (format: "user:role:type")
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
# categories are determined automatically by high-level container runtimes such as containerd.
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
# If enabled, the runtime will create opentracing.io traces and spans. # If enabled, the runtime will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started). # (See https://www.jaegertracing.io/docs/getting-started).
# (default: disabled) # (default: disabled)

View File

@ -90,6 +90,7 @@ const defaultSevSnpGuest = false
const defaultGuestSwap = false const defaultGuestSwap = false
const defaultRootlessHypervisor = false const defaultRootlessHypervisor = false
const defaultDisableSeccomp = false const defaultDisableSeccomp = false
const defaultDisableGuestSeLinux = true
const defaultVfioMode = "guest-kernel" const defaultVfioMode = "guest-kernel"
const defaultLegacySerial = false const defaultLegacySerial = false

View File

@ -59,9 +59,9 @@ const (
type tomlConfig struct { type tomlConfig struct {
Hypervisor map[string]hypervisor Hypervisor map[string]hypervisor
Agent map[string]agent Agent map[string]agent
Runtime runtime
Image image Image image
Factory factory Factory factory
Runtime runtime
} }
type image struct { type image struct {
@ -154,6 +154,7 @@ type hypervisor struct {
Rootless bool `toml:"rootless"` Rootless bool `toml:"rootless"`
DisableSeccomp bool `toml:"disable_seccomp"` DisableSeccomp bool `toml:"disable_seccomp"`
DisableSeLinux bool `toml:"disable_selinux"` DisableSeLinux bool `toml:"disable_selinux"`
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
LegacySerial bool `toml:"use_legacy_serial"` LegacySerial bool `toml:"use_legacy_serial"`
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
} }
@ -164,12 +165,13 @@ type runtime struct {
JaegerUser string `toml:"jaeger_user"` JaegerUser string `toml:"jaeger_user"`
JaegerPassword string `toml:"jaeger_password"` JaegerPassword string `toml:"jaeger_password"`
VfioMode string `toml:"vfio_mode"` VfioMode string `toml:"vfio_mode"`
GuestSeLinuxLabel string `toml:"guest_selinux_label"`
SandboxBindMounts []string `toml:"sandbox_bind_mounts"` SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
Experimental []string `toml:"experimental"` Experimental []string `toml:"experimental"`
Debug bool `toml:"enable_debug"`
Tracing bool `toml:"enable_tracing"` Tracing bool `toml:"enable_tracing"`
DisableNewNetNs bool `toml:"disable_new_netns"` DisableNewNetNs bool `toml:"disable_new_netns"`
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
Debug bool `toml:"enable_debug"`
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
EnablePprof bool `toml:"enable_pprof"` EnablePprof bool `toml:"enable_pprof"`
@ -690,6 +692,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
TxRateLimiterMaxRate: txRateLimiterMaxRate, TxRateLimiterMaxRate: txRateLimiterMaxRate,
EnableAnnotations: h.EnableAnnotations, EnableAnnotations: h.EnableAnnotations,
DisableSeLinux: h.DisableSeLinux, DisableSeLinux: h.DisableSeLinux,
DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker
}, nil }, nil
} }
@ -836,6 +839,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
LegacySerial: h.LegacySerial, LegacySerial: h.LegacySerial,
DisableSeLinux: h.DisableSeLinux, DisableSeLinux: h.DisableSeLinux,
EnableVCPUsPinning: h.EnableVCPUsPinning, EnableVCPUsPinning: h.EnableVCPUsPinning,
DisableGuestSeLinux: h.DisableGuestSeLinux,
}, nil }, nil
} }
@ -902,6 +906,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
GuestHookPath: h.guestHookPath(), GuestHookPath: h.guestHookPath(),
DisableSeLinux: h.DisableSeLinux, DisableSeLinux: h.DisableSeLinux,
EnableAnnotations: h.EnableAnnotations, EnableAnnotations: h.EnableAnnotations,
DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN
}, nil }, nil
} }
@ -1007,6 +1012,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
DisableSeccomp: h.DisableSeccomp, DisableSeccomp: h.DisableSeccomp,
ConfidentialGuest: h.ConfidentialGuest, ConfidentialGuest: h.ConfidentialGuest,
DisableSeLinux: h.DisableSeLinux, DisableSeLinux: h.DisableSeLinux,
DisableGuestSeLinux: h.DisableGuestSeLinux,
NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(), NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(),
NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(), NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(),
NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(), NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(),
@ -1230,6 +1236,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
GuestSwap: defaultGuestSwap, GuestSwap: defaultGuestSwap,
Rootless: defaultRootlessHypervisor, Rootless: defaultRootlessHypervisor,
DisableSeccomp: defaultDisableSeccomp, DisableSeccomp: defaultDisableSeccomp,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
LegacySerial: defaultLegacySerial, LegacySerial: defaultLegacySerial,
} }
} }
@ -1317,7 +1324,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
} }
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs

View File

@ -554,6 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
VhostUserStorePath: defaultVhostUserStorePath, VhostUserStorePath: defaultVhostUserStorePath,
VirtioFSCache: defaultVirtioFSCacheMode, VirtioFSCache: defaultVirtioFSCacheMode,
BlockDeviceAIO: defaultBlockDeviceAIO, BlockDeviceAIO: defaultBlockDeviceAIO,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
} }
expectedAgentConfig := vc.KataAgentConfig{ expectedAgentConfig := vc.KataAgentConfig{

View File

@ -128,6 +128,9 @@ type RuntimeConfig struct {
//Determines if seccomp should be applied inside guest //Determines if seccomp should be applied inside guest
DisableGuestSeccomp bool DisableGuestSeccomp bool
//SELinux security context applied to the container process inside guest.
GuestSeLinuxLabel string
// Sandbox sizing information which, if provided, indicates the size of // Sandbox sizing information which, if provided, indicates the size of
// the sandbox needed for the workload(s) // the sandbox needed for the workload(s)
SandboxCPUs uint32 SandboxCPUs uint32
@ -945,6 +948,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st
DisableGuestSeccomp: runtime.DisableGuestSeccomp, DisableGuestSeccomp: runtime.DisableGuestSeccomp,
GuestSeLinuxLabel: runtime.GuestSeLinuxLabel,
Experimental: runtime.Experimental, Experimental: runtime.Experimental,
} }

View File

@ -74,6 +74,8 @@ const (
MinHypervisorMemory = 256 MinHypervisorMemory = 256
defaultMsize9p = 8192 defaultMsize9p = 8192
defaultDisableGuestSeLinux = true
) )
var ( var (
@ -560,6 +562,9 @@ type HypervisorConfig struct {
// Disable selinux from the hypervisor process // Disable selinux from the hypervisor process
DisableSeLinux bool DisableSeLinux bool
// Disable selinux from the container process
DisableGuestSeLinux bool
// Use legacy serial for the guest console // Use legacy serial for the guest console
LegacySerial bool LegacySerial bool

View File

@ -92,22 +92,24 @@ func TestHypervisorConfigValidTemplateConfig(t *testing.T) {
func TestHypervisorConfigDefaults(t *testing.T) { func TestHypervisorConfigDefaults(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
hypervisorConfig := &HypervisorConfig{ hypervisorConfig := &HypervisorConfig{
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
HypervisorPath: "", HypervisorPath: "",
DisableGuestSeLinux: defaultDisableGuestSeLinux,
} }
testHypervisorConfigValid(t, hypervisorConfig, true) testHypervisorConfigValid(t, hypervisorConfig, true)
hypervisorConfigDefaultsExpected := &HypervisorConfig{ hypervisorConfigDefaultsExpected := &HypervisorConfig{
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
HypervisorPath: "", HypervisorPath: "",
NumVCPUs: defaultVCPUs, NumVCPUs: defaultVCPUs,
MemorySize: defaultMemSzMiB, MemorySize: defaultMemSzMiB,
DefaultBridges: defaultBridges, DefaultBridges: defaultBridges,
BlockDeviceDriver: defaultBlockDriver, BlockDeviceDriver: defaultBlockDriver,
DefaultMaxVCPUs: defaultMaxVCPUs, DefaultMaxVCPUs: defaultMaxVCPUs,
Msize9p: defaultMsize9p, Msize9p: defaultMsize9p,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
} }
assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected) assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected)

View File

@ -36,6 +36,7 @@ import (
"context" "context"
"github.com/gogo/protobuf/proto" "github.com/gogo/protobuf/proto"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
"google.golang.org/grpc/codes" "google.golang.org/grpc/codes"
@ -69,6 +70,9 @@ const (
kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsole = "agent.debug_console"
kernelParamDebugConsoleVPort = "agent.debug_console_vport" kernelParamDebugConsoleVPort = "agent.debug_console_vport"
kernelParamDebugConsoleVPortValue = "1026" kernelParamDebugConsoleVPortValue = "1026"
// Default SELinux type applied to the container process inside guest
defaultSeLinuxContainerType = "container_t"
) )
var ( var (
@ -895,7 +899,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st
return nil return nil
} }
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) { func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error {
// Disable Hooks since they have been handled on the host and there is // Disable Hooks since they have been handled on the host and there is
// no reason to send them to the agent. It would make no sense to try // no reason to send them to the agent. It would make no sense to try
// to apply them on the guest. // to apply them on the guest.
@ -907,11 +911,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
grpcSpec.Linux.Seccomp = nil grpcSpec.Linux.Seccomp = nil
} }
// Disable SELinux inside of the virtual machine, the label will apply // Pass SELinux label for the container process to the agent.
// to the KVM process
if grpcSpec.Process.SelinuxLabel != "" { if grpcSpec.Process.SelinuxLabel != "" {
k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload") if !disableGuestSeLinux {
grpcSpec.Process.SelinuxLabel = "" k.Logger().Info("SELinux label will be applied to the container process inside guest")
var label string
if guestSeLinuxLabel != "" {
label = guestSeLinuxLabel
} else {
label = grpcSpec.Process.SelinuxLabel
}
processContext, err := selinux.NewContext(label)
if err != nil {
return err
}
// Change the type from KVM to container because the type passed from the high-level
// runtime is for KVM process.
if guestSeLinuxLabel == "" {
processContext["type"] = defaultSeLinuxContainerType
}
grpcSpec.Process.SelinuxLabel = processContext.Get()
} else {
k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled")
grpcSpec.Process.SelinuxLabel = ""
grpcSpec.Linux.MountLabel = ""
}
} }
// By now only CPU constraints are supported // By now only CPU constraints are supported
@ -973,6 +1000,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
} }
grpcSpec.Linux.Devices = linuxDevices grpcSpec.Linux.Devices = linuxDevices
} }
return nil
} }
func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) { func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
@ -1256,9 +1285,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
// Currently, guest SELinux can be enabled only when SELinux is enabled on the host side.
if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() {
return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side")
}
if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" {
return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled")
}
// We need to constrain the spec to make sure we're not // We need to constrain the spec to make sure we're not
// passing irrelevant information to the agent. // passing irrelevant information to the agent.
k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel) err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
if err != nil {
return nil, err
}
req := &grpc.CreateContainerRequest{ req := &grpc.CreateContainerRequest{
ContainerId: c.id, ContainerId: c.id,

View File

@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) {
} }
k := kataAgent{} k := kataAgent{}
k.constrainGRPCSpec(g, true, true) k.constrainGRPCSpec(g, true, true, "", true)
// Check nil fields // Check nil fields
assert.Nil(g.Hooks) assert.Nil(g.Hooks)

View File

@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
SystemdCgroup: sconfig.SystemdCgroup, SystemdCgroup: sconfig.SystemdCgroup,
SandboxCgroupOnly: sconfig.SandboxCgroupOnly, SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
DisableGuestSeccomp: sconfig.DisableGuestSeccomp, DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel,
} }
ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...) ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...)
@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
SystemdCgroup: savedConf.SystemdCgroup, SystemdCgroup: savedConf.SystemdCgroup,
SandboxCgroupOnly: savedConf.SandboxCgroupOnly, SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
DisableGuestSeccomp: savedConf.DisableGuestSeccomp, DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel,
} }
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)

View File

@ -243,19 +243,6 @@ type ContainerConfig struct {
// SandboxConfig is a sandbox configuration. // SandboxConfig is a sandbox configuration.
// Refs: virtcontainers/sandbox.go:SandboxConfig // Refs: virtcontainers/sandbox.go:SandboxConfig
type SandboxConfig struct { type SandboxConfig struct {
// Information for fields not saved:
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
// if you know this data needs to persist, please gives it
// a specific field
ContainerConfigs []ContainerConfig
// SandboxBindMounts - list of paths to mount into guest
SandboxBindMounts []string
// Experimental enables experimental features
Experimental []string
// Cgroups specifies specific cgroup settings for the various subsystems that the container is // Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available // placed into to limit the resources the container has available
Cgroups *configs.Cgroup `json:"cgroups"` Cgroups *configs.Cgroup `json:"cgroups"`
@ -265,8 +252,24 @@ type SandboxConfig struct {
KataShimConfig *ShimConfig KataShimConfig *ShimConfig
HypervisorType string // Custom SELinux security policy to the container process inside the VM
NetworkConfig NetworkConfig GuestSeLinuxLabel string
HypervisorType string
// SandboxBindMounts - list of paths to mount into guest
SandboxBindMounts []string
// Experimental enables experimental features
Experimental []string
// Information for fields not saved:
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
// if you know this data needs to persist, please gives it a specific field
ContainerConfigs []ContainerConfig
NetworkConfig NetworkConfig
HypervisorConfig HypervisorConfig HypervisorConfig HypervisorConfig
ShmSize uint64 ShmSize uint64

View File

@ -247,6 +247,9 @@ const (
// DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest. // DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest.
DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp" DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp"
// GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest.
GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label"
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"

View File

@ -181,6 +181,15 @@ func (q *qemu) kernelParameters() string {
// set the maximum number of vCPUs // set the maximum number of vCPUs
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
// set the SELinux params in accordance with the runtime configuration, disable_guest_selinux.
if q.config.DisableGuestSeLinux {
q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled")
params = append(params, Param{"selinux", "0"})
} else {
q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled")
params = append(params, Param{"selinux", "1"})
}
// add the params specified by the provided config. As the kernel // add the params specified by the provided config. As the kernel
// honours the last parameter value set and since the config-provided // honours the last parameter value set and since the config-provided
// params are added here, they will take priority over the defaults. // params are added here, they will take priority over the defaults.
@ -476,6 +485,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
return nd, nil return nd, nil
} }
// Set the xattr option for virtiofsd daemon to enable extended attributes
// in virtiofs if SELinux on the guest side is enabled.
if !q.config.DisableGuestSeLinux {
q.Logger().Info("Set the xattr option for virtiofsd")
q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr")
}
// default use virtiofsd // default use virtiofsd
return &virtiofsd{ return &virtiofsd{
path: q.config.VirtioFSDaemon, path: q.config.VirtioFSDaemon,
@ -846,7 +862,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error {
// the SELinux label. If these processes require privileged, we do // the SELinux label. If these processes require privileged, we do
// notwant to run them under confinement. // notwant to run them under confinement.
if !q.config.DisableSeLinux { if !q.config.DisableSeLinux {
if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil {
return err return err
} }

View File

@ -27,15 +27,16 @@ import (
func newQemuConfig() HypervisorConfig { func newQemuConfig() HypervisorConfig {
return HypervisorConfig{ return HypervisorConfig{
KernelPath: testQemuKernelPath, KernelPath: testQemuKernelPath,
InitrdPath: testQemuInitrdPath, InitrdPath: testQemuInitrdPath,
HypervisorPath: testQemuPath, HypervisorPath: testQemuPath,
NumVCPUs: defaultVCPUs, NumVCPUs: defaultVCPUs,
MemorySize: defaultMemSzMiB, MemorySize: defaultMemSzMiB,
DefaultBridges: defaultBridges, DefaultBridges: defaultBridges,
BlockDeviceDriver: defaultBlockDriver, BlockDeviceDriver: defaultBlockDriver,
DefaultMaxVCPUs: defaultMaxVCPUs, DefaultMaxVCPUs: defaultMaxVCPUs,
Msize9p: defaultMsize9p, Msize9p: defaultMsize9p,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
} }
} }
@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
} }
func TestQemuKernelParameters(t *testing.T) { func TestQemuKernelParameters(t *testing.T) {
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs()) expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
params := []Param{ params := []Param{
{ {
Key: "foo", Key: "foo",

View File

@ -126,14 +126,17 @@ type SandboxResourceSizing struct {
// SandboxConfig is a Sandbox configuration. // SandboxConfig is a Sandbox configuration.
type SandboxConfig struct { type SandboxConfig struct {
// Volumes is a list of shared volumes between the host and the Sandbox. // Annotations keys must be unique strings and must be name-spaced
Volumes []types.Volume Annotations map[string]string
// Containers describe the list of containers within a Sandbox. // Custom SELinux security policy to the container process inside the VM
// This list can be empty and populated by adding containers GuestSeLinuxLabel string
// to the Sandbox a posteriori.
//TODO: this should be a map to avoid duplicated containers HypervisorType HypervisorType
Containers []ContainerConfig
ID string
Hostname string
// SandboxBindMounts - list of paths to mount into guest // SandboxBindMounts - list of paths to mount into guest
SandboxBindMounts []string SandboxBindMounts []string
@ -141,31 +144,29 @@ type SandboxConfig struct {
// Experimental features enabled // Experimental features enabled
Experimental []exp.Feature Experimental []exp.Feature
// Annotations keys must be unique strings and must be name-spaced // Containers describe the list of containers within a Sandbox.
// with e.g. reverse domain notation (org.clearlinux.key). // This list can be empty and populated by adding containers
Annotations map[string]string // to the Sandbox a posteriori.
// TODO: this should be a map to avoid duplicated containers
Containers []ContainerConfig
ID string Volumes []types.Volume
Hostname string
HypervisorType HypervisorType
AgentConfig KataAgentConfig
NetworkConfig NetworkConfig NetworkConfig NetworkConfig
AgentConfig KataAgentConfig
HypervisorConfig HypervisorConfig HypervisorConfig HypervisorConfig
SandboxResources SandboxResourceSizing
// StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM)
StaticResourceMgmt bool
ShmSize uint64 ShmSize uint64
SandboxResources SandboxResourceSizing
VfioMode config.VFIOModeType VfioMode config.VFIOModeType
// StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM)
StaticResourceMgmt bool
// SharePidNs sets all containers to share the same sandbox level pid namespace. // SharePidNs sets all containers to share the same sandbox level pid namespace.
SharePidNs bool SharePidNs bool