diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 0f49badd44..852b4d5795 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -171,6 +171,11 @@ DEFDISABLEGUESTEMPTYDIR := false DEFAULTEXPFEATURES := [] DEFDISABLESELINUX := false + +# Default guest SELinux configuration +DEFDISABLEGUESTSELINUX := true +DEFGUESTSELINUXLABEL := system_u:system_r:container_t + #Default SeccomSandbox param #The same default policy is used by libvirt #More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html @@ -460,6 +465,8 @@ USER_VARS += DEFNETWORKMODEL_QEMU USER_VARS += DEFDISABLEGUESTEMPTYDIR USER_VARS += DEFDISABLEGUESTSECCOMP USER_VARS += DEFDISABLESELINUX +USER_VARS += DEFDISABLEGUESTSELINUX +USER_VARS += DEFGUESTSELINUXLABEL USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN diff --git a/src/runtime/cmd/kata-runtime/kata-env.go b/src/runtime/cmd/kata-runtime/kata-env.go index b1421fa006..c129f8f434 100644 --- a/src/runtime/cmd/kata-runtime/kata-env.go +++ b/src/runtime/cmd/kata-runtime/kata-env.go @@ -76,6 +76,7 @@ type RuntimeConfigInfo struct { type RuntimeInfo struct { Config RuntimeConfigInfo Path string + GuestSeLinuxLabel string Experimental []exp.Feature Version RuntimeVersionInfo Debug bool @@ -186,6 +187,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo { SandboxCgroupOnly: config.SandboxCgroupOnly, Experimental: config.Experimental, DisableGuestSeccomp: config.DisableGuestSeccomp, + GuestSeLinuxLabel: config.GuestSeLinuxLabel, } } diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index e47a1d92a0..cedf2303ad 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -38,6 +38,13 @@ image = "@IMAGEPATH@" # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + # Path to the firmware. # If you want Cloud Hypervisor to use a specific firmware, set its path below. # This is option is only used when confidential_guest is enabled. @@ -321,6 +328,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 8330042977..f7e70a6d53 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -438,6 +438,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and @@ -555,6 +563,14 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 601d95612c..43dd5cc5a4 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -90,6 +90,7 @@ const defaultSevSnpGuest = false const defaultGuestSwap = false const defaultRootlessHypervisor = false const defaultDisableSeccomp = false +const defaultDisableGuestSeLinux = true const defaultVfioMode = "guest-kernel" const defaultLegacySerial = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 3fabfe0af1..3ed3177f57 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -59,9 +59,9 @@ const ( type tomlConfig struct { Hypervisor map[string]hypervisor Agent map[string]agent - Runtime runtime Image image Factory factory + Runtime runtime } type image struct { @@ -154,6 +154,7 @@ type hypervisor struct { Rootless bool `toml:"rootless"` DisableSeccomp bool `toml:"disable_seccomp"` DisableSeLinux bool `toml:"disable_selinux"` + DisableGuestSeLinux bool `toml:"disable_guest_selinux"` LegacySerial bool `toml:"use_legacy_serial"` EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` } @@ -164,12 +165,13 @@ type runtime struct { JaegerUser string `toml:"jaeger_user"` JaegerPassword string `toml:"jaeger_password"` VfioMode string `toml:"vfio_mode"` + GuestSeLinuxLabel string `toml:"guest_selinux_label"` SandboxBindMounts []string `toml:"sandbox_bind_mounts"` Experimental []string `toml:"experimental"` - Debug bool `toml:"enable_debug"` Tracing bool `toml:"enable_tracing"` DisableNewNetNs bool `toml:"disable_new_netns"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + Debug bool `toml:"enable_debug"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` EnablePprof bool `toml:"enable_pprof"` @@ -690,6 +692,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { TxRateLimiterMaxRate: txRateLimiterMaxRate, EnableAnnotations: h.EnableAnnotations, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker }, nil } @@ -836,6 +839,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { LegacySerial: h.LegacySerial, DisableSeLinux: h.DisableSeLinux, EnableVCPUsPinning: h.EnableVCPUsPinning, + DisableGuestSeLinux: h.DisableGuestSeLinux, }, nil } @@ -902,6 +906,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestHookPath: h.guestHookPath(), DisableSeLinux: h.DisableSeLinux, EnableAnnotations: h.EnableAnnotations, + DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN }, nil } @@ -1007,6 +1012,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DisableSeccomp: h.DisableSeccomp, ConfidentialGuest: h.ConfidentialGuest, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(), NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(), NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(), @@ -1230,6 +1236,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { GuestSwap: defaultGuestSwap, Rootless: defaultRootlessHypervisor, DisableSeccomp: defaultDisableSeccomp, + DisableGuestSeLinux: defaultDisableGuestSeLinux, LegacySerial: defaultLegacySerial, } } @@ -1317,7 +1324,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat } config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp - + config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 5e493b40e3..335f077fbb 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -554,6 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, BlockDeviceAIO: defaultBlockDeviceAIO, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } expectedAgentConfig := vc.KataAgentConfig{ diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 57c2ed1a15..2cd7c10f53 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -128,6 +128,9 @@ type RuntimeConfig struct { //Determines if seccomp should be applied inside guest DisableGuestSeccomp bool + //SELinux security context applied to the container process inside guest. + GuestSeLinuxLabel string + // Sandbox sizing information which, if provided, indicates the size of // the sandbox needed for the workload(s) SandboxCPUs uint32 @@ -945,6 +948,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st DisableGuestSeccomp: runtime.DisableGuestSeccomp, + GuestSeLinuxLabel: runtime.GuestSeLinuxLabel, + Experimental: runtime.Experimental, } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 548ce6f77d..955da7d107 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -74,6 +74,8 @@ const ( MinHypervisorMemory = 256 defaultMsize9p = 8192 + + defaultDisableGuestSeLinux = true ) var ( @@ -560,6 +562,9 @@ type HypervisorConfig struct { // Disable selinux from the hypervisor process DisableSeLinux bool + // Disable selinux from the container process + DisableGuestSeLinux bool + // Use legacy serial for the guest console LegacySerial bool diff --git a/src/runtime/virtcontainers/hypervisor_config_linux_test.go b/src/runtime/virtcontainers/hypervisor_config_linux_test.go index 609e52fd73..41cabb1c35 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux_test.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux_test.go @@ -92,22 +92,24 @@ func TestHypervisorConfigValidTemplateConfig(t *testing.T) { func TestHypervisorConfigDefaults(t *testing.T) { assert := assert.New(t) hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + DisableGuestSeLinux: defaultDisableGuestSeLinux, } testHypervisorConfigValid(t, hypervisorConfig, true) hypervisorConfigDefaultsExpected := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 477be9fde4..5746759542 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -36,6 +36,7 @@ import ( "context" "github.com/gogo/protobuf/proto" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "google.golang.org/grpc/codes" @@ -69,6 +70,9 @@ const ( kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" kernelParamDebugConsoleVPortValue = "1026" + + // Default SELinux type applied to the container process inside guest + defaultSeLinuxContainerType = "container_t" ) var ( @@ -895,7 +899,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st return nil } -func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) { +func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error { // Disable Hooks since they have been handled on the host and there is // no reason to send them to the agent. It would make no sense to try // to apply them on the guest. @@ -907,11 +911,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str grpcSpec.Linux.Seccomp = nil } - // Disable SELinux inside of the virtual machine, the label will apply - // to the KVM process + // Pass SELinux label for the container process to the agent. if grpcSpec.Process.SelinuxLabel != "" { - k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload") - grpcSpec.Process.SelinuxLabel = "" + if !disableGuestSeLinux { + k.Logger().Info("SELinux label will be applied to the container process inside guest") + + var label string + if guestSeLinuxLabel != "" { + label = guestSeLinuxLabel + } else { + label = grpcSpec.Process.SelinuxLabel + } + + processContext, err := selinux.NewContext(label) + if err != nil { + return err + } + + // Change the type from KVM to container because the type passed from the high-level + // runtime is for KVM process. + if guestSeLinuxLabel == "" { + processContext["type"] = defaultSeLinuxContainerType + } + grpcSpec.Process.SelinuxLabel = processContext.Get() + } else { + k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled") + grpcSpec.Process.SelinuxLabel = "" + grpcSpec.Linux.MountLabel = "" + } } // By now only CPU constraints are supported @@ -973,6 +1000,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str } grpcSpec.Linux.Devices = linuxDevices } + + return nil } func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) { @@ -1256,9 +1285,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported + // Currently, guest SELinux can be enabled only when SELinux is enabled on the host side. + if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() { + return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side") + } + if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" { + return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled") + } + // We need to constrain the spec to make sure we're not // passing irrelevant information to the agent. - k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel) + err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel) + if err != nil { + return nil, err + } req := &grpc.CreateContainerRequest{ ContainerId: c.id, diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 9711a5cf55..885fd8acc7 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) { } k := kataAgent{} - k.constrainGRPCSpec(g, true, true) + k.constrainGRPCSpec(g, true, true, "", true) // Check nil fields assert.Nil(g.Hooks) diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 59c6dda15f..906ed10761 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { SystemdCgroup: sconfig.SystemdCgroup, SandboxCgroupOnly: sconfig.SandboxCgroupOnly, DisableGuestSeccomp: sconfig.DisableGuestSeccomp, + GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel, } ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...) @@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { SystemdCgroup: savedConf.SystemdCgroup, SandboxCgroupOnly: savedConf.SandboxCgroupOnly, DisableGuestSeccomp: savedConf.DisableGuestSeccomp, + GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel, } sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 1c16b7bd91..44ba820643 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -243,19 +243,6 @@ type ContainerConfig struct { // SandboxConfig is a sandbox configuration. // Refs: virtcontainers/sandbox.go:SandboxConfig type SandboxConfig struct { - // Information for fields not saved: - // * Annotation: this is kind of casual data, we don't need casual data in persist file, - // if you know this data needs to persist, please gives it - // a specific field - - ContainerConfigs []ContainerConfig - - // SandboxBindMounts - list of paths to mount into guest - SandboxBindMounts []string - - // Experimental enables experimental features - Experimental []string - // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available Cgroups *configs.Cgroup `json:"cgroups"` @@ -265,8 +252,24 @@ type SandboxConfig struct { KataShimConfig *ShimConfig - HypervisorType string - NetworkConfig NetworkConfig + // Custom SELinux security policy to the container process inside the VM + GuestSeLinuxLabel string + + HypervisorType string + + // SandboxBindMounts - list of paths to mount into guest + SandboxBindMounts []string + + // Experimental enables experimental features + Experimental []string + + // Information for fields not saved: + // * Annotation: this is kind of casual data, we don't need casual data in persist file, + // if you know this data needs to persist, please gives it a specific field + ContainerConfigs []ContainerConfig + + NetworkConfig NetworkConfig + HypervisorConfig HypervisorConfig ShmSize uint64 diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index d785580d9b..67c81cb1f8 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -247,6 +247,9 @@ const ( // DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest. DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp" + // GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest. + GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label" + // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index d33f02f6ed..75a6731dd1 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -181,6 +181,15 @@ func (q *qemu) kernelParameters() string { // set the maximum number of vCPUs params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + // set the SELinux params in accordance with the runtime configuration, disable_guest_selinux. + if q.config.DisableGuestSeLinux { + q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled") + params = append(params, Param{"selinux", "0"}) + } else { + q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled") + params = append(params, Param{"selinux", "1"}) + } + // add the params specified by the provided config. As the kernel // honours the last parameter value set and since the config-provided // params are added here, they will take priority over the defaults. @@ -476,6 +485,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { return nd, nil } + // Set the xattr option for virtiofsd daemon to enable extended attributes + // in virtiofs if SELinux on the guest side is enabled. + if !q.config.DisableGuestSeLinux { + q.Logger().Info("Set the xattr option for virtiofsd") + q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr") + } + // default use virtiofsd return &virtiofsd{ path: q.config.VirtioFSDaemon, @@ -846,7 +862,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. if !q.config.DisableSeLinux { - if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { return err } diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index f30dd0a696..a8bc6a33db 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -27,15 +27,16 @@ import ( func newQemuConfig() HypervisorConfig { return HypervisorConfig{ - KernelPath: testQemuKernelPath, - InitrdPath: testQemuInitrdPath, - HypervisorPath: testQemuPath, - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, + KernelPath: testQemuKernelPath, + InitrdPath: testQemuInitrdPath, + HypervisorPath: testQemuPath, + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + DisableGuestSeLinux: defaultDisableGuestSeLinux, } } @@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs()) + expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs()) params := []Param{ { Key: "foo", diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 32ccc2dbb7..025537fed9 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -126,14 +126,17 @@ type SandboxResourceSizing struct { // SandboxConfig is a Sandbox configuration. type SandboxConfig struct { - // Volumes is a list of shared volumes between the host and the Sandbox. - Volumes []types.Volume + // Annotations keys must be unique strings and must be name-spaced + Annotations map[string]string - // Containers describe the list of containers within a Sandbox. - // This list can be empty and populated by adding containers - // to the Sandbox a posteriori. - //TODO: this should be a map to avoid duplicated containers - Containers []ContainerConfig + // Custom SELinux security policy to the container process inside the VM + GuestSeLinuxLabel string + + HypervisorType HypervisorType + + ID string + + Hostname string // SandboxBindMounts - list of paths to mount into guest SandboxBindMounts []string @@ -141,31 +144,29 @@ type SandboxConfig struct { // Experimental features enabled Experimental []exp.Feature - // Annotations keys must be unique strings and must be name-spaced - // with e.g. reverse domain notation (org.clearlinux.key). - Annotations map[string]string + // Containers describe the list of containers within a Sandbox. + // This list can be empty and populated by adding containers + // to the Sandbox a posteriori. + // TODO: this should be a map to avoid duplicated containers + Containers []ContainerConfig - ID string - - Hostname string - - HypervisorType HypervisorType - - AgentConfig KataAgentConfig + Volumes []types.Volume NetworkConfig NetworkConfig + AgentConfig KataAgentConfig + HypervisorConfig HypervisorConfig - SandboxResources SandboxResourceSizing - - // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) - StaticResourceMgmt bool - ShmSize uint64 + SandboxResources SandboxResourceSizing + VfioMode config.VFIOModeType + // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) + StaticResourceMgmt bool + // SharePidNs sets all containers to share the same sandbox level pid namespace. SharePidNs bool