From ef925d40ce7b120ad888a8487ec622a7688448c2 Mon Sep 17 00:00:00 2001 From: Liang Zhou Date: Mon, 26 Jul 2021 02:54:00 -0700 Subject: [PATCH] runtime: enable sandbox feature on qemu Enable "-sandbox on" in qemu can introduce another protect layer on the host, to make the secure container more secure. The default option is disable because this feature may introduce some performance cost, even though user can enable /proc/sys/net/core/bpf_jit_enable to reduce the impact. Fixes: #2266 Signed-off-by: Feng Wang --- src/runtime/Makefile | 6 ++ src/runtime/config/configuration-qemu.toml.in | 8 +++ src/runtime/pkg/govmm/qemu/qemu.go | 3 +- src/runtime/pkg/katautils/config.go | 2 + src/runtime/virtcontainers/hypervisor.go | 3 + src/runtime/virtcontainers/persist.go | 1 + .../virtcontainers/persist/api/config.go | 3 + src/runtime/virtcontainers/qemu.go | 65 ++++++++++++------- 8 files changed, 67 insertions(+), 24 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 85f9bbd8ab..8d7f5652cc 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -167,6 +167,11 @@ DEFDISABLEGUESTEMPTYDIR := false DEFAULTEXPFEATURES := [] DEFDISABLESELINUX := false +#Default SeccomSandbox param +#The same default policy is used by libvirt +#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +DEFSECCOMPSANDBOXPARAM := on,obsolete=deny,spawn=deny,resourcecontrol=deny #Default entropy source DEFENTROPYSOURCE := /dev/urandom @@ -459,6 +464,7 @@ USER_VARS += DEFVIRTIOFSCACHE USER_VARS += DEFVIRTIOFSEXTRAARGS USER_VARS += DEFENABLEANNOTATIONS USER_VARS += DEFENABLEIOTHREADS +USER_VARS += DEFSECCOMPSANDBOXPARAM USER_VARS += DEFENABLEVHOSTUSERSTORE USER_VARS += DEFVHOSTUSERSTOREPATH USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 09c219545d..702b71aadd 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -76,6 +76,14 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@" # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` machine_accelerators="@MACHINEACCELERATORS@" +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + # CPU features # comma-separated list of cpu features to pass to the cpu # For example, `cpu_features = "pmu=off,vmx=off" diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index ea3f1311a8..100316dd9e 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -15,6 +15,7 @@ package qemu import ( "bytes" + "context" "fmt" "log" "os" @@ -23,8 +24,6 @@ import ( "strconv" "strings" "syscall" - - "context" ) // Machine describes the machine type qemu will emulate. diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 2a62c3d6c3..368f6eedd8 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -95,6 +95,7 @@ type hypervisor struct { FileBackedMemRootDir string `toml:"file_mem_backend"` GuestHookPath string `toml:"guest_hook_path"` GuestMemoryDumpPath string `toml:"guest_memory_dump_path"` + SeccompSandbox string `toml:"seccompsandbox"` HypervisorPathList []string `toml:"valid_hypervisor_paths"` JailerPathList []string `toml:"valid_jailer_paths"` CtlPathList []string `toml:"valid_ctlpaths"` @@ -767,6 +768,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { EnableVhostUserStore: h.EnableVhostUserStore, VhostUserStorePath: h.vhostUserStorePath(), VhostUserStorePathList: h.VhostUserStorePathList, + SeccompSandbox: h.SeccompSandbox, GuestHookPath: h.guestHookPath(), RxRateLimiterMaxRate: rxRateLimiterMaxRate, TxRateLimiterMaxRate: txRateLimiterMaxRate, diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 9de4dc0d0a..20d7d61038 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -370,6 +370,9 @@ type HypervisorConfig struct { // VhostUserStorePathList is the list of valid values for vhost-user paths VhostUserStorePathList []string + // SeccompSandbox is the qemu function which enables the seccomp feature + SeccompSandbox string + // KernelParams are additional guest kernel parameters. KernelParams []Param diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index bc20af21fa..199c647ae2 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -247,6 +247,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet, EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore, + SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox, VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath, VhostUserStorePathList: sconfig.HypervisorConfig.VhostUserStorePathList, GuestHookPath: sconfig.HypervisorConfig.GuestHookPath, diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 0af8a09227..1c16b7bd91 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -80,6 +80,9 @@ type HypervisorConfig struct { // related folders, sockets and device nodes should be. VhostUserStorePath string + // SeccompSandbox is the qemu function which enables the seccomp feature + SeccompSandbox string + // GuestHookPath is the path within the VM that will be used for 'drop-in' hooks GuestHookPath string diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 656548e88c..b56ffda051 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -629,30 +629,32 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi // some devices configuration may also change kernel params, make sure this is called afterwards Params: q.kernelParameters(), } + q.checkBpfEnabled() qemuConfig := govmmQemu.Config{ - Name: fmt.Sprintf("sandbox-%s", q.id), - UUID: q.state.UUID, - Path: qemuPath, - Ctx: q.qmpMonitorCh.ctx, - Uid: q.config.Uid, - Gid: q.config.Gid, - Groups: q.config.Groups, - Machine: machine, - SMP: smp, - Memory: memory, - Devices: devices, - CPUModel: cpuModel, - Kernel: kernel, - RTC: rtc, - QMPSockets: qmpSockets, - Knobs: knobs, - Incoming: incoming, - VGA: "none", - GlobalParam: "kvm-pit.lost_tick_policy=discard", - Bios: firmwarePath, - PFlash: pflash, - PidFile: filepath.Join(q.config.VMStorePath, q.id, "pid"), + Name: fmt.Sprintf("sandbox-%s", q.id), + UUID: q.state.UUID, + Path: qemuPath, + Ctx: q.qmpMonitorCh.ctx, + Uid: q.config.Uid, + Gid: q.config.Gid, + Groups: q.config.Groups, + Machine: machine, + SMP: smp, + Memory: memory, + Devices: devices, + CPUModel: cpuModel, + SeccompSandbox: q.config.SeccompSandbox, + Kernel: kernel, + RTC: rtc, + QMPSockets: qmpSockets, + Knobs: knobs, + Incoming: incoming, + VGA: "none", + GlobalParam: "kvm-pit.lost_tick_policy=discard", + Bios: firmwarePath, + PFlash: pflash, + PidFile: filepath.Join(q.config.VMStorePath, q.id, "pid"), } qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath) @@ -689,6 +691,25 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } +func (q *qemu) checkBpfEnabled() { + if q.config.SeccompSandbox != "" { + out, err := os.ReadFile("/proc/sys/net/core/bpf_jit_enable") + if err != nil { + q.Logger().WithError(err).Warningf("failed to get bpf_jit_enable status") + return + } + enabled, err := strconv.Atoi(string(out)) + if err != nil { + q.Logger().WithError(err).Warningf("failed to convert bpf_jit_enable status to integer") + return + } + if enabled == 0 { + q.Logger().Warningf("bpf_jit_enable is disabled. " + + "It's recommended to turn on bpf_jit_enable to reduce the performance impact of QEMU seccomp sandbox.") + } + } +} + func (q *qemu) vhostFSSocketPath(id string) (string, error) { return utils.BuildSocketPath(q.config.VMStorePath, id, vhostFSSocket) }