runtime: enable sandbox feature on qemu

Enable "-sandbox on" in qemu can introduce another protect layer
on the host, to make the secure container more secure.

The default option is disable because this feature may introduce some
performance cost, even though user can enable
/proc/sys/net/core/bpf_jit_enable to reduce the impact.

Fixes: #2266

Signed-off-by: Feng Wang <feng.wang@databricks.com>
This commit is contained in:
Liang Zhou 2021-07-26 02:54:00 -07:00 committed by Feng Wang
parent 811ac6a8ce
commit ef925d40ce
8 changed files with 67 additions and 24 deletions

View File

@ -167,6 +167,11 @@ DEFDISABLEGUESTEMPTYDIR := false
DEFAULTEXPFEATURES := []
DEFDISABLESELINUX := false
#Default SeccomSandbox param
#The same default policy is used by libvirt
#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
DEFSECCOMPSANDBOXPARAM := on,obsolete=deny,spawn=deny,resourcecontrol=deny
#Default entropy source
DEFENTROPYSOURCE := /dev/urandom
@ -459,6 +464,7 @@ USER_VARS += DEFVIRTIOFSCACHE
USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFSECCOMPSANDBOXPARAM
USER_VARS += DEFENABLEVHOSTUSERSTORE
USER_VARS += DEFVHOSTUSERSTOREPATH
USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS

View File

@ -76,6 +76,14 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
machine_accelerators="@MACHINEACCELERATORS@"
# Qemu seccomp sandbox feature
# comma-separated list of seccomp sandbox features to control the syscall access.
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
# Another note: enabling this feature may reduce performance, you may enable
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
# CPU features
# comma-separated list of cpu features to pass to the cpu
# For example, `cpu_features = "pmu=off,vmx=off"

View File

@ -15,6 +15,7 @@ package qemu
import (
"bytes"
"context"
"fmt"
"log"
"os"
@ -23,8 +24,6 @@ import (
"strconv"
"strings"
"syscall"
"context"
)
// Machine describes the machine type qemu will emulate.

View File

@ -95,6 +95,7 @@ type hypervisor struct {
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
SeccompSandbox string `toml:"seccompsandbox"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
@ -767,6 +768,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableVhostUserStore: h.EnableVhostUserStore,
VhostUserStorePath: h.vhostUserStorePath(),
VhostUserStorePathList: h.VhostUserStorePathList,
SeccompSandbox: h.SeccompSandbox,
GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,

View File

@ -370,6 +370,9 @@ type HypervisorConfig struct {
// VhostUserStorePathList is the list of valid values for vhost-user paths
VhostUserStorePathList []string
// SeccompSandbox is the qemu function which enables the seccomp feature
SeccompSandbox string
// KernelParams are additional guest kernel parameters.
KernelParams []Param

View File

@ -247,6 +247,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore,
SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox,
VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath,
VhostUserStorePathList: sconfig.HypervisorConfig.VhostUserStorePathList,
GuestHookPath: sconfig.HypervisorConfig.GuestHookPath,

View File

@ -80,6 +80,9 @@ type HypervisorConfig struct {
// related folders, sockets and device nodes should be.
VhostUserStorePath string
// SeccompSandbox is the qemu function which enables the seccomp feature
SeccompSandbox string
// GuestHookPath is the path within the VM that will be used for 'drop-in' hooks
GuestHookPath string

View File

@ -629,6 +629,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
// some devices configuration may also change kernel params, make sure this is called afterwards
Params: q.kernelParameters(),
}
q.checkBpfEnabled()
qemuConfig := govmmQemu.Config{
Name: fmt.Sprintf("sandbox-%s", q.id),
@ -643,6 +644,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
Memory: memory,
Devices: devices,
CPUModel: cpuModel,
SeccompSandbox: q.config.SeccompSandbox,
Kernel: kernel,
RTC: rtc,
QMPSockets: qmpSockets,
@ -689,6 +691,25 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
return err
}
func (q *qemu) checkBpfEnabled() {
if q.config.SeccompSandbox != "" {
out, err := os.ReadFile("/proc/sys/net/core/bpf_jit_enable")
if err != nil {
q.Logger().WithError(err).Warningf("failed to get bpf_jit_enable status")
return
}
enabled, err := strconv.Atoi(string(out))
if err != nil {
q.Logger().WithError(err).Warningf("failed to convert bpf_jit_enable status to integer")
return
}
if enabled == 0 {
q.Logger().Warningf("bpf_jit_enable is disabled. " +
"It's recommended to turn on bpf_jit_enable to reduce the performance impact of QEMU seccomp sandbox.")
}
}
}
func (q *qemu) vhostFSSocketPath(id string) (string, error) {
return utils.BuildSocketPath(q.config.VMStorePath, id, vhostFSSocket)
}