mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-15 05:49:05 +00:00
runtime: Run QEMU using a non-root user/group
A random generated user/group is used to start QEMU VMM process. The /dev/kvm group owner is also added to the QEMU process to grant it access. Fixes #2444 Signed-off-by: Feng Wang <feng.wang@databricks.com>
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -136,6 +137,11 @@ func TestCreateSandboxNoopAgentSuccessful(t *testing.T) {
|
||||
}
|
||||
defer cleanUp()
|
||||
|
||||
// Pre-create the directory path to avoid panic error. Without this change, ff the test is run as a non-root user,
|
||||
// this test will fail because of permission denied error in chown syscall in the utils.MkdirAllWithInheritedOwner() method
|
||||
err := os.MkdirAll(fs.MockRunStoragePath(), DirMode)
|
||||
assert.NoError(err)
|
||||
|
||||
config := newTestSandboxConfigNoop()
|
||||
|
||||
ctx := WithNewAgentFunc(context.Background(), newMockAgent)
|
||||
|
@@ -228,6 +228,9 @@ type HypervisorConfig struct {
|
||||
// it will be used for the sandbox's kernel path instead of KernelPath.
|
||||
customAssets map[types.AssetType]*types.Asset
|
||||
|
||||
// Supplementary group IDs.
|
||||
Groups []uint32
|
||||
|
||||
// KernelPath is the guest kernel host path.
|
||||
KernelPath string
|
||||
|
||||
@@ -382,6 +385,12 @@ type HypervisorConfig struct {
|
||||
// VirtioFSCacheSize is the DAX cache size in MiB
|
||||
VirtioFSCacheSize uint32
|
||||
|
||||
// User ID.
|
||||
Uid uint32
|
||||
|
||||
// Group ID.
|
||||
Gid uint32
|
||||
|
||||
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
|
||||
BlockDeviceCacheSet bool
|
||||
|
||||
@@ -461,6 +470,9 @@ type HypervisorConfig struct {
|
||||
|
||||
// GuestSwap Used to enable/disable swap in the guest
|
||||
GuestSwap bool
|
||||
|
||||
// Rootless is used to enable rootless VMM process
|
||||
Rootless bool
|
||||
}
|
||||
|
||||
// vcpu mapping from vcpu number to thread number
|
||||
|
@@ -9,6 +9,7 @@ package fs
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -86,7 +87,7 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(sandboxDir, dirMode); err != nil {
|
||||
if err := utils.MkdirAllWithInheritedOwner(sandboxDir, dirMode); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -123,7 +124,7 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai
|
||||
// persist container configuration data
|
||||
for cid, cstate := range fs.containerState {
|
||||
cdir := filepath.Join(sandboxDir, cid)
|
||||
if dirCreationErr = os.MkdirAll(cdir, dirMode); dirCreationErr != nil {
|
||||
if dirCreationErr = utils.MkdirAllWithInheritedOwner(cdir, dirMode); dirCreationErr != nil {
|
||||
return dirCreationErr
|
||||
}
|
||||
createdDirs = append(createdDirs, cdir)
|
||||
@@ -288,7 +289,7 @@ func (fs *FS) GlobalWrite(relativePath string, data []byte) error {
|
||||
|
||||
_, err = os.Stat(dir)
|
||||
if os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(dir, dirMode); err != nil {
|
||||
if err := utils.MkdirAllWithInheritedOwner(dir, dirMode); err != nil {
|
||||
fs.Logger().WithError(err).WithField("directory", dir).Error("failed to create dir")
|
||||
return err
|
||||
}
|
||||
|
@@ -223,6 +223,9 @@ const (
|
||||
|
||||
// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
|
||||
EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
|
||||
|
||||
// EnableRootlessHypervisor is a sandbox annotation to enable rootless hypervisor (only supported in QEMU currently).
|
||||
EnableRootlessHypervisor = kataAnnotHypervisorPrefix + "rootless"
|
||||
)
|
||||
|
||||
// Runtime related annotations
|
||||
|
@@ -622,6 +622,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
|
||||
return err
|
||||
}
|
||||
|
||||
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableRootlessHypervisor).setBool(func(enableRootlessHypervisor bool) {
|
||||
sbConfig.HypervisorConfig.Rootless = enableRootlessHypervisor
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@@ -23,6 +23,7 @@ import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
@@ -106,7 +107,7 @@ func NewNS() (ns.NetNS, error) {
|
||||
// Create the directory for mounting network namespaces
|
||||
// This needs to be a shared mountpoint in case it is mounted in to
|
||||
// other namespaces (containers)
|
||||
err = os.MkdirAll(nsRunDir, 0755)
|
||||
err = utils.MkdirAllWithInheritedOwner(nsRunDir, 0755)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@@ -11,9 +11,11 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -83,6 +85,7 @@ type QemuState struct {
|
||||
}
|
||||
|
||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||
// nolint: govet
|
||||
type qemu struct {
|
||||
arch qemuArch
|
||||
|
||||
@@ -271,7 +274,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
||||
|
||||
// The path might already exist, but in case of VM templating,
|
||||
// we have to create it since the sandbox has not created it yet.
|
||||
if err = os.MkdirAll(filepath.Join(q.store.RunStoragePath(), id), DirMode); err != nil {
|
||||
if err = utils.MkdirAllWithInheritedOwner(filepath.Join(q.store.RunStoragePath(), id), DirMode); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -583,6 +586,9 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
|
||||
UUID: q.state.UUID,
|
||||
Path: qemuPath,
|
||||
Ctx: q.qmpMonitorCh.ctx,
|
||||
Uid: q.config.Uid,
|
||||
Gid: q.config.Gid,
|
||||
Groups: q.config.Groups,
|
||||
Machine: machine,
|
||||
SMP: smp,
|
||||
Memory: memory,
|
||||
@@ -775,10 +781,11 @@ func (q *qemu) startSandbox(ctx context.Context, timeout int) error {
|
||||
}()
|
||||
|
||||
vmPath := filepath.Join(q.store.RunVMStoragePath(), q.id)
|
||||
err := os.MkdirAll(vmPath, DirMode)
|
||||
err := utils.MkdirAllWithInheritedOwner(vmPath, DirMode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Logger().WithField("vm path", vmPath).Info("created vm path")
|
||||
// append logfile only on debug
|
||||
if q.config.Debug {
|
||||
q.qemuConfig.LogFile = filepath.Join(vmPath, "qemu.log")
|
||||
@@ -1007,6 +1014,27 @@ func (q *qemu) cleanupVM() error {
|
||||
}
|
||||
}
|
||||
|
||||
if rootless.IsRootless() {
|
||||
rootlessDir := os.Getenv("XDG_RUNTIME_DIR")
|
||||
if err := os.RemoveAll(rootlessDir); err != nil {
|
||||
q.Logger().WithError(err).WithField("root-path", rootlessDir).
|
||||
Warnf("failed to remove vm run-as-user root path")
|
||||
}
|
||||
|
||||
u, err := user.LookupId(strconv.Itoa(int(q.config.Uid)))
|
||||
if err != nil {
|
||||
q.Logger().WithError(err).WithField("uid", q.config.Uid).Warn("failed to find the user")
|
||||
}
|
||||
userdelPath, err := pkgUtils.FirstValidExecutable([]string{"/usr/sbin/userdel", "/sbin/userdel", "/bin/userdel"})
|
||||
if err != nil {
|
||||
q.Logger().WithError(err).WithField("user", u.Username).Warn("failed to delete the user")
|
||||
}
|
||||
_, err = pkgUtils.RunCommand([]string{userdelPath, "-f", u.Username})
|
||||
if err != nil {
|
||||
q.Logger().WithError(err).WithField("user", u.Username).Warn("failed to delete the user")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@@ -390,3 +390,85 @@ outer:
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MkdirAllWithInheritedOwner creates a directory named path, along with any necessary parents.
|
||||
// It creates the missing directories with the ownership of the last existing parent.
|
||||
// The path needs to be absolute and the method doesn't handle symlink.
|
||||
func MkdirAllWithInheritedOwner(path string, perm os.FileMode) error {
|
||||
if len(path) == 0 {
|
||||
return fmt.Errorf("the path is empty")
|
||||
}
|
||||
|
||||
// By default, use the uid and gid of the calling process.
|
||||
var uid = os.Getuid()
|
||||
var gid = os.Getgid()
|
||||
|
||||
paths := getAllParentPaths(path)
|
||||
for _, curPath := range paths {
|
||||
info, err := os.Stat(curPath)
|
||||
|
||||
if err != nil {
|
||||
if err = os.Mkdir(curPath, perm); err != nil {
|
||||
return fmt.Errorf("mkdir call failed: %v", err.Error())
|
||||
}
|
||||
if err = syscall.Chown(curPath, uid, gid); err != nil {
|
||||
return fmt.Errorf("chown syscall failed: %v", err.Error())
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
return &os.PathError{Op: "mkdir", Path: curPath, Err: syscall.ENOTDIR}
|
||||
}
|
||||
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
|
||||
uid = int(stat.Uid)
|
||||
gid = int(stat.Gid)
|
||||
} else {
|
||||
return fmt.Errorf("fail to retrieve the uid and gid of path %s", curPath)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ChownToParent changes the owners of the path to the same of parent directory.
|
||||
// The path needs to be absolute and the method doesn't handle symlink.
|
||||
func ChownToParent(path string) error {
|
||||
if len(path) == 0 {
|
||||
return fmt.Errorf("the path is empty")
|
||||
}
|
||||
|
||||
if !filepath.IsAbs(path) {
|
||||
return fmt.Errorf("the path is not absolute")
|
||||
}
|
||||
|
||||
info, err := os.Stat(filepath.Dir(path))
|
||||
if err != nil {
|
||||
return fmt.Errorf("os.Stat() error on %s: %s", filepath.Dir(path), err.Error())
|
||||
}
|
||||
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
|
||||
if err = syscall.Chown(path, int(stat.Uid), int(stat.Gid)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("fail to retrieve the uid and gid of path %s", path)
|
||||
}
|
||||
|
||||
// getAllParentPaths returns all the parent directories of a path, including itself but excluding root directory "/".
|
||||
// For example, "/foo/bar/biz" returns {"/foo", "/foo/bar", "/foo/bar/biz"}
|
||||
func getAllParentPaths(path string) []string {
|
||||
if path == "/" || path == "." {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
paths := []string{filepath.Clean(path)}
|
||||
cur := path
|
||||
var parent string
|
||||
for cur != "/" && cur != "." {
|
||||
parent = filepath.Dir(cur)
|
||||
paths = append([]string{parent}, paths...)
|
||||
cur = parent
|
||||
}
|
||||
// remove the "/" or "." from the return result
|
||||
return paths[1:]
|
||||
}
|
||||
|
@@ -10,6 +10,7 @@ import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
@@ -469,3 +470,124 @@ func TestWaitLocalProcessInvalidPid(t *testing.T) {
|
||||
assert.Error(err, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMkdirAllWithInheritedOwnerSuccessful(t *testing.T) {
|
||||
if os.Getuid() != 0 {
|
||||
t.Skip("Test disabled as requires root user")
|
||||
}
|
||||
assert := assert.New(t)
|
||||
tmpDir1, err := ioutil.TempDir("", "test")
|
||||
assert.NoError(err)
|
||||
defer os.RemoveAll(tmpDir1)
|
||||
tmpDir2, err := ioutil.TempDir("", "test")
|
||||
assert.NoError(err)
|
||||
defer os.RemoveAll(tmpDir2)
|
||||
|
||||
testCases := []struct {
|
||||
before func(rootDir string, uid, gid int)
|
||||
rootDir string
|
||||
targetDir string
|
||||
uid int
|
||||
gid int
|
||||
}{
|
||||
{
|
||||
before: func(rootDir string, uid, gid int) {
|
||||
err = syscall.Chown(rootDir, uid, gid)
|
||||
assert.NoError(err)
|
||||
},
|
||||
rootDir: tmpDir1,
|
||||
targetDir: path.Join(tmpDir1, "foo", "bar"),
|
||||
uid: 1234,
|
||||
gid: 5678,
|
||||
},
|
||||
{
|
||||
before: func(rootDir string, uid, gid int) {
|
||||
// remove the tmpDir2 so the MkdirAllWithInheritedOwner() call creates them from /tmp
|
||||
err = os.RemoveAll(tmpDir2)
|
||||
assert.NoError(err)
|
||||
},
|
||||
rootDir: tmpDir2,
|
||||
targetDir: path.Join(tmpDir2, "foo", "bar"),
|
||||
uid: 0,
|
||||
gid: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
if tc.before != nil {
|
||||
tc.before(tc.rootDir, tc.uid, tc.gid)
|
||||
}
|
||||
|
||||
err := MkdirAllWithInheritedOwner(tc.targetDir, 0700)
|
||||
assert.NoError(err)
|
||||
// remove the first parent "/tmp" from the assertion as it's owned by root
|
||||
for _, p := range getAllParentPaths(tc.targetDir)[1:] {
|
||||
info, err := os.Stat(p)
|
||||
assert.NoError(err)
|
||||
assert.True(info.IsDir())
|
||||
stat, ok := info.Sys().(*syscall.Stat_t)
|
||||
assert.True(ok)
|
||||
assert.Equal(tc.uid, int(stat.Uid))
|
||||
assert.Equal(tc.gid, int(stat.Gid))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestChownToParent(t *testing.T) {
|
||||
if os.Getuid() != 0 {
|
||||
t.Skip("Test disabled as requires root user")
|
||||
}
|
||||
assert := assert.New(t)
|
||||
rootDir, err := ioutil.TempDir("", "root")
|
||||
assert.NoError(err)
|
||||
defer os.RemoveAll(rootDir)
|
||||
uid := 1234
|
||||
gid := 5678
|
||||
err = syscall.Chown(rootDir, uid, gid)
|
||||
assert.NoError(err)
|
||||
|
||||
targetDir := path.Join(rootDir, "foo")
|
||||
|
||||
err = os.MkdirAll(targetDir, 0700)
|
||||
assert.NoError(err)
|
||||
|
||||
err = ChownToParent(targetDir)
|
||||
assert.NoError(err)
|
||||
|
||||
info, err := os.Stat(targetDir)
|
||||
assert.NoError(err)
|
||||
stat, ok := info.Sys().(*syscall.Stat_t)
|
||||
assert.True(ok)
|
||||
assert.Equal(uid, int(stat.Uid))
|
||||
assert.Equal(gid, int(stat.Gid))
|
||||
}
|
||||
|
||||
func TestGetAllParentPaths(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
testCases := []struct {
|
||||
targetPath string
|
||||
parents []string
|
||||
}{
|
||||
{
|
||||
targetPath: "/",
|
||||
parents: []string{},
|
||||
},
|
||||
{
|
||||
targetPath: ".",
|
||||
parents: []string{},
|
||||
},
|
||||
{
|
||||
targetPath: "foo",
|
||||
parents: []string{"foo"},
|
||||
},
|
||||
{
|
||||
targetPath: "/tmp/bar",
|
||||
parents: []string{"/tmp", "/tmp/bar"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
assert.Equal(tc.parents, getAllParentPaths(tc.targetPath))
|
||||
}
|
||||
}
|
||||
|
@@ -79,6 +79,12 @@ func (v *virtiofsd) getSocketFD() (*os.File, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Need to change the filesystem ownership of the socket because virtiofsd runs as root while qemu can run as non-root.
|
||||
// This can be removed once virtiofsd can also run as non-root (https://github.com/kata-containers/kata-containers/issues/2542)
|
||||
if err := utils.ChownToParent(v.socketPath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// no longer needed since fd is a dup
|
||||
defer listener.Close()
|
||||
|
||||
|
@@ -9,6 +9,7 @@ import (
|
||||
"context"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@@ -37,7 +38,7 @@ func TestVirtiofsdStart(t *testing.T) {
|
||||
assert.NoError(err)
|
||||
defer os.RemoveAll(socketDir)
|
||||
|
||||
socketPath := socketDir + "socket.s"
|
||||
socketPath := path.Join(socketDir, "socket.s")
|
||||
|
||||
validConfig := fields{
|
||||
path: "/usr/bin/virtiofsd-path",
|
||||
|
Reference in New Issue
Block a user