Merge pull request #2432 from fengwang666/qemu-rootless

runtime: run the QEMU VMM process with a non-root user
This commit is contained in:
Fabiano Fidêncio
2021-09-21 21:37:02 +02:00
committed by GitHub
17 changed files with 505 additions and 9 deletions

View File

@@ -9,6 +9,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/fs"
"os"
"path/filepath"
"strings"
@@ -136,6 +137,11 @@ func TestCreateSandboxNoopAgentSuccessful(t *testing.T) {
}
defer cleanUp()
// Pre-create the directory path to avoid panic error. Without this change, ff the test is run as a non-root user,
// this test will fail because of permission denied error in chown syscall in the utils.MkdirAllWithInheritedOwner() method
err := os.MkdirAll(fs.MockRunStoragePath(), DirMode)
assert.NoError(err)
config := newTestSandboxConfigNoop()
ctx := WithNewAgentFunc(context.Background(), newMockAgent)

View File

@@ -228,6 +228,9 @@ type HypervisorConfig struct {
// it will be used for the sandbox's kernel path instead of KernelPath.
customAssets map[types.AssetType]*types.Asset
// Supplementary group IDs.
Groups []uint32
// KernelPath is the guest kernel host path.
KernelPath string
@@ -382,6 +385,12 @@ type HypervisorConfig struct {
// VirtioFSCacheSize is the DAX cache size in MiB
VirtioFSCacheSize uint32
// User ID.
Uid uint32
// Group ID.
Gid uint32
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet bool
@@ -461,6 +470,9 @@ type HypervisorConfig struct {
// GuestSwap Used to enable/disable swap in the guest
GuestSwap bool
// Rootless is used to enable rootless VMM process
Rootless bool
}
// vcpu mapping from vcpu number to thread number

View File

@@ -9,6 +9,7 @@ package fs
import (
"encoding/json"
"fmt"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"io/ioutil"
"os"
"path/filepath"
@@ -86,7 +87,7 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai
return err
}
if err := os.MkdirAll(sandboxDir, dirMode); err != nil {
if err := utils.MkdirAllWithInheritedOwner(sandboxDir, dirMode); err != nil {
return err
}
@@ -123,7 +124,7 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai
// persist container configuration data
for cid, cstate := range fs.containerState {
cdir := filepath.Join(sandboxDir, cid)
if dirCreationErr = os.MkdirAll(cdir, dirMode); dirCreationErr != nil {
if dirCreationErr = utils.MkdirAllWithInheritedOwner(cdir, dirMode); dirCreationErr != nil {
return dirCreationErr
}
createdDirs = append(createdDirs, cdir)
@@ -288,7 +289,7 @@ func (fs *FS) GlobalWrite(relativePath string, data []byte) error {
_, err = os.Stat(dir)
if os.IsNotExist(err) {
if err := os.MkdirAll(dir, dirMode); err != nil {
if err := utils.MkdirAllWithInheritedOwner(dir, dirMode); err != nil {
fs.Logger().WithError(err).WithField("directory", dir).Error("failed to create dir")
return err
}

View File

@@ -223,6 +223,9 @@ const (
// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
// EnableRootlessHypervisor is a sandbox annotation to enable rootless hypervisor (only supported in QEMU currently).
EnableRootlessHypervisor = kataAnnotHypervisorPrefix + "rootless"
)
// Runtime related annotations

View File

@@ -622,6 +622,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableRootlessHypervisor).setBool(func(enableRootlessHypervisor bool) {
sbConfig.HypervisorConfig.Rootless = enableRootlessHypervisor
}); err != nil {
return err
}
return nil
}

View File

@@ -23,6 +23,7 @@ import (
"context"
"crypto/rand"
"fmt"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"os"
"path/filepath"
"runtime"
@@ -106,7 +107,7 @@ func NewNS() (ns.NetNS, error) {
// Create the directory for mounting network namespaces
// This needs to be a shared mountpoint in case it is mounted in to
// other namespaces (containers)
err = os.MkdirAll(nsRunDir, 0755)
err = utils.MkdirAllWithInheritedOwner(nsRunDir, 0755)
if err != nil {
return nil, err
}

View File

@@ -11,9 +11,11 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"io/ioutil"
"math"
"os"
"os/user"
"path/filepath"
"strconv"
"strings"
@@ -83,6 +85,7 @@ type QemuState struct {
}
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
// nolint: govet
type qemu struct {
arch qemuArch
@@ -271,7 +274,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
// The path might already exist, but in case of VM templating,
// we have to create it since the sandbox has not created it yet.
if err = os.MkdirAll(filepath.Join(q.store.RunStoragePath(), id), DirMode); err != nil {
if err = utils.MkdirAllWithInheritedOwner(filepath.Join(q.store.RunStoragePath(), id), DirMode); err != nil {
return err
}
}
@@ -583,6 +586,9 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
UUID: q.state.UUID,
Path: qemuPath,
Ctx: q.qmpMonitorCh.ctx,
Uid: q.config.Uid,
Gid: q.config.Gid,
Groups: q.config.Groups,
Machine: machine,
SMP: smp,
Memory: memory,
@@ -775,10 +781,11 @@ func (q *qemu) startSandbox(ctx context.Context, timeout int) error {
}()
vmPath := filepath.Join(q.store.RunVMStoragePath(), q.id)
err := os.MkdirAll(vmPath, DirMode)
err := utils.MkdirAllWithInheritedOwner(vmPath, DirMode)
if err != nil {
return err
}
q.Logger().WithField("vm path", vmPath).Info("created vm path")
// append logfile only on debug
if q.config.Debug {
q.qemuConfig.LogFile = filepath.Join(vmPath, "qemu.log")
@@ -1007,6 +1014,27 @@ func (q *qemu) cleanupVM() error {
}
}
if rootless.IsRootless() {
rootlessDir := os.Getenv("XDG_RUNTIME_DIR")
if err := os.RemoveAll(rootlessDir); err != nil {
q.Logger().WithError(err).WithField("root-path", rootlessDir).
Warnf("failed to remove vm run-as-user root path")
}
u, err := user.LookupId(strconv.Itoa(int(q.config.Uid)))
if err != nil {
q.Logger().WithError(err).WithField("uid", q.config.Uid).Warn("failed to find the user")
}
userdelPath, err := pkgUtils.FirstValidExecutable([]string{"/usr/sbin/userdel", "/sbin/userdel", "/bin/userdel"})
if err != nil {
q.Logger().WithError(err).WithField("user", u.Username).Warn("failed to delete the user")
}
_, err = pkgUtils.RunCommand([]string{userdelPath, "-f", u.Username})
if err != nil {
q.Logger().WithError(err).WithField("user", u.Username).Warn("failed to delete the user")
}
}
return nil
}

View File

@@ -390,3 +390,85 @@ outer:
return nil
}
// MkdirAllWithInheritedOwner creates a directory named path, along with any necessary parents.
// It creates the missing directories with the ownership of the last existing parent.
// The path needs to be absolute and the method doesn't handle symlink.
func MkdirAllWithInheritedOwner(path string, perm os.FileMode) error {
if len(path) == 0 {
return fmt.Errorf("the path is empty")
}
// By default, use the uid and gid of the calling process.
var uid = os.Getuid()
var gid = os.Getgid()
paths := getAllParentPaths(path)
for _, curPath := range paths {
info, err := os.Stat(curPath)
if err != nil {
if err = os.Mkdir(curPath, perm); err != nil {
return fmt.Errorf("mkdir call failed: %v", err.Error())
}
if err = syscall.Chown(curPath, uid, gid); err != nil {
return fmt.Errorf("chown syscall failed: %v", err.Error())
}
continue
}
if !info.IsDir() {
return &os.PathError{Op: "mkdir", Path: curPath, Err: syscall.ENOTDIR}
}
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
uid = int(stat.Uid)
gid = int(stat.Gid)
} else {
return fmt.Errorf("fail to retrieve the uid and gid of path %s", curPath)
}
}
return nil
}
// ChownToParent changes the owners of the path to the same of parent directory.
// The path needs to be absolute and the method doesn't handle symlink.
func ChownToParent(path string) error {
if len(path) == 0 {
return fmt.Errorf("the path is empty")
}
if !filepath.IsAbs(path) {
return fmt.Errorf("the path is not absolute")
}
info, err := os.Stat(filepath.Dir(path))
if err != nil {
return fmt.Errorf("os.Stat() error on %s: %s", filepath.Dir(path), err.Error())
}
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
if err = syscall.Chown(path, int(stat.Uid), int(stat.Gid)); err != nil {
return err
}
return nil
}
return fmt.Errorf("fail to retrieve the uid and gid of path %s", path)
}
// getAllParentPaths returns all the parent directories of a path, including itself but excluding root directory "/".
// For example, "/foo/bar/biz" returns {"/foo", "/foo/bar", "/foo/bar/biz"}
func getAllParentPaths(path string) []string {
if path == "/" || path == "." {
return []string{}
}
paths := []string{filepath.Clean(path)}
cur := path
var parent string
for cur != "/" && cur != "." {
parent = filepath.Dir(cur)
paths = append([]string{parent}, paths...)
cur = parent
}
// remove the "/" or "." from the return result
return paths[1:]
}

View File

@@ -10,6 +10,7 @@ import (
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"reflect"
"strings"
@@ -469,3 +470,124 @@ func TestWaitLocalProcessInvalidPid(t *testing.T) {
assert.Error(err, msg)
}
}
func TestMkdirAllWithInheritedOwnerSuccessful(t *testing.T) {
if os.Getuid() != 0 {
t.Skip("Test disabled as requires root user")
}
assert := assert.New(t)
tmpDir1, err := ioutil.TempDir("", "test")
assert.NoError(err)
defer os.RemoveAll(tmpDir1)
tmpDir2, err := ioutil.TempDir("", "test")
assert.NoError(err)
defer os.RemoveAll(tmpDir2)
testCases := []struct {
before func(rootDir string, uid, gid int)
rootDir string
targetDir string
uid int
gid int
}{
{
before: func(rootDir string, uid, gid int) {
err = syscall.Chown(rootDir, uid, gid)
assert.NoError(err)
},
rootDir: tmpDir1,
targetDir: path.Join(tmpDir1, "foo", "bar"),
uid: 1234,
gid: 5678,
},
{
before: func(rootDir string, uid, gid int) {
// remove the tmpDir2 so the MkdirAllWithInheritedOwner() call creates them from /tmp
err = os.RemoveAll(tmpDir2)
assert.NoError(err)
},
rootDir: tmpDir2,
targetDir: path.Join(tmpDir2, "foo", "bar"),
uid: 0,
gid: 0,
},
}
for _, tc := range testCases {
if tc.before != nil {
tc.before(tc.rootDir, tc.uid, tc.gid)
}
err := MkdirAllWithInheritedOwner(tc.targetDir, 0700)
assert.NoError(err)
// remove the first parent "/tmp" from the assertion as it's owned by root
for _, p := range getAllParentPaths(tc.targetDir)[1:] {
info, err := os.Stat(p)
assert.NoError(err)
assert.True(info.IsDir())
stat, ok := info.Sys().(*syscall.Stat_t)
assert.True(ok)
assert.Equal(tc.uid, int(stat.Uid))
assert.Equal(tc.gid, int(stat.Gid))
}
}
}
func TestChownToParent(t *testing.T) {
if os.Getuid() != 0 {
t.Skip("Test disabled as requires root user")
}
assert := assert.New(t)
rootDir, err := ioutil.TempDir("", "root")
assert.NoError(err)
defer os.RemoveAll(rootDir)
uid := 1234
gid := 5678
err = syscall.Chown(rootDir, uid, gid)
assert.NoError(err)
targetDir := path.Join(rootDir, "foo")
err = os.MkdirAll(targetDir, 0700)
assert.NoError(err)
err = ChownToParent(targetDir)
assert.NoError(err)
info, err := os.Stat(targetDir)
assert.NoError(err)
stat, ok := info.Sys().(*syscall.Stat_t)
assert.True(ok)
assert.Equal(uid, int(stat.Uid))
assert.Equal(gid, int(stat.Gid))
}
func TestGetAllParentPaths(t *testing.T) {
assert := assert.New(t)
testCases := []struct {
targetPath string
parents []string
}{
{
targetPath: "/",
parents: []string{},
},
{
targetPath: ".",
parents: []string{},
},
{
targetPath: "foo",
parents: []string{"foo"},
},
{
targetPath: "/tmp/bar",
parents: []string{"/tmp", "/tmp/bar"},
},
}
for _, tc := range testCases {
assert.Equal(tc.parents, getAllParentPaths(tc.targetPath))
}
}

View File

@@ -79,6 +79,12 @@ func (v *virtiofsd) getSocketFD() (*os.File, error) {
return nil, err
}
// Need to change the filesystem ownership of the socket because virtiofsd runs as root while qemu can run as non-root.
// This can be removed once virtiofsd can also run as non-root (https://github.com/kata-containers/kata-containers/issues/2542)
if err := utils.ChownToParent(v.socketPath); err != nil {
return nil, err
}
// no longer needed since fd is a dup
defer listener.Close()

View File

@@ -9,6 +9,7 @@ import (
"context"
"io/ioutil"
"os"
"path"
"strings"
"testing"
@@ -37,7 +38,7 @@ func TestVirtiofsdStart(t *testing.T) {
assert.NoError(err)
defer os.RemoveAll(socketDir)
socketPath := socketDir + "socket.s"
socketPath := path.Join(socketDir, "socket.s")
validConfig := fields{
path: "/usr/bin/virtiofsd-path",