diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 9f4da58bed..89e7183618 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -24,6 +24,11 @@ machine_type = "@MACHINETYPE@" # Default false # confidential_guest = true +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/pkg/containerd-shim-v2/create.go b/src/runtime/pkg/containerd-shim-v2/create.go index 7e9e20ae7c..8d25e162e2 100644 --- a/src/runtime/pkg/containerd-shim-v2/create.go +++ b/src/runtime/pkg/containerd-shim-v2/create.go @@ -10,8 +10,15 @@ package containerdshim import ( "context" "fmt" + "github.com/kata-containers/kata-containers/src/runtime/pkg/utils" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" + "math/rand" "os" + "os/user" + "path" "path/filepath" + "strconv" + "syscall" containerd_types "github.com/containerd/containerd/api/types" "github.com/containerd/containerd/mount" @@ -103,6 +110,12 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con }() katautils.HandleFactory(ctx, vci, s.config) + rootless.SetRootless(s.config.HypervisorConfig.Rootless) + if rootless.IsRootless() { + if err := configureNonRootHypervisor(s.config); err != nil { + return nil, err + } + } // Pass service's context instead of local ctx to CreateSandbox(), since local // ctx will be canceled after this rpc service call, but the sandbox will live @@ -259,3 +272,112 @@ func doMount(mounts []*containerd_types.Mount, rootfs string) error { } return nil } + +func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig) error { + userName, err := createVmmUser() + if err != nil { + return err + } + defer func() { + if err != nil { + removeVmmUser(userName) + } + }() + + u, err := user.Lookup(userName) + if err != nil { + return err + } + + uid, err := strconv.Atoi(u.Uid) + if err != nil { + return err + } + gid, err := strconv.Atoi(u.Gid) + if err != nil { + return err + } + runtimeConfig.HypervisorConfig.Uid = uint32(uid) + runtimeConfig.HypervisorConfig.Gid = uint32(gid) + + userTmpDir := path.Join("/run/user/", fmt.Sprint(uid)) + dir, err := os.Stat(userTmpDir) + if os.IsNotExist(err) { + if err = os.Mkdir(userTmpDir, vc.DirMode); err != nil { + return err + } + defer func() { + if err != nil { + if err = os.RemoveAll(userTmpDir); err != nil { + shimLog.WithField("userTmpDir", userTmpDir).WithError(err).Warn("failed to remove userTmpDir") + } + } + }() + if err = syscall.Chown(userTmpDir, uid, gid); err != nil { + return err + } + } + if dir != nil && !dir.IsDir() { + return fmt.Errorf("%s is expected to be a directory", userTmpDir) + } + + if err := os.Setenv("XDG_RUNTIME_DIR", userTmpDir); err != nil { + return err + } + + info, err := os.Stat("/dev/kvm") + if err != nil { + return err + } + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + // Add the kvm group to the hypervisor supplemental group so that the hypervisor process can access /dev/kvm + runtimeConfig.HypervisorConfig.Groups = append(runtimeConfig.HypervisorConfig.Groups, stat.Gid) + return nil + } + return fmt.Errorf("failed to get the gid of /dev/kvm") +} + +func createVmmUser() (string, error) { + var ( + err error + userName string + ) + + useraddPath, err := utils.FirstValidExecutable([]string{"/usr/sbin/useradd", "/sbin/useradd", "/bin/useradd"}) + if err != nil { + return "", err + } + nologinPath, err := utils.FirstValidExecutable([]string{"/usr/sbin/nologin", "/sbin/nologin", "/bin/nologin"}) + if err != nil { + return "", err + } + + // Add retries to mitigate temporary errors and race conditions. For example, the user already exists + // or another instance of the runtime is also creating a user. + maxAttempt := 5 + for i := 0; i < maxAttempt; i++ { + userName = fmt.Sprintf("kata-%v", rand.Intn(100000)) + _, err = utils.RunCommand([]string{useraddPath, "-M", "-s", nologinPath, userName, "-c", "\"Kata Containers temporary hypervisor user\""}) + if err == nil { + return userName, nil + } + shimLog.WithField("attempt", i+1).WithField("username", userName). + WithError(err).Warn("failed to add user, will try again") + } + return "", fmt.Errorf("could not create VMM user: %v", err) +} + +func removeVmmUser(user string) { + userdelPath, err := utils.FirstValidExecutable([]string{"/usr/sbin/userdel", "/sbin/userdel", "/bin/userdel"}) + if err != nil { + shimLog.WithField("username", user).WithError(err).Warn("failed to remove user") + } + // Add retries to mitigate temporary errors and race conditions. + for i := 0; i < 5; i++ { + _, err := utils.RunCommand([]string{userdelPath, "-f", user}) + if err == nil { + return + } + shimLog.WithField("username", user).WithField("attempt", i+1).WithError(err).Warn("failed to remove user") + } +} diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 4b92b4390b..a6805729e8 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -86,6 +86,7 @@ const defaultRxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0) const defaultConfidentialGuest = false const defaultGuestSwap = false +const defaultRootlessHypervisor = false var defaultSGXEPCSize = int64(0) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 6be937726b..a412e0760a 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -105,15 +105,15 @@ type hypervisor struct { EnableAnnotations []string `toml:"enable_annotations"` RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"` TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"` + MemOffset uint64 `toml:"memory_offset"` VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"` - NumVCPUs int32 `toml:"default_vcpus"` DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` MemorySize uint32 `toml:"default_memory"` MemSlots uint32 `toml:"memory_slots"` - MemOffset uint64 `toml:"memory_offset"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` PCIeRootPort uint32 `toml:"pcie_root_port"` + NumVCPUs int32 `toml:"default_vcpus"` BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` @@ -134,6 +134,7 @@ type hypervisor struct { GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"` ConfidentialGuest bool `toml:"confidential_guest"` GuestSwap bool `toml:"enable_guest_swap"` + Rootless bool `toml:"rootless"` } type runtime struct { @@ -713,6 +714,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, ConfidentialGuest: h.ConfidentialGuest, GuestSwap: h.GuestSwap, + Rootless: h.Rootless, }, nil } @@ -1069,6 +1071,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { SGXEPCSize: defaultSGXEPCSize, ConfidentialGuest: defaultConfidentialGuest, GuestSwap: defaultGuestSwap, + Rootless: defaultRootlessHypervisor, } } diff --git a/src/runtime/pkg/utils/utils.go b/src/runtime/pkg/utils/utils.go index fd09bbd70e..034331e605 100644 --- a/src/runtime/pkg/utils/utils.go +++ b/src/runtime/pkg/utils/utils.go @@ -80,3 +80,22 @@ func EnsureDir(path string, mode os.FileMode) error { return nil } + +func FirstValidExecutable(paths []string) (string, error) { + for _, p := range paths { + info, err := os.Stat(p) + if err != nil { + if os.IsNotExist(err) { + continue + } + return "", err + } + mode := info.Mode() + // check whether the file is an executable + if mode&0111 == 0 { + continue + } + return p, nil + } + return "", fmt.Errorf("all the executables are invalid") +} diff --git a/src/runtime/pkg/utils/utils_test.go b/src/runtime/pkg/utils/utils_test.go index 5af0f1e0c5..1fda6577a1 100644 --- a/src/runtime/pkg/utils/utils_test.go +++ b/src/runtime/pkg/utils/utils_test.go @@ -10,6 +10,7 @@ import ( "io/ioutil" "net/http" "os" + "path" "testing" "github.com/stretchr/testify/assert" @@ -117,3 +118,80 @@ func TestEnsureDir(t *testing.T) { } } } + +func TestFirstValidExecutable(t *testing.T) { + assert := assert.New(t) + tmpdir, err := ioutil.TempDir("", "TestFirstValidPath") + assert.NoError(err) + defer os.RemoveAll(tmpdir) + + // nolint: govet + testCases := []struct { + before func() + paths []string + validPath string + isValid bool + msg string + }{ + { + before: nil, + paths: []string{"a/b/c", "c/d"}, + validPath: "", + isValid: false, + msg: "all the executables are invalid", + }, + { + before: func() { + err := os.MkdirAll(path.Join(tmpdir, "a", "b"), 0755) + assert.NoError(err) + // create a non-executable file + err = ioutil.WriteFile(path.Join(tmpdir, "a", "b", "c"), []byte("test\n"), 0644) + assert.NoError(err) + }, + paths: []string{path.Join(tmpdir, "a", "b", "c"), "c/d"}, + validPath: "", + isValid: false, + msg: "all the executables are invalid", + }, + { + before: func() { + err := os.MkdirAll(path.Join(tmpdir, "d", "e"), 0755) + assert.NoError(err) + // create an executable file + err = ioutil.WriteFile(path.Join(tmpdir, "d", "e", "f"), []byte("test\n"), 0755) + assert.NoError(err) + }, + paths: []string{path.Join(tmpdir, "d", "e", "f"), "c/d"}, + validPath: fmt.Sprintf("%s/d/e/f", tmpdir), + isValid: true, + msg: "", + }, + { + before: func() { + err := os.MkdirAll(path.Join(tmpdir, "g", "h"), 0755) + assert.NoError(err) + // create an executable file + err = ioutil.WriteFile(path.Join(tmpdir, "g", "h", "i"), []byte("test\n"), 0755) + assert.NoError(err) + }, + paths: []string{"c/d", path.Join(tmpdir, "g", "h", "i")}, + validPath: path.Join(tmpdir, "g", "h", "i"), + isValid: true, + msg: "", + }, + } + + for _, tc := range testCases { + if tc.before != nil { + tc.before() + } + path, err := FirstValidExecutable(tc.paths) + assert.Equal(tc.isValid, err == nil) + if tc.isValid { + assert.Equal(tc.validPath, path) + } else { + assert.Equal(err.Error(), tc.msg) + assert.Equal(tc.validPath, "") + } + } +} diff --git a/src/runtime/virtcontainers/api_test.go b/src/runtime/virtcontainers/api_test.go index d3999927f0..6790ce60b9 100644 --- a/src/runtime/virtcontainers/api_test.go +++ b/src/runtime/virtcontainers/api_test.go @@ -9,6 +9,7 @@ import ( "context" "encoding/json" "fmt" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/fs" "os" "path/filepath" "strings" @@ -136,6 +137,11 @@ func TestCreateSandboxNoopAgentSuccessful(t *testing.T) { } defer cleanUp() + // Pre-create the directory path to avoid panic error. Without this change, ff the test is run as a non-root user, + // this test will fail because of permission denied error in chown syscall in the utils.MkdirAllWithInheritedOwner() method + err := os.MkdirAll(fs.MockRunStoragePath(), DirMode) + assert.NoError(err) + config := newTestSandboxConfigNoop() ctx := WithNewAgentFunc(context.Background(), newMockAgent) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index bc92eb3034..c32c5d9378 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -228,6 +228,9 @@ type HypervisorConfig struct { // it will be used for the sandbox's kernel path instead of KernelPath. customAssets map[types.AssetType]*types.Asset + // Supplementary group IDs. + Groups []uint32 + // KernelPath is the guest kernel host path. KernelPath string @@ -382,6 +385,12 @@ type HypervisorConfig struct { // VirtioFSCacheSize is the DAX cache size in MiB VirtioFSCacheSize uint32 + // User ID. + Uid uint32 + + // Group ID. + Gid uint32 + // BlockDeviceCacheSet specifies cache-related options will be set to block devices or not. BlockDeviceCacheSet bool @@ -461,6 +470,9 @@ type HypervisorConfig struct { // GuestSwap Used to enable/disable swap in the guest GuestSwap bool + + // Rootless is used to enable rootless VMM process + Rootless bool } // vcpu mapping from vcpu number to thread number diff --git a/src/runtime/virtcontainers/persist/fs/fs.go b/src/runtime/virtcontainers/persist/fs/fs.go index a7570173f0..24ff5abee3 100644 --- a/src/runtime/virtcontainers/persist/fs/fs.go +++ b/src/runtime/virtcontainers/persist/fs/fs.go @@ -9,6 +9,7 @@ package fs import ( "encoding/json" "fmt" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "io/ioutil" "os" "path/filepath" @@ -86,7 +87,7 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai return err } - if err := os.MkdirAll(sandboxDir, dirMode); err != nil { + if err := utils.MkdirAllWithInheritedOwner(sandboxDir, dirMode); err != nil { return err } @@ -123,7 +124,7 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai // persist container configuration data for cid, cstate := range fs.containerState { cdir := filepath.Join(sandboxDir, cid) - if dirCreationErr = os.MkdirAll(cdir, dirMode); dirCreationErr != nil { + if dirCreationErr = utils.MkdirAllWithInheritedOwner(cdir, dirMode); dirCreationErr != nil { return dirCreationErr } createdDirs = append(createdDirs, cdir) @@ -288,7 +289,7 @@ func (fs *FS) GlobalWrite(relativePath string, data []byte) error { _, err = os.Stat(dir) if os.IsNotExist(err) { - if err := os.MkdirAll(dir, dirMode); err != nil { + if err := utils.MkdirAllWithInheritedOwner(dir, dirMode); err != nil { fs.Logger().WithError(err).WithField("directory", dir).Error("failed to create dir") return err } diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 838b2994fd..a50cb52135 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -223,6 +223,9 @@ const ( // EnableGuestSwap is a sandbox annotation to enable swap in the guest. EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap" + + // EnableRootlessHypervisor is a sandbox annotation to enable rootless hypervisor (only supported in QEMU currently). + EnableRootlessHypervisor = kataAnnotHypervisorPrefix + "rootless" ) // Runtime related annotations diff --git a/src/runtime/virtcontainers/pkg/oci/utils.go b/src/runtime/virtcontainers/pkg/oci/utils.go index eebc642f22..200962f953 100644 --- a/src/runtime/virtcontainers/pkg/oci/utils.go +++ b/src/runtime/virtcontainers/pkg/oci/utils.go @@ -622,6 +622,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableRootlessHypervisor).setBool(func(enableRootlessHypervisor bool) { + sbConfig.HypervisorConfig.Rootless = enableRootlessHypervisor + }); err != nil { + return err + } + return nil } diff --git a/src/runtime/virtcontainers/pkg/rootless/rootless.go b/src/runtime/virtcontainers/pkg/rootless/rootless.go index dc69f43eb0..088f8a89da 100644 --- a/src/runtime/virtcontainers/pkg/rootless/rootless.go +++ b/src/runtime/virtcontainers/pkg/rootless/rootless.go @@ -23,6 +23,7 @@ import ( "context" "crypto/rand" "fmt" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "os" "path/filepath" "runtime" @@ -106,7 +107,7 @@ func NewNS() (ns.NetNS, error) { // Create the directory for mounting network namespaces // This needs to be a shared mountpoint in case it is mounted in to // other namespaces (containers) - err = os.MkdirAll(nsRunDir, 0755) + err = utils.MkdirAllWithInheritedOwner(nsRunDir, 0755) if err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index e656259507..c2cee7a2b7 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -11,9 +11,11 @@ import ( "encoding/hex" "encoding/json" "fmt" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "io/ioutil" "math" "os" + "os/user" "path/filepath" "strconv" "strings" @@ -83,6 +85,7 @@ type QemuState struct { } // qemu is an Hypervisor interface implementation for the Linux qemu hypervisor. +// nolint: govet type qemu struct { arch qemuArch @@ -271,7 +274,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso // The path might already exist, but in case of VM templating, // we have to create it since the sandbox has not created it yet. - if err = os.MkdirAll(filepath.Join(q.store.RunStoragePath(), id), DirMode); err != nil { + if err = utils.MkdirAllWithInheritedOwner(filepath.Join(q.store.RunStoragePath(), id), DirMode); err != nil { return err } } @@ -583,6 +586,9 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa UUID: q.state.UUID, Path: qemuPath, Ctx: q.qmpMonitorCh.ctx, + Uid: q.config.Uid, + Gid: q.config.Gid, + Groups: q.config.Groups, Machine: machine, SMP: smp, Memory: memory, @@ -775,10 +781,11 @@ func (q *qemu) startSandbox(ctx context.Context, timeout int) error { }() vmPath := filepath.Join(q.store.RunVMStoragePath(), q.id) - err := os.MkdirAll(vmPath, DirMode) + err := utils.MkdirAllWithInheritedOwner(vmPath, DirMode) if err != nil { return err } + q.Logger().WithField("vm path", vmPath).Info("created vm path") // append logfile only on debug if q.config.Debug { q.qemuConfig.LogFile = filepath.Join(vmPath, "qemu.log") @@ -1007,6 +1014,27 @@ func (q *qemu) cleanupVM() error { } } + if rootless.IsRootless() { + rootlessDir := os.Getenv("XDG_RUNTIME_DIR") + if err := os.RemoveAll(rootlessDir); err != nil { + q.Logger().WithError(err).WithField("root-path", rootlessDir). + Warnf("failed to remove vm run-as-user root path") + } + + u, err := user.LookupId(strconv.Itoa(int(q.config.Uid))) + if err != nil { + q.Logger().WithError(err).WithField("uid", q.config.Uid).Warn("failed to find the user") + } + userdelPath, err := pkgUtils.FirstValidExecutable([]string{"/usr/sbin/userdel", "/sbin/userdel", "/bin/userdel"}) + if err != nil { + q.Logger().WithError(err).WithField("user", u.Username).Warn("failed to delete the user") + } + _, err = pkgUtils.RunCommand([]string{userdelPath, "-f", u.Username}) + if err != nil { + q.Logger().WithError(err).WithField("user", u.Username).Warn("failed to delete the user") + } + } + return nil } diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index e49f55a101..a709eb18ec 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -390,3 +390,85 @@ outer: return nil } + +// MkdirAllWithInheritedOwner creates a directory named path, along with any necessary parents. +// It creates the missing directories with the ownership of the last existing parent. +// The path needs to be absolute and the method doesn't handle symlink. +func MkdirAllWithInheritedOwner(path string, perm os.FileMode) error { + if len(path) == 0 { + return fmt.Errorf("the path is empty") + } + + // By default, use the uid and gid of the calling process. + var uid = os.Getuid() + var gid = os.Getgid() + + paths := getAllParentPaths(path) + for _, curPath := range paths { + info, err := os.Stat(curPath) + + if err != nil { + if err = os.Mkdir(curPath, perm); err != nil { + return fmt.Errorf("mkdir call failed: %v", err.Error()) + } + if err = syscall.Chown(curPath, uid, gid); err != nil { + return fmt.Errorf("chown syscall failed: %v", err.Error()) + } + continue + } + + if !info.IsDir() { + return &os.PathError{Op: "mkdir", Path: curPath, Err: syscall.ENOTDIR} + } + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + uid = int(stat.Uid) + gid = int(stat.Gid) + } else { + return fmt.Errorf("fail to retrieve the uid and gid of path %s", curPath) + } + } + return nil +} + +// ChownToParent changes the owners of the path to the same of parent directory. +// The path needs to be absolute and the method doesn't handle symlink. +func ChownToParent(path string) error { + if len(path) == 0 { + return fmt.Errorf("the path is empty") + } + + if !filepath.IsAbs(path) { + return fmt.Errorf("the path is not absolute") + } + + info, err := os.Stat(filepath.Dir(path)) + if err != nil { + return fmt.Errorf("os.Stat() error on %s: %s", filepath.Dir(path), err.Error()) + } + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + if err = syscall.Chown(path, int(stat.Uid), int(stat.Gid)); err != nil { + return err + } + return nil + } + return fmt.Errorf("fail to retrieve the uid and gid of path %s", path) +} + +// getAllParentPaths returns all the parent directories of a path, including itself but excluding root directory "/". +// For example, "/foo/bar/biz" returns {"/foo", "/foo/bar", "/foo/bar/biz"} +func getAllParentPaths(path string) []string { + if path == "/" || path == "." { + return []string{} + } + + paths := []string{filepath.Clean(path)} + cur := path + var parent string + for cur != "/" && cur != "." { + parent = filepath.Dir(cur) + paths = append([]string{parent}, paths...) + cur = parent + } + // remove the "/" or "." from the return result + return paths[1:] +} diff --git a/src/runtime/virtcontainers/utils/utils_test.go b/src/runtime/virtcontainers/utils/utils_test.go index d8d6c4c870..99e533feee 100644 --- a/src/runtime/virtcontainers/utils/utils_test.go +++ b/src/runtime/virtcontainers/utils/utils_test.go @@ -10,6 +10,7 @@ import ( "io/ioutil" "os" "os/exec" + "path" "path/filepath" "reflect" "strings" @@ -469,3 +470,124 @@ func TestWaitLocalProcessInvalidPid(t *testing.T) { assert.Error(err, msg) } } + +func TestMkdirAllWithInheritedOwnerSuccessful(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + assert := assert.New(t) + tmpDir1, err := ioutil.TempDir("", "test") + assert.NoError(err) + defer os.RemoveAll(tmpDir1) + tmpDir2, err := ioutil.TempDir("", "test") + assert.NoError(err) + defer os.RemoveAll(tmpDir2) + + testCases := []struct { + before func(rootDir string, uid, gid int) + rootDir string + targetDir string + uid int + gid int + }{ + { + before: func(rootDir string, uid, gid int) { + err = syscall.Chown(rootDir, uid, gid) + assert.NoError(err) + }, + rootDir: tmpDir1, + targetDir: path.Join(tmpDir1, "foo", "bar"), + uid: 1234, + gid: 5678, + }, + { + before: func(rootDir string, uid, gid int) { + // remove the tmpDir2 so the MkdirAllWithInheritedOwner() call creates them from /tmp + err = os.RemoveAll(tmpDir2) + assert.NoError(err) + }, + rootDir: tmpDir2, + targetDir: path.Join(tmpDir2, "foo", "bar"), + uid: 0, + gid: 0, + }, + } + + for _, tc := range testCases { + if tc.before != nil { + tc.before(tc.rootDir, tc.uid, tc.gid) + } + + err := MkdirAllWithInheritedOwner(tc.targetDir, 0700) + assert.NoError(err) + // remove the first parent "/tmp" from the assertion as it's owned by root + for _, p := range getAllParentPaths(tc.targetDir)[1:] { + info, err := os.Stat(p) + assert.NoError(err) + assert.True(info.IsDir()) + stat, ok := info.Sys().(*syscall.Stat_t) + assert.True(ok) + assert.Equal(tc.uid, int(stat.Uid)) + assert.Equal(tc.gid, int(stat.Gid)) + } + } +} + +func TestChownToParent(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + assert := assert.New(t) + rootDir, err := ioutil.TempDir("", "root") + assert.NoError(err) + defer os.RemoveAll(rootDir) + uid := 1234 + gid := 5678 + err = syscall.Chown(rootDir, uid, gid) + assert.NoError(err) + + targetDir := path.Join(rootDir, "foo") + + err = os.MkdirAll(targetDir, 0700) + assert.NoError(err) + + err = ChownToParent(targetDir) + assert.NoError(err) + + info, err := os.Stat(targetDir) + assert.NoError(err) + stat, ok := info.Sys().(*syscall.Stat_t) + assert.True(ok) + assert.Equal(uid, int(stat.Uid)) + assert.Equal(gid, int(stat.Gid)) +} + +func TestGetAllParentPaths(t *testing.T) { + assert := assert.New(t) + + testCases := []struct { + targetPath string + parents []string + }{ + { + targetPath: "/", + parents: []string{}, + }, + { + targetPath: ".", + parents: []string{}, + }, + { + targetPath: "foo", + parents: []string{"foo"}, + }, + { + targetPath: "/tmp/bar", + parents: []string{"/tmp", "/tmp/bar"}, + }, + } + + for _, tc := range testCases { + assert.Equal(tc.parents, getAllParentPaths(tc.targetPath)) + } +} diff --git a/src/runtime/virtcontainers/virtiofsd.go b/src/runtime/virtcontainers/virtiofsd.go index c7a0043343..d35399bead 100644 --- a/src/runtime/virtcontainers/virtiofsd.go +++ b/src/runtime/virtcontainers/virtiofsd.go @@ -79,6 +79,12 @@ func (v *virtiofsd) getSocketFD() (*os.File, error) { return nil, err } + // Need to change the filesystem ownership of the socket because virtiofsd runs as root while qemu can run as non-root. + // This can be removed once virtiofsd can also run as non-root (https://github.com/kata-containers/kata-containers/issues/2542) + if err := utils.ChownToParent(v.socketPath); err != nil { + return nil, err + } + // no longer needed since fd is a dup defer listener.Close() diff --git a/src/runtime/virtcontainers/virtiofsd_test.go b/src/runtime/virtcontainers/virtiofsd_test.go index a4d81430c0..f361f5471f 100644 --- a/src/runtime/virtcontainers/virtiofsd_test.go +++ b/src/runtime/virtcontainers/virtiofsd_test.go @@ -9,6 +9,7 @@ import ( "context" "io/ioutil" "os" + "path" "strings" "testing" @@ -37,7 +38,7 @@ func TestVirtiofsdStart(t *testing.T) { assert.NoError(err) defer os.RemoveAll(socketDir) - socketPath := socketDir + "socket.s" + socketPath := path.Join(socketDir, "socket.s") validConfig := fields{ path: "/usr/bin/virtiofsd-path",