From ce2798b688e7b7b82a4b0afb354cb963098d741f Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 30 Oct 2020 14:54:49 +0800 Subject: [PATCH 1/5] runtime: readonly mounts should be readonly bindmount on the host So that we get protected at the VM boundary not just the guest kernel. Signed-off-by: Peng Tao --- src/runtime/virtcontainers/container.go | 14 ++------------ src/runtime/virtcontainers/pkg/oci/utils.go | 8 ++++++++ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 719e43f266..17fa98472b 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -470,7 +470,7 @@ func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, guestSharedDir s } else { // These mounts are created in the shared dir mountDest := filepath.Join(hostSharedDir, filename) - if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil { + if err := bindMount(c.ctx, m.Source, mountDest, m.ReadOnly, "private"); err != nil { return "", false, err } // Save HostPath mount value into the mount list of the container. @@ -547,22 +547,12 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) ( continue } - // Check if mount is readonly, let the agent handle the readonly mount - // within the VM. - readonly := false - for _, flag := range m.Options { - if flag == "ro" { - readonly = true - break - } - } - sharedDirMount := Mount{ Source: guestDest, Destination: m.Destination, Type: m.Type, Options: m.Options, - ReadOnly: readonly, + ReadOnly: m.ReadOnly, } sharedDirMounts[sharedDirMount.Destination] = sharedDirMount diff --git a/src/runtime/virtcontainers/pkg/oci/utils.go b/src/runtime/virtcontainers/pkg/oci/utils.go index c417c7121a..313882d385 100644 --- a/src/runtime/virtcontainers/pkg/oci/utils.go +++ b/src/runtime/virtcontainers/pkg/oci/utils.go @@ -159,11 +159,19 @@ func cmdEnvs(spec specs.Spec, envs []types.EnvVar) []types.EnvVar { } func newMount(m specs.Mount) vc.Mount { + readonly := false + for _, flag := range m.Options { + if flag == "ro" { + readonly = true + break + } + } return vc.Mount{ Source: m.Source, Destination: m.Destination, Type: m.Type, Options: m.Options, + ReadOnly: readonly, } } From 57aa746d0d36687eeedc08cfe2a2d089aca5c6fb Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 30 Oct 2020 17:40:12 +0800 Subject: [PATCH 2/5] runtime: mount shared mountpoint readonly bindmount remount events are not propagated through mount subtrees, so we have to remount the shared dir mountpoint directly. E.g., ``` mkdir -p source dest foo source/foo mount -o bind --make-shared source dest mount -o bind foo source/foo echo bind mount rw mount | grep foo echo remount ro mount -o remount,bind,ro source/foo mount | grep foo ``` would result in: ``` bind mount rw /dev/xvda1 on /home/ubuntu/source/foo type ext4 (rw,relatime,discard,data=ordered) /dev/xvda1 on /home/ubuntu/dest/foo type ext4 (rw,relatime,discard,data=ordered) remount ro /dev/xvda1 on /home/ubuntu/source/foo type ext4 (ro,relatime,discard,data=ordered) /dev/xvda1 on /home/ubuntu/dest/foo type ext4 (rw,relatime,discard,data=ordered) ``` The reason is that bind mount creats new mount structs and attaches them to different mount subtrees. However, MS_REMOUNT only looks for existing mount structs to modify and does not try to propagate the change to mount structs in other subtrees. Fixes: #1061 Signed-off-by: Peng Tao --- src/runtime/virtcontainers/container.go | 15 +++++++++++---- src/runtime/virtcontainers/kata_agent.go | 2 +- src/runtime/virtcontainers/mount.go | 5 +++++ src/runtime/virtcontainers/sandbox_test.go | 2 +- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 17fa98472b..6920f7a920 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -435,7 +435,7 @@ func (c *Container) setContainerState(state types.StateString) error { return nil } -func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, guestSharedDir string) (string, bool, error) { +func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, hostMountDir, guestSharedDir string) (string, bool, error) { randBytes, err := utils.GenerateRandomBytes(8) if err != nil { return "", false, err @@ -469,12 +469,19 @@ func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, guestSharedDir s } } else { // These mounts are created in the shared dir - mountDest := filepath.Join(hostSharedDir, filename) + mountDest := filepath.Join(hostMountDir, filename) if err := bindMount(c.ctx, m.Source, mountDest, m.ReadOnly, "private"); err != nil { return "", false, err } // Save HostPath mount value into the mount list of the container. c.mounts[idx].HostPath = mountDest + // bindmount remount event is not propagated to mount subtrees, so we have to remount the shared dir mountpoint directly. + if m.ReadOnly { + mountDest = filepath.Join(hostSharedDir, filename) + if err := remountRo(c.ctx, mountDest); err != nil { + return "", false, err + } + } } return guestDest, false, nil @@ -485,7 +492,7 @@ func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, guestSharedDir s // It also updates the container mount list with the HostPath info, and store // container mounts to the storage. This way, we will have the HostPath info // available when we will need to unmount those mounts. -func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) (sharedDirMounts map[string]Mount, ignoredMounts map[string]Mount, err error) { +func (c *Container) mountSharedDirMounts(hostSharedDir, hostMountDir, guestSharedDir string) (sharedDirMounts map[string]Mount, ignoredMounts map[string]Mount, err error) { sharedDirMounts = make(map[string]Mount) ignoredMounts = make(map[string]Mount) var devicesToDetach []string @@ -536,7 +543,7 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) ( var ignore bool var guestDest string - guestDest, ignore, err = c.shareFiles(m, idx, hostSharedDir, guestSharedDir) + guestDest, ignore, err = c.shareFiles(m, idx, hostSharedDir, hostMountDir, guestSharedDir) if err != nil { return nil, nil, err } diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 26199c78b7..a9e1e655a7 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1261,7 +1261,7 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, } // Handle container mounts - newMounts, ignoredMounts, err := c.mountSharedDirMounts(getMountPath(sandbox.id), kataGuestSharedDir()) + newMounts, ignoredMounts, err := c.mountSharedDirMounts(getSharePath(sandbox.id), getMountPath(sandbox.id), kataGuestSharedDir()) if err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index 6adf0d40a9..604922dfe8 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -277,6 +277,11 @@ func remount(ctx context.Context, mountflags uintptr, src string) error { return nil } +// remount a mount point as readonly +func remountRo(ctx context.Context, src string) error { + return remount(ctx, syscall.MS_BIND|syscall.MS_RDONLY, src) +} + // bindMountContainerRootfs bind mounts a container rootfs into a 9pfs shared // directory between the guest and the host. func bindMountContainerRootfs(ctx context.Context, shareDir, cid, cRootFs string, readonly bool) error { diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 6e6dd752c1..9a9d0c1e4a 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -1175,7 +1175,7 @@ func TestPreAddDevice(t *testing.T) { }, } - mounts, ignoreMounts, err := container.mountSharedDirMounts("", "") + mounts, ignoreMounts, err := container.mountSharedDirMounts("", "", "") assert.Nil(t, err) assert.Equal(t, len(mounts), 0, "mounts should contain nothing because it only contains a block device") From 50aa89fa05fb5878e83a52b5757b518c1b9b4c6c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 12 Mar 2021 16:25:35 +0800 Subject: [PATCH 3/5] runtime: fix virtiofsd RO volume sharing Right now we rely heavily on mount propagation to share host files/directories to the guest. However, because virtiofsd pivots and moves itself to a separate mount namespace, the remount mount is not present in virtiofsd's mount. And it causes guest to be able to write to the host RO volume. To fix it, create a private RO mount and then move it to the host mounts dir so that it will be present readonly in the host-guest shared dir. Signed-off-by: Peng Tao --- src/runtime/virtcontainers/container.go | 36 ++++++++++++----- src/runtime/virtcontainers/kata_agent.go | 7 +++- src/runtime/virtcontainers/mount.go | 51 ++++++++++++++++++------ 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 6920f7a920..65bbfd31f9 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -470,18 +470,36 @@ func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, hostMountDir, gu } else { // These mounts are created in the shared dir mountDest := filepath.Join(hostMountDir, filename) - if err := bindMount(c.ctx, m.Source, mountDest, m.ReadOnly, "private"); err != nil { - return "", false, err + if !m.ReadOnly { + if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil { + return "", false, err + } + } else { + // For RO mounts, bindmount remount event is not propagated to mount subtrees, + // and it doesn't present in the virtiofsd standalone mount namespace either. + // So we end up a bit tricky: + // 1. make a private bind mount to the mount source + // 2. make another ro bind mount on the private mount + // 3. move the ro bind mount to mountDest + // 4. umount the private bind mount created in step 1 + privateDest := filepath.Join(getPrivatePath(c.sandboxID), filename) + if err := bindMount(c.ctx, m.Source, privateDest, false, "private"); err != nil { + return "", false, err + } + defer func() { + syscall.Unmount(privateDest, syscall.MNT_DETACH|UmountNoFollow) + }() + if err := bindMount(c.ctx, privateDest, privateDest, true, "private"); err != nil { + return "", false, err + } + if err := moveMount(c.ctx, privateDest, mountDest); err != nil { + return "", false, err + } + + syscall.Unmount(privateDest, syscall.MNT_DETACH|UmountNoFollow) } // Save HostPath mount value into the mount list of the container. c.mounts[idx].HostPath = mountDest - // bindmount remount event is not propagated to mount subtrees, so we have to remount the shared dir mountpoint directly. - if m.ReadOnly { - mountDest = filepath.Join(hostSharedDir, filename) - if err := remountRo(c.ctx, mountDest); err != nil { - return "", false, err - } - } } return guestDest, false, nil diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index a9e1e655a7..ac0cb7891e 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -149,9 +149,10 @@ var kataHostSharedDir = func() string { } // Shared path handling: -// 1. create two directories for each sandbox: +// 1. create three directories for each sandbox: // -. /run/kata-containers/shared/sandboxes/$sbx_id/mounts/, a directory to hold all host/guest shared mounts // -. /run/kata-containers/shared/sandboxes/$sbx_id/shared/, a host/guest shared directory (9pfs/virtiofs source dir) +// -. /run/kata-containers/shared/sandboxes/$sbx_id/private/, a directory to hold all temporary private mounts when creating ro mounts // // 2. /run/kata-containers/shared/sandboxes/$sbx_id/mounts/ is bind mounted readonly to /run/kata-containers/shared/sandboxes/$sbx_id/shared/, so guest cannot modify it // @@ -164,6 +165,10 @@ func getMountPath(id string) string { return filepath.Join(kataHostSharedDir(), id, "mounts") } +func getPrivatePath(id string) string { + return filepath.Join(kataHostSharedDir(), id, "private") +} + func getSandboxPath(id string) string { return filepath.Join(kataHostSharedDir(), id) } diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index 604922dfe8..8e3921ac69 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -212,6 +212,42 @@ func isDeviceMapper(major, minor int) (bool, error) { const mountPerm = os.FileMode(0755) +func evalMountPath(source, destination string) (string, string, error) { + if source == "" { + return "", "", fmt.Errorf("source must be specified") + } + if destination == "" { + return "", "", fmt.Errorf("destination must be specified") + } + + absSource, err := filepath.EvalSymlinks(source) + if err != nil { + return "", "", fmt.Errorf("Could not resolve symlink for source %v", source) + } + + if err := ensureDestinationExists(absSource, destination); err != nil { + return "", "", fmt.Errorf("Could not create destination mount point %v: %v", destination, err) + } + + return absSource, destination, nil +} + +// moveMount moves a mountpoint to another path with some bookkeeping: +// * evaluate all symlinks +// * ensure the source exists +// * recursively create the destination +func moveMount(ctx context.Context, source, destination string) error { + span, _ := trace(ctx, "moveMount") + defer span.Finish() + + source, destination, err := evalMountPath(source, destination) + if err != nil { + return err + } + + return syscall.Mount(source, destination, "move", syscall.MS_MOVE, "") +} + // bindMount bind mounts a source in to a destination. This will // do some bookkeeping: // * evaluate all symlinks @@ -222,20 +258,9 @@ func bindMount(ctx context.Context, source, destination string, readonly bool, p span, _ := trace(ctx, "bindMount") defer span.Finish() - if source == "" { - return fmt.Errorf("source must be specified") - } - if destination == "" { - return fmt.Errorf("destination must be specified") - } - - absSource, err := filepath.EvalSymlinks(source) + absSource, destination, err := evalMountPath(source, destination) if err != nil { - return fmt.Errorf("Could not resolve symlink for source %v", source) - } - - if err := ensureDestinationExists(absSource, destination); err != nil { - return fmt.Errorf("Could not create destination mount point %v: %v", destination, err) + return err } if err := syscall.Mount(absSource, destination, "bind", syscall.MS_BIND, ""); err != nil { From dfe5ef36b43ec84fc09e5dd6bd29753fa65bcc7a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 26 Mar 2021 15:08:40 +0800 Subject: [PATCH 4/5] tools: fix missing SPDX license header As reported by the static checker. Signed-off-by: Peng Tao --- tools/packaging/static-build/firecracker/Makefile | 5 +++++ tools/packaging/static-build/qemu/Makefile | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/tools/packaging/static-build/firecracker/Makefile b/tools/packaging/static-build/firecracker/Makefile index 2978ac1bc6..1add636c91 100644 --- a/tools/packaging/static-build/firecracker/Makefile +++ b/tools/packaging/static-build/firecracker/Makefile @@ -1,3 +1,8 @@ +#!/usr/bin/env bash +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# MK_DIR :=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) CONFIG_DIR := $(MK_DIR)/../../scripts/ diff --git a/tools/packaging/static-build/qemu/Makefile b/tools/packaging/static-build/qemu/Makefile index 00ab89e462..0a078bd68d 100644 --- a/tools/packaging/static-build/qemu/Makefile +++ b/tools/packaging/static-build/qemu/Makefile @@ -1,3 +1,8 @@ +#!/usr/bin/env bash +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# MK_DIR :=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) CONFIG_DIR := $(MK_DIR)/../../scripts/ From 2c1b957642ff9da8f095aa6667a57f2c8349bedb Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 26 Mar 2021 15:30:09 +0800 Subject: [PATCH 5/5] runtime: remove unused functions archConvertStatFs is not called by anyone and causes static checker failures. Signed-off-by: Peng Tao --- src/runtime/cli/utils_arch_base.go | 10 ---------- src/runtime/cli/utils_s390x.go | 10 ---------- 2 files changed, 20 deletions(-) delete mode 100644 src/runtime/cli/utils_arch_base.go delete mode 100644 src/runtime/cli/utils_s390x.go diff --git a/src/runtime/cli/utils_arch_base.go b/src/runtime/cli/utils_arch_base.go deleted file mode 100644 index a0c6a5e71f..0000000000 --- a/src/runtime/cli/utils_arch_base.go +++ /dev/null @@ -1,10 +0,0 @@ -// +build !s390x -// -// SPDX-License-Identifier: Apache-2.0 -// - -package main - -func archConvertStatFs(cgroupFsType int) int64 { - return int64(cgroupFsType) -} diff --git a/src/runtime/cli/utils_s390x.go b/src/runtime/cli/utils_s390x.go deleted file mode 100644 index 6acd5627ef..0000000000 --- a/src/runtime/cli/utils_s390x.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) 2018 IBM -// -// SPDX-License-Identifier: Apache-2.0 -// - -package main - -func archConvertStatFs(cgroupFsType int) uint32 { - return uint32(cgroupFsType) -}