virtcontainers: Use FilesystemSharer for sharing the containers files

Switching to the generic FilesystemSharer brings 2 majors improvements:

1. Remove container and sandbox specific code from kata_agent.go
2. Allow for non Linux implementations to provide ways to share
   container files and root filesystems with the Kata Linux guest.

Fixes #3622

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
This commit is contained in:
Samuel Ortiz 2022-02-04 10:25:55 +00:00 committed by Samuel Ortiz
parent 533c1c0e86
commit 1103f5a4d4
5 changed files with 62 additions and 257 deletions

View File

@ -10,7 +10,6 @@ package virtcontainers
import (
"context"
"encoding/hex"
"fmt"
"io"
"os"
@ -29,7 +28,6 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -423,72 +421,6 @@ func (c *Container) setContainerState(state types.StateString) error {
return nil
}
func (c *Container) shareFiles(ctx context.Context, m Mount, idx int) (string, bool, error) {
randBytes, err := utils.GenerateRandomBytes(8)
if err != nil {
return "", false, err
}
filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination))
guestDest := filepath.Join(kataGuestSharedDir(), filename)
// copy file to contaier's rootfs if filesystem sharing is not supported, otherwise
// bind mount it in the shared directory.
caps := c.sandbox.hypervisor.Capabilities(ctx)
if !caps.IsFsSharingSupported() {
c.Logger().Debug("filesystem sharing is not supported, files will be copied")
fileInfo, err := os.Stat(m.Source)
if err != nil {
return "", false, err
}
// Ignore the mount if this is not a regular file (excludes
// directory, socket, device, ...) as it cannot be handled by
// a simple copy. But this should not be treated as an error,
// only as a limitation.
if !fileInfo.Mode().IsRegular() {
c.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported")
return "", true, nil
}
if err := c.sandbox.agent.copyFile(ctx, m.Source, guestDest); err != nil {
return "", false, err
}
} else {
// These mounts are created in the shared dir
mountDest := filepath.Join(getMountPath(c.sandboxID), filename)
if !m.ReadOnly {
if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil {
return "", false, err
}
} else {
// For RO mounts, bindmount remount event is not propagated to mount subtrees,
// and it doesn't present in the virtiofsd standalone mount namespace either.
// So we end up a bit tricky:
// 1. make a private ro bind mount to the mount source
// 2. duplicate the ro mount we create in step 1 to mountDest, by making a bind mount. No need to remount with MS_RDONLY here.
// 3. umount the private bind mount created in step 1
privateDest := filepath.Join(getPrivatePath(c.sandboxID), filename)
if err := bindMount(c.ctx, m.Source, privateDest, true, "private"); err != nil {
return "", false, err
}
defer func() {
syscall.Unmount(privateDest, syscall.MNT_DETACH|UmountNoFollow)
}()
if err := bindMount(c.ctx, privateDest, mountDest, false, "private"); err != nil {
return "", false, err
}
}
// Save HostPath mount value into the mount list of the container.
c.mounts[idx].HostPath = mountDest
}
return guestDest, false, nil
}
// mountSharedDirMounts handles bind-mounts by bindmounting to the host shared
// directory which is mounted through virtiofs/9pfs in the VM.
// It also updates the container mount list with the HostPath info, and store
@ -503,6 +435,7 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i
}
}
}()
for idx, m := range c.mounts {
// Skip mounting certain system paths from the source on the host side
// into the container as it does not make sense to do so.
@ -541,20 +474,18 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i
continue
}
var ignore bool
var guestDest string
guestDest, ignore, err = c.shareFiles(ctx, m, idx)
sharedFile, err := c.sandbox.fsShare.ShareFile(ctx, c, &c.mounts[idx])
if err != nil {
return storages, err
}
// Expand the list of mounts to ignore.
if ignore {
if sharedFile == nil {
ignoredMounts[m.Source] = Mount{Source: m.Source}
continue
}
sharedDirMount := Mount{
Source: guestDest,
Source: sharedFile.guestPath,
Destination: m.Destination,
Type: m.Type,
Options: m.Options,
@ -581,11 +512,11 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i
return storages, fmt.Errorf("unable to create watchable path: %s: %v", watchableHostPath, err)
}
watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(guestDest))
watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(sharedFile.guestPath))
storage := &grpc.Storage{
Driver: kataWatchableBindDevType,
Source: guestDest,
Source: sharedFile.guestPath,
Fstype: "bind",
MountPoint: watchableGuestMount,
Options: m.Options,
@ -616,7 +547,7 @@ func (c *Container) unmountHostMounts(ctx context.Context) error {
span.End()
}()
if err = syscall.Unmount(m.HostPath, syscall.MNT_DETACH|UmountNoFollow); err != nil {
if err = c.sandbox.fsShare.UnshareFile(ctx, c, &m); err != nil {
c.Logger().WithFields(logrus.Fields{
"host-path": m.HostPath,
"error": err,
@ -624,19 +555,6 @@ func (c *Container) unmountHostMounts(ctx context.Context) error {
return err
}
if m.Type == "bind" {
s, err := os.Stat(m.HostPath)
if err != nil {
return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath)
}
// Remove the empty file or directory
if s.Mode().IsRegular() && s.Size() == 0 {
os.Remove(m.HostPath)
}
if s.Mode().IsDir() {
syscall.Rmdir(m.HostPath)
}
}
return nil
}
@ -867,8 +785,8 @@ func (c *Container) rollbackFailingContainerCreation(ctx context.Context) {
c.Logger().WithError(err).Error("rollback failed nydusContainerCleanup()")
}
} else {
if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil {
c.Logger().WithError(err).Error("rollback failed bindUnmountContainerRootfs()")
if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil {
c.Logger().WithError(err).Error("rollback failed UnshareRootFilesystem()")
}
}
}
@ -1051,7 +969,7 @@ func (c *Container) stop(ctx context.Context, force bool) error {
return err
}
} else {
if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil && !force {
if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil && !force {
return err
}
}
@ -1064,11 +982,6 @@ func (c *Container) stop(ctx context.Context, force bool) error {
return err
}
shareDir := filepath.Join(getMountPath(c.sandbox.id), c.id)
if err := syscall.Rmdir(shareDir); err != nil {
c.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir")
}
// container was killed by force, container MUST change its state
// as soon as possible just in case one of below operations fail leaving
// the containers in a bad state.

View File

@ -146,7 +146,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) {
return f.Name()
}
doUnmountCheck := func(src, dest, hostPath, nonEmptyHostpath, devPath string) {
doUnmountCheck := func(s *Sandbox, src, dest, hostPath, nonEmptyHostpath, devPath string) {
mounts := []Mount{
{
Source: src,
@ -169,8 +169,10 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) {
}
c := Container{
mounts: mounts,
ctx: context.Background(),
mounts: mounts,
ctx: context.Background(),
id: "fooabr",
sandbox: s,
}
if err := bindMount(c.ctx, src, hostPath, false, "private"); err != nil {
@ -221,8 +223,21 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) {
hostPath := createFakeMountDir(t, testDir, "host-path")
nonEmptyHostpath := createFakeMountDir(t, testDir, "non-empty-host-path")
devPath := createFakeMountDir(t, testDir, "dev-hostpath")
// create sandbox for mounting into
sandbox := &Sandbox{
ctx: context.Background(),
id: "foobar",
config: &SandboxConfig{},
}
fsShare, err := NewFilesystemShare(sandbox)
if err != nil {
t.Fatal(err)
}
sandbox.fsShare = fsShare
createFakeMountDir(t, nonEmptyHostpath, "nop")
doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath)
doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath)
src = createFakeMountFile(t, testDir, "src")
dest = createFakeMountFile(t, testDir, "dest")
@ -235,7 +250,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) {
}
f.WriteString("nop\n")
f.Close()
doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath)
doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath)
}
func testSetupFakeRootfs(t *testing.T) (testRawFile, loopDev, mntDir string, err error) {
@ -584,8 +599,14 @@ func TestMountSharedDirMounts(t *testing.T) {
},
},
}
fsShare, err := NewFilesystemShare(sandbox)
assert.Nil(err)
sandbox.fsShare = fsShare
// setup the shared mounts:
k.setupSharedPath(k.ctx, sandbox)
err = sandbox.fsShare.Prepare(sandbox.ctx)
assert.NoError(err)
//
// Create the mounts that we'll test with

View File

@ -317,10 +317,9 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun
func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *Container) (*SharedFile, error) {
rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix)
if f.sandbox.GetHypervisorType() != string(QemuHypervisor) {
// qemu is supported first, other hypervisors will next
// https://github.com/kata-containers/kata-containers/issues/2724
return nil, errNydusdNotSupport
virtiofsDaemon, err := getVirtiofsDaemonForNydus(f.sandbox)
if err != nil {
return nil, err
}
extraOption, err := parseExtraOption(c.rootFs.Options)
if err != nil {
@ -333,9 +332,8 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C
config: extraOption.Config,
}
q, _ := f.sandbox.hypervisor.(*qemu)
// mount lowerdir to guest /run/kata-containers/shared/images/<cid>/lowerdir
if err := q.virtiofsDaemon.Mount(*mountOpt); err != nil {
if err := virtiofsDaemon.Mount(*mountOpt); err != nil {
return nil, err
}
rootfs := &grpc.Storage{}

View File

@ -1158,158 +1158,18 @@ func (k *kataAgent) rollbackFailingContainerCreation(ctx context.Context, c *Con
k.Logger().WithError(err2).Error("rollback failed unmountHostMounts()")
}
if c.rootFs.Type == NydusRootFSType {
if err2 := nydusContainerCleanup(ctx, getMountPath(c.sandbox.id), c); err2 != nil {
k.Logger().WithError(err2).Error("rollback failed nydusContainerCleanup")
}
} else {
if err2 := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err2 != nil {
k.Logger().WithError(err2).Error("rollback failed bindUnmountContainerRootfs()")
}
if err2 := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err2 != nil {
k.Logger().WithError(err2).Error("rollback failed UnshareRootfs()")
}
}
}
func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) {
var virtiofsDaemon VirtiofsDaemon
switch sandbox.GetHypervisorType() {
case string(QemuHypervisor):
virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon
case string(ClhHypervisor):
virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon
default:
return nil, errNydusdNotSupport
}
return virtiofsDaemon, nil
}
func (k *kataAgent) buildContainerRootfsWithNydus(sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) {
virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox)
if err != nil {
return nil, err
}
extraOption, err := parseExtraOption(c.rootFs.Options)
if err != nil {
return nil, err
}
mountOpt := &MountOption{
mountpoint: rafsMountPath(c.id),
source: extraOption.Source,
config: extraOption.Config,
}
k.Logger().Infof("nydus option: %v", extraOption)
// mount lowerdir to guest /run/kata-containers/shared/images/<cid>/lowerdir
if err := virtiofsDaemon.Mount(*mountOpt); err != nil {
return nil, err
}
rootfs := &grpc.Storage{}
containerShareDir := filepath.Join(getMountPath(c.sandbox.id), c.id)
// mkdir rootfs, guest at /run/kata-containers/shared/containers/<cid>/rootfs
rootfsDir := filepath.Join(containerShareDir, c.rootfsSuffix)
if err := os.MkdirAll(rootfsDir, DirMode); err != nil {
return nil, err
}
// bindmount snapshot dir which snapshotter allocated
// to guest /run/kata-containers/shared/containers/<cid>/snapshotdir
snapshotShareDir := filepath.Join(containerShareDir, snapshotDir)
if err := bindMount(k.ctx, extraOption.Snapshotdir, snapshotShareDir, true, "slave"); err != nil {
return nil, err
}
// so rootfs = overlay(upperdir, workerdir, lowerdir)
rootfs.MountPoint = filepath.Join(rootPathParent, c.rootfsSuffix)
rootfs.Source = typeOverlayFS
rootfs.Fstype = typeOverlayFS
rootfs.Driver = kataOverlayDevType
rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "fs")))
rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "work")))
rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, filepath.Join(kataGuestNydusImageDir(), c.id, lowerDir)))
rootfs.Options = append(rootfs.Options, "index=off")
k.Logger().Infof("rootfs info: %#v\n", rootfs)
return rootfs, nil
}
func (k *kataAgent) buildContainerRootfs(ctx context.Context, sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) {
if c.rootFs.Type == NydusRootFSType {
return k.buildContainerRootfsWithNydus(sandbox, c, rootPathParent)
}
if c.state.Fstype != "" && c.state.BlockDeviceID != "" {
// The rootfs storage volume represents the container rootfs
// mount point inside the guest.
// It can be a block based device (when using block based container
// overlay on the host) mount or a 9pfs one (for all other overlay
// implementations).
rootfs := &grpc.Storage{}
// This is a block based device rootfs.
device := sandbox.devManager.GetDeviceByID(c.state.BlockDeviceID)
if device == nil {
k.Logger().WithField("device", c.state.BlockDeviceID).Error("failed to find device by id")
return nil, fmt.Errorf("failed to find device by id %q", c.state.BlockDeviceID)
}
blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive)
if !ok || blockDrive == nil {
k.Logger().Error("malformed block drive")
return nil, fmt.Errorf("malformed block drive")
}
switch {
case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio:
rootfs.Driver = kataMmioBlkDevType
rootfs.Source = blockDrive.VirtPath
case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW:
rootfs.Driver = kataBlkCCWDevType
rootfs.Source = blockDrive.DevNo
case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock:
rootfs.Driver = kataBlkDevType
rootfs.Source = blockDrive.PCIPath.String()
case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI:
rootfs.Driver = kataSCSIDevType
rootfs.Source = blockDrive.SCSIAddr
default:
return nil, fmt.Errorf("Unknown block device driver: %s", sandbox.config.HypervisorConfig.BlockDeviceDriver)
}
rootfs.MountPoint = rootPathParent
rootfs.Fstype = c.state.Fstype
if c.state.Fstype == "xfs" {
rootfs.Options = []string{"nouuid"}
}
// Ensure container mount destination exists
// TODO: remove dependency on shared fs path. shared fs is just one kind of storage source.
// we should not always use shared fs path for all kinds of storage. Instead, all storage
// should be bind mounted to a tmpfs path for containers to use.
if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil {
return nil, err
}
return rootfs, nil
}
// This is not a block based device rootfs. We are going to bind mount it into the shared drive
// between the host and the guest.
// With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory
// (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from
// the host and it will show up in the guest.
if err := bindMountContainerRootfs(ctx, getMountPath(sandbox.id), c.id, c.rootFs.Target, false); err != nil {
return nil, err
}
return nil, nil
}
func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Container) (p *Process, err error) {
span, ctx := katatrace.Trace(ctx, k.Logger(), "createContainer", kataAgentTracingTags)
defer span.End()
var ctrStorages []*grpc.Storage
var ctrDevices []*grpc.Device
var rootfs *grpc.Storage
// This is the guest absolute root path for that container.
rootPathParent := filepath.Join(kataGuestSharedDir(), c.id)
rootPath := filepath.Join(rootPathParent, c.rootfsSuffix)
var sharedRootfs *SharedFile
// In case the container creation fails, the following defer statement
// takes care of rolling back actions previously performed.
@ -1320,19 +1180,19 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
}
}()
// setup rootfs -- if its block based, we'll receive a non-nil storage object representing
// Share the container rootfs -- if its block based, we'll receive a non-nil storage object representing
// the block device for the rootfs, which us utilized for mounting in the guest. This'll be handled
// already for non-block based rootfs
if rootfs, err = k.buildContainerRootfs(ctx, sandbox, c, rootPathParent); err != nil {
if sharedRootfs, err = sandbox.fsShare.ShareRootFilesystem(ctx, c); err != nil {
return nil, err
}
if rootfs != nil {
if sharedRootfs.storage != nil {
// Add rootfs to the list of container storage.
// We only need to do this for block based rootfs, as we
// want the agent to mount it into the right location
// (kataGuestSharedDir/ctrID/
ctrStorages = append(ctrStorages, rootfs)
ctrStorages = append(ctrStorages, sharedRootfs.storage)
}
ociSpec := c.GetPatchedOCISpec()
@ -1408,7 +1268,7 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
}
// We need to give the OCI spec our absolute rootfs path in the guest.
grpcSpec.Root.Path = rootPath
grpcSpec.Root.Path = sharedRootfs.guestPath
sharedPidNs := k.handlePidNamespace(grpcSpec, sandbox)

View File

@ -388,6 +388,19 @@ func bindUnmountContainerSnapshotDir(ctx context.Context, sharedDir, cID string)
return bindUnmountContainerShareDir(ctx, sharedDir, cID, snapshotDir)
}
func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) {
var virtiofsDaemon VirtiofsDaemon
switch sandbox.GetHypervisorType() {
case string(QemuHypervisor):
virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon
case string(ClhHypervisor):
virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon
default:
return nil, errNydusdNotSupport
}
return virtiofsDaemon, nil
}
func nydusContainerCleanup(ctx context.Context, sharedDir string, c *Container) error {
sandbox := c.sandbox
virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox)