diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index ee0a380791..b57f40e51b 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -10,7 +10,6 @@ package virtcontainers import ( "context" - "encoding/hex" "fmt" "io" "os" @@ -29,7 +28,6 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -423,72 +421,6 @@ func (c *Container) setContainerState(state types.StateString) error { return nil } -func (c *Container) shareFiles(ctx context.Context, m Mount, idx int) (string, bool, error) { - randBytes, err := utils.GenerateRandomBytes(8) - if err != nil { - return "", false, err - } - - filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination)) - guestDest := filepath.Join(kataGuestSharedDir(), filename) - - // copy file to contaier's rootfs if filesystem sharing is not supported, otherwise - // bind mount it in the shared directory. - caps := c.sandbox.hypervisor.Capabilities(ctx) - if !caps.IsFsSharingSupported() { - c.Logger().Debug("filesystem sharing is not supported, files will be copied") - - fileInfo, err := os.Stat(m.Source) - if err != nil { - return "", false, err - } - - // Ignore the mount if this is not a regular file (excludes - // directory, socket, device, ...) as it cannot be handled by - // a simple copy. But this should not be treated as an error, - // only as a limitation. - if !fileInfo.Mode().IsRegular() { - c.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported") - return "", true, nil - } - - if err := c.sandbox.agent.copyFile(ctx, m.Source, guestDest); err != nil { - return "", false, err - } - } else { - // These mounts are created in the shared dir - mountDest := filepath.Join(getMountPath(c.sandboxID), filename) - if !m.ReadOnly { - if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil { - return "", false, err - } - } else { - // For RO mounts, bindmount remount event is not propagated to mount subtrees, - // and it doesn't present in the virtiofsd standalone mount namespace either. - // So we end up a bit tricky: - // 1. make a private ro bind mount to the mount source - // 2. duplicate the ro mount we create in step 1 to mountDest, by making a bind mount. No need to remount with MS_RDONLY here. - // 3. umount the private bind mount created in step 1 - privateDest := filepath.Join(getPrivatePath(c.sandboxID), filename) - - if err := bindMount(c.ctx, m.Source, privateDest, true, "private"); err != nil { - return "", false, err - } - defer func() { - syscall.Unmount(privateDest, syscall.MNT_DETACH|UmountNoFollow) - }() - - if err := bindMount(c.ctx, privateDest, mountDest, false, "private"); err != nil { - return "", false, err - } - } - // Save HostPath mount value into the mount list of the container. - c.mounts[idx].HostPath = mountDest - } - - return guestDest, false, nil -} - // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared // directory which is mounted through virtiofs/9pfs in the VM. // It also updates the container mount list with the HostPath info, and store @@ -503,6 +435,7 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i } } }() + for idx, m := range c.mounts { // Skip mounting certain system paths from the source on the host side // into the container as it does not make sense to do so. @@ -541,20 +474,18 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i continue } - var ignore bool - var guestDest string - guestDest, ignore, err = c.shareFiles(ctx, m, idx) + sharedFile, err := c.sandbox.fsShare.ShareFile(ctx, c, &c.mounts[idx]) if err != nil { return storages, err } // Expand the list of mounts to ignore. - if ignore { + if sharedFile == nil { ignoredMounts[m.Source] = Mount{Source: m.Source} continue } sharedDirMount := Mount{ - Source: guestDest, + Source: sharedFile.guestPath, Destination: m.Destination, Type: m.Type, Options: m.Options, @@ -581,11 +512,11 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i return storages, fmt.Errorf("unable to create watchable path: %s: %v", watchableHostPath, err) } - watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(guestDest)) + watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(sharedFile.guestPath)) storage := &grpc.Storage{ Driver: kataWatchableBindDevType, - Source: guestDest, + Source: sharedFile.guestPath, Fstype: "bind", MountPoint: watchableGuestMount, Options: m.Options, @@ -616,7 +547,7 @@ func (c *Container) unmountHostMounts(ctx context.Context) error { span.End() }() - if err = syscall.Unmount(m.HostPath, syscall.MNT_DETACH|UmountNoFollow); err != nil { + if err = c.sandbox.fsShare.UnshareFile(ctx, c, &m); err != nil { c.Logger().WithFields(logrus.Fields{ "host-path": m.HostPath, "error": err, @@ -624,19 +555,6 @@ func (c *Container) unmountHostMounts(ctx context.Context) error { return err } - if m.Type == "bind" { - s, err := os.Stat(m.HostPath) - if err != nil { - return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath) - } - // Remove the empty file or directory - if s.Mode().IsRegular() && s.Size() == 0 { - os.Remove(m.HostPath) - } - if s.Mode().IsDir() { - syscall.Rmdir(m.HostPath) - } - } return nil } @@ -867,8 +785,8 @@ func (c *Container) rollbackFailingContainerCreation(ctx context.Context) { c.Logger().WithError(err).Error("rollback failed nydusContainerCleanup()") } } else { - if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil { - c.Logger().WithError(err).Error("rollback failed bindUnmountContainerRootfs()") + if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil { + c.Logger().WithError(err).Error("rollback failed UnshareRootFilesystem()") } } } @@ -1051,7 +969,7 @@ func (c *Container) stop(ctx context.Context, force bool) error { return err } } else { - if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil && !force { + if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil && !force { return err } } @@ -1064,11 +982,6 @@ func (c *Container) stop(ctx context.Context, force bool) error { return err } - shareDir := filepath.Join(getMountPath(c.sandbox.id), c.id) - if err := syscall.Rmdir(shareDir); err != nil { - c.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir") - } - // container was killed by force, container MUST change its state // as soon as possible just in case one of below operations fail leaving // the containers in a bad state. diff --git a/src/runtime/virtcontainers/container_test.go b/src/runtime/virtcontainers/container_test.go index 05b8974a9e..b41fcc1089 100644 --- a/src/runtime/virtcontainers/container_test.go +++ b/src/runtime/virtcontainers/container_test.go @@ -146,7 +146,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { return f.Name() } - doUnmountCheck := func(src, dest, hostPath, nonEmptyHostpath, devPath string) { + doUnmountCheck := func(s *Sandbox, src, dest, hostPath, nonEmptyHostpath, devPath string) { mounts := []Mount{ { Source: src, @@ -169,8 +169,10 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { } c := Container{ - mounts: mounts, - ctx: context.Background(), + mounts: mounts, + ctx: context.Background(), + id: "fooabr", + sandbox: s, } if err := bindMount(c.ctx, src, hostPath, false, "private"); err != nil { @@ -221,8 +223,21 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { hostPath := createFakeMountDir(t, testDir, "host-path") nonEmptyHostpath := createFakeMountDir(t, testDir, "non-empty-host-path") devPath := createFakeMountDir(t, testDir, "dev-hostpath") + // create sandbox for mounting into + sandbox := &Sandbox{ + ctx: context.Background(), + id: "foobar", + config: &SandboxConfig{}, + } + + fsShare, err := NewFilesystemShare(sandbox) + if err != nil { + t.Fatal(err) + } + sandbox.fsShare = fsShare + createFakeMountDir(t, nonEmptyHostpath, "nop") - doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath) + doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath) src = createFakeMountFile(t, testDir, "src") dest = createFakeMountFile(t, testDir, "dest") @@ -235,7 +250,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { } f.WriteString("nop\n") f.Close() - doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath) + doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath) } func testSetupFakeRootfs(t *testing.T) (testRawFile, loopDev, mntDir string, err error) { @@ -584,8 +599,14 @@ func TestMountSharedDirMounts(t *testing.T) { }, }, } + + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + // setup the shared mounts: - k.setupSharedPath(k.ctx, sandbox) + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) // // Create the mounts that we'll test with diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index dc16a73b73..a7007826fa 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -317,10 +317,9 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *Container) (*SharedFile, error) { rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) - if f.sandbox.GetHypervisorType() != string(QemuHypervisor) { - // qemu is supported first, other hypervisors will next - // https://github.com/kata-containers/kata-containers/issues/2724 - return nil, errNydusdNotSupport + virtiofsDaemon, err := getVirtiofsDaemonForNydus(f.sandbox) + if err != nil { + return nil, err } extraOption, err := parseExtraOption(c.rootFs.Options) if err != nil { @@ -333,9 +332,8 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C config: extraOption.Config, } - q, _ := f.sandbox.hypervisor.(*qemu) // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir - if err := q.virtiofsDaemon.Mount(*mountOpt); err != nil { + if err := virtiofsDaemon.Mount(*mountOpt); err != nil { return nil, err } rootfs := &grpc.Storage{} diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 8572f584a1..1a10543051 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1158,158 +1158,18 @@ func (k *kataAgent) rollbackFailingContainerCreation(ctx context.Context, c *Con k.Logger().WithError(err2).Error("rollback failed unmountHostMounts()") } - if c.rootFs.Type == NydusRootFSType { - if err2 := nydusContainerCleanup(ctx, getMountPath(c.sandbox.id), c); err2 != nil { - k.Logger().WithError(err2).Error("rollback failed nydusContainerCleanup") - } - } else { - if err2 := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err2 != nil { - k.Logger().WithError(err2).Error("rollback failed bindUnmountContainerRootfs()") - } + if err2 := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err2 != nil { + k.Logger().WithError(err2).Error("rollback failed UnshareRootfs()") } } } -func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) { - var virtiofsDaemon VirtiofsDaemon - switch sandbox.GetHypervisorType() { - case string(QemuHypervisor): - virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon - case string(ClhHypervisor): - virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon - default: - return nil, errNydusdNotSupport - } - return virtiofsDaemon, nil -} - -func (k *kataAgent) buildContainerRootfsWithNydus(sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) { - virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox) - if err != nil { - return nil, err - } - extraOption, err := parseExtraOption(c.rootFs.Options) - if err != nil { - return nil, err - } - mountOpt := &MountOption{ - mountpoint: rafsMountPath(c.id), - source: extraOption.Source, - config: extraOption.Config, - } - k.Logger().Infof("nydus option: %v", extraOption) - // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir - if err := virtiofsDaemon.Mount(*mountOpt); err != nil { - return nil, err - } - rootfs := &grpc.Storage{} - containerShareDir := filepath.Join(getMountPath(c.sandbox.id), c.id) - - // mkdir rootfs, guest at /run/kata-containers/shared/containers//rootfs - rootfsDir := filepath.Join(containerShareDir, c.rootfsSuffix) - if err := os.MkdirAll(rootfsDir, DirMode); err != nil { - return nil, err - } - // bindmount snapshot dir which snapshotter allocated - // to guest /run/kata-containers/shared/containers//snapshotdir - snapshotShareDir := filepath.Join(containerShareDir, snapshotDir) - if err := bindMount(k.ctx, extraOption.Snapshotdir, snapshotShareDir, true, "slave"); err != nil { - return nil, err - } - - // so rootfs = overlay(upperdir, workerdir, lowerdir) - rootfs.MountPoint = filepath.Join(rootPathParent, c.rootfsSuffix) - rootfs.Source = typeOverlayFS - rootfs.Fstype = typeOverlayFS - rootfs.Driver = kataOverlayDevType - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "fs"))) - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "work"))) - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, filepath.Join(kataGuestNydusImageDir(), c.id, lowerDir))) - rootfs.Options = append(rootfs.Options, "index=off") - k.Logger().Infof("rootfs info: %#v\n", rootfs) - return rootfs, nil -} - -func (k *kataAgent) buildContainerRootfs(ctx context.Context, sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) { - if c.rootFs.Type == NydusRootFSType { - return k.buildContainerRootfsWithNydus(sandbox, c, rootPathParent) - } - if c.state.Fstype != "" && c.state.BlockDeviceID != "" { - // The rootfs storage volume represents the container rootfs - // mount point inside the guest. - // It can be a block based device (when using block based container - // overlay on the host) mount or a 9pfs one (for all other overlay - // implementations). - rootfs := &grpc.Storage{} - - // This is a block based device rootfs. - device := sandbox.devManager.GetDeviceByID(c.state.BlockDeviceID) - if device == nil { - k.Logger().WithField("device", c.state.BlockDeviceID).Error("failed to find device by id") - return nil, fmt.Errorf("failed to find device by id %q", c.state.BlockDeviceID) - } - - blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) - if !ok || blockDrive == nil { - k.Logger().Error("malformed block drive") - return nil, fmt.Errorf("malformed block drive") - } - switch { - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: - rootfs.Driver = kataMmioBlkDevType - rootfs.Source = blockDrive.VirtPath - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: - rootfs.Driver = kataBlkCCWDevType - rootfs.Source = blockDrive.DevNo - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: - rootfs.Driver = kataBlkDevType - rootfs.Source = blockDrive.PCIPath.String() - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: - rootfs.Driver = kataSCSIDevType - rootfs.Source = blockDrive.SCSIAddr - default: - return nil, fmt.Errorf("Unknown block device driver: %s", sandbox.config.HypervisorConfig.BlockDeviceDriver) - } - - rootfs.MountPoint = rootPathParent - rootfs.Fstype = c.state.Fstype - - if c.state.Fstype == "xfs" { - rootfs.Options = []string{"nouuid"} - } - - // Ensure container mount destination exists - // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. - // we should not always use shared fs path for all kinds of storage. Instead, all storage - // should be bind mounted to a tmpfs path for containers to use. - if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil { - return nil, err - } - return rootfs, nil - } - - // This is not a block based device rootfs. We are going to bind mount it into the shared drive - // between the host and the guest. - // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory - // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from - // the host and it will show up in the guest. - if err := bindMountContainerRootfs(ctx, getMountPath(sandbox.id), c.id, c.rootFs.Target, false); err != nil { - return nil, err - } - - return nil, nil -} - func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Container) (p *Process, err error) { span, ctx := katatrace.Trace(ctx, k.Logger(), "createContainer", kataAgentTracingTags) defer span.End() var ctrStorages []*grpc.Storage var ctrDevices []*grpc.Device - var rootfs *grpc.Storage - - // This is the guest absolute root path for that container. - rootPathParent := filepath.Join(kataGuestSharedDir(), c.id) - rootPath := filepath.Join(rootPathParent, c.rootfsSuffix) + var sharedRootfs *SharedFile // In case the container creation fails, the following defer statement // takes care of rolling back actions previously performed. @@ -1320,19 +1180,19 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co } }() - // setup rootfs -- if its block based, we'll receive a non-nil storage object representing + // Share the container rootfs -- if its block based, we'll receive a non-nil storage object representing // the block device for the rootfs, which us utilized for mounting in the guest. This'll be handled // already for non-block based rootfs - if rootfs, err = k.buildContainerRootfs(ctx, sandbox, c, rootPathParent); err != nil { + if sharedRootfs, err = sandbox.fsShare.ShareRootFilesystem(ctx, c); err != nil { return nil, err } - if rootfs != nil { + if sharedRootfs.storage != nil { // Add rootfs to the list of container storage. // We only need to do this for block based rootfs, as we // want the agent to mount it into the right location // (kataGuestSharedDir/ctrID/ - ctrStorages = append(ctrStorages, rootfs) + ctrStorages = append(ctrStorages, sharedRootfs.storage) } ociSpec := c.GetPatchedOCISpec() @@ -1408,7 +1268,7 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co } // We need to give the OCI spec our absolute rootfs path in the guest. - grpcSpec.Root.Path = rootPath + grpcSpec.Root.Path = sharedRootfs.guestPath sharedPidNs := k.handlePidNamespace(grpcSpec, sandbox) diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index f782c5d09f..76e5fe3f49 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -388,6 +388,19 @@ func bindUnmountContainerSnapshotDir(ctx context.Context, sharedDir, cID string) return bindUnmountContainerShareDir(ctx, sharedDir, cID, snapshotDir) } +func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) { + var virtiofsDaemon VirtiofsDaemon + switch sandbox.GetHypervisorType() { + case string(QemuHypervisor): + virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon + case string(ClhHypervisor): + virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon + default: + return nil, errNydusdNotSupport + } + return virtiofsDaemon, nil +} + func nydusContainerCleanup(ctx context.Context, sharedDir string, c *Container) error { sandbox := c.sandbox virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox)