From f63ec50ba3f816520cd149153e9f67489da62633 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Mon, 21 Apr 2025 19:11:32 +0800 Subject: [PATCH] runtime: Add EROFS snapshotter with block device support - Detection of EROFS options in container rootfs - Creation of necessary EROFS devices - Sharing of rootfs with EROFS via overlayfs Fixes: #11163 Signed-off-by: ChengyuZhu6 --- src/runtime/pkg/containerd-shim-v2/create.go | 4 ++ src/runtime/pkg/katautils/create.go | 4 +- src/runtime/virtcontainers/container.go | 23 +++++++- src/runtime/virtcontainers/fs_share_linux.go | 57 ++++++++++++++++++++ src/runtime/virtcontainers/kata_agent.go | 29 ++++++++++ 5 files changed, 114 insertions(+), 3 deletions(-) diff --git a/src/runtime/pkg/containerd-shim-v2/create.go b/src/runtime/pkg/containerd-shim-v2/create.go index ab5afd2a07..6dcb459637 100644 --- a/src/runtime/pkg/containerd-shim-v2/create.go +++ b/src/runtime/pkg/containerd-shim-v2/create.go @@ -315,6 +315,10 @@ func checkAndMount(s *service, r *taskAPI.CreateTaskRequest) (bool, error) { return false, nil } + if virtcontainers.HasErofsOptions(m.Options) { + return false, nil + } + if vc.IsNydusRootFSType(m.Type) { // if kata + nydus, do not mount return false, nil diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index 758e83a3b9..1b83adf027 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -130,7 +130,7 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo } if !rootFs.Mounted && len(sandboxConfig.Containers) == 1 { - if rootFs.Source != "" && !vc.HasOptionPrefix(rootFs.Options, vc.VirtualVolumePrefix) { + if rootFs.Source != "" && !vc.HasOptionPrefix(rootFs.Options, vc.VirtualVolumePrefix) && !vc.HasErofsOptions(rootFs.Options) { realPath, err := ResolvePath(rootFs.Source) if err != nil { return nil, vc.Process{}, err @@ -244,7 +244,7 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp } if !rootFs.Mounted { - if rootFs.Source != "" && !vc.IsNydusRootFSType(rootFs.Type) { + if rootFs.Source != "" && !vc.IsNydusRootFSType(rootFs.Type) && !vc.HasErofsOptions(rootFs.Options) { realPath, err := ResolvePath(rootFs.Source) if err != nil { return vc.Process{}, err diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index b736229f5e..b00745b55c 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -821,6 +821,21 @@ func (c *Container) createMounts(ctx context.Context) error { return c.createBlockDevices(ctx) } +func (c *Container) createErofsDevices() ([]config.DeviceInfo, error) { + var deviceInfos []config.DeviceInfo + if HasErofsOptions(c.rootFs.Options) { + parsedOptions := parseRootFsOptions(c.rootFs.Options) + for _, path := range parsedOptions { + di, err := c.createDeviceInfo(path+"/layer.erofs", path+"/layer.erofs", true, true) + if err != nil { + return nil, err + } + deviceInfos = append(deviceInfos, *di) + } + } + return deviceInfos, nil +} + func (c *Container) createDevices(contConfig *ContainerConfig) error { // If devices were not found in storage, create Device implementations // from the configuration. This should happen at create. @@ -831,6 +846,12 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error { } deviceInfos := append(virtualVolumesDeviceInfos, contConfig.DeviceInfos...) + erofsDeviceInfos, err := c.createErofsDevices() + if err != nil { + return err + } + deviceInfos = append(erofsDeviceInfos, deviceInfos...) + // If we have a confidential guest we need to cold-plug the PCIe VFIO devices // until we have TDISP/IDE PCIe support. coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort) @@ -1053,7 +1074,7 @@ func (c *Container) create(ctx context.Context) (err error) { } }() - if c.checkBlockDeviceSupport(ctx) && !IsNydusRootFSType(c.rootFs.Type) { + if c.checkBlockDeviceSupport(ctx) && !IsNydusRootFSType(c.rootFs.Type) && !HasErofsOptions(c.rootFs.Options) { // If the rootfs is backed by a block device, go ahead and hotplug it to the guest if err = c.hotplugDrive(ctx); err != nil { return diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 0dc11cbed5..b6d327a158 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -9,6 +9,7 @@ package virtcontainers import ( "context" + b64 "encoding/base64" "encoding/hex" "fmt" "io/fs" @@ -542,6 +543,57 @@ func (f *FilesystemShare) shareRootFilesystemWithVirtualVolume(ctx context.Conte }, nil } +func (f *FilesystemShare) shareRootFilesystemWithErofs(ctx context.Context, c *Container) (*SharedFile, error) { + guestPath := filepath.Join("/run/kata-containers/", c.id, c.rootfsSuffix) + var rootFsStorages []*grpc.Storage + for i, d := range c.devices { + if strings.Contains(d.ContainerPath, "layer.erofs") { + device := c.sandbox.devManager.GetDeviceByID(d.ID) + if device == nil { + return nil, fmt.Errorf("failed to find device by id %q", d.ID) + } + vol, err := handleBlockVolume(c, device) + if err != nil { + return nil, err + } + filename := b64.URLEncoding.EncodeToString([]byte(vol.Source)) + vol.Fstype = "erofs" + vol.Options = append(vol.Options, "ro") + vol.MountPoint = filepath.Join(defaultKataGuestVirtualVolumedir, filename) + c.devices[i].ContainerPath = vol.MountPoint + rootFsStorages = append(rootFsStorages, vol) + } + } + + overlayDirDriverOption := "io.katacontainers.volume.overlayfs.create_directory" + rootfsUpperDir := filepath.Join("/run/kata-containers/", c.id, "fs") + rootfsWorkDir := filepath.Join("/run/kata-containers/", c.id, "work") + rootfs := &grpc.Storage{} + rootfs.MountPoint = guestPath + rootfs.Source = typeOverlayFS + rootfs.Fstype = typeOverlayFS + rootfs.Driver = kataOverlayDevType + rootfs.DriverOptions = append(rootfs.DriverOptions, fmt.Sprintf("%s=%s", overlayDirDriverOption, rootfsUpperDir)) + rootfs.DriverOptions = append(rootfs.DriverOptions, fmt.Sprintf("%s=%s", overlayDirDriverOption, rootfsWorkDir)) + rootfs.Options = []string{} + for _, v := range rootFsStorages { + if len(rootfs.Options) == 0 { + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, v.MountPoint)) + } else { + rootfs.Options[0] = (rootfs.Options[0] + fmt.Sprintf(":%s", v.MountPoint)) + } + } + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, rootfsUpperDir)) + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, rootfsWorkDir)) + + rootFsStorages = append(rootFsStorages, rootfs) + + return &SharedFile{ + containerStorages: rootFsStorages, + guestPath: guestPath, + }, nil +} + // func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { @@ -552,6 +604,11 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) if IsNydusRootFSType(c.rootFs.Type) { return f.shareRootFilesystemWithNydus(ctx, c) } + + if HasErofsOptions(c.rootFs.Options) { + return f.shareRootFilesystemWithErofs(ctx, c) + } + rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) if HasOptionPrefix(c.rootFs.Options, annotations.FileSystemLayer) { diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 25f08d63ad..d8a936dda2 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -2692,3 +2692,32 @@ func IsNydusRootFSType(s string) bool { s = strings.TrimPrefix(s, "fuse.") return strings.HasPrefix(path.Base(s), "nydus-overlayfs") } + +// HasErofsOptions checks if any of the options contain io.containerd.snapshotter.v1.erofs path +func HasErofsOptions(options []string) bool { + for _, opt := range options { + if strings.Contains(opt, "io.containerd.snapshotter.v1.erofs") { + return true + } + } + return false +} + +func parseRootFsOptions(options []string) []string { + lowerdirs := []string{} + + for _, opt := range options { + if strings.HasPrefix(opt, "lowerdir=") { + lowerdirValue := strings.TrimPrefix(opt, "lowerdir=") + + paths := strings.Split(lowerdirValue, ":") + + for _, path := range paths { + path = strings.TrimSuffix(path, "/fs") + lowerdirs = append(lowerdirs, path) + } + } + } + + return lowerdirs +}