From e4f33ac141fbdb3fd33c70db5f3c0d5901403a1d Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Mon, 4 Sep 2023 13:11:44 +0800 Subject: [PATCH 1/4] runtime: add functions to create devices in KataVirtualVolume The snapshotter will place `KataVirtualVolume` information into 'rootfs.options' and commence with the prefix 'io.katacontainers.volume='. The purpose of this commit is to transform the encapsulated KataVirtualVolume data into device information. Fixes: #8495 Signed-off-by: ChengyuZhu6 Co-authored-by: Feng Wang Co-authored-by: Samuel Ortiz Co-authored-by: Wedson Almeida Filho --- src/runtime/virtcontainers/container.go | 94 +++++++++++++++--------- src/runtime/virtcontainers/kata_agent.go | 2 + 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 1f8646a63c..db249b3e49 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -662,41 +662,9 @@ func (c *Container) createBlockDevices(ctx context.Context) error { } } - var stat unix.Stat_t - if err := unix.Stat(c.mounts[i].Source, &stat); err != nil { - return fmt.Errorf("stat %q failed: %v", c.mounts[i].Source, err) - } - - var di *config.DeviceInfo - var err error - // Check if mount is a block device file. If it is, the block device will be attached to the host // instead of passing this as a shared mount. - if stat.Mode&unix.S_IFMT == unix.S_IFBLK { - di = &config.DeviceInfo{ - HostPath: c.mounts[i].Source, - ContainerPath: c.mounts[i].Destination, - DevType: "b", - Major: int64(unix.Major(uint64(stat.Rdev))), - Minor: int64(unix.Minor(uint64(stat.Rdev))), - ReadOnly: c.mounts[i].ReadOnly, - } - } else if isBlockFile && stat.Mode&unix.S_IFMT == unix.S_IFREG { - di = &config.DeviceInfo{ - HostPath: c.mounts[i].Source, - ContainerPath: c.mounts[i].Destination, - DevType: "b", - Major: -1, - Minor: 0, - ReadOnly: c.mounts[i].ReadOnly, - } - // Check whether source can be used as a pmem device - } else if di, err = config.PmemDeviceInfo(c.mounts[i].Source, c.mounts[i].Destination); err != nil { - c.Logger().WithError(err). - WithField("mount-source", c.mounts[i].Source). - Debug("no loop device") - } - + di, err := c.createDeviceInfo(c.mounts[i].Source, c.mounts[i].Destination, c.mounts[i].ReadOnly, isBlockFile) if err == nil && di != nil { b, err := c.sandbox.devManager.NewDevice(*di) if err != nil { @@ -795,6 +763,58 @@ func newContainer(ctx context.Context, sandbox *Sandbox, contConfig *ContainerCo return c, nil } +// Create Device Information about the block device +func (c *Container) createDeviceInfo(source, destination string, readonly, isBlockFile bool) (*config.DeviceInfo, error) { + var stat unix.Stat_t + if err := unix.Stat(source, &stat); err != nil { + return nil, fmt.Errorf("stat %q failed: %v", source, err) + } + + var di *config.DeviceInfo + var err error + + if stat.Mode&unix.S_IFMT == unix.S_IFBLK { + di = &config.DeviceInfo{ + HostPath: source, + ContainerPath: destination, + DevType: "b", + Major: int64(unix.Major(uint64(stat.Rdev))), + Minor: int64(unix.Minor(uint64(stat.Rdev))), + ReadOnly: readonly, + } + } else if isBlockFile && stat.Mode&unix.S_IFMT == unix.S_IFREG { + di = &config.DeviceInfo{ + HostPath: source, + ContainerPath: destination, + DevType: "b", + Major: -1, + Minor: 0, + ReadOnly: readonly, + } + // Check whether source can be used as a pmem device + } else if di, err = config.PmemDeviceInfo(source, destination); err != nil { + c.Logger().WithError(err). + WithField("mount-source", source). + Debug("no loop device") + } + return di, err +} + +// call hypervisor to create device about KataVirtualVolume. +func (c *Container) createVirtualVolumeDevices() ([]config.DeviceInfo, error) { + var deviceInfos []config.DeviceInfo + for _, o := range c.rootFs.Options { + if strings.HasPrefix(o, VirtualVolumePrefix) { + virtVolume, err := types.ParseKataVirtualVolume(strings.TrimPrefix(o, VirtualVolumePrefix)) + if err != nil { + return nil, err + } + c.Logger().Infof("KataVirtualVolume volumetype = %s", virtVolume.VolumeType) + } + } + return deviceInfos, nil +} + func (c *Container) createMounts(ctx context.Context) error { // Create block devices for newly created container return c.createBlockDevices(ctx) @@ -804,7 +824,13 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error { // If devices were not found in storage, create Device implementations // from the configuration. This should happen at create. var storedDevices []ContainerDevice - for _, info := range contConfig.DeviceInfos { + virtualVolumesDeviceInfos, err := c.createVirtualVolumeDevices() + if err != nil { + return err + } + deviceInfos := append(virtualVolumesDeviceInfos, contConfig.DeviceInfos...) + + for _, info := range deviceInfos { dev, err := c.sandbox.devManager.NewDevice(info) if err != nil { return err diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 892aa534b7..c17e7f080f 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -69,6 +69,8 @@ const ( NydusRootFSType = "fuse.nydus-overlayfs" + VirtualVolumePrefix = "io.katacontainers.volume=" + // enable debug console kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" From bd099fbda95eff462f93e20889da00db99a938da Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Mon, 4 Sep 2023 13:18:23 +0800 Subject: [PATCH 2/4] runtime: extend SharedFile to support mutiple storage devices To enhance the construction and administration of `Katavirtualvolume` storages, this commit expands the 'sharedFile' structure to manage both rootfs storages(`containerStorages`) including `Katavirtualvolume` and other data volumes storages(`volumeStorages`). NOTE: `volumeStorages` is intended for future extensions to support Kubernetes data volumes. Currently, `KataVirtualVolume` is exclusively employed for container rootfs, hence only `containerStorages` is actively utilized. Signed-off-by: ChengyuZhu6 --- src/runtime/virtcontainers/fs_share.go | 5 +++-- src/runtime/virtcontainers/fs_share_linux.go | 17 +++++++++-------- src/runtime/virtcontainers/kata_agent.go | 13 +++++++++---- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/runtime/virtcontainers/fs_share.go b/src/runtime/virtcontainers/fs_share.go index b5000291cc..3df08368ea 100644 --- a/src/runtime/virtcontainers/fs_share.go +++ b/src/runtime/virtcontainers/fs_share.go @@ -21,8 +21,9 @@ var fsShareTracingTags = map[string]string{ // SharedFile represents the outcome of a host filesystem sharing // operation. type SharedFile struct { - storage *grpc.Storage - guestPath string + containerStorages []*grpc.Storage + volumeStorages []*grpc.Storage + guestPath string } type FilesystemSharer interface { diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 5bafb9e403..97e893fdda 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -455,13 +455,14 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C f.Logger().Infof("Nydus rootfs info: %#v\n", rootfs) return &SharedFile{ - storage: rootfs, - guestPath: rootfsGuestPath, + containerStorages: []*grpc.Storage{rootfs}, + guestPath: rootfsGuestPath, }, nil } // func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { + if c.rootFs.Type == NydusRootFSType { return f.shareRootFilesystemWithNydus(ctx, c) } @@ -470,13 +471,13 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) if HasOptionPrefix(c.rootFs.Options, annotations.FileSystemLayer) { path := filepath.Join("/run/kata-containers", c.id, "rootfs") return &SharedFile{ - storage: &grpc.Storage{ + containerStorages: []*grpc.Storage{{ MountPoint: path, Source: "none", Fstype: c.rootFs.Type, Driver: kataOverlayDevType, Options: c.rootFs.Options, - }, + }}, guestPath: path, }, nil } @@ -541,8 +542,8 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) } return &SharedFile{ - storage: rootfsStorage, - guestPath: rootfsGuestPath, + containerStorages: []*grpc.Storage{rootfsStorage}, + guestPath: rootfsGuestPath, }, nil } @@ -556,8 +557,8 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) } return &SharedFile{ - storage: nil, - guestPath: rootfsGuestPath, + containerStorages: nil, + guestPath: rootfsGuestPath, }, nil } diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index c17e7f080f..3194ec6e84 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1258,12 +1258,17 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co return nil, err } - if sharedRootfs.storage != nil { + if sharedRootfs.containerStorages != nil { // Add rootfs to the list of container storage. - // We only need to do this for block based rootfs, as we + ctrStorages = append(ctrStorages, sharedRootfs.containerStorages...) + } + + if sharedRootfs.volumeStorages != nil { + // Add volumeStorages to the list of container storage. + // We only need to do this for KataVirtualVolume based rootfs, as we // want the agent to mount it into the right location - // (kataGuestSharedDir/ctrID/ - ctrStorages = append(ctrStorages, sharedRootfs.storage) + + ctrStorages = append(ctrStorages, sharedRootfs.volumeStorages...) } ociSpec := c.GetPatchedOCISpec() From 0b4f7c2ee7643334a2430df4fe8b36f444caa2a4 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Mon, 4 Sep 2023 13:29:16 +0800 Subject: [PATCH 3/4] runtime: redefine and add functions to handle VirtualVolume to storage 1) Extract function `handleBlockVolume` to create Storage only. 2) Add functions to handle KataVirtualVolume device and construct corresponding storages. Signed-off-by: ChengyuZhu6 --- src/runtime/virtcontainers/kata_agent.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 3194ec6e84..e4eb2824e4 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1543,14 +1543,11 @@ func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string, r return localStorages, nil } -// handleDeviceBlockVolume handles volume that is block device file -// and DeviceBlock type. -func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { +func handleBlockVolume(c *Container, device api.Device) (*grpc.Storage, error) { vol := &grpc.Storage{} blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) if !ok || blockDrive == nil { - k.Logger().Error("malformed block drive") return nil, fmt.Errorf("malformed block drive") } switch { @@ -1575,6 +1572,22 @@ func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.De default: return nil, fmt.Errorf("Unknown block device driver: %s", c.sandbox.config.HypervisorConfig.BlockDeviceDriver) } + return vol, nil +} + +// handleVirtualVolumeStorageObject handles KataVirtualVolume that is block device file. +func handleVirtualVolumeStorageObject(c *Container, blockDeviceId string, virtVolume *types.KataVirtualVolume) (*grpc.Storage, error) { + var vol *grpc.Storage = &grpc.Storage{} + return vol, nil +} + +// handleDeviceBlockVolume handles volume that is block device file +// and DeviceBlock type. +func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { + vol, err := handleBlockVolume(c, device) + if err != nil { + return nil, err + } vol.MountPoint = m.Destination From 5318afe2738944f1418d86bcef4637f743fdace3 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Mon, 4 Sep 2023 13:34:30 +0800 Subject: [PATCH 4/4] runtime: support to create VirtualVolume rootfs storages 1) Creating storage for all `io.katacontainers.volume=` messages in rootFs.Options, and then aggregates all storages into `containerStorages`. 2) Creating storage for other data volumes and push them into `volumeStorages`. Signed-off-by: ChengyuZhu6 --- src/runtime/pkg/katautils/create.go | 2 +- src/runtime/virtcontainers/fs_share_linux.go | 48 +++++++++++++ src/runtime/virtcontainers/kata_agent.go | 75 +++++++++++--------- 3 files changed, 89 insertions(+), 36 deletions(-) diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index bd5808deba..6be910bde9 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -130,7 +130,7 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo } if !rootFs.Mounted && len(sandboxConfig.Containers) == 1 { - if rootFs.Source != "" { + if rootFs.Source != "" && !vc.HasOptionPrefix(rootFs.Options, vc.VirtualVolumePrefix) { realPath, err := ResolvePath(rootFs.Source) if err != nil { return nil, vc.Process{}, err diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 97e893fdda..4191a706a1 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "regexp" + "strings" "sync" "syscall" @@ -26,6 +27,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" ) @@ -460,9 +462,55 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C }, nil } +// handleVirtualVolume processes all `io.katacontainers.volume=` messages in rootFs.Options, +// creating storage, and then aggregates all storages into an array. +func handleVirtualVolume(c *Container) ([]*grpc.Storage, string, error) { + var volumes []*grpc.Storage + var volumeType string + + for _, o := range c.rootFs.Options { + if strings.HasPrefix(o, VirtualVolumePrefix) { + virtVolume, err := types.ParseKataVirtualVolume(strings.TrimPrefix(o, VirtualVolumePrefix)) + if err != nil { + return nil, "", err + } + + volumeType = virtVolume.VolumeType + var vol *grpc.Storage + vol, err = handleVirtualVolumeStorageObject(c, "", virtVolume) + if err != nil { + return nil, "", err + } + + if vol != nil { + volumes = append(volumes, vol) + } + } + } + + return volumes, volumeType, nil +} + +func (f *FilesystemShare) shareRootFilesystemWithVirtualVolume(ctx context.Context, c *Container) (*SharedFile, error) { + guestPath := filepath.Join("/run/kata-containers/", c.id, c.rootfsSuffix) + rootFsStorages, _, err := handleVirtualVolume(c) + if err != nil { + return nil, err + } + + return &SharedFile{ + containerStorages: rootFsStorages, + guestPath: guestPath, + }, nil +} + // func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { + if HasOptionPrefix(c.rootFs.Options, VirtualVolumePrefix) { + return f.shareRootFilesystemWithVirtualVolume(ctx, c) + } + if c.rootFs.Type == NydusRootFSType { return f.shareRootFilesystemWithNydus(ctx, c) } diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index e4eb2824e4..476c6d1779 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -83,41 +83,42 @@ const ( type customRequestTimeoutKeyType struct{} var ( - checkRequestTimeout = 30 * time.Second - defaultRequestTimeout = 60 * time.Second - remoteRequestTimeout = 300 * time.Second - customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) - errorMissingOCISpec = errors.New("Missing OCI specification") - defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" - defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" - defaultKataGuestNydusRootDir = "/run/kata-containers/shared/" - mountGuestTag = "kataShared" - defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/" - type9pFs = "9p" - typeVirtioFS = "virtiofs" - typeOverlayFS = "overlay" - kata9pDevType = "9p" - kataMmioBlkDevType = "mmioblk" - kataBlkDevType = "blk" - kataBlkCCWDevType = "blk-ccw" - kataSCSIDevType = "scsi" - kataNvdimmDevType = "nvdimm" - kataVirtioFSDevType = "virtio-fs" - kataOverlayDevType = "overlayfs" - kataWatchableBindDevType = "watchable-bind" - kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container - kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel - kataVfioApDevType = "vfio-ap" - sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} - sharedDirVirtioFSOptions = []string{} - sharedDirVirtioFSDaxOptions = "dax" - shmDir = "shm" - kataEphemeralDevType = "ephemeral" - defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType) - grpcMaxDataSize = int64(1024 * 1024) - localDirOptions = []string{"mode=0777"} - maxHostnameLen = 64 - GuestDNSFile = "/etc/resolv.conf" + checkRequestTimeout = 30 * time.Second + defaultRequestTimeout = 60 * time.Second + remoteRequestTimeout = 300 * time.Second + customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) + errorMissingOCISpec = errors.New("Missing OCI specification") + defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" + defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" + defaultKataGuestNydusRootDir = "/run/kata-containers/shared/" + defaultKataGuestVirtualVolumedir = "/run/kata-containers/virtual-volumes/" + mountGuestTag = "kataShared" + defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/" + type9pFs = "9p" + typeVirtioFS = "virtiofs" + typeOverlayFS = "overlay" + kata9pDevType = "9p" + kataMmioBlkDevType = "mmioblk" + kataBlkDevType = "blk" + kataBlkCCWDevType = "blk-ccw" + kataSCSIDevType = "scsi" + kataNvdimmDevType = "nvdimm" + kataVirtioFSDevType = "virtio-fs" + kataOverlayDevType = "overlayfs" + kataWatchableBindDevType = "watchable-bind" + kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container + kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel + kataVfioApDevType = "vfio-ap" + sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} + sharedDirVirtioFSOptions = []string{} + sharedDirVirtioFSDaxOptions = "dax" + shmDir = "shm" + kataEphemeralDevType = "ephemeral" + defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType) + grpcMaxDataSize = int64(1024 * 1024) + localDirOptions = []string{"mode=0777"} + maxHostnameLen = 64 + GuestDNSFile = "/etc/resolv.conf" ) const ( @@ -1200,6 +1201,10 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr return nil } + if strings.HasPrefix(dev.ContainerPath, defaultKataGuestVirtualVolumedir) { + continue + } + switch device.DeviceType() { case config.DeviceBlock: kataDevice = k.appendBlockDevice(dev, device, c)