diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index bd5808deba..6be910bde9 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -130,7 +130,7 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo } if !rootFs.Mounted && len(sandboxConfig.Containers) == 1 { - if rootFs.Source != "" { + if rootFs.Source != "" && !vc.HasOptionPrefix(rootFs.Options, vc.VirtualVolumePrefix) { realPath, err := ResolvePath(rootFs.Source) if err != nil { return nil, vc.Process{}, err diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 1f8646a63c..db249b3e49 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -662,41 +662,9 @@ func (c *Container) createBlockDevices(ctx context.Context) error { } } - var stat unix.Stat_t - if err := unix.Stat(c.mounts[i].Source, &stat); err != nil { - return fmt.Errorf("stat %q failed: %v", c.mounts[i].Source, err) - } - - var di *config.DeviceInfo - var err error - // Check if mount is a block device file. If it is, the block device will be attached to the host // instead of passing this as a shared mount. - if stat.Mode&unix.S_IFMT == unix.S_IFBLK { - di = &config.DeviceInfo{ - HostPath: c.mounts[i].Source, - ContainerPath: c.mounts[i].Destination, - DevType: "b", - Major: int64(unix.Major(uint64(stat.Rdev))), - Minor: int64(unix.Minor(uint64(stat.Rdev))), - ReadOnly: c.mounts[i].ReadOnly, - } - } else if isBlockFile && stat.Mode&unix.S_IFMT == unix.S_IFREG { - di = &config.DeviceInfo{ - HostPath: c.mounts[i].Source, - ContainerPath: c.mounts[i].Destination, - DevType: "b", - Major: -1, - Minor: 0, - ReadOnly: c.mounts[i].ReadOnly, - } - // Check whether source can be used as a pmem device - } else if di, err = config.PmemDeviceInfo(c.mounts[i].Source, c.mounts[i].Destination); err != nil { - c.Logger().WithError(err). - WithField("mount-source", c.mounts[i].Source). - Debug("no loop device") - } - + di, err := c.createDeviceInfo(c.mounts[i].Source, c.mounts[i].Destination, c.mounts[i].ReadOnly, isBlockFile) if err == nil && di != nil { b, err := c.sandbox.devManager.NewDevice(*di) if err != nil { @@ -795,6 +763,58 @@ func newContainer(ctx context.Context, sandbox *Sandbox, contConfig *ContainerCo return c, nil } +// Create Device Information about the block device +func (c *Container) createDeviceInfo(source, destination string, readonly, isBlockFile bool) (*config.DeviceInfo, error) { + var stat unix.Stat_t + if err := unix.Stat(source, &stat); err != nil { + return nil, fmt.Errorf("stat %q failed: %v", source, err) + } + + var di *config.DeviceInfo + var err error + + if stat.Mode&unix.S_IFMT == unix.S_IFBLK { + di = &config.DeviceInfo{ + HostPath: source, + ContainerPath: destination, + DevType: "b", + Major: int64(unix.Major(uint64(stat.Rdev))), + Minor: int64(unix.Minor(uint64(stat.Rdev))), + ReadOnly: readonly, + } + } else if isBlockFile && stat.Mode&unix.S_IFMT == unix.S_IFREG { + di = &config.DeviceInfo{ + HostPath: source, + ContainerPath: destination, + DevType: "b", + Major: -1, + Minor: 0, + ReadOnly: readonly, + } + // Check whether source can be used as a pmem device + } else if di, err = config.PmemDeviceInfo(source, destination); err != nil { + c.Logger().WithError(err). + WithField("mount-source", source). + Debug("no loop device") + } + return di, err +} + +// call hypervisor to create device about KataVirtualVolume. +func (c *Container) createVirtualVolumeDevices() ([]config.DeviceInfo, error) { + var deviceInfos []config.DeviceInfo + for _, o := range c.rootFs.Options { + if strings.HasPrefix(o, VirtualVolumePrefix) { + virtVolume, err := types.ParseKataVirtualVolume(strings.TrimPrefix(o, VirtualVolumePrefix)) + if err != nil { + return nil, err + } + c.Logger().Infof("KataVirtualVolume volumetype = %s", virtVolume.VolumeType) + } + } + return deviceInfos, nil +} + func (c *Container) createMounts(ctx context.Context) error { // Create block devices for newly created container return c.createBlockDevices(ctx) @@ -804,7 +824,13 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error { // If devices were not found in storage, create Device implementations // from the configuration. This should happen at create. var storedDevices []ContainerDevice - for _, info := range contConfig.DeviceInfos { + virtualVolumesDeviceInfos, err := c.createVirtualVolumeDevices() + if err != nil { + return err + } + deviceInfos := append(virtualVolumesDeviceInfos, contConfig.DeviceInfos...) + + for _, info := range deviceInfos { dev, err := c.sandbox.devManager.NewDevice(info) if err != nil { return err diff --git a/src/runtime/virtcontainers/fs_share.go b/src/runtime/virtcontainers/fs_share.go index b5000291cc..3df08368ea 100644 --- a/src/runtime/virtcontainers/fs_share.go +++ b/src/runtime/virtcontainers/fs_share.go @@ -21,8 +21,9 @@ var fsShareTracingTags = map[string]string{ // SharedFile represents the outcome of a host filesystem sharing // operation. type SharedFile struct { - storage *grpc.Storage - guestPath string + containerStorages []*grpc.Storage + volumeStorages []*grpc.Storage + guestPath string } type FilesystemSharer interface { diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 029206f1fc..e80c9e8260 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "regexp" + "strings" "sync" "syscall" @@ -26,6 +27,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" ) @@ -464,13 +466,60 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C f.Logger().Infof("Nydus rootfs info: %#v\n", rootfs) return &SharedFile{ - storage: rootfs, - guestPath: rootfsGuestPath, + containerStorages: []*grpc.Storage{rootfs}, + guestPath: rootfsGuestPath, + }, nil +} + +// handleVirtualVolume processes all `io.katacontainers.volume=` messages in rootFs.Options, +// creating storage, and then aggregates all storages into an array. +func handleVirtualVolume(c *Container) ([]*grpc.Storage, string, error) { + var volumes []*grpc.Storage + var volumeType string + + for _, o := range c.rootFs.Options { + if strings.HasPrefix(o, VirtualVolumePrefix) { + virtVolume, err := types.ParseKataVirtualVolume(strings.TrimPrefix(o, VirtualVolumePrefix)) + if err != nil { + return nil, "", err + } + + volumeType = virtVolume.VolumeType + var vol *grpc.Storage + vol, err = handleVirtualVolumeStorageObject(c, "", virtVolume) + if err != nil { + return nil, "", err + } + + if vol != nil { + volumes = append(volumes, vol) + } + } + } + + return volumes, volumeType, nil +} + +func (f *FilesystemShare) shareRootFilesystemWithVirtualVolume(ctx context.Context, c *Container) (*SharedFile, error) { + guestPath := filepath.Join("/run/kata-containers/", c.id, c.rootfsSuffix) + rootFsStorages, _, err := handleVirtualVolume(c) + if err != nil { + return nil, err + } + + return &SharedFile{ + containerStorages: rootFsStorages, + guestPath: guestPath, }, nil } // func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { + + if HasOptionPrefix(c.rootFs.Options, VirtualVolumePrefix) { + return f.shareRootFilesystemWithVirtualVolume(ctx, c) + } + if c.rootFs.Type == NydusRootFSType { return f.shareRootFilesystemWithNydus(ctx, c) } @@ -479,13 +528,13 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) if HasOptionPrefix(c.rootFs.Options, annotations.FileSystemLayer) { path := filepath.Join("/run/kata-containers", c.id, "rootfs") return &SharedFile{ - storage: &grpc.Storage{ + containerStorages: []*grpc.Storage{{ MountPoint: path, Source: "none", Fstype: c.rootFs.Type, Driver: kataOverlayDevType, Options: c.rootFs.Options, - }, + }}, guestPath: path, }, nil } @@ -550,8 +599,8 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) } return &SharedFile{ - storage: rootfsStorage, - guestPath: rootfsGuestPath, + containerStorages: []*grpc.Storage{rootfsStorage}, + guestPath: rootfsGuestPath, }, nil } @@ -565,8 +614,8 @@ func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) } return &SharedFile{ - storage: nil, - guestPath: rootfsGuestPath, + containerStorages: nil, + guestPath: rootfsGuestPath, }, nil } diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 892aa534b7..476c6d1779 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -69,6 +69,8 @@ const ( NydusRootFSType = "fuse.nydus-overlayfs" + VirtualVolumePrefix = "io.katacontainers.volume=" + // enable debug console kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" @@ -81,41 +83,42 @@ const ( type customRequestTimeoutKeyType struct{} var ( - checkRequestTimeout = 30 * time.Second - defaultRequestTimeout = 60 * time.Second - remoteRequestTimeout = 300 * time.Second - customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) - errorMissingOCISpec = errors.New("Missing OCI specification") - defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" - defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" - defaultKataGuestNydusRootDir = "/run/kata-containers/shared/" - mountGuestTag = "kataShared" - defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/" - type9pFs = "9p" - typeVirtioFS = "virtiofs" - typeOverlayFS = "overlay" - kata9pDevType = "9p" - kataMmioBlkDevType = "mmioblk" - kataBlkDevType = "blk" - kataBlkCCWDevType = "blk-ccw" - kataSCSIDevType = "scsi" - kataNvdimmDevType = "nvdimm" - kataVirtioFSDevType = "virtio-fs" - kataOverlayDevType = "overlayfs" - kataWatchableBindDevType = "watchable-bind" - kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container - kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel - kataVfioApDevType = "vfio-ap" - sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} - sharedDirVirtioFSOptions = []string{} - sharedDirVirtioFSDaxOptions = "dax" - shmDir = "shm" - kataEphemeralDevType = "ephemeral" - defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType) - grpcMaxDataSize = int64(1024 * 1024) - localDirOptions = []string{"mode=0777"} - maxHostnameLen = 64 - GuestDNSFile = "/etc/resolv.conf" + checkRequestTimeout = 30 * time.Second + defaultRequestTimeout = 60 * time.Second + remoteRequestTimeout = 300 * time.Second + customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) + errorMissingOCISpec = errors.New("Missing OCI specification") + defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" + defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" + defaultKataGuestNydusRootDir = "/run/kata-containers/shared/" + defaultKataGuestVirtualVolumedir = "/run/kata-containers/virtual-volumes/" + mountGuestTag = "kataShared" + defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/" + type9pFs = "9p" + typeVirtioFS = "virtiofs" + typeOverlayFS = "overlay" + kata9pDevType = "9p" + kataMmioBlkDevType = "mmioblk" + kataBlkDevType = "blk" + kataBlkCCWDevType = "blk-ccw" + kataSCSIDevType = "scsi" + kataNvdimmDevType = "nvdimm" + kataVirtioFSDevType = "virtio-fs" + kataOverlayDevType = "overlayfs" + kataWatchableBindDevType = "watchable-bind" + kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container + kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel + kataVfioApDevType = "vfio-ap" + sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} + sharedDirVirtioFSOptions = []string{} + sharedDirVirtioFSDaxOptions = "dax" + shmDir = "shm" + kataEphemeralDevType = "ephemeral" + defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType) + grpcMaxDataSize = int64(1024 * 1024) + localDirOptions = []string{"mode=0777"} + maxHostnameLen = 64 + GuestDNSFile = "/etc/resolv.conf" ) const ( @@ -1198,6 +1201,10 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr return nil } + if strings.HasPrefix(dev.ContainerPath, defaultKataGuestVirtualVolumedir) { + continue + } + switch device.DeviceType() { case config.DeviceBlock: kataDevice = k.appendBlockDevice(dev, device, c) @@ -1256,12 +1263,17 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co return nil, err } - if sharedRootfs.storage != nil { + if sharedRootfs.containerStorages != nil { // Add rootfs to the list of container storage. - // We only need to do this for block based rootfs, as we + ctrStorages = append(ctrStorages, sharedRootfs.containerStorages...) + } + + if sharedRootfs.volumeStorages != nil { + // Add volumeStorages to the list of container storage. + // We only need to do this for KataVirtualVolume based rootfs, as we // want the agent to mount it into the right location - // (kataGuestSharedDir/ctrID/ - ctrStorages = append(ctrStorages, sharedRootfs.storage) + + ctrStorages = append(ctrStorages, sharedRootfs.volumeStorages...) } ociSpec := c.GetPatchedOCISpec() @@ -1536,14 +1548,11 @@ func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string, r return localStorages, nil } -// handleDeviceBlockVolume handles volume that is block device file -// and DeviceBlock type. -func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { +func handleBlockVolume(c *Container, device api.Device) (*grpc.Storage, error) { vol := &grpc.Storage{} blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) if !ok || blockDrive == nil { - k.Logger().Error("malformed block drive") return nil, fmt.Errorf("malformed block drive") } switch { @@ -1568,6 +1577,22 @@ func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.De default: return nil, fmt.Errorf("Unknown block device driver: %s", c.sandbox.config.HypervisorConfig.BlockDeviceDriver) } + return vol, nil +} + +// handleVirtualVolumeStorageObject handles KataVirtualVolume that is block device file. +func handleVirtualVolumeStorageObject(c *Container, blockDeviceId string, virtVolume *types.KataVirtualVolume) (*grpc.Storage, error) { + var vol *grpc.Storage = &grpc.Storage{} + return vol, nil +} + +// handleDeviceBlockVolume handles volume that is block device file +// and DeviceBlock type. +func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { + vol, err := handleBlockVolume(c, device) + if err != nil { + return nil, err + } vol.MountPoint = m.Destination