diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 87e4cf45e..01acc5e54 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -488,7 +488,7 @@ func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, hostMountDir, gu } // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared -// directory which is mounted through 9pfs in the VM. +// directory which is mounted through virtiofs/9pfs in the VM. // It also updates the container mount list with the HostPath info, and store // container mounts to the storage. This way, we will have the HostPath info // available when we will need to unmount those mounts. @@ -511,6 +511,18 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, hostMountDir, guestShare continue } + // Check if mount is a block device file. If it is, the block device will be attached to the host + // instead of passing this as a shared mount: + if len(m.BlockDeviceID) > 0 { + // Attach this block device, all other devices passed in the config have been attached at this point + if err = c.sandbox.devManager.AttachDevice(m.BlockDeviceID, c.sandbox); err != nil { + return nil, nil, err + } + devicesToDetach = append(devicesToDetach, m.BlockDeviceID) + continue + } + + // For non-block based mounts, we are only interested in bind mounts if m.Type != "bind" { continue } @@ -522,17 +534,6 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, hostMountDir, guestShare continue } - // Check if mount is a block device file. If it is, the block device will be attached to the host - // instead of passing this as a shared mount. - if len(m.BlockDeviceID) > 0 { - // Attach this block device, all other devices passed in the config have been attached at this point - if err = c.sandbox.devManager.AttachDevice(m.BlockDeviceID, c.sandbox); err != nil { - return nil, nil, err - } - devicesToDetach = append(devicesToDetach, m.BlockDeviceID) - continue - } - // Ignore /dev, directories and all other device files. We handle // only regular files in /dev. It does not make sense to pass the host // device nodes to the guest. @@ -628,6 +629,9 @@ func filterDevices(c *Container, devices []ContainerDevice) (ret []ContainerDevi return } +// Add any mount based block devices to the device manager and save the +// device ID for the particular mount. This'll occur when the mountpoint source +// is a block device. func (c *Container) createBlockDevices() error { if !c.checkBlockDeviceSupport() { c.Logger().Warn("Block device not supported") @@ -636,13 +640,18 @@ func (c *Container) createBlockDevices() error { // iterate all mounts and create block device if it's block based. for i, m := range c.mounts { - if len(m.BlockDeviceID) > 0 || m.Type != "bind" { + if len(m.BlockDeviceID) > 0 { // Non-empty m.BlockDeviceID indicates there's already one device // associated with the mount,so no need to create a new device for it // and we only create block device for bind mount continue } + if m.Type != "bind" { + // We only handle for bind-mounts + continue + } + var stat unix.Stat_t if err := unix.Stat(m.Source, &stat); err != nil { return fmt.Errorf("stat %q failed: %v", m.Source, err) @@ -670,7 +679,6 @@ func (c *Container) createBlockDevices() error { if err == nil && di != nil { b, err := c.sandbox.devManager.NewDevice(*di) - if err != nil { // Do not return an error, try to create // devices for other mounts @@ -722,11 +730,12 @@ func newContainer(sandbox *Sandbox, contConfig *ContainerConfig) (*Container, er return nil, err } - // Go to next step for first created container + // If mounts are block devices, add to devmanager if err := c.createMounts(); err != nil { return nil, err } + // Add container's devices to sandbox's device-manager if err := c.createDevices(contConfig); err != nil { return nil, err } @@ -736,11 +745,7 @@ func newContainer(sandbox *Sandbox, contConfig *ContainerConfig) (*Container, er func (c *Container) createMounts() error { // Create block devices for newly created container - if err := c.createBlockDevices(); err != nil { - return err - } - - return nil + return c.createBlockDevices() } func (c *Container) createDevices(contConfig *ContainerConfig) error { @@ -810,6 +815,7 @@ func (c *Container) create() (err error) { }() if c.checkBlockDeviceSupport() { + // If the rootfs is backed by a block device, go ahead and hotplug it to the guest if err = c.hotplugDrive(); err != nil { return } @@ -1205,11 +1211,14 @@ func (c *Container) resume() error { return c.setContainerState(types.StateRunning) } +// hotplugDrive will attempt to hotplug the container rootfs if it is backed by a +// block device func (c *Container) hotplugDrive() error { var dev device var err error - // container rootfs is blockdevice backed and isn't mounted + // Check to see if the rootfs is an umounted block device (source) or if the + // mount (target) is backed by a block device: if !c.rootFs.Mounted { dev, err = getDeviceForPath(c.rootFs.Source) // there is no "rootfs" dir on block device backed rootfs @@ -1271,6 +1280,7 @@ func (c *Container) hotplugDrive() error { return c.setStateFstype(fsType) } +// plugDevice will attach the rootfs if blockdevice is supported (this is rootfs specific) func (c *Container) plugDevice(devicePath string) error { var stat unix.Stat_t if err := unix.Stat(devicePath, &stat); err != nil { diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 1ce2f8e74..eb64fa619 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1180,7 +1180,6 @@ func (k *kataAgent) buildContainerRootfs(sandbox *Sandbox, c *Container, rootPat } case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: - rootfs.Driver = kataSCSIDevType rootfs.Source = blockDrive.SCSIAddr default: @@ -1195,8 +1194,8 @@ func (k *kataAgent) buildContainerRootfs(sandbox *Sandbox, c *Container, rootPat } // Ensure container mount destination exists - // TODO: remove dependency on shared fs path. shared fs is just one kind of storage sources. - // we should not always use shared fs path for all kinds of storage. Stead, all storage + // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. + // we should not always use shared fs path for all kinds of storage. Instead, all storage // should be bind mounted to a tmpfs path for containers to use. if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil { return nil, err @@ -1204,13 +1203,10 @@ func (k *kataAgent) buildContainerRootfs(sandbox *Sandbox, c *Container, rootPat return rootfs, nil } - // This is not a block based device rootfs. - // We are going to bind mount it into the 9pfs - // shared drive between the host and the guest. - // With 9pfs we don't need to ask the agent to - // mount the rootfs as the shared directory - // (kataGuestSharedDir) is already mounted in the - // guest. We only need to mount the rootfs from + // This is not a block based device rootfs. We are going to bind mount it into the shared drive + // between the host and the guest. + // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory + // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from // the host and it will show up in the guest. if err := bindMountContainerRootfs(k.ctx, getMountPath(sandbox.id), c.id, c.rootFs.Target, false); err != nil { return nil, err @@ -1240,9 +1236,14 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, } }() + // setup rootfs -- if its block based, we'll receive a non-nil storage object representing + // the block device for the rootfs, which us utilized for mounting in the guest. This'll be handled + // already for non-block based rootfs if rootfs, err = k.buildContainerRootfs(sandbox, c, rootPathParent); err != nil { return nil, err - } else if rootfs != nil { + } + + if rootfs != nil { // Add rootfs to the list of container storage. // We only need to do this for block based rootfs, as we // want the agent to mount it into the right location @@ -1291,6 +1292,7 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, if err != nil { return nil, err } + if err := k.replaceOCIMountsForStorages(ociSpec, volumeStorages); err != nil { return nil, err } @@ -1400,7 +1402,7 @@ func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string, r // handleDeviceBlockVolume handles volume that is block device file // and DeviceBlock type. -func (k *kataAgent) handleDeviceBlockVolume(c *Container, device api.Device) (*grpc.Storage, error) { +func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { vol := &grpc.Storage{} blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) @@ -1435,12 +1437,22 @@ func (k *kataAgent) handleDeviceBlockVolume(c *Container, device api.Device) (*g return nil, fmt.Errorf("Unknown block device driver: %s", c.sandbox.config.HypervisorConfig.BlockDeviceDriver) } + vol.MountPoint = m.Destination + + // If no explicit FS Type or Options are being set, then let's use what is provided for the particular mount: + if vol.Fstype == "" { + vol.Fstype = m.Type + } + if len(vol.Options) == 0 { + vol.Options = m.Options + } + return vol, nil } // handleVhostUserBlkVolume handles volume that is block device file // and VhostUserBlk type. -func (k *kataAgent) handleVhostUserBlkVolume(c *Container, device api.Device) (*grpc.Storage, error) { +func (k *kataAgent) handleVhostUserBlkVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { vol := &grpc.Storage{} d, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs) @@ -1451,6 +1463,9 @@ func (k *kataAgent) handleVhostUserBlkVolume(c *Container, device api.Device) (* vol.Driver = kataBlkDevType vol.Source = d.PCIAddr + vol.Fstype = "bind" + vol.Options = []string{"bind"} + vol.MountPoint = m.Destination return vol, nil } @@ -1483,9 +1498,9 @@ func (k *kataAgent) handleBlockVolumes(c *Container) ([]*grpc.Storage, error) { var err error switch device.DeviceType() { case config.DeviceBlock: - vol, err = k.handleDeviceBlockVolume(c, device) + vol, err = k.handleDeviceBlockVolume(c, m, device) case config.VhostUserBlk: - vol, err = k.handleVhostUserBlkVolume(c, device) + vol, err = k.handleVhostUserBlkVolume(c, m, device) default: k.Logger().Error("Unknown device type") continue @@ -1495,14 +1510,6 @@ func (k *kataAgent) handleBlockVolumes(c *Container) ([]*grpc.Storage, error) { return nil, err } - vol.MountPoint = m.Destination - if vol.Fstype == "" { - vol.Fstype = "bind" - } - if len(vol.Options) == 0 { - vol.Options = []string{"bind"} - } - volumeStorages = append(volumeStorages, vol) } diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 5a7bcf6d3..7a7cb73fe 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -228,6 +228,7 @@ func TestHandleDeviceBlockVolume(t *testing.T) { tests := []struct { BlockDeviceDriver string + inputMount Mount inputDev *drivers.BlockDevice resultVol *pb.Storage }{ @@ -239,6 +240,7 @@ func TestHandleDeviceBlockVolume(t *testing.T) { Format: testBlkDriveFormat, }, }, + inputMount: Mount{}, resultVol: &pb.Storage{ Driver: kataNvdimmDevType, Source: fmt.Sprintf("/dev/pmem%s", testNvdimmID), @@ -248,18 +250,25 @@ func TestHandleDeviceBlockVolume(t *testing.T) { }, { BlockDeviceDriver: config.VirtioBlockCCW, + inputMount: Mount{ + Type: "bind", + Options: []string{"ro"}, + }, inputDev: &drivers.BlockDevice{ BlockDrive: &config.BlockDrive{ DevNo: testDevNo, }, }, resultVol: &pb.Storage{ - Driver: kataBlkCCWDevType, - Source: testDevNo, + Driver: kataBlkCCWDevType, + Source: testDevNo, + Fstype: "bind", + Options: []string{"ro"}, }, }, { BlockDeviceDriver: config.VirtioBlock, + inputMount: Mount{}, inputDev: &drivers.BlockDevice{ BlockDrive: &config.BlockDrive{ PCIAddr: testPCIAddr, @@ -320,7 +329,7 @@ func TestHandleDeviceBlockVolume(t *testing.T) { }, } - vol, _ := k.handleDeviceBlockVolume(c, test.inputDev) + vol, _ := k.handleDeviceBlockVolume(c, test.inputMount, test.inputDev) assert.True(t, reflect.DeepEqual(vol, test.resultVol), "Volume didn't match: got %+v, expecting %+v", vol, test.resultVol) @@ -336,22 +345,27 @@ func TestHandleBlockVolume(t *testing.T) { containers := map[string]*Container{} containers[c.id] = c - // Create a VhostUserBlk device and a DeviceBlock device + // Create a devices for VhostUserBlk, standard DeviceBlock and direct assigned Block device vDevID := "MockVhostUserBlk" bDevID := "MockDeviceBlock" + dDevID := "MockDeviceBlockDirect" vDestination := "/VhostUserBlk/destination" bDestination := "/DeviceBlock/destination" + dDestination := "/DeviceDirectBlock/destination" vPCIAddr := "0001:01" bPCIAddr := "0002:01" + dPCIAddr := "0003:01" vDev := drivers.NewVhostUserBlkDevice(&config.DeviceInfo{ID: vDevID}) bDev := drivers.NewBlockDevice(&config.DeviceInfo{ID: bDevID}) + dDev := drivers.NewBlockDevice(&config.DeviceInfo{ID: dDevID}) vDev.VhostUserDeviceAttrs = &config.VhostUserDeviceAttrs{PCIAddr: vPCIAddr} bDev.BlockDrive = &config.BlockDrive{PCIAddr: bPCIAddr} + dDev.BlockDrive = &config.BlockDrive{PCIAddr: dPCIAddr} var devices []api.Device - devices = append(devices, vDev, bDev) + devices = append(devices, vDev, bDev, dDev) // Create a VhostUserBlk mount and a DeviceBlock mount var mounts []Mount @@ -362,8 +376,16 @@ func TestHandleBlockVolume(t *testing.T) { bMount := Mount{ BlockDeviceID: bDevID, Destination: bDestination, + Type: "bind", + Options: []string{"bind"}, } - mounts = append(mounts, vMount, bMount) + dMount := Mount{ + BlockDeviceID: dDevID, + Destination: dDestination, + Type: "ext4", + Options: []string{"ro"}, + } + mounts = append(mounts, vMount, bMount, dMount) tmpDir := "/vhost/user/dir" dm := manager.NewDeviceManager(manager.VirtioBlock, true, tmpDir, devices) @@ -398,9 +420,17 @@ func TestHandleBlockVolume(t *testing.T) { Driver: kataBlkDevType, Source: bPCIAddr, } + dStorage := &pb.Storage{ + MountPoint: dDestination, + Fstype: "ext4", + Options: []string{"ro"}, + Driver: kataBlkDevType, + Source: dPCIAddr, + } assert.Equal(t, vStorage, volumeStorages[0], "Error while handle VhostUserBlk type block volume") assert.Equal(t, bStorage, volumeStorages[1], "Error while handle BlockDevice type block volume") + assert.Equal(t, dStorage, volumeStorages[2], "Error while handle direct BlockDevice type block volume") } func TestAppendDevicesEmptyContainerDeviceList(t *testing.T) { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 06d071ffc..94e81e933 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1233,9 +1233,9 @@ func (q *qemu) hotplugAddBlockDevice(drive *config.BlockDrive, op operation, dev } if q.config.BlockDeviceCacheSet { - err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush) + err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush, false) } else { - err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID) + err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID, false) } if err != nil { return err diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index e838d2817..f207c3da9 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1098,13 +1098,13 @@ func (s *Sandbox) addContainer(c *Container) error { // This should be called only when the sandbox is already created. // It will add new container config to sandbox.config.Containers func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, error) { - // Create the container. + // Create the container object, add devices to the sandbox's device-manager: c, err := newContainer(s, &contConfig) if err != nil { return nil, err } - // Update sandbox config. + // Update sandbox config to include the new container's config s.config.Containers = append(s.config.Containers, contConfig) defer func() { @@ -1116,6 +1116,7 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro } }() + // create and start the container err = c.create() if err != nil { return nil, err