diff --git a/cli/config/configuration.toml.in b/cli/config/configuration.toml.in index a8df6c58e..8efd4bd9d 100644 --- a/cli/config/configuration.toml.in +++ b/cli/config/configuration.toml.in @@ -91,8 +91,8 @@ default_memory = @DEFMEMSZ@ disable_block_device_use = @DEFDISABLEBLOCK@ # Block storage driver to be used for the hypervisor in case the container -# rootfs is backed by a block device. This is either virtio-scsi or -# virtio-blk. +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. block_device_driver = "@DEFBLOCKSTORAGEDRIVER@" # Specifies cache-related options will be set to block devices or not. diff --git a/pkg/katautils/config.go b/pkg/katautils/config.go index 446048a90..3bd697dec 100644 --- a/pkg/katautils/config.go +++ b/pkg/katautils/config.go @@ -294,7 +294,7 @@ func (h hypervisor) defaultBridges() uint32 { } func (h hypervisor) blockDeviceDriver() (string, error) { - supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio} + supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm} if h.BlockDeviceDriver == "" { return defaultBlockDeviceDriver, nil diff --git a/virtcontainers/device/config/config.go b/virtcontainers/device/config/config.go index 704160920..13eaeff35 100644 --- a/virtcontainers/device/config/config.go +++ b/virtcontainers/device/config/config.go @@ -47,6 +47,9 @@ const ( // VirtioSCSI means use virtio-scsi for hotplugging drives VirtioSCSI = "virtio-scsi" + + // Nvdimm means use nvdimm for hotplugging drives + Nvdimm = "nvdimm" ) // Defining these as a variable instead of a const, to allow @@ -119,6 +122,9 @@ type BlockDrive struct { // SCSI address is in the format SCSI-Id:LUN SCSIAddr string + // NvdimmID is the nvdimm id inside the VM + NvdimmID string + // VirtPath at which the device appears inside the VM, outside of the container mount namespace VirtPath string } diff --git a/virtcontainers/device/drivers/block.go b/virtcontainers/device/drivers/block.go index ba0592b72..b977e30e8 100644 --- a/virtcontainers/device/drivers/block.go +++ b/virtcontainers/device/drivers/block.go @@ -78,7 +78,7 @@ func (device *BlockDevice) Attach(devReceiver api.DeviceReceiver) (err error) { } drive.SCSIAddr = scsiAddr - } else { + } else if customOptions["block-driver"] != "nvdimm" { var globalIdx int switch customOptions["block-driver"] { @@ -102,7 +102,7 @@ func (device *BlockDevice) Attach(devReceiver api.DeviceReceiver) (err error) { drive.VirtPath = filepath.Join("/dev", driveName) } - deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Attaching block device") + deviceLogger().WithField("device", device.DeviceInfo.HostPath).WithField("VirtPath", drive.VirtPath).Infof("Attaching %s device", customOptions["block-driver"]) device.BlockDrive = drive if err = devReceiver.HotplugAddDevice(device, config.DeviceBlock); err != nil { return err diff --git a/virtcontainers/device/manager/manager.go b/virtcontainers/device/manager/manager.go index 066c30581..9086fa455 100644 --- a/virtcontainers/device/manager/manager.go +++ b/virtcontainers/device/manager/manager.go @@ -26,6 +26,8 @@ const ( VirtioBlock string = "virtio-blk" // VirtioSCSI indicates block driver is virtio-scsi based VirtioSCSI string = "virtio-scsi" + // Nvdimm indicates block driver is nvdimm based + Nvdimm string = "nvdimm" ) var ( @@ -61,6 +63,8 @@ func NewDeviceManager(blockDriver string, devices []api.Device) api.DeviceManage dm.blockDriver = VirtioMmio } else if blockDriver == VirtioBlock { dm.blockDriver = VirtioBlock + } else if blockDriver == Nvdimm { + dm.blockDriver = Nvdimm } else { dm.blockDriver = VirtioSCSI } diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 18a508cc8..95c0a4b45 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -62,6 +62,7 @@ var ( kataMmioBlkDevType = "mmioblk" kataBlkDevType = "blk" kataSCSIDevType = "scsi" + kataNvdimmDevType = "nvdimm" sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} shmDir = "shm" kataEphemeralDevType = "ephemeral" @@ -883,6 +884,9 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr case config.VirtioSCSI: kataDevice.Type = kataSCSIDevType kataDevice.Id = d.SCSIAddr + case config.Nvdimm: + kataDevice.Type = kataNvdimmDevType + kataDevice.VmPath = fmt.Sprintf("/dev/pmem%s", d.NvdimmID) } deviceList = append(deviceList, kataDevice) diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index c02ecc547..536e76dfe 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -8,20 +8,22 @@ package virtcontainers import ( "context" "fmt" + govmmQemu "github.com/intel/govmm/qemu" + "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" + "github.com/opentracing/opentracing-go" + "github.com/sirupsen/logrus" "math" "os" "path/filepath" "strconv" "strings" + "syscall" "time" - - govmmQemu "github.com/intel/govmm/qemu" - "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" - opentracing "github.com/opentracing/opentracing-go" - "github.com/sirupsen/logrus" + "unsafe" "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/utils" + "golang.org/x/sys/unix" ) // romFile is the file name of the ROM that can be used for virtio-pci devices. @@ -73,6 +75,8 @@ type qemu struct { fds []*os.File ctx context.Context + + nvdimmCount int } const ( @@ -221,6 +225,20 @@ func (q *qemu) init(ctx context.Context, id string, hypervisorConfig *Hypervisor q.config = *hypervisorConfig q.arch = newQemuArch(q.config) + initrdPath, err := q.config.InitrdAssetPath() + if err != nil { + return err + } + imagePath, err := q.config.ImageAssetPath() + if err != nil { + return err + } + if initrdPath == "" && imagePath != "" { + q.nvdimmCount = 1 + } else { + q.nvdimmCount = 0 + } + if err = q.storage.fetchHypervisorState(q.id, &q.state); err != nil { q.Logger().Debug("Creating bridges") q.state.Bridges = q.arch.bridges(q.config.DefaultBridges) @@ -727,6 +745,69 @@ func (q *qemu) removeDeviceFromBridge(ID string) error { return err } +func (q *qemu) hotplugAddBlockDevice(drive *config.BlockDrive, op operation, devID string) error { + var err error + + if q.config.BlockDeviceDriver == config.Nvdimm { + var blocksize int64 + file, err := os.Open(drive.File) + if err != nil { + return err + } + if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&blocksize))); err != 0 { + return err + } + if err = q.qmpMonitorCh.qmp.ExecuteNVDIMMDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, drive.File, blocksize); err != nil { + q.Logger().WithError(err).Errorf("Failed to add NVDIMM device %s", drive.File) + return err + } + drive.NvdimmID = strconv.Itoa(q.nvdimmCount) + q.nvdimmCount++ + return nil + } + + if q.config.BlockDeviceCacheSet { + err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush) + } else { + err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID) + } + if err != nil { + return err + } + + if q.config.BlockDeviceDriver == config.VirtioBlock { + driver := "virtio-blk-pci" + addr, bridge, err := q.addDeviceToBridge(drive.ID) + if err != nil { + return err + } + + // PCI address is in the format bridge-addr/device-addr eg. "03/02" + drive.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr + + if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, true, q.arch.runNested()); err != nil { + return err + } + } else { + driver := "scsi-hd" + + // Bus exposed by the SCSI Controller + bus := scsiControllerID + ".0" + + // Get SCSI-id and LUN based on the order of attaching drives. + scsiID, lun, err := utils.GetSCSIIdLun(drive.Index) + if err != nil { + return err + } + + if err = q.qmpMonitorCh.qmp.ExecuteSCSIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, bus, romFile, scsiID, lun, true, q.arch.runNested()); err != nil { + return err + } + } + + return nil +} + func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error { err := q.qmpSetup() if err != nil { @@ -736,44 +817,7 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error devID := "virtio-" + drive.ID if op == addDevice { - if q.config.BlockDeviceCacheSet { - err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush) - } else { - err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID) - } - if err != nil { - return err - } - - if q.config.BlockDeviceDriver == config.VirtioBlock { - driver := "virtio-blk-pci" - addr, bridge, err := q.addDeviceToBridge(drive.ID) - if err != nil { - return err - } - - // PCI address is in the format bridge-addr/device-addr eg. "03/02" - drive.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr - - if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, true, q.arch.runNested()); err != nil { - return err - } - } else { - driver := "scsi-hd" - - // Bus exposed by the SCSI Controller - bus := scsiControllerID + ".0" - - // Get SCSI-id and LUN based on the order of attaching drives. - scsiID, lun, err := utils.GetSCSIIdLun(drive.Index) - if err != nil { - return err - } - - if err = q.qmpMonitorCh.qmp.ExecuteSCSIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, bus, romFile, scsiID, lun, true, q.arch.runNested()); err != nil { - return err - } - } + err = q.hotplugAddBlockDevice(drive, op, devID) } else { if q.config.BlockDeviceDriver == config.VirtioBlock { if err := q.removeDeviceFromBridge(drive.ID); err != nil { @@ -790,7 +834,7 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error } } - return nil + return err } func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) error {