Merge pull request #962 from teawater/nvdimm

block: Add new block storage driver "nvdimm"
This commit is contained in:
fli 2018-12-26 09:38:55 +08:00 committed by GitHub
commit 2b75f440f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 144 additions and 55 deletions

View File

@ -82,6 +82,13 @@ default_memory = @DEFMEMSZ@
# This is will determine the times that memory will be hotadded to sandbox/VM.
#memory_slots = @DEFMEMSLOTS@
# The size in MiB will be plused to max memory of hypervisor.
# It is the memory address space for the NVDIMM devie.
# If set block storage driver (block_device_driver) to "nvdimm",
# should set memory_offset to the size of block device.
# Default 0
#memory_offset = 0
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
@ -91,8 +98,8 @@ default_memory = @DEFMEMSZ@
disable_block_device_use = @DEFDISABLEBLOCK@
# Block storage driver to be used for the hypervisor in case the container
# rootfs is backed by a block device. This is either virtio-scsi or
# virtio-blk.
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
# or nvdimm.
block_device_driver = "@DEFBLOCKSTORAGEDRIVER@"
# Specifies cache-related options will be set to block devices or not.

View File

@ -24,6 +24,7 @@ const defaultVCPUCount uint32 = 1
const defaultMaxVCPUCount uint32 = 0
const defaultMemSize uint32 = 2048 // MiB
const defaultMemSlots uint32 = 10
const defaultMemOffset uint32 = 0 // MiB
const defaultBridgesCount uint32 = 1
const defaultInterNetworkingModel = "macvtap"
const defaultDisableBlockDeviceUse bool = false

View File

@ -98,6 +98,7 @@ type hypervisor struct {
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
MemOffset uint32 `toml:"memory_offset"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
@ -281,6 +282,15 @@ func (h hypervisor) defaultMemSlots() uint32 {
return slots
}
func (h hypervisor) defaultMemOffset() uint32 {
offset := h.MemOffset
if offset == 0 {
offset = defaultMemOffset
}
return offset
}
func (h hypervisor) defaultBridges() uint32 {
if h.DefaultBridges == 0 {
return defaultBridgesCount
@ -294,7 +304,7 @@ func (h hypervisor) defaultBridges() uint32 {
}
func (h hypervisor) blockDeviceDriver() (string, error) {
supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio}
supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm}
if h.BlockDeviceDriver == "" {
return defaultBlockDeviceDriver, nil
@ -514,6 +524,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
@ -677,6 +688,7 @@ func initConfig() (config oci.RuntimeConfig, err error) {
NumVCPUs: defaultVCPUCount,
DefaultMaxVCPUs: defaultMaxVCPUCount,
MemorySize: defaultMemSize,
MemOffset: defaultMemOffset,
DefaultBridges: defaultBridgesCount,
MemPrealloc: defaultEnableMemPrealloc,
HugePages: defaultEnableHugePages,

View File

@ -47,6 +47,9 @@ const (
// VirtioSCSI means use virtio-scsi for hotplugging drives
VirtioSCSI = "virtio-scsi"
// Nvdimm means use nvdimm for hotplugging drives
Nvdimm = "nvdimm"
)
// Defining these as a variable instead of a const, to allow
@ -119,6 +122,9 @@ type BlockDrive struct {
// SCSI address is in the format SCSI-Id:LUN
SCSIAddr string
// NvdimmID is the nvdimm id inside the VM
NvdimmID string
// VirtPath at which the device appears inside the VM, outside of the container mount namespace
VirtPath string
}

View File

@ -78,7 +78,7 @@ func (device *BlockDevice) Attach(devReceiver api.DeviceReceiver) (err error) {
}
drive.SCSIAddr = scsiAddr
} else {
} else if customOptions["block-driver"] != "nvdimm" {
var globalIdx int
switch customOptions["block-driver"] {
@ -102,7 +102,7 @@ func (device *BlockDevice) Attach(devReceiver api.DeviceReceiver) (err error) {
drive.VirtPath = filepath.Join("/dev", driveName)
}
deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Attaching block device")
deviceLogger().WithField("device", device.DeviceInfo.HostPath).WithField("VirtPath", drive.VirtPath).Infof("Attaching %s device", customOptions["block-driver"])
device.BlockDrive = drive
if err = devReceiver.HotplugAddDevice(device, config.DeviceBlock); err != nil {
return err

View File

@ -26,6 +26,8 @@ const (
VirtioBlock string = "virtio-blk"
// VirtioSCSI indicates block driver is virtio-scsi based
VirtioSCSI string = "virtio-scsi"
// Nvdimm indicates block driver is nvdimm based
Nvdimm string = "nvdimm"
)
var (
@ -61,6 +63,8 @@ func NewDeviceManager(blockDriver string, devices []api.Device) api.DeviceManage
dm.blockDriver = VirtioMmio
} else if blockDriver == VirtioBlock {
dm.blockDriver = VirtioBlock
} else if blockDriver == Nvdimm {
dm.blockDriver = Nvdimm
} else {
dm.blockDriver = VirtioSCSI
}

View File

@ -169,6 +169,9 @@ type HypervisorConfig struct {
// MemSlots specifies default memory slots the VM.
MemSlots uint32
// MemOffset specifies memory space for nvdimm device
MemOffset uint32
// KernelParams are additional guest kernel parameters.
KernelParams []Param

View File

@ -62,6 +62,7 @@ var (
kataMmioBlkDevType = "mmioblk"
kataBlkDevType = "blk"
kataSCSIDevType = "scsi"
kataNvdimmDevType = "nvdimm"
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
shmDir = "shm"
kataEphemeralDevType = "ephemeral"
@ -883,6 +884,9 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr
case config.VirtioSCSI:
kataDevice.Type = kataSCSIDevType
kataDevice.Id = d.SCSIAddr
case config.Nvdimm:
kataDevice.Type = kataNvdimmDevType
kataDevice.VmPath = fmt.Sprintf("/dev/pmem%s", d.NvdimmID)
}
deviceList = append(deviceList, kataDevice)

View File

@ -8,20 +8,22 @@ package virtcontainers
import (
"context"
"fmt"
govmmQemu "github.com/intel/govmm/qemu"
"github.com/kata-containers/runtime/virtcontainers/pkg/uuid"
"github.com/opentracing/opentracing-go"
"github.com/sirupsen/logrus"
"math"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
govmmQemu "github.com/intel/govmm/qemu"
"github.com/kata-containers/runtime/virtcontainers/pkg/uuid"
opentracing "github.com/opentracing/opentracing-go"
"github.com/sirupsen/logrus"
"unsafe"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/kata-containers/runtime/virtcontainers/utils"
"golang.org/x/sys/unix"
)
// romFile is the file name of the ROM that can be used for virtio-pci devices.
@ -73,6 +75,8 @@ type qemu struct {
fds []*os.File
ctx context.Context
nvdimmCount int
}
const (
@ -221,6 +225,20 @@ func (q *qemu) init(ctx context.Context, id string, hypervisorConfig *Hypervisor
q.config = *hypervisorConfig
q.arch = newQemuArch(q.config)
initrdPath, err := q.config.InitrdAssetPath()
if err != nil {
return err
}
imagePath, err := q.config.ImageAssetPath()
if err != nil {
return err
}
if initrdPath == "" && imagePath != "" {
q.nvdimmCount = 1
} else {
q.nvdimmCount = 0
}
if err = q.storage.fetchHypervisorState(q.id, &q.state); err != nil {
q.Logger().Debug("Creating bridges")
q.state.Bridges = q.arch.bridges(q.config.DefaultBridges)
@ -727,6 +745,69 @@ func (q *qemu) removeDeviceFromBridge(ID string) error {
return err
}
func (q *qemu) hotplugAddBlockDevice(drive *config.BlockDrive, op operation, devID string) error {
var err error
if q.config.BlockDeviceDriver == config.Nvdimm {
var blocksize int64
file, err := os.Open(drive.File)
if err != nil {
return err
}
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&blocksize))); err != 0 {
return err
}
if err = q.qmpMonitorCh.qmp.ExecuteNVDIMMDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, drive.File, blocksize); err != nil {
q.Logger().WithError(err).Errorf("Failed to add NVDIMM device %s", drive.File)
return err
}
drive.NvdimmID = strconv.Itoa(q.nvdimmCount)
q.nvdimmCount++
return nil
}
if q.config.BlockDeviceCacheSet {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush)
} else {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID)
}
if err != nil {
return err
}
if q.config.BlockDeviceDriver == config.VirtioBlock {
driver := "virtio-blk-pci"
addr, bridge, err := q.addDeviceToBridge(drive.ID)
if err != nil {
return err
}
// PCI address is in the format bridge-addr/device-addr eg. "03/02"
drive.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr
if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, true, q.arch.runNested()); err != nil {
return err
}
} else {
driver := "scsi-hd"
// Bus exposed by the SCSI Controller
bus := scsiControllerID + ".0"
// Get SCSI-id and LUN based on the order of attaching drives.
scsiID, lun, err := utils.GetSCSIIdLun(drive.Index)
if err != nil {
return err
}
if err = q.qmpMonitorCh.qmp.ExecuteSCSIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, bus, romFile, scsiID, lun, true, q.arch.runNested()); err != nil {
return err
}
}
return nil
}
func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error {
err := q.qmpSetup()
if err != nil {
@ -736,44 +817,7 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error
devID := "virtio-" + drive.ID
if op == addDevice {
if q.config.BlockDeviceCacheSet {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush)
} else {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID)
}
if err != nil {
return err
}
if q.config.BlockDeviceDriver == config.VirtioBlock {
driver := "virtio-blk-pci"
addr, bridge, err := q.addDeviceToBridge(drive.ID)
if err != nil {
return err
}
// PCI address is in the format bridge-addr/device-addr eg. "03/02"
drive.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr
if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, true, q.arch.runNested()); err != nil {
return err
}
} else {
driver := "scsi-hd"
// Bus exposed by the SCSI Controller
bus := scsiControllerID + ".0"
// Get SCSI-id and LUN based on the order of attaching drives.
scsiID, lun, err := utils.GetSCSIIdLun(drive.Index)
if err != nil {
return err
}
if err = q.qmpMonitorCh.qmp.ExecuteSCSIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, bus, romFile, scsiID, lun, true, q.arch.runNested()); err != nil {
return err
}
}
err = q.hotplugAddBlockDevice(drive, op, devID)
} else {
if q.config.BlockDeviceDriver == config.VirtioBlock {
if err := q.removeDeviceFromBridge(drive.ID); err != nil {
@ -790,7 +834,7 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error
}
}
return nil
return err
}
func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) error {
@ -1391,12 +1435,11 @@ func genericBridges(number uint32, machineType string) []Bridge {
return bridges
}
func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory {
// NVDIMM device needs memory space 1024MB
func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8, memoryOffset uint32) govmmQemu.Memory {
// image NVDIMM device needs memory space 1024MB
// See https://github.com/clearcontainers/runtime/issues/380
memoryOffset := 1024
memoryOffset += 1024
// add 1G memory space for nvdimm device (vm guest image)
memMax := fmt.Sprintf("%dM", hostMemoryMb+uint64(memoryOffset))
mem := fmt.Sprintf("%dM", memoryMb)

View File

@ -87,6 +87,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
q := &qemuAmd64{
qemuArchBase{
machineType: machineType,
memoryOffset: config.MemOffset,
qemuPaths: qemuPaths,
supportedQemuMachines: supportedQemuMachines,
kernelParamsNonDebug: kernelParamsNonDebug,
@ -96,6 +97,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
}
q.handleImagePath(config)
return q
}
@ -126,7 +128,7 @@ func (q *qemuAmd64) cpuModel() string {
}
func (q *qemuAmd64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory {
return genericMemoryTopology(memoryMb, hostMemoryMb, slots)
return genericMemoryTopology(memoryMb, hostMemoryMb, slots, q.memoryOffset)
}
func (q *qemuAmd64) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {

View File

@ -103,6 +103,7 @@ type qemuArch interface {
type qemuArchBase struct {
machineType string
memoryOffset uint32
nestedRun bool
vhost bool
networkIndex int

View File

@ -136,6 +136,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
q := &qemuArm64{
qemuArchBase{
machineType: machineType,
memoryOffset: config.MemOffset,
qemuPaths: qemuPaths,
supportedQemuMachines: supportedQemuMachines,
kernelParamsNonDebug: kernelParamsNonDebug,

View File

@ -74,6 +74,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
q := &qemuPPC64le{
qemuArchBase{
machineType: machineType,
memoryOffset: config.MemOffset,
qemuPaths: qemuPaths,
supportedQemuMachines: supportedQemuMachines,
kernelParamsNonDebug: kernelParamsNonDebug,
@ -83,6 +84,9 @@ func newQemuArch(config HypervisorConfig) qemuArch {
}
q.handleImagePath(config)
q.memoryOffset = config.MemOffset
return q
}
@ -121,7 +125,7 @@ func (q *qemuPPC64le) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8)
hostMemoryMb = defaultMemMaxPPC64le
}
return genericMemoryTopology(memoryMb, hostMemoryMb, slots)
return genericMemoryTopology(memoryMb, hostMemoryMb, slots, q.memoryOffset)
}
func (q *qemuPPC64le) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {

View File

@ -61,6 +61,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
q := &qemuS390x{
qemuArchBase{
machineType: machineType,
memoryOffset: config.MemOffset,
qemuPaths: qemuPaths,
supportedQemuMachines: supportedQemuMachines,
kernelParamsNonDebug: kernelParamsNonDebug,