mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-31 07:19:06 +00:00
Merge pull request #962 from teawater/nvdimm
block: Add new block storage driver "nvdimm"
This commit is contained in:
commit
2b75f440f0
@ -82,6 +82,13 @@ default_memory = @DEFMEMSZ@
|
||||
# This is will determine the times that memory will be hotadded to sandbox/VM.
|
||||
#memory_slots = @DEFMEMSLOTS@
|
||||
|
||||
# The size in MiB will be plused to max memory of hypervisor.
|
||||
# It is the memory address space for the NVDIMM devie.
|
||||
# If set block storage driver (block_device_driver) to "nvdimm",
|
||||
# should set memory_offset to the size of block device.
|
||||
# Default 0
|
||||
#memory_offset = 0
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
@ -91,8 +98,8 @@ default_memory = @DEFMEMSZ@
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Block storage driver to be used for the hypervisor in case the container
|
||||
# rootfs is backed by a block device. This is either virtio-scsi or
|
||||
# virtio-blk.
|
||||
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
|
||||
# or nvdimm.
|
||||
block_device_driver = "@DEFBLOCKSTORAGEDRIVER@"
|
||||
|
||||
# Specifies cache-related options will be set to block devices or not.
|
||||
|
@ -24,6 +24,7 @@ const defaultVCPUCount uint32 = 1
|
||||
const defaultMaxVCPUCount uint32 = 0
|
||||
const defaultMemSize uint32 = 2048 // MiB
|
||||
const defaultMemSlots uint32 = 10
|
||||
const defaultMemOffset uint32 = 0 // MiB
|
||||
const defaultBridgesCount uint32 = 1
|
||||
const defaultInterNetworkingModel = "macvtap"
|
||||
const defaultDisableBlockDeviceUse bool = false
|
||||
|
@ -98,6 +98,7 @@ type hypervisor struct {
|
||||
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
|
||||
MemorySize uint32 `toml:"default_memory"`
|
||||
MemSlots uint32 `toml:"memory_slots"`
|
||||
MemOffset uint32 `toml:"memory_offset"`
|
||||
DefaultBridges uint32 `toml:"default_bridges"`
|
||||
Msize9p uint32 `toml:"msize_9p"`
|
||||
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
|
||||
@ -281,6 +282,15 @@ func (h hypervisor) defaultMemSlots() uint32 {
|
||||
return slots
|
||||
}
|
||||
|
||||
func (h hypervisor) defaultMemOffset() uint32 {
|
||||
offset := h.MemOffset
|
||||
if offset == 0 {
|
||||
offset = defaultMemOffset
|
||||
}
|
||||
|
||||
return offset
|
||||
}
|
||||
|
||||
func (h hypervisor) defaultBridges() uint32 {
|
||||
if h.DefaultBridges == 0 {
|
||||
return defaultBridgesCount
|
||||
@ -294,7 +304,7 @@ func (h hypervisor) defaultBridges() uint32 {
|
||||
}
|
||||
|
||||
func (h hypervisor) blockDeviceDriver() (string, error) {
|
||||
supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio}
|
||||
supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm}
|
||||
|
||||
if h.BlockDeviceDriver == "" {
|
||||
return defaultBlockDeviceDriver, nil
|
||||
@ -514,6 +524,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
|
||||
MemorySize: h.defaultMemSz(),
|
||||
MemSlots: h.defaultMemSlots(),
|
||||
MemOffset: h.defaultMemOffset(),
|
||||
EntropySource: h.GetEntropySource(),
|
||||
DefaultBridges: h.defaultBridges(),
|
||||
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
|
||||
@ -677,6 +688,7 @@ func initConfig() (config oci.RuntimeConfig, err error) {
|
||||
NumVCPUs: defaultVCPUCount,
|
||||
DefaultMaxVCPUs: defaultMaxVCPUCount,
|
||||
MemorySize: defaultMemSize,
|
||||
MemOffset: defaultMemOffset,
|
||||
DefaultBridges: defaultBridgesCount,
|
||||
MemPrealloc: defaultEnableMemPrealloc,
|
||||
HugePages: defaultEnableHugePages,
|
||||
|
@ -47,6 +47,9 @@ const (
|
||||
|
||||
// VirtioSCSI means use virtio-scsi for hotplugging drives
|
||||
VirtioSCSI = "virtio-scsi"
|
||||
|
||||
// Nvdimm means use nvdimm for hotplugging drives
|
||||
Nvdimm = "nvdimm"
|
||||
)
|
||||
|
||||
// Defining these as a variable instead of a const, to allow
|
||||
@ -119,6 +122,9 @@ type BlockDrive struct {
|
||||
// SCSI address is in the format SCSI-Id:LUN
|
||||
SCSIAddr string
|
||||
|
||||
// NvdimmID is the nvdimm id inside the VM
|
||||
NvdimmID string
|
||||
|
||||
// VirtPath at which the device appears inside the VM, outside of the container mount namespace
|
||||
VirtPath string
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ func (device *BlockDevice) Attach(devReceiver api.DeviceReceiver) (err error) {
|
||||
}
|
||||
|
||||
drive.SCSIAddr = scsiAddr
|
||||
} else {
|
||||
} else if customOptions["block-driver"] != "nvdimm" {
|
||||
var globalIdx int
|
||||
|
||||
switch customOptions["block-driver"] {
|
||||
@ -102,7 +102,7 @@ func (device *BlockDevice) Attach(devReceiver api.DeviceReceiver) (err error) {
|
||||
drive.VirtPath = filepath.Join("/dev", driveName)
|
||||
}
|
||||
|
||||
deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Attaching block device")
|
||||
deviceLogger().WithField("device", device.DeviceInfo.HostPath).WithField("VirtPath", drive.VirtPath).Infof("Attaching %s device", customOptions["block-driver"])
|
||||
device.BlockDrive = drive
|
||||
if err = devReceiver.HotplugAddDevice(device, config.DeviceBlock); err != nil {
|
||||
return err
|
||||
|
@ -26,6 +26,8 @@ const (
|
||||
VirtioBlock string = "virtio-blk"
|
||||
// VirtioSCSI indicates block driver is virtio-scsi based
|
||||
VirtioSCSI string = "virtio-scsi"
|
||||
// Nvdimm indicates block driver is nvdimm based
|
||||
Nvdimm string = "nvdimm"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -61,6 +63,8 @@ func NewDeviceManager(blockDriver string, devices []api.Device) api.DeviceManage
|
||||
dm.blockDriver = VirtioMmio
|
||||
} else if blockDriver == VirtioBlock {
|
||||
dm.blockDriver = VirtioBlock
|
||||
} else if blockDriver == Nvdimm {
|
||||
dm.blockDriver = Nvdimm
|
||||
} else {
|
||||
dm.blockDriver = VirtioSCSI
|
||||
}
|
||||
|
@ -169,6 +169,9 @@ type HypervisorConfig struct {
|
||||
// MemSlots specifies default memory slots the VM.
|
||||
MemSlots uint32
|
||||
|
||||
// MemOffset specifies memory space for nvdimm device
|
||||
MemOffset uint32
|
||||
|
||||
// KernelParams are additional guest kernel parameters.
|
||||
KernelParams []Param
|
||||
|
||||
|
@ -62,6 +62,7 @@ var (
|
||||
kataMmioBlkDevType = "mmioblk"
|
||||
kataBlkDevType = "blk"
|
||||
kataSCSIDevType = "scsi"
|
||||
kataNvdimmDevType = "nvdimm"
|
||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||
shmDir = "shm"
|
||||
kataEphemeralDevType = "ephemeral"
|
||||
@ -883,6 +884,9 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr
|
||||
case config.VirtioSCSI:
|
||||
kataDevice.Type = kataSCSIDevType
|
||||
kataDevice.Id = d.SCSIAddr
|
||||
case config.Nvdimm:
|
||||
kataDevice.Type = kataNvdimmDevType
|
||||
kataDevice.VmPath = fmt.Sprintf("/dev/pmem%s", d.NvdimmID)
|
||||
}
|
||||
|
||||
deviceList = append(deviceList, kataDevice)
|
||||
|
@ -8,20 +8,22 @@ package virtcontainers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
govmmQemu "github.com/intel/govmm/qemu"
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/uuid"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
govmmQemu "github.com/intel/govmm/qemu"
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/uuid"
|
||||
opentracing "github.com/opentracing/opentracing-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"unsafe"
|
||||
|
||||
"github.com/kata-containers/runtime/virtcontainers/device/config"
|
||||
"github.com/kata-containers/runtime/virtcontainers/utils"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// romFile is the file name of the ROM that can be used for virtio-pci devices.
|
||||
@ -73,6 +75,8 @@ type qemu struct {
|
||||
fds []*os.File
|
||||
|
||||
ctx context.Context
|
||||
|
||||
nvdimmCount int
|
||||
}
|
||||
|
||||
const (
|
||||
@ -221,6 +225,20 @@ func (q *qemu) init(ctx context.Context, id string, hypervisorConfig *Hypervisor
|
||||
q.config = *hypervisorConfig
|
||||
q.arch = newQemuArch(q.config)
|
||||
|
||||
initrdPath, err := q.config.InitrdAssetPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
imagePath, err := q.config.ImageAssetPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if initrdPath == "" && imagePath != "" {
|
||||
q.nvdimmCount = 1
|
||||
} else {
|
||||
q.nvdimmCount = 0
|
||||
}
|
||||
|
||||
if err = q.storage.fetchHypervisorState(q.id, &q.state); err != nil {
|
||||
q.Logger().Debug("Creating bridges")
|
||||
q.state.Bridges = q.arch.bridges(q.config.DefaultBridges)
|
||||
@ -727,6 +745,69 @@ func (q *qemu) removeDeviceFromBridge(ID string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *qemu) hotplugAddBlockDevice(drive *config.BlockDrive, op operation, devID string) error {
|
||||
var err error
|
||||
|
||||
if q.config.BlockDeviceDriver == config.Nvdimm {
|
||||
var blocksize int64
|
||||
file, err := os.Open(drive.File)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&blocksize))); err != 0 {
|
||||
return err
|
||||
}
|
||||
if err = q.qmpMonitorCh.qmp.ExecuteNVDIMMDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, drive.File, blocksize); err != nil {
|
||||
q.Logger().WithError(err).Errorf("Failed to add NVDIMM device %s", drive.File)
|
||||
return err
|
||||
}
|
||||
drive.NvdimmID = strconv.Itoa(q.nvdimmCount)
|
||||
q.nvdimmCount++
|
||||
return nil
|
||||
}
|
||||
|
||||
if q.config.BlockDeviceCacheSet {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush)
|
||||
} else {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if q.config.BlockDeviceDriver == config.VirtioBlock {
|
||||
driver := "virtio-blk-pci"
|
||||
addr, bridge, err := q.addDeviceToBridge(drive.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// PCI address is in the format bridge-addr/device-addr eg. "03/02"
|
||||
drive.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr
|
||||
|
||||
if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, true, q.arch.runNested()); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
driver := "scsi-hd"
|
||||
|
||||
// Bus exposed by the SCSI Controller
|
||||
bus := scsiControllerID + ".0"
|
||||
|
||||
// Get SCSI-id and LUN based on the order of attaching drives.
|
||||
scsiID, lun, err := utils.GetSCSIIdLun(drive.Index)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = q.qmpMonitorCh.qmp.ExecuteSCSIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, bus, romFile, scsiID, lun, true, q.arch.runNested()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error {
|
||||
err := q.qmpSetup()
|
||||
if err != nil {
|
||||
@ -736,44 +817,7 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error
|
||||
devID := "virtio-" + drive.ID
|
||||
|
||||
if op == addDevice {
|
||||
if q.config.BlockDeviceCacheSet {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush)
|
||||
} else {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if q.config.BlockDeviceDriver == config.VirtioBlock {
|
||||
driver := "virtio-blk-pci"
|
||||
addr, bridge, err := q.addDeviceToBridge(drive.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// PCI address is in the format bridge-addr/device-addr eg. "03/02"
|
||||
drive.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr
|
||||
|
||||
if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, true, q.arch.runNested()); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
driver := "scsi-hd"
|
||||
|
||||
// Bus exposed by the SCSI Controller
|
||||
bus := scsiControllerID + ".0"
|
||||
|
||||
// Get SCSI-id and LUN based on the order of attaching drives.
|
||||
scsiID, lun, err := utils.GetSCSIIdLun(drive.Index)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = q.qmpMonitorCh.qmp.ExecuteSCSIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, bus, romFile, scsiID, lun, true, q.arch.runNested()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
err = q.hotplugAddBlockDevice(drive, op, devID)
|
||||
} else {
|
||||
if q.config.BlockDeviceDriver == config.VirtioBlock {
|
||||
if err := q.removeDeviceFromBridge(drive.ID); err != nil {
|
||||
@ -790,7 +834,7 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) error {
|
||||
@ -1391,12 +1435,11 @@ func genericBridges(number uint32, machineType string) []Bridge {
|
||||
return bridges
|
||||
}
|
||||
|
||||
func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory {
|
||||
// NVDIMM device needs memory space 1024MB
|
||||
func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8, memoryOffset uint32) govmmQemu.Memory {
|
||||
// image NVDIMM device needs memory space 1024MB
|
||||
// See https://github.com/clearcontainers/runtime/issues/380
|
||||
memoryOffset := 1024
|
||||
memoryOffset += 1024
|
||||
|
||||
// add 1G memory space for nvdimm device (vm guest image)
|
||||
memMax := fmt.Sprintf("%dM", hostMemoryMb+uint64(memoryOffset))
|
||||
|
||||
mem := fmt.Sprintf("%dM", memoryMb)
|
||||
|
@ -87,6 +87,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
q := &qemuAmd64{
|
||||
qemuArchBase{
|
||||
machineType: machineType,
|
||||
memoryOffset: config.MemOffset,
|
||||
qemuPaths: qemuPaths,
|
||||
supportedQemuMachines: supportedQemuMachines,
|
||||
kernelParamsNonDebug: kernelParamsNonDebug,
|
||||
@ -96,6 +97,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
}
|
||||
|
||||
q.handleImagePath(config)
|
||||
|
||||
return q
|
||||
}
|
||||
|
||||
@ -126,7 +128,7 @@ func (q *qemuAmd64) cpuModel() string {
|
||||
}
|
||||
|
||||
func (q *qemuAmd64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory {
|
||||
return genericMemoryTopology(memoryMb, hostMemoryMb, slots)
|
||||
return genericMemoryTopology(memoryMb, hostMemoryMb, slots, q.memoryOffset)
|
||||
}
|
||||
|
||||
func (q *qemuAmd64) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
||||
|
@ -103,6 +103,7 @@ type qemuArch interface {
|
||||
|
||||
type qemuArchBase struct {
|
||||
machineType string
|
||||
memoryOffset uint32
|
||||
nestedRun bool
|
||||
vhost bool
|
||||
networkIndex int
|
||||
|
@ -136,6 +136,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
q := &qemuArm64{
|
||||
qemuArchBase{
|
||||
machineType: machineType,
|
||||
memoryOffset: config.MemOffset,
|
||||
qemuPaths: qemuPaths,
|
||||
supportedQemuMachines: supportedQemuMachines,
|
||||
kernelParamsNonDebug: kernelParamsNonDebug,
|
||||
|
@ -74,6 +74,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
q := &qemuPPC64le{
|
||||
qemuArchBase{
|
||||
machineType: machineType,
|
||||
memoryOffset: config.MemOffset,
|
||||
qemuPaths: qemuPaths,
|
||||
supportedQemuMachines: supportedQemuMachines,
|
||||
kernelParamsNonDebug: kernelParamsNonDebug,
|
||||
@ -83,6 +84,9 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
}
|
||||
|
||||
q.handleImagePath(config)
|
||||
|
||||
q.memoryOffset = config.MemOffset
|
||||
|
||||
return q
|
||||
}
|
||||
|
||||
@ -121,7 +125,7 @@ func (q *qemuPPC64le) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8)
|
||||
hostMemoryMb = defaultMemMaxPPC64le
|
||||
}
|
||||
|
||||
return genericMemoryTopology(memoryMb, hostMemoryMb, slots)
|
||||
return genericMemoryTopology(memoryMb, hostMemoryMb, slots, q.memoryOffset)
|
||||
}
|
||||
|
||||
func (q *qemuPPC64le) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
||||
|
@ -61,6 +61,7 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
q := &qemuS390x{
|
||||
qemuArchBase{
|
||||
machineType: machineType,
|
||||
memoryOffset: config.MemOffset,
|
||||
qemuPaths: qemuPaths,
|
||||
supportedQemuMachines: supportedQemuMachines,
|
||||
kernelParamsNonDebug: kernelParamsNonDebug,
|
||||
|
Loading…
Reference in New Issue
Block a user