diff --git a/Makefile b/Makefile index ed2ede316a..5bb16f7cff 100644 --- a/Makefile +++ b/Makefile @@ -181,6 +181,8 @@ DEFVIRTIOFSEXTRAARGS := [] DEFENABLEIOTHREADS := false DEFENABLEMEMPREALLOC := false DEFENABLEHUGEPAGES := false +DEFENABLEVHOSTUSERSTORE := false +DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user DEFENABLESWAP := false DEFENABLEDEBUG := false DEFDISABLENESTINGCHECKS := false @@ -440,6 +442,8 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS USER_VARS += DEFENABLEIOTHREADS USER_VARS += DEFENABLEMEMPREALLOC USER_VARS += DEFENABLEHUGEPAGES +USER_VARS += DEFENABLEVHOSTUSERSTORE +USER_VARS += DEFVHOSTUSERSTOREPATH USER_VARS += DEFENABLESWAP USER_VARS += DEFENABLEDEBUG USER_VARS += DEFDISABLENESTINGCHECKS @@ -622,6 +626,8 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit -e "s|@DEFENABLEIOTHREADS@|$(DEFENABLEIOTHREADS)|g" \ -e "s|@DEFENABLEMEMPREALLOC@|$(DEFENABLEMEMPREALLOC)|g" \ -e "s|@DEFENABLEHUGEPAGES@|$(DEFENABLEHUGEPAGES)|g" \ + -e "s|@DEFENABLEVHOSTUSERSTORE@|$(DEFENABLEVHOSTUSERSTORE)|g" \ + -e "s|@DEFVHOSTUSERSTOREPATH@|$(DEFVHOSTUSERSTOREPATH)|g" \ -e "s|@DEFENABLEMSWAP@|$(DEFENABLESWAP)|g" \ -e "s|@DEFENABLEDEBUG@|$(DEFENABLEDEBUG)|g" \ -e "s|@DEFDISABLENESTINGCHECKS@|$(DEFDISABLENESTINGCHECKS)|g" \ diff --git a/cli/config/configuration-qemu-virtiofs.toml.in b/cli/config/configuration-qemu-virtiofs.toml.in index a38e0a0d19..46a5ff0cab 100644 --- a/cli/config/configuration-qemu-virtiofs.toml.in +++ b/cli/config/configuration-qemu-virtiofs.toml.in @@ -172,6 +172,17 @@ enable_iothreads = @DEFENABLEIOTHREADS@ # result in memory pre allocation #enable_hugepages = true +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. diff --git a/cli/config/configuration-qemu.toml.in b/cli/config/configuration-qemu.toml.in index 20e387008c..46ce7d9b53 100644 --- a/cli/config/configuration-qemu.toml.in +++ b/cli/config/configuration-qemu.toml.in @@ -179,6 +179,17 @@ enable_iothreads = @DEFENABLEIOTHREADS@ # result in memory pre allocation #enable_hugepages = true +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. diff --git a/pkg/katautils/config-settings.go b/pkg/katautils/config-settings.go index c46f36c15f..37464b93da 100644 --- a/pkg/katautils/config-settings.go +++ b/pkg/katautils/config-settings.go @@ -49,6 +49,7 @@ const defaultEntropySource = "/dev/urandom" const defaultGuestHookPath string = "" const defaultVirtioFSCacheMode = "none" const defaultDisableImageNvdimm = false +const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" const defaultTemplatePath string = "/run/vc/vm/template" const defaultVMCacheEndpoint string = "/var/run/kata-containers/cache.sock" diff --git a/pkg/katautils/config.go b/pkg/katautils/config.go index bf20a33206..b7b3ccd40d 100644 --- a/pkg/katautils/config.go +++ b/pkg/katautils/config.go @@ -104,6 +104,8 @@ type hypervisor struct { BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` + EnableVhostUserStore bool `toml:"enable_vhost_user_store"` + VhostUserStorePath string `toml:"vhost_user_store_path"` NumVCPUs int32 `toml:"default_vcpus"` DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` MemorySize uint32 `toml:"default_memory"` @@ -404,6 +406,13 @@ func (h hypervisor) guestHookPath() string { return h.GuestHookPath } +func (h hypervisor) vhostUserStorePath() string { + if h.VhostUserStorePath == "" { + return defaultVhostUserStorePath + } + return h.VhostUserStorePath +} + func (h hypervisor) getInitrdAndImage() (initrd string, image string, err error) { initrd, errInitrd := h.initrd() @@ -651,6 +660,8 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus, PCIeRootPort: h.PCIeRootPort, DisableVhostNet: h.DisableVhostNet, + EnableVhostUserStore: h.EnableVhostUserStore, + VhostUserStorePath: h.vhostUserStorePath(), GuestHookPath: h.guestHookPath(), }, nil } @@ -1078,6 +1089,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, PCIeRootPort: defaultPCIeRootPort, GuestHookPath: defaultGuestHookPath, + VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, DisableImageNvdimm: defaultDisableImageNvdimm, } diff --git a/pkg/katautils/config_test.go b/pkg/katautils/config_test.go index eb15e5f3e7..221a4b5551 100644 --- a/pkg/katautils/config_test.go +++ b/pkg/katautils/config_test.go @@ -165,6 +165,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf MemSlots: defaultMemSlots, EntropySource: defaultEntropySource, GuestHookPath: defaultGuestHookPath, + VhostUserStorePath: defaultVhostUserStorePath, SharedFS: sharedFS, VirtioFSDaemon: "/path/to/virtiofsd", VirtioFSCache: defaultVirtioFSCacheMode, @@ -628,6 +629,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { BlockDeviceDriver: defaultBlockDeviceDriver, Msize9p: defaultMsize9p, GuestHookPath: defaultGuestHookPath, + VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, } @@ -1198,6 +1200,21 @@ func TestHypervisorDefaultsGuestHookPath(t *testing.T) { assert.Equal(guestHookPath, testGuestHookPath, "custom guest hook path wrong") } +func TestHypervisorDefaultsVhostUserStorePath(t *testing.T) { + assert := assert.New(t) + + h := hypervisor{} + vhostUserStorePath := h.vhostUserStorePath() + assert.Equal(vhostUserStorePath, defaultVhostUserStorePath, "default vhost-user store path wrong") + + testVhostUserStorePath := "/test/vhost/user/store/path" + h = hypervisor{ + VhostUserStorePath: testVhostUserStorePath, + } + vhostUserStorePath = h.vhostUserStorePath() + assert.Equal(vhostUserStorePath, testVhostUserStorePath, "custom vhost-user store path wrong") +} + func TestProxyDefaults(t *testing.T) { assert := assert.New(t) diff --git a/virtcontainers/container_test.go b/virtcontainers/container_test.go index b11e83aff3..7eaa245962 100644 --- a/virtcontainers/container_test.go +++ b/virtcontainers/container_test.go @@ -87,7 +87,7 @@ func TestContainerRemoveDrive(t *testing.T) { sandbox := &Sandbox{ ctx: context.Background(), id: "sandbox", - devManager: manager.NewDeviceManager(manager.VirtioSCSI, nil), + devManager: manager.NewDeviceManager(manager.VirtioSCSI, false, "", nil), config: &SandboxConfig{}, } @@ -306,7 +306,7 @@ func TestContainerAddDriveDir(t *testing.T) { sandbox := &Sandbox{ ctx: context.Background(), id: testSandboxID, - devManager: manager.NewDeviceManager(manager.VirtioSCSI, nil), + devManager: manager.NewDeviceManager(manager.VirtioSCSI, false, "", nil), hypervisor: &mockHypervisor{}, agent: &noopAgent{}, config: &SandboxConfig{ diff --git a/virtcontainers/device/api/interface.go b/virtcontainers/device/api/interface.go index bb1c30d2cb..2ecc6e56fd 100644 --- a/virtcontainers/device/api/interface.go +++ b/virtcontainers/device/api/interface.go @@ -58,8 +58,9 @@ type Device interface { // GetDeviceInfo returns device specific data used for hotplugging by hypervisor // Caller could cast the return value to device specific struct - // e.g. Block device returns *config.BlockDrive and - // vfio device returns []*config.VFIODev + // e.g. Block device returns *config.BlockDrive, + // vfio device returns []*config.VFIODev, + // VhostUser device returns []*config.VhostUserDeviceAttrs GetDeviceInfo() interface{} // GetAttachCount returns how many times the device has been attached diff --git a/virtcontainers/device/config/config.go b/virtcontainers/device/config/config.go index 88d844c752..57b4f0a7b9 100644 --- a/virtcontainers/device/config/config.go +++ b/virtcontainers/device/config/config.go @@ -8,11 +8,13 @@ package config import ( "fmt" + "io/ioutil" "os" "path/filepath" "strconv" "github.com/go-ini/ini" + "golang.org/x/sys/unix" ) // DeviceType indicates device type @@ -66,6 +68,14 @@ const ( VirtioFS = "virtio-fs" ) +const ( + // The OCI spec requires the major-minor number to be provided for a + // device. We have chosen the below major numbers to represent + // vhost-user devices. + VhostUserBlkMajor = 241 + VhostUserSCSIMajor = 242 +) + // Defining these as a variable instead of a const, to allow // overriding this in the tests. @@ -223,15 +233,26 @@ type VhostUserDeviceAttrs struct { Tag string CacheSize uint32 Cache string + + // PCIAddr is the PCI address used to identify the slot at which the drive is attached. + // It is only meaningful for vhost user block devices + PCIAddr string + + // Block index of the device if assigned + Index int } // GetHostPathFunc is function pointer used to mock GetHostPath in tests. var GetHostPathFunc = GetHostPath +// GetVhostUserNodeStatFunc is function pointer used to mock GetVhostUserNodeStat +// in tests. Through this functon, user can get device type information. +var GetVhostUserNodeStatFunc = GetVhostUserNodeStat + // GetHostPath is used to fetch the host path for the device. // The path passed in the spec refers to the path that should appear inside the container. // We need to find the actual device path on the host based on the major-minor numbers of the device. -func GetHostPath(devInfo DeviceInfo) (string, error) { +func GetHostPath(devInfo DeviceInfo, vhostUserStoreEnabled bool, vhostUserStorePath string) (string, error) { if devInfo.ContainerPath == "" { return "", fmt.Errorf("Empty path provided for device") } @@ -249,6 +270,12 @@ func GetHostPath(devInfo DeviceInfo) (string, error) { return "", nil } + // Filter out vhost-user storage devices by device Major numbers. + if vhostUserStoreEnabled && devInfo.DevType == "b" && + (devInfo.Major == VhostUserSCSIMajor || devInfo.Major == VhostUserBlkMajor) { + return getVhostUserHostPath(devInfo, vhostUserStorePath) + } + format := strconv.FormatInt(devInfo.Major, 10) + ":" + strconv.FormatInt(devInfo.Minor, 10) sysDevPath := filepath.Join(SysDevPrefix, pathComp, format, "uevent") @@ -278,3 +305,58 @@ func GetHostPath(devInfo DeviceInfo) (string, error) { return filepath.Join("/dev", devName.String()), nil } + +// getVhostUserHostPath is used to fetch host path for the vhost-user device. +// For vhost-user block device like vhost-user-blk or vhost-user-scsi, its +// socket should be under directory "/block/sockets/"; +// its corresponding device node should be under directory +// "/block/devices/" +func getVhostUserHostPath(devInfo DeviceInfo, vhostUserStorePath string) (string, error) { + vhostUserDevNodePath := filepath.Join(vhostUserStorePath, "/block/devices/") + vhostUserSockPath := filepath.Join(vhostUserStorePath, "/block/sockets/") + + sockFileName, err := getVhostUserDevName(vhostUserDevNodePath, + uint32(devInfo.Major), uint32(devInfo.Minor)) + if err != nil { + return "", err + } + + // Locate socket path of vhost-user device + sockFilePath := filepath.Join(vhostUserSockPath, sockFileName) + if _, err = os.Stat(sockFilePath); os.IsNotExist(err) { + return "", err + } + + return sockFilePath, nil +} + +func GetVhostUserNodeStat(devNodePath string, devNodeStat *unix.Stat_t) (err error) { + return unix.Stat(devNodePath, devNodeStat) +} + +// Filter out name of the device node whose device type is Major:Minor from directory +func getVhostUserDevName(dirname string, majorNum, minorNum uint32) (string, error) { + files, err := ioutil.ReadDir(dirname) + if err != nil { + return "", err + } + + for _, file := range files { + var devStat unix.Stat_t + + devFilePath := filepath.Join(dirname, file.Name()) + err = GetVhostUserNodeStatFunc(devFilePath, &devStat) + if err != nil { + return "", err + } + + devMajor := unix.Major(devStat.Rdev) + devMinor := unix.Minor(devStat.Rdev) + if devMajor == majorNum && devMinor == minorNum { + return file.Name(), nil + } + } + + return "", fmt.Errorf("Required device node (%d:%d) doesn't exist under directory %s", + majorNum, minorNum, dirname) +} diff --git a/virtcontainers/device/drivers/generic.go b/virtcontainers/device/drivers/generic.go index 32d91d4624..53e710a5d7 100644 --- a/virtcontainers/device/drivers/generic.go +++ b/virtcontainers/device/drivers/generic.go @@ -14,7 +14,7 @@ import ( persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" ) -// GenericDevice refers to a device that is neither a VFIO device or block device. +// GenericDevice refers to a device that is neither a VFIO device, block device or VhostUserDevice. type GenericDevice struct { ID string DeviceInfo *config.DeviceInfo diff --git a/virtcontainers/device/drivers/vhost_user_blk.go b/virtcontainers/device/drivers/vhost_user_blk.go index b1bb61d554..cdc33cf8ac 100644 --- a/virtcontainers/device/drivers/vhost_user_blk.go +++ b/virtcontainers/device/drivers/vhost_user_blk.go @@ -7,18 +7,27 @@ package drivers import ( - "encoding/hex" - "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/utils" + "github.com/sirupsen/logrus" ) // VhostUserBlkDevice is a block vhost-user based device type VhostUserBlkDevice struct { *GenericDevice - config.VhostUserDeviceAttrs + VhostUserDeviceAttrs *config.VhostUserDeviceAttrs +} + +// NewVhostUserBlkDevice creates a new vhost-user block device based on DeviceInfo +func NewVhostUserBlkDevice(devInfo *config.DeviceInfo) *VhostUserBlkDevice { + return &VhostUserBlkDevice{ + GenericDevice: &GenericDevice{ + ID: devInfo.ID, + DeviceInfo: devInfo, + }, + } } // @@ -35,30 +44,103 @@ func (device *VhostUserBlkDevice) Attach(devReceiver api.DeviceReceiver) (err er if skip { return nil } + + // From the explanation of function attach in block.go, block index of + // a general block device is utilized for some situation. + // Since vhost-user-blk uses "vd" prefix in Linux kernel, not "sd", + // sandbox block index should be updated only if sandbox default block + // driver is "virtio-blk"/"virtio-blk-ccw"/"virtio-mmio" which uses + // "vd" prefix in Linux kernel. + index := -1 + updateBlockIndex := isVirtioBlkBlockDriver(device.DeviceInfo.DriverOptions) + if updateBlockIndex { + index, err = devReceiver.GetAndSetSandboxBlockIndex() + } + defer func() { if err != nil { + if updateBlockIndex { + devReceiver.UnsetSandboxBlockIndex(index) + } device.bumpAttachCount(false) } }() - // generate a unique ID to be used for hypervisor commandline fields - randBytes, err := utils.GenerateRandomBytes(8) if err != nil { return err } - id := hex.EncodeToString(randBytes) - device.DevID = id - device.Type = device.DeviceType() + vAttrs := &config.VhostUserDeviceAttrs{ + DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), + SocketPath: device.DeviceInfo.HostPath, + Type: config.VhostUserBlk, + Index: index, + } - return devReceiver.AppendDevice(device) + deviceLogger().WithFields(logrus.Fields{ + "device": device.DeviceInfo.HostPath, + "SocketPath": vAttrs.SocketPath, + "Type": config.VhostUserBlk, + "Index": index, + }).Info("Attaching device") + + device.VhostUserDeviceAttrs = vAttrs + if err = devReceiver.HotplugAddDevice(device, config.VhostUserBlk); err != nil { + return err + } + + return nil +} + +func isVirtioBlkBlockDriver(customOptions map[string]string) bool { + var blockDriverOption string + + if customOptions == nil { + // User has not chosen a specific block device type + // Default to SCSI + blockDriverOption = "virtio-scsi" + } else { + blockDriverOption = customOptions["block-driver"] + } + + if blockDriverOption == "virtio-blk" || + blockDriverOption == "virtio-blk-ccw" || + blockDriverOption == "virtio-mmio" { + return true + } + + return false } // Detach is standard interface of api.Device, it's used to remove device from some // DeviceReceiver func (device *VhostUserBlkDevice) Detach(devReceiver api.DeviceReceiver) error { - _, err := device.bumpAttachCount(false) - return err + skip, err := device.bumpAttachCount(false) + if err != nil { + return err + } + if skip { + return nil + } + + defer func() { + if err != nil { + device.bumpAttachCount(true) + } else { + updateBlockIndex := isVirtioBlkBlockDriver(device.DeviceInfo.DriverOptions) + if updateBlockIndex { + devReceiver.UnsetSandboxBlockIndex(device.VhostUserDeviceAttrs.Index) + } + } + }() + + deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Unplugging vhost-user-blk device") + + if err = devReceiver.HotplugRemoveDevice(device, config.VhostUserBlk); err != nil { + deviceLogger().WithError(err).Error("Failed to unplug vhost-user-blk device") + return err + } + return nil } // DeviceType is standard interface of api.Device, it returns device type @@ -68,19 +150,23 @@ func (device *VhostUserBlkDevice) DeviceType() config.DeviceType { // GetDeviceInfo returns device information used for creating func (device *VhostUserBlkDevice) GetDeviceInfo() interface{} { - device.Type = device.DeviceType() - return &device.VhostUserDeviceAttrs + return device.VhostUserDeviceAttrs } // Save converts Device to DeviceState func (device *VhostUserBlkDevice) Save() persistapi.DeviceState { ds := device.GenericDevice.Save() ds.Type = string(device.DeviceType()) - ds.VhostUserDev = &persistapi.VhostUserDeviceAttrs{ - DevID: device.DevID, - SocketPath: device.SocketPath, - Type: string(device.Type), - MacAddress: device.MacAddress, + + vAttr := device.VhostUserDeviceAttrs + if vAttr != nil { + ds.VhostUserDev = &persistapi.VhostUserDeviceAttrs{ + DevID: vAttr.DevID, + SocketPath: vAttr.SocketPath, + Type: string(vAttr.Type), + PCIAddr: vAttr.PCIAddr, + Index: vAttr.Index, + } } return ds } @@ -95,11 +181,12 @@ func (device *VhostUserBlkDevice) Load(ds persistapi.DeviceState) { return } - device.VhostUserDeviceAttrs = config.VhostUserDeviceAttrs{ + device.VhostUserDeviceAttrs = &config.VhostUserDeviceAttrs{ DevID: dev.DevID, SocketPath: dev.SocketPath, Type: config.DeviceType(dev.Type), - MacAddress: dev.MacAddress, + PCIAddr: dev.PCIAddr, + Index: dev.Index, } } diff --git a/virtcontainers/device/manager/manager.go b/virtcontainers/device/manager/manager.go index db1e8ee48a..531cbd1ddb 100644 --- a/virtcontainers/device/manager/manager.go +++ b/virtcontainers/device/manager/manager.go @@ -47,7 +47,9 @@ var ( ) type deviceManager struct { - blockDriver string + blockDriver string + vhostUserStoreEnabled bool + vhostUserStorePath string devices map[string]api.Device sync.RWMutex @@ -58,9 +60,11 @@ func deviceLogger() *logrus.Entry { } // NewDeviceManager creates a deviceManager object behaved as api.DeviceManager -func NewDeviceManager(blockDriver string, devices []api.Device) api.DeviceManager { +func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, devices []api.Device) api.DeviceManager { dm := &deviceManager{ - devices: make(map[string]api.Device), + vhostUserStoreEnabled: vhostUserStoreEnabled, + vhostUserStorePath: vhostUserStorePath, + devices: make(map[string]api.Device), } if blockDriver == VirtioMmio { dm.blockDriver = VirtioMmio @@ -94,7 +98,7 @@ func (dm *deviceManager) findDeviceByMajorMinor(major, minor int64) api.Device { // createDevice creates one device based on DeviceInfo func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device, err error) { - path, err := config.GetHostPathFunc(devInfo) + path, err := config.GetHostPathFunc(devInfo, dm.vhostUserStoreEnabled, dm.vhostUserStorePath) if err != nil { return nil, err } @@ -117,6 +121,12 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device } if isVFIO(path) { return drivers.NewVFIODevice(&devInfo), nil + } else if isVhostUserBlk(devInfo) { + if devInfo.DriverOptions == nil { + devInfo.DriverOptions = make(map[string]string) + } + devInfo.DriverOptions["block-driver"] = dm.blockDriver + return drivers.NewVhostUserBlkDevice(&devInfo), nil } else if isBlock(devInfo) { if devInfo.DriverOptions == nil { devInfo.DriverOptions = make(map[string]string) diff --git a/virtcontainers/device/manager/manager_test.go b/virtcontainers/device/manager/manager_test.go index b99dd992b4..664b502802 100644 --- a/virtcontainers/device/manager/manager_test.go +++ b/virtcontainers/device/manager/manager_test.go @@ -7,17 +7,20 @@ package manager import ( + "fmt" "io/ioutil" "os" "path/filepath" "strconv" "testing" - "github.com/stretchr/testify/assert" - + ktu "github.com/kata-containers/runtime/pkg/katatestutils" "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/device/drivers" + "github.com/stretchr/testify/assert" + + "golang.org/x/sys/unix" ) const fileMode0640 = os.FileMode(0640) @@ -202,8 +205,85 @@ func TestAttachBlockDevice(t *testing.T) { assert.Nil(t, err) } +func TestAttachVhostUserBlkDevice(t *testing.T) { + rootEnabled := true + tc := ktu.NewTestConstraint(false) + if tc.NotValid(ktu.NeedRoot()) { + rootEnabled = false + } + + tmpDir, err := ioutil.TempDir("", "") + dm := &deviceManager{ + blockDriver: VirtioBlock, + devices: make(map[string]api.Device), + vhostUserStoreEnabled: true, + vhostUserStorePath: tmpDir, + } + assert.Nil(t, err) + os.RemoveAll(tmpDir) + + vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") + vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") + deviceNodePath := filepath.Join(vhostUserDevNodePath, "vhostblk0") + deviceSockPath := filepath.Join(vhostUserSockPath, "vhostblk0") + + err = os.MkdirAll(vhostUserDevNodePath, dirMode) + assert.Nil(t, err) + err = os.MkdirAll(vhostUserSockPath, dirMode) + assert.Nil(t, err) + _, err = os.Create(deviceSockPath) + assert.Nil(t, err) + + // mknod requires root privilege, call mock function for non-root to + // get VhostUserBlk device type. + if rootEnabled == true { + err = unix.Mknod(deviceNodePath, unix.S_IFBLK, int(unix.Mkdev(config.VhostUserBlkMajor, 0))) + assert.Nil(t, err) + } else { + savedFunc := config.GetVhostUserNodeStatFunc + + _, err = os.Create(deviceNodePath) + assert.Nil(t, err) + + config.GetVhostUserNodeStatFunc = func(devNodePath string, + devNodeStat *unix.Stat_t) error { + if deviceNodePath != devNodePath { + return fmt.Errorf("mock GetVhostUserNodeStatFunc error") + } + + devNodeStat.Rdev = unix.Mkdev(config.VhostUserBlkMajor, 0) + return nil + } + + defer func() { + config.GetVhostUserNodeStatFunc = savedFunc + }() + } + + path := "/dev/vda" + deviceInfo := config.DeviceInfo{ + HostPath: deviceNodePath, + ContainerPath: path, + DevType: "b", + Major: config.VhostUserBlkMajor, + Minor: 0, + } + + devReceiver := &api.MockDeviceReceiver{} + device, err := dm.NewDevice(deviceInfo) + assert.Nil(t, err) + _, ok := device.(*drivers.VhostUserBlkDevice) + assert.True(t, ok) + + err = device.Attach(devReceiver) + assert.Nil(t, err) + + err = device.Detach(devReceiver) + assert.Nil(t, err) +} + func TestAttachDetachDevice(t *testing.T) { - dm := NewDeviceManager(VirtioSCSI, nil) + dm := NewDeviceManager(VirtioSCSI, false, "", nil) path := "/dev/hda" deviceInfo := config.DeviceInfo{ diff --git a/virtcontainers/device/manager/utils.go b/virtcontainers/device/manager/utils.go index 2f26bc245d..5f703b1b13 100644 --- a/virtcontainers/device/manager/utils.go +++ b/virtcontainers/device/manager/utils.go @@ -132,3 +132,13 @@ func isLargeBarSpace(resourcePath string) (bool, error) { return false, nil } + +// isVhostUserBlk checks if the device is a VhostUserBlk device. +func isVhostUserBlk(devInfo config.DeviceInfo) bool { + return devInfo.Major == config.VhostUserBlkMajor +} + +// isVhostUserSCSI checks if the device is a VhostUserSCSI device. +func isVhostUserSCSI(devInfo config.DeviceInfo) bool { + return devInfo.Major == config.VhostUserSCSIMajor +} diff --git a/virtcontainers/device/manager/utils_test.go b/virtcontainers/device/manager/utils_test.go index 984338ee4a..33c650b622 100644 --- a/virtcontainers/device/manager/utils_test.go +++ b/virtcontainers/device/manager/utils_test.go @@ -53,3 +53,39 @@ func TestIsBlock(t *testing.T) { assert.Equal(t, d.expected, isBlock) } } + +func TestIsVhostUserBlk(t *testing.T) { + type testData struct { + major int64 + expected bool + } + + data := []testData{ + {config.VhostUserBlkMajor, true}, + {config.VhostUserSCSIMajor, false}, + {240, false}, + } + + for _, d := range data { + isVhostUserBlk := isVhostUserBlk(config.DeviceInfo{Major: d.major}) + assert.Equal(t, d.expected, isVhostUserBlk) + } +} + +func TestIsVhostUserSCSI(t *testing.T) { + type testData struct { + major int64 + expected bool + } + + data := []testData{ + {config.VhostUserBlkMajor, false}, + {config.VhostUserSCSIMajor, true}, + {240, false}, + } + + for _, d := range data { + isVhostUserSCSI := isVhostUserSCSI(config.DeviceInfo{Major: d.major}) + assert.Equal(t, d.expected, isVhostUserSCSI) + } +} diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 8b6a6cb8ac..ffa8f0fbbf 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -393,6 +393,13 @@ type HypervisorConfig struct { // DisableVhostNet is used to indicate if host supports vhost_net DisableVhostNet bool + // EnableVhostUserStore is used to indicate if host supports vhost-user-blk/scsi + EnableVhostUserStore bool + + // VhostUserStorePath is the directory path where vhost-user devices + // related folders, sockets and device nodes should be. + VhostUserStorePath string + // GuestHookPath is the path within the VM that will be used for 'drop-in' hooks GuestHookPath string diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 243415fee9..987fb6354e 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -22,6 +22,7 @@ import ( aTypes "github.com/kata-containers/agent/pkg/types" kataclient "github.com/kata-containers/agent/protocols/client" "github.com/kata-containers/agent/protocols/grpc" + "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" @@ -1089,7 +1090,62 @@ func (k *kataAgent) handleShm(grpcSpec *grpc.Spec, sandbox *Sandbox) { } } +func (k *kataAgent) appendBlockDevice(dev ContainerDevice, c *Container) *grpc.Device { + device := c.sandbox.devManager.GetDeviceByID(dev.ID) + + d, ok := device.GetDeviceInfo().(*config.BlockDrive) + if !ok || d == nil { + k.Logger().WithField("device", device).Error("malformed block drive") + return nil + } + + kataDevice := &grpc.Device{ + ContainerPath: dev.ContainerPath, + } + + switch c.sandbox.config.HypervisorConfig.BlockDeviceDriver { + case config.VirtioMmio: + kataDevice.Type = kataMmioBlkDevType + kataDevice.Id = d.VirtPath + kataDevice.VmPath = d.VirtPath + case config.VirtioBlockCCW: + kataDevice.Type = kataBlkCCWDevType + kataDevice.Id = d.DevNo + case config.VirtioBlock: + kataDevice.Type = kataBlkDevType + kataDevice.Id = d.PCIAddr + case config.VirtioSCSI: + kataDevice.Type = kataSCSIDevType + kataDevice.Id = d.SCSIAddr + case config.Nvdimm: + kataDevice.Type = kataNvdimmDevType + kataDevice.VmPath = fmt.Sprintf("/dev/pmem%s", d.NvdimmID) + } + + return kataDevice +} + +func (k *kataAgent) appendVhostUserBlkDevice(dev ContainerDevice, c *Container) *grpc.Device { + device := c.sandbox.devManager.GetDeviceByID(dev.ID) + + d, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs) + if !ok || d == nil { + k.Logger().WithField("device", device).Error("malformed vhost-user-blk drive") + return nil + } + + kataDevice := &grpc.Device{ + ContainerPath: dev.ContainerPath, + Type: kataBlkDevType, + Id: d.PCIAddr, + } + + return kataDevice +} + func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*grpc.Device { + var kataDevice *grpc.Device + for _, dev := range c.devices { device := c.sandbox.devManager.GetDeviceByID(dev.ID) if device == nil { @@ -1097,39 +1153,17 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr return nil } - if device.DeviceType() != config.DeviceBlock { + switch device.DeviceType() { + case config.DeviceBlock: + kataDevice = k.appendBlockDevice(dev, c) + case config.VhostUserBlk: + kataDevice = k.appendVhostUserBlkDevice(dev, c) + } + + if kataDevice == nil { continue } - d, ok := device.GetDeviceInfo().(*config.BlockDrive) - if !ok || d == nil { - k.Logger().WithField("device", device).Error("malformed block drive") - continue - } - - kataDevice := &grpc.Device{ - ContainerPath: dev.ContainerPath, - } - - switch c.sandbox.config.HypervisorConfig.BlockDeviceDriver { - case config.VirtioMmio: - kataDevice.Type = kataMmioBlkDevType - kataDevice.Id = d.VirtPath - kataDevice.VmPath = d.VirtPath - case config.VirtioBlockCCW: - kataDevice.Type = kataBlkCCWDevType - kataDevice.Id = d.DevNo - case config.VirtioBlock: - kataDevice.Type = kataBlkDevType - kataDevice.Id = d.PCIAddr - case config.VirtioSCSI: - kataDevice.Type = kataSCSIDevType - kataDevice.Id = d.SCSIAddr - case config.Nvdimm: - kataDevice.Type = kataNvdimmDevType - kataDevice.VmPath = fmt.Sprintf("/dev/pmem%s", d.NvdimmID) - } - deviceList = append(deviceList, kataDevice) } @@ -1416,6 +1450,53 @@ func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string, r return localStorages } +// handleDeviceBlockVolume handles volume that is block device file +// and DeviceBlock type. +func (k *kataAgent) handleDeviceBlockVolume(c *Container, device api.Device) (*grpc.Storage, error) { + vol := &grpc.Storage{} + + blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) + if !ok || blockDrive == nil { + k.Logger().Error("malformed block drive") + return nil, fmt.Errorf("malformed block drive") + } + switch { + case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: + vol.Driver = kataBlkCCWDevType + vol.Source = blockDrive.DevNo + case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: + vol.Driver = kataBlkDevType + vol.Source = blockDrive.PCIAddr + case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: + vol.Driver = kataMmioBlkDevType + vol.Source = blockDrive.VirtPath + case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: + vol.Driver = kataSCSIDevType + vol.Source = blockDrive.SCSIAddr + default: + return nil, fmt.Errorf("Unknown block device driver: %s", c.sandbox.config.HypervisorConfig.BlockDeviceDriver) + } + + return vol, nil +} + +// handleVhostUserBlkVolume handles volume that is block device file +// and VhostUserBlk type. +func (k *kataAgent) handleVhostUserBlkVolume(c *Container, device api.Device) (*grpc.Storage, error) { + vol := &grpc.Storage{} + + d, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs) + if !ok || d == nil { + k.Logger().Error("malformed vhost-user blk drive") + return nil, fmt.Errorf("malformed vhost-user blk drive") + } + + vol.Driver = kataBlkDevType + vol.Source = d.PCIAddr + + return vol, nil +} + // handleBlockVolumes handles volumes that are block devices files // by passing the block devices as Storage to the agent. func (k *kataAgent) handleBlockVolumes(c *Container) ([]*grpc.Storage, error) { @@ -1433,33 +1514,27 @@ func (k *kataAgent) handleBlockVolumes(c *Container) ([]*grpc.Storage, error) { // device is detached with detachDevices() for a container. c.devices = append(c.devices, ContainerDevice{ID: id, ContainerPath: m.Destination}) - vol := &grpc.Storage{} + var vol *grpc.Storage device := c.sandbox.devManager.GetDeviceByID(id) if device == nil { k.Logger().WithField("device", id).Error("failed to find device by id") return nil, fmt.Errorf("Failed to find device by id (id=%s)", id) } - blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) - if !ok || blockDrive == nil { - k.Logger().Error("malformed block drive") + + var err error + switch device.DeviceType() { + case config.DeviceBlock: + vol, err = k.handleDeviceBlockVolume(c, device) + case config.VhostUserBlk: + vol, err = k.handleVhostUserBlkVolume(c, device) + default: + k.Logger().Error("Unknown device type") continue } - switch { - case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: - vol.Driver = kataBlkCCWDevType - vol.Source = blockDrive.DevNo - case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: - vol.Driver = kataBlkDevType - vol.Source = blockDrive.PCIAddr - case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: - vol.Driver = kataMmioBlkDevType - vol.Source = blockDrive.VirtPath - case c.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: - vol.Driver = kataSCSIDevType - vol.Source = blockDrive.SCSIAddr - default: - return nil, fmt.Errorf("Unknown block device driver: %s", c.sandbox.config.HypervisorConfig.BlockDeviceDriver) + + if vol == nil || err != nil { + return nil, err } vol.MountPoint = m.Destination diff --git a/virtcontainers/kata_agent_test.go b/virtcontainers/kata_agent_test.go index f13e3277de..5195b73a0e 100644 --- a/virtcontainers/kata_agent_test.go +++ b/virtcontainers/kata_agent_test.go @@ -391,6 +391,82 @@ func TestHandleLocalStorage(t *testing.T) { assert.Equal(t, localMountPoint, expected) } +func TestHandleBlockVolume(t *testing.T) { + k := kataAgent{} + + c := &Container{ + id: "100", + } + containers := map[string]*Container{} + containers[c.id] = c + + // Create a VhostUserBlk device and a DeviceBlock device + vDevID := "MockVhostUserBlk" + bDevID := "MockDeviceBlock" + vDestination := "/VhostUserBlk/destination" + bDestination := "/DeviceBlock/destination" + vPCIAddr := "0001:01" + bPCIAddr := "0002:01" + + vDev := drivers.NewVhostUserBlkDevice(&config.DeviceInfo{ID: vDevID}) + bDev := drivers.NewBlockDevice(&config.DeviceInfo{ID: bDevID}) + + vDev.VhostUserDeviceAttrs = &config.VhostUserDeviceAttrs{PCIAddr: vPCIAddr} + bDev.BlockDrive = &config.BlockDrive{PCIAddr: bPCIAddr} + + var devices []api.Device + devices = append(devices, vDev, bDev) + + // Create a VhostUserBlk mount and a DeviceBlock mount + var mounts []Mount + vMount := Mount{ + BlockDeviceID: vDevID, + Destination: vDestination, + } + bMount := Mount{ + BlockDeviceID: bDevID, + Destination: bDestination, + } + mounts = append(mounts, vMount, bMount) + + tmpDir := "/vhost/user/dir" + dm := manager.NewDeviceManager(manager.VirtioBlock, true, tmpDir, devices) + + sConfig := SandboxConfig{} + sConfig.HypervisorConfig.BlockDeviceDriver = manager.VirtioBlock + sandbox := Sandbox{ + id: "100", + containers: containers, + hypervisor: &mockHypervisor{}, + devManager: dm, + ctx: context.Background(), + config: &sConfig, + } + containers[c.id].sandbox = &sandbox + containers[c.id].mounts = mounts + + volumeStorages, err := k.handleBlockVolumes(c) + assert.Nil(t, err, "Error while handling block volumes") + + vStorage := &pb.Storage{ + MountPoint: vDestination, + Fstype: "bind", + Options: []string{"bind"}, + Driver: kataBlkDevType, + Source: vPCIAddr, + } + bStorage := &pb.Storage{ + MountPoint: bDestination, + Fstype: "bind", + Options: []string{"bind"}, + Driver: kataBlkDevType, + Source: bPCIAddr, + } + + assert.Equal(t, vStorage, volumeStorages[0], "Error while handle VhostUserBlk type block volume") + assert.Equal(t, bStorage, volumeStorages[1], "Error while handle BlockDevice type block volume") +} + func TestAppendDevicesEmptyContainerDeviceList(t *testing.T) { k := kataAgent{} @@ -400,7 +476,7 @@ func TestAppendDevicesEmptyContainerDeviceList(t *testing.T) { c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-scsi", nil), + devManager: manager.NewDeviceManager("virtio-scsi", false, "", nil), }, devices: ctrDevices, } @@ -433,7 +509,55 @@ func TestAppendDevices(t *testing.T) { c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-blk", ctrDevices), + devManager: manager.NewDeviceManager("virtio-blk", false, "", ctrDevices), + config: sandboxConfig, + }, + } + c.devices = append(c.devices, ContainerDevice{ + ID: id, + ContainerPath: testBlockDeviceCtrPath, + }) + + devList := []*pb.Device{} + expected := []*pb.Device{ + { + Type: kataBlkDevType, + ContainerPath: testBlockDeviceCtrPath, + Id: testPCIAddr, + }, + } + updatedDevList := k.appendDevices(devList, c) + assert.True(t, reflect.DeepEqual(updatedDevList, expected), + "Device lists didn't match: got %+v, expecting %+v", + updatedDevList, expected) +} + +func TestAppendVhostUserBlkDevices(t *testing.T) { + k := kataAgent{} + + id := "test-append-vhost-user-blk" + ctrDevices := []api.Device{ + &drivers.VhostUserBlkDevice{ + GenericDevice: &drivers.GenericDevice{ + ID: id, + }, + VhostUserDeviceAttrs: &config.VhostUserDeviceAttrs{ + Type: config.VhostUserBlk, + PCIAddr: testPCIAddr, + }, + }, + } + + sandboxConfig := &SandboxConfig{ + HypervisorConfig: HypervisorConfig{ + BlockDeviceDriver: config.VirtioBlock, + }, + } + + testVhostUserStorePath := "/test/vhost/user/store/path" + c := &Container{ + sandbox: &Sandbox{ + devManager: manager.NewDeviceManager("virtio-blk", true, testVhostUserStorePath, ctrDevices), config: sandboxConfig, }, } diff --git a/virtcontainers/persist.go b/virtcontainers/persist.go index 2793fe1d52..d96a3890ea 100644 --- a/virtcontainers/persist.go +++ b/virtcontainers/persist.go @@ -252,6 +252,8 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate, BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet, + EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore, + VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath, GuestHookPath: sconfig.HypervisorConfig.GuestHookPath, VMid: sconfig.HypervisorConfig.VMid, } @@ -539,6 +541,8 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { BootToBeTemplate: hconf.BootToBeTemplate, BootFromTemplate: hconf.BootFromTemplate, DisableVhostNet: hconf.DisableVhostNet, + EnableVhostUserStore: hconf.EnableVhostUserStore, + VhostUserStorePath: hconf.VhostUserStorePath, GuestHookPath: hconf.GuestHookPath, VMid: hconf.VMid, } diff --git a/virtcontainers/persist/api/config.go b/virtcontainers/persist/api/config.go index 7912587057..34a5fd0fbf 100644 --- a/virtcontainers/persist/api/config.go +++ b/virtcontainers/persist/api/config.go @@ -166,6 +166,13 @@ type HypervisorConfig struct { // DisableVhostNet is used to indicate if host supports vhost_net DisableVhostNet bool + // EnableVhostUserStore is used to indicate if host supports vhost-user-blk/scsi + EnableVhostUserStore bool + + // VhostUserStorePath is the directory path where vhost-user devices + // related folders, sockets and device nodes should be. + VhostUserStorePath string + // GuestHookPath is the path within the VM that will be used for 'drop-in' hooks GuestHookPath string diff --git a/virtcontainers/persist/api/device.go b/virtcontainers/persist/api/device.go index e82276ccf9..67226542a9 100644 --- a/virtcontainers/persist/api/device.go +++ b/virtcontainers/persist/api/device.go @@ -66,6 +66,13 @@ type VhostUserDeviceAttrs struct { // MacAddress is only meaningful for vhost user net device MacAddress string + + // PCIAddr is the PCI address used to identify the slot at which the drive is attached. + // It is only meaningful for vhost user block devices + PCIAddr string + + // Block index of the device if assigned + Index int } // DeviceState is sandbox level resource which represents host devices diff --git a/virtcontainers/persist_test.go b/virtcontainers/persist_test.go index db6d029e76..52e38ce523 100644 --- a/virtcontainers/persist_test.go +++ b/virtcontainers/persist_test.go @@ -31,7 +31,7 @@ func TestSandboxRestore(t *testing.T) { sandbox := Sandbox{ id: "test-exp", containers: container, - devManager: manager.NewDeviceManager(manager.VirtioSCSI, nil), + devManager: manager.NewDeviceManager(manager.VirtioSCSI, false, "", nil), hypervisor: &mockHypervisor{}, ctx: context.Background(), config: &sconfig, diff --git a/virtcontainers/pkg/annotations/annotations.go b/virtcontainers/pkg/annotations/annotations.go index f18e3ffed5..10ce7833e2 100644 --- a/virtcontainers/pkg/annotations/annotations.go +++ b/virtcontainers/pkg/annotations/annotations.go @@ -84,6 +84,13 @@ const ( // DisableVhostNet is a sandbox annotation to specify if vhost-net is not available on the host. DisableVhostNet = kataAnnotHypervisorPrefix + "disable_vhost_net" + // EnableVhostUserStore is a sandbox annotation to specify if vhost-user-blk/scsi is abailable on the host + EnableVhostUserStore = kataAnnotHypervisorPrefix + "enable_vhost_user_store" + + // VhostUserStorePath is a sandbox annotation to specify the directory path where vhost-user devices + // related folders, sockets and device nodes should be. + VhostUserStorePath = kataAnnotHypervisorPrefix + "vhost_user_store_path" + // GuestHookPath is a sandbox annotation to specify the path within the VM that will be used for 'drop-in' hooks. GuestHookPath = kataAnnotHypervisorPrefix + "guest_hook_path" diff --git a/virtcontainers/pkg/oci/utils_test.go b/virtcontainers/pkg/oci/utils_test.go index c53ca2458d..30771da608 100644 --- a/virtcontainers/pkg/oci/utils_test.go +++ b/virtcontainers/pkg/oci/utils_test.go @@ -60,7 +60,8 @@ func TestMinimalSandboxConfig(t *testing.T) { savedFunc := config.GetHostPathFunc // Simply assign container path to host path for device. - config.GetHostPathFunc = func(devInfo config.DeviceInfo) (string, error) { + config.GetHostPathFunc = func(devInfo config.DeviceInfo, vhostUserStoreEnabled bool, + vhostUserStorePath string) (string, error) { return devInfo.ContainerPath, nil } diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 002b208294..ab9c4c2c9f 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -522,6 +522,15 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa } } + // Vhost-user-blk/scsi process which can improve performance, like SPDK, + // requires shared-on hugepage to work with Qemu. + if q.config.EnableVhostUserStore { + if !q.config.HugePages { + return errors.New("Vhost-user-blk/scsi is enabled without HugePages. This configuration will not work") + } + knobs.MemShared = true + } + rtc := govmmQemu.RTC{ Base: "utc", DriftFix: "slew", @@ -678,7 +687,7 @@ func (q *qemu) setupVirtiofsd() (err error) { return err } -func (q *qemu) getMemArgs() (bool, string, string) { +func (q *qemu) getMemArgs() (bool, string, string, error) { share := false target := "" memoryBack := "memory-backend-ram" @@ -689,15 +698,24 @@ func (q *qemu) getMemArgs() (bool, string, string) { target = "/dev/hugepages" memoryBack = "memory-backend-file" share = true - } else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" { - target = q.qemuConfig.Memory.Path - memoryBack = "memory-backend-file" + } else { + if q.config.EnableVhostUserStore { + // Vhost-user-blk/scsi process which can improve performance, like SPDK, + // requires shared-on hugepage to work with Qemu. + return share, target, "", fmt.Errorf("Vhost-user-blk/scsi requires hugepage memory") + } + + if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" { + target = q.qemuConfig.Memory.Path + memoryBack = "memory-backend-file" + } } + if q.qemuConfig.Knobs.MemShared { share = true } - return share, target, memoryBack + return share, target, memoryBack, nil } func (q *qemu) setupVirtioMem() error { @@ -708,7 +726,11 @@ func (q *qemu) setupVirtioMem() error { // 1024 is size for nvdimm sizeMB := int(maxMem) - int(q.config.MemorySize) - share, target, memoryBack := q.getMemArgs() + share, target, memoryBack, err := q.getMemArgs() + if err != nil { + return err + } + err = q.qmpSetup() if err != nil { return err @@ -1123,6 +1145,40 @@ func (q *qemu) hotplugAddBlockDevice(drive *config.BlockDrive, op operation, dev return nil } +func (q *qemu) hotplugAddVhostUserBlkDevice(vAttr *config.VhostUserDeviceAttrs, op operation, devID string) (err error) { + err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false) + if err != nil { + return err + } + + defer func() { + if err != nil { + q.qmpMonitorCh.qmp.ExecuteChardevDel(q.qmpMonitorCh.ctx, vAttr.DevID) + } + }() + + driver := "vhost-user-blk-pci" + addr, bridge, err := q.arch.addDeviceToBridge(vAttr.DevID, types.PCI) + if err != nil { + return err + } + + defer func() { + if err != nil { + q.arch.removeDeviceFromBridge(vAttr.DevID) + } + }() + + // PCI address is in the format bridge-addr/device-addr eg. "03/02" + vAttr.PCIAddr = fmt.Sprintf("%02x", bridge.Addr) + "/" + addr + + if err = q.qmpMonitorCh.qmp.ExecutePCIVhostUserDevAdd(q.qmpMonitorCh.ctx, driver, devID, vAttr.DevID, addr, bridge.ID); err != nil { + return err + } + + return nil +} + func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error { err := q.qmpSetup() if err != nil { @@ -1152,6 +1208,38 @@ func (q *qemu) hotplugBlockDevice(drive *config.BlockDrive, op operation) error return err } +func (q *qemu) hotplugVhostUserDevice(vAttr *config.VhostUserDeviceAttrs, op operation) error { + err := q.qmpSetup() + if err != nil { + return err + } + + devID := "virtio-" + vAttr.DevID + + if op == addDevice { + switch vAttr.Type { + case config.VhostUserBlk: + return q.hotplugAddVhostUserBlkDevice(vAttr, op, devID) + default: + return fmt.Errorf("Incorrect vhost-user device type found") + } + } else { + if err := q.arch.removeDeviceFromBridge(vAttr.DevID); err != nil { + return err + } + + if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, devID); err != nil { + return err + } + + if err := q.qmpMonitorCh.qmp.ExecuteChardevDel(q.qmpMonitorCh.ctx, vAttr.DevID); err != nil { + return err + } + } + + return nil +} + func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err error) { err = q.qmpSetup() if err != nil { @@ -1344,6 +1432,9 @@ func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operati case netDev: device := devInfo.(Endpoint) return nil, q.hotplugNetDevice(device, op) + case vhostuserDev: + vAttr := devInfo.(*config.VhostUserDeviceAttrs) + return nil, q.hotplugVhostUserDevice(vAttr, op) default: return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) } @@ -1551,7 +1642,11 @@ func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) { memDev.slot = maxSlot + 1 } - share, target, memoryBack := q.getMemArgs() + share, target, memoryBack, err := q.getMemArgs() + if err != nil { + return 0, err + } + err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, memoryBack, "mem"+strconv.Itoa(memDev.slot), target, memDev.sizeMB, share) if err != nil { q.Logger().WithError(err).Error("hotplug memory") diff --git a/virtcontainers/qemu_test.go b/virtcontainers/qemu_test.go index 5c6eec7576..f490d5d7de 100644 --- a/virtcontainers/qemu_test.go +++ b/virtcontainers/qemu_test.go @@ -18,6 +18,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/persist" "github.com/kata-containers/runtime/virtcontainers/types" + "github.com/kata-containers/runtime/virtcontainers/utils" "github.com/pkg/errors" "github.com/stretchr/testify/assert" ) @@ -221,6 +222,27 @@ func TestQemuAddDeviceFsDev(t *testing.T) { testQemuAddDevice(t, volume, fsDev, expectedOut) } +func TestQemuAddDeviceVhostUserBlk(t *testing.T) { + socketPath := "/test/socket/path" + devID := "testDevID" + + expectedOut := []govmmQemu.Device{ + govmmQemu.VhostUserDevice{ + SocketPath: socketPath, + CharDevID: utils.MakeNameID("char", devID, maxDevIDSize), + VhostUserType: govmmQemu.VhostUserBlk, + }, + } + + vDevice := config.VhostUserDeviceAttrs{ + DevID: devID, + SocketPath: socketPath, + Type: config.VhostUserBlk, + } + + testQemuAddDevice(t, vDevice, vhostuserDev, expectedOut) +} + func TestQemuAddDeviceSerialPortDev(t *testing.T) { deviceID := "channelTest" id := "charchTest" @@ -465,6 +487,33 @@ func TestQemuFileBackedMem(t *testing.T) { assert.Equal(q.qemuConfig.Knobs.FileBackedMem, false) assert.Equal(q.qemuConfig.Knobs.MemShared, false) assert.Equal(q.qemuConfig.Memory.Path, "") + + // Check setting vhost-user storage with Hugepages + sandbox, err = createQemuSandboxConfig() + assert.NoError(err) + + q = &qemu{ + store: sandbox.newStore, + } + sandbox.config.HypervisorConfig.EnableVhostUserStore = true + sandbox.config.HypervisorConfig.HugePages = true + err = q.createSandbox(context.Background(), sandbox.id, NetworkNamespace{}, &sandbox.config.HypervisorConfig, false) + assert.NoError(err) + assert.Equal(q.qemuConfig.Knobs.MemShared, true) + + // Check failure for vhost-user storage + sandbox, err = createQemuSandboxConfig() + assert.NoError(err) + + q = &qemu{ + store: sandbox.newStore, + } + sandbox.config.HypervisorConfig.EnableVhostUserStore = true + sandbox.config.HypervisorConfig.HugePages = false + err = q.createSandbox(context.Background(), sandbox.id, NetworkNamespace{}, &sandbox.config.HypervisorConfig, false) + + expectErr = errors.New("Vhost-user-blk/scsi is enabled without HugePages. This configuration will not work") + assert.Equal(expectErr.Error(), err.Error()) } func createQemuSandboxConfig() (*Sandbox, error) { diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 911eca997b..09de215002 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -575,7 +575,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor if err != nil { s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("load sandbox devices failed") } - s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, devices) + s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, + sandboxConfig.HypervisorConfig.EnableVhostUserStore, + sandboxConfig.HypervisorConfig.VhostUserStorePath, devices) // Load sandbox state. The hypervisor.createSandbox call, may need to access statei. state, err := s.store.LoadState() @@ -587,7 +589,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return nil, err } } else { - s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, nil) + s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, + sandboxConfig.HypervisorConfig.EnableVhostUserStore, + sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) // Ignore the error. Restore can fail for a new sandbox if err := s.Restore(); err != nil { @@ -1658,6 +1662,13 @@ func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType) } _, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev) return err + case config.VhostUserBlk: + vhostUserBlkDevice, ok := device.(*drivers.VhostUserBlkDevice) + if !ok { + return fmt.Errorf("device type mismatch, expect device type to be %s", devType) + } + _, err := s.hypervisor.hotplugAddDevice(vhostUserBlkDevice.VhostUserDeviceAttrs, vhostuserDev) + return err case config.DeviceGeneric: // TODO: what? return nil @@ -1695,6 +1706,13 @@ func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceTy } _, err := s.hypervisor.hotplugRemoveDevice(blockDrive, blockDev) return err + case config.VhostUserBlk: + vhostUserDeviceAttrs, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs) + if !ok { + return fmt.Errorf("device type mismatch, expect device type to be %s", devType) + } + _, err := s.hypervisor.hotplugRemoveDevice(vhostUserDeviceAttrs, vhostuserDev) + return err case config.DeviceGeneric: // TODO: what? return nil diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 3f25bf2c39..7dd9e150f8 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -17,6 +17,7 @@ import ( "syscall" "testing" + ktu "github.com/kata-containers/runtime/pkg/katatestutils" "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/device/drivers" "github.com/kata-containers/runtime/virtcontainers/device/manager" @@ -29,6 +30,9 @@ import ( "golang.org/x/sys/unix" ) +// dirMode is the permission bits used for creating a directory +const dirMode = os.FileMode(0750) | os.ModeDir + func newHypervisorConfig(kernelParams []Param, hParams []Param) HypervisorConfig { return HypervisorConfig{ KernelPath: filepath.Join(testDir, testKernel), @@ -696,7 +700,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) { config.SysIOMMUPath = savedIOMMUPath }() - dm := manager.NewDeviceManager(manager.VirtioSCSI, nil) + dm := manager.NewDeviceManager(manager.VirtioSCSI, false, "", nil) path := filepath.Join(vfioPath, testFDIOGroup) deviceInfo := config.DeviceInfo{ HostPath: path, @@ -737,6 +741,101 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) { assert.Nil(t, err, "Error while detaching devices %s", err) } +func TestSandboxAttachDevicesVhostUserBlk(t *testing.T) { + rootEnabled := true + tc := ktu.NewTestConstraint(false) + if tc.NotValid(ktu.NeedRoot()) { + rootEnabled = false + } + + tmpDir, err := ioutil.TempDir("", "") + assert.Nil(t, err) + os.RemoveAll(tmpDir) + dm := manager.NewDeviceManager(manager.VirtioSCSI, true, tmpDir, nil) + + vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") + vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") + deviceNodePath := filepath.Join(vhostUserDevNodePath, "vhostblk0") + deviceSockPath := filepath.Join(vhostUserSockPath, "vhostblk0") + + err = os.MkdirAll(vhostUserDevNodePath, dirMode) + assert.Nil(t, err) + err = os.MkdirAll(vhostUserSockPath, dirMode) + assert.Nil(t, err) + _, err = os.Create(deviceSockPath) + assert.Nil(t, err) + + // mknod requires root privilege, call mock function for non-root to + // get VhostUserBlk device type. + if rootEnabled == true { + err = unix.Mknod(deviceNodePath, unix.S_IFBLK, int(unix.Mkdev(config.VhostUserBlkMajor, 0))) + assert.Nil(t, err) + } else { + savedFunc := config.GetVhostUserNodeStatFunc + + _, err = os.Create(deviceNodePath) + assert.Nil(t, err) + + config.GetVhostUserNodeStatFunc = func(devNodePath string, + devNodeStat *unix.Stat_t) error { + if deviceNodePath != devNodePath { + return fmt.Errorf("mock GetVhostUserNodeStatFunc error") + } + + devNodeStat.Rdev = unix.Mkdev(config.VhostUserBlkMajor, 0) + return nil + } + + defer func() { + config.GetVhostUserNodeStatFunc = savedFunc + }() + } + + path := "/dev/vda" + deviceInfo := config.DeviceInfo{ + HostPath: deviceNodePath, + ContainerPath: path, + DevType: "b", + Major: config.VhostUserBlkMajor, + Minor: 0, + } + + device, err := dm.NewDevice(deviceInfo) + assert.Nil(t, err) + _, ok := device.(*drivers.VhostUserBlkDevice) + assert.True(t, ok) + + c := &Container{ + id: "100", + devices: []ContainerDevice{ + { + ID: device.DeviceID(), + ContainerPath: path, + }, + }, + } + + containers := map[string]*Container{} + containers[c.id] = c + + sandbox := Sandbox{ + id: "100", + containers: containers, + hypervisor: &mockHypervisor{}, + devManager: dm, + ctx: context.Background(), + config: &SandboxConfig{}, + } + + containers[c.id].sandbox = &sandbox + + err = containers[c.id].attachDevices(c.devices) + assert.Nil(t, err, "Error while attaching vhost-user-blk devices %s", err) + + err = containers[c.id].detachDevices() + assert.Nil(t, err, "Error while detaching vhost-user-blk devices %s", err) +} + var assetContent = []byte("FakeAsset fake asset FAKE ASSET") var assetContentHash = "92549f8d2018a95a294d28a65e795ed7d1a9d150009a28cea108ae10101178676f04ab82a6950d0099e4924f9c5e41dcba8ece56b75fc8b4e0a7492cb2a8c880" var assetContentWrongHash = "92549f8d2018a95a294d28a65e795ed7d1a9d150009a28cea108ae10101178676f04ab82a6950d0099e4924f9c5e41dcba8ece56b75fc8b4e0a7492cb2a8c881" @@ -1174,7 +1273,7 @@ func TestAttachBlockDevice(t *testing.T) { DevType: "b", } - dm := manager.NewDeviceManager(config.VirtioBlock, nil) + dm := manager.NewDeviceManager(config.VirtioBlock, false, "", nil) device, err := dm.NewDevice(deviceInfo) assert.Nil(t, err) _, ok := device.(*drivers.BlockDevice) @@ -1230,7 +1329,7 @@ func TestPreAddDevice(t *testing.T) { HypervisorConfig: hConfig, } - dm := manager.NewDeviceManager(config.VirtioBlock, nil) + dm := manager.NewDeviceManager(config.VirtioBlock, false, "", nil) // create a sandbox first sandbox := &Sandbox{ id: testSandboxID,