Merge pull request #4798 from amshinde/use-iouring-qemu

Use iouring for qemu block devices
This commit is contained in:
Archana Shinde 2022-08-26 04:00:24 +05:30 committed by GitHub
commit 7d52934ec1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 131 additions and 24 deletions

View File

@ -253,6 +253,7 @@ ifneq (,$(QEMUCMD))
# qemu-specific options (all should be suffixed by "_QEMU")
DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi
DEFBLOCKDEVICEAIO_QEMU := io_uring
DEFNETWORKMODEL_QEMU := tcfilter
KERNELTYPE = uncompressed
KERNELNAME = $(call MAKE_KERNEL_NAME,$(KERNELTYPE))
@ -458,6 +459,7 @@ USER_VARS += DEFDISABLEBLOCK
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
USER_VARS += DEFBLOCKDEVICEAIO_QEMU
USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS
USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
USER_VARS += DEFVIRTIOFSDAEMON

View File

@ -208,6 +208,20 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
# or nvdimm.
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
# aio is the I/O mechanism used by qemu
# Options:
#
# - threads
# Pthread based disk I/O.
#
# - native
# Native Linux I/O.
#
# - io_uring
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
# qemu >=5.0.
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
# Specifies cache-related options will be set to block devices or not.
# Default false
#block_device_cache_set = true

View File

@ -61,6 +61,17 @@ const (
Nvdimm = "nvdimm"
)
const (
// AIOThreads is the pthread asynchronous I/O implementation.
AIOThreads = "threads"
// AIONative is the native Linux AIO implementation
AIONative = "native"
// AIOUring is the Linux io_uring I/O implementation
AIOIOUring = "io_uring"
)
const (
// Virtio9P means use virtio-9p for the shared file system
Virtio9P = "virtio-9p"

View File

@ -1140,8 +1140,11 @@ const (
// Threads is the pthread asynchronous I/O implementation.
Threads BlockDeviceAIO = "threads"
// Native is the pthread asynchronous I/O implementation.
// Native is the native Linux AIO implementation.
Native BlockDeviceAIO = "native"
// IOUring is the Linux io_uring I/O implementation.
IOUring BlockDeviceAIO = "io_uring"
)
const (

View File

@ -771,30 +771,28 @@ func (q *QMP) ExecuteQuit(ctx context.Context) error {
return q.executeCommand(ctx, "quit", nil, nil)
}
func (q *QMP) blockdevAddBaseArgs(driver, device, blockdevID string, ro bool) (map[string]interface{}, map[string]interface{}) {
var args map[string]interface{}
func (q *QMP) blockdevAddBaseArgs(driver string, blockDevice *BlockDevice) map[string]interface{} {
blockdevArgs := map[string]interface{}{
"driver": "raw",
"read-only": ro,
"read-only": blockDevice.ReadOnly,
"file": map[string]interface{}{
"driver": driver,
"filename": device,
"filename": blockDevice.File,
"aio": string(blockDevice.AIO),
},
}
blockdevArgs["node-name"] = blockdevID
args = blockdevArgs
blockdevArgs["node-name"] = blockDevice.ID
return args, blockdevArgs
return blockdevArgs
}
// ExecuteBlockdevAdd sends a blockdev-add to the QEMU instance. device is the
// path of the device to add, e.g., /dev/rdb0, and blockdevID is an identifier
// used to name the device. As this identifier will be passed directly to QMP,
// it must obey QMP's naming rules, e,g., it must start with a letter.
func (q *QMP) ExecuteBlockdevAdd(ctx context.Context, device, blockdevID string, ro bool) error {
args, _ := q.blockdevAddBaseArgs("host_device", device, blockdevID, ro)
func (q *QMP) ExecuteBlockdevAdd(ctx context.Context, blockDevice *BlockDevice) error {
args := q.blockdevAddBaseArgs("host_device", blockDevice)
return q.executeCommand(ctx, "blockdev-add", args, nil)
}
@ -806,29 +804,29 @@ func (q *QMP) ExecuteBlockdevAdd(ctx context.Context, device, blockdevID string,
// direct denotes whether use of O_DIRECT (bypass the host page cache)
// is enabled. noFlush denotes whether flush requests for the device are
// ignored.
func (q *QMP) ExecuteBlockdevAddWithCache(ctx context.Context, device, blockdevID string, direct, noFlush, ro bool) error {
args, blockdevArgs := q.blockdevAddBaseArgs("host_device", device, blockdevID, ro)
func (q *QMP) ExecuteBlockdevAddWithCache(ctx context.Context, blockDevice *BlockDevice, direct, noFlush bool) error {
blockdevArgs := q.blockdevAddBaseArgs("host_device", blockDevice)
blockdevArgs["cache"] = map[string]interface{}{
"direct": direct,
"no-flush": noFlush,
}
return q.executeCommand(ctx, "blockdev-add", args, nil)
return q.executeCommand(ctx, "blockdev-add", blockdevArgs, nil)
}
// ExecuteBlockdevAddWithDriverCache has three one parameter driver
// than ExecuteBlockdevAddWithCache.
// Parameter driver can set the driver of block device.
func (q *QMP) ExecuteBlockdevAddWithDriverCache(ctx context.Context, driver, device, blockdevID string, direct, noFlush, ro bool) error {
args, blockdevArgs := q.blockdevAddBaseArgs(driver, device, blockdevID, ro)
func (q *QMP) ExecuteBlockdevAddWithDriverCache(ctx context.Context, driver string, blockDevice *BlockDevice, direct, noFlush bool) error {
blockdevArgs := q.blockdevAddBaseArgs(driver, blockDevice)
blockdevArgs["cache"] = map[string]interface{}{
"direct": direct,
"no-flush": noFlush,
}
return q.executeCommand(ctx, "blockdev-add", args, nil)
return q.executeCommand(ctx, "blockdev-add", blockdevArgs, nil)
}
// ExecuteDeviceAdd adds the guest portion of a device to a QEMU instance

View File

@ -400,8 +400,13 @@ func TestQMPBlockdevAdd(t *testing.T) {
cfg := QMPConfig{Logger: qmpTestLogger{}}
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
q.version = checkVersion(t, connectedCh)
err := q.ExecuteBlockdevAdd(context.Background(), "/dev/rbd0",
fmt.Sprintf("drive_%s", volumeUUID), false)
dev := BlockDevice{
ID: fmt.Sprintf("drive_%s", volumeUUID),
File: "/dev/rbd0",
ReadOnly: false,
AIO: Native,
}
err := q.ExecuteBlockdevAdd(context.Background(), &dev)
if err != nil {
t.Fatalf("Unexpected error %v", err)
}
@ -424,8 +429,13 @@ func TestQMPBlockdevAddWithCache(t *testing.T) {
cfg := QMPConfig{Logger: qmpTestLogger{}}
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
q.version = checkVersion(t, connectedCh)
err := q.ExecuteBlockdevAddWithCache(context.Background(), "/dev/rbd0",
fmt.Sprintf("drive_%s", volumeUUID), true, true, false)
dev := BlockDevice{
ID: fmt.Sprintf("drive_%s", volumeUUID),
File: "/dev/rbd0",
ReadOnly: false,
AIO: Native,
}
err := q.ExecuteBlockdevAddWithCache(context.Background(), &dev, true, true)
if err != nil {
t.Fatalf("Unexpected error %v", err)
}

View File

@ -216,6 +216,7 @@ type RuntimeConfigOptions struct {
ShimPath string
LogPath string
BlockDeviceDriver string
BlockDeviceAIO string
SharedFS string
VirtioFSDaemon string
JaegerEndpoint string
@ -305,6 +306,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
path = "` + config.HypervisorPath + `"
kernel = "` + config.KernelPath + `"
block_device_driver = "` + config.BlockDeviceDriver + `"
block_device_aio = "` + config.BlockDeviceAIO + `"
kernel_params = "` + config.KernelParams + `"
image = "` + config.ImagePath + `"
machine_type = "` + config.MachineType + `"

View File

@ -63,6 +63,7 @@ const defaultBridgesCount uint32 = 1
const defaultInterNetworkingModel = "tcfilter"
const defaultDisableBlockDeviceUse bool = false
const defaultBlockDeviceDriver = "virtio-scsi"
const defaultBlockDeviceAIO string = "io_uring"
const defaultBlockDeviceCacheSet bool = false
const defaultBlockDeviceCacheDirect bool = false
const defaultBlockDeviceCacheNoflush bool = false

View File

@ -100,6 +100,7 @@ type hypervisor struct {
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
SeccompSandbox string `toml:"seccompsandbox"`
BlockDeviceAIO string `toml:"block_device_aio"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
@ -469,6 +470,22 @@ func (h hypervisor) blockDeviceDriver() (string, error) {
return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers)
}
func (h hypervisor) blockDeviceAIO() (string, error) {
supportedBlockAIO := []string{config.AIOIOUring, config.AIONative, config.AIOThreads}
if h.BlockDeviceAIO == "" {
return defaultBlockDeviceAIO, nil
}
for _, b := range supportedBlockAIO {
if b == h.BlockDeviceAIO {
return h.BlockDeviceAIO, nil
}
}
return "", fmt.Errorf("Invalid hypervisor block storage I/O mechanism %v specified (supported AIO: %v)", h.BlockDeviceAIO, supportedBlockAIO)
}
func (h hypervisor) sharedFS() (string, error) {
supportedSharedFS := []string{config.Virtio9P, config.VirtioFS, config.VirtioFSNydus}
@ -728,6 +745,11 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
return vc.HypervisorConfig{}, err
}
blockAIO, err := h.blockDeviceAIO()
if err != nil {
return vc.HypervisorConfig{}, err
}
sharedFS, err := h.sharedFS()
if err != nil {
return vc.HypervisorConfig{}, err
@ -784,6 +806,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
BlockDeviceAIO: blockAIO,
BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
@ -1182,6 +1205,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
Debug: defaultEnableDebug,
DisableNestingChecks: defaultDisableNestingChecks,
BlockDeviceDriver: defaultBlockDeviceDriver,
BlockDeviceAIO: defaultBlockDeviceAIO,
BlockDeviceCacheSet: defaultBlockDeviceCacheSet,
BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect,
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,

View File

@ -79,6 +79,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
machineType := "machineType"
disableBlockDevice := true
blockDeviceDriver := "virtio-scsi"
blockDeviceAIO := "io_uring"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
@ -99,6 +100,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
DefaultGuestHookPath: defaultGuestHookPath,
DisableBlock: disableBlockDevice,
BlockDeviceDriver: blockDeviceDriver,
BlockDeviceAIO: blockDeviceAIO,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
@ -159,6 +161,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
DefaultMaxMemorySize: maxMemory,
DisableBlockDeviceUse: disableBlockDevice,
BlockDeviceDriver: defaultBlockDeviceDriver,
BlockDeviceAIO: defaultBlockDeviceAIO,
DefaultBridges: defaultBridgesCount,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
@ -550,6 +553,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
GuestHookPath: defaultGuestHookPath,
VhostUserStorePath: defaultVhostUserStorePath,
VirtioFSCache: defaultVirtioFSCacheMode,
BlockDeviceAIO: defaultBlockDeviceAIO,
}
expectedAgentConfig := vc.KataAgentConfig{
@ -593,6 +597,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
blockDeviceAIO := "io_uring"
defer func() {
utils.VHostVSockDevicePath = orgVHostVSockDevicePath
}()
@ -614,6 +619,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
TxRateLimiterMaxRate: txRateLimiterMaxRate,
SharedFS: "virtio-fs",
VirtioFSDaemon: filepath.Join(dir, "virtiofsd"),
BlockDeviceAIO: blockDeviceAIO,
}
files := []string{hypervisorPath, kernelPath, imagePath}
@ -674,6 +680,11 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
if config.TxRateLimiterMaxRate != txRateLimiterMaxRate {
t.Errorf("Expected value for tx rate limiter %v, got %v", txRateLimiterMaxRate, config.TxRateLimiterMaxRate)
}
if config.BlockDeviceAIO != blockDeviceAIO {
t.Errorf("Expected value for BlockDeviceAIO %v, got %v", blockDeviceAIO, config.BlockDeviceAIO)
}
}
func TestNewFirecrackerHypervisorConfig(t *testing.T) {

View File

@ -683,6 +683,22 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig)
}
}
if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceAIO]; ok {
supportedAIO := []string{config.AIONative, config.AIOThreads, config.AIOIOUring}
valid := false
for _, b := range supportedAIO {
if b == value {
sbConfig.HypervisorConfig.BlockDeviceAIO = value
valid = true
}
}
if !valid {
return fmt.Errorf("Invalid AIO mechanism %v specified in annotation (supported IO mechanism : %v)", value, supportedAIO)
}
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableBlockDeviceUse).setBool(func(disableBlockDeviceUse bool) {
sbConfig.HypervisorConfig.DisableBlockDeviceUse = disableBlockDeviceUse
}); err != nil {

View File

@ -642,6 +642,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.HugePages] = "true"
ocispec.Annotations[vcAnnotations.IOMMU] = "true"
ocispec.Annotations[vcAnnotations.BlockDeviceDriver] = "virtio-scsi"
ocispec.Annotations[vcAnnotations.BlockDeviceAIO] = "io_uring"
ocispec.Annotations[vcAnnotations.DisableBlockDeviceUse] = "true"
ocispec.Annotations[vcAnnotations.EnableIOThreads] = "true"
ocispec.Annotations[vcAnnotations.BlockDeviceCacheSet] = "true"
@ -679,6 +680,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(config.HypervisorConfig.HugePages, true)
assert.Equal(config.HypervisorConfig.IOMMU, true)
assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
assert.Equal(config.HypervisorConfig.BlockDeviceAIO, "io_uring")
assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true)
assert.Equal(config.HypervisorConfig.EnableIOThreads, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true)

View File

@ -377,6 +377,9 @@ type HypervisorConfig struct {
// SeccompSandbox is the qemu function which enables the seccomp feature
SeccompSandbox string
// BlockiDeviceAIO specifies the I/O API to be used.
BlockDeviceAIO string
// KernelParams are additional guest kernel parameters.
KernelParams []Param

View File

@ -203,6 +203,9 @@ const (
// BlockDeviceDriver specifies the driver to be used for block device either VirtioSCSI or VirtioBlock
BlockDeviceDriver = kataAnnotHypervisorPrefix + "block_device_driver"
// BlockDeviceAIO specifies I/O mechanism to be used with VirtioBlock for qemu
BlockDeviceAIO = kataAnnotHypervisorPrefix + "block_device_aio"
// DisableBlockDeviceUse is a sandbox annotation that disallows a block device from being used.
DisableBlockDeviceUse = kataAnnotHypervisorPrefix + "disable_block_device_use"

View File

@ -1292,12 +1292,19 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri
return nil
}
qblkDevice := govmmQemu.BlockDevice{
ID: drive.ID,
File: drive.File,
ReadOnly: drive.ReadOnly,
AIO: govmmQemu.BlockDeviceAIO(q.config.BlockDeviceAIO),
}
if drive.Swap {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithDriverCache(q.qmpMonitorCh.ctx, "file", drive.File, drive.ID, false, false, false)
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithDriverCache(q.qmpMonitorCh.ctx, "file", &qblkDevice, false, false)
} else if q.config.BlockDeviceCacheSet {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush, drive.ReadOnly)
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, &qblkDevice, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush)
} else {
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID, drive.ReadOnly)
err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, &qblkDevice)
}
if err != nil {
return err