diff --git a/src/runtime/Makefile b/src/runtime/Makefile index fa07b87deb..b05106318b 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -253,6 +253,7 @@ ifneq (,$(QEMUCMD)) # qemu-specific options (all should be suffixed by "_QEMU") DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi + DEFBLOCKDEVICEAIO_QEMU := io_uring DEFNETWORKMODEL_QEMU := tcfilter KERNELTYPE = uncompressed KERNELNAME = $(call MAKE_KERNEL_NAME,$(KERNELTYPE)) @@ -458,6 +459,7 @@ USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN USER_VARS += DEFBLOCKSTORAGEDRIVER_FC USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU +USER_VARS += DEFBLOCKDEVICEAIO_QEMU USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS USER_VARS += DEFVIRTIOFSDAEMON diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 3ec44c8b6e..d0a711dcfe 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -208,6 +208,20 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@" # or nvdimm. block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + # Specifies cache-related options will be set to block devices or not. # Default false #block_device_cache_set = true diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index 61f9236b9c..a11c52f75c 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -61,6 +61,17 @@ const ( Nvdimm = "nvdimm" ) +const ( + // AIOThreads is the pthread asynchronous I/O implementation. + AIOThreads = "threads" + + // AIONative is the native Linux AIO implementation + AIONative = "native" + + // AIOUring is the Linux io_uring I/O implementation + AIOIOUring = "io_uring" +) + const ( // Virtio9P means use virtio-9p for the shared file system Virtio9P = "virtio-9p" diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 5676f4451c..2aa0754cac 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -216,6 +216,7 @@ type RuntimeConfigOptions struct { ShimPath string LogPath string BlockDeviceDriver string + BlockDeviceAIO string SharedFS string VirtioFSDaemon string JaegerEndpoint string @@ -305,6 +306,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { path = "` + config.HypervisorPath + `" kernel = "` + config.KernelPath + `" block_device_driver = "` + config.BlockDeviceDriver + `" + block_device_aio = "` + config.BlockDeviceAIO + `" kernel_params = "` + config.KernelParams + `" image = "` + config.ImagePath + `" machine_type = "` + config.MachineType + `" diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 2aad22bd85..37dbfee45a 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -63,6 +63,7 @@ const defaultBridgesCount uint32 = 1 const defaultInterNetworkingModel = "tcfilter" const defaultDisableBlockDeviceUse bool = false const defaultBlockDeviceDriver = "virtio-scsi" +const defaultBlockDeviceAIO string = "io_uring" const defaultBlockDeviceCacheSet bool = false const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheNoflush bool = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 0903c8ea9e..6147e2738c 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -99,6 +99,7 @@ type hypervisor struct { GuestHookPath string `toml:"guest_hook_path"` GuestMemoryDumpPath string `toml:"guest_memory_dump_path"` SeccompSandbox string `toml:"seccompsandbox"` + BlockDeviceAIO string `toml:"block_device_aio"` HypervisorPathList []string `toml:"valid_hypervisor_paths"` JailerPathList []string `toml:"valid_jailer_paths"` CtlPathList []string `toml:"valid_ctlpaths"` @@ -468,6 +469,22 @@ func (h hypervisor) blockDeviceDriver() (string, error) { return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers) } +func (h hypervisor) blockDeviceAIO() (string, error) { + supportedBlockAIO := []string{config.AIOIOUring, config.AIONative, config.AIOThreads} + + if h.BlockDeviceAIO == "" { + return defaultBlockDeviceAIO, nil + } + + for _, b := range supportedBlockAIO { + if b == h.BlockDeviceAIO { + return h.BlockDeviceAIO, nil + } + } + + return "", fmt.Errorf("Invalid hypervisor block storage I/O mechanism %v specified (supported AIO: %v)", h.BlockDeviceAIO, supportedBlockAIO) +} + func (h hypervisor) sharedFS() (string, error) { supportedSharedFS := []string{config.Virtio9P, config.VirtioFS, config.VirtioFSNydus} @@ -727,6 +744,11 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + blockAIO, err := h.blockDeviceAIO() + if err != nil { + return vc.HypervisorConfig{}, err + } + sharedFS, err := h.sharedFS() if err != nil { return vc.HypervisorConfig{}, err @@ -783,6 +805,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { Debug: h.Debug, DisableNestingChecks: h.DisableNestingChecks, BlockDeviceDriver: blockDriver, + BlockDeviceAIO: blockAIO, BlockDeviceCacheSet: h.BlockDeviceCacheSet, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, @@ -1154,6 +1177,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { Debug: defaultEnableDebug, DisableNestingChecks: defaultDisableNestingChecks, BlockDeviceDriver: defaultBlockDeviceDriver, + BlockDeviceAIO: defaultBlockDeviceAIO, BlockDeviceCacheSet: defaultBlockDeviceCacheSet, BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 86619ab13d..5e493b40e3 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -79,6 +79,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf machineType := "machineType" disableBlockDevice := true blockDeviceDriver := "virtio-scsi" + blockDeviceAIO := "io_uring" enableIOThreads := true hotplugVFIOOnRootBus := true pcieRootPort := uint32(2) @@ -99,6 +100,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf DefaultGuestHookPath: defaultGuestHookPath, DisableBlock: disableBlockDevice, BlockDeviceDriver: blockDeviceDriver, + BlockDeviceAIO: blockDeviceAIO, EnableIOThreads: enableIOThreads, HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, PCIeRootPort: pcieRootPort, @@ -159,6 +161,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf DefaultMaxMemorySize: maxMemory, DisableBlockDeviceUse: disableBlockDevice, BlockDeviceDriver: defaultBlockDeviceDriver, + BlockDeviceAIO: defaultBlockDeviceAIO, DefaultBridges: defaultBridgesCount, EnableIOThreads: enableIOThreads, HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, @@ -550,6 +553,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { GuestHookPath: defaultGuestHookPath, VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, + BlockDeviceAIO: defaultBlockDeviceAIO, } expectedAgentConfig := vc.KataAgentConfig{ @@ -593,6 +597,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) { hotplugVFIOOnRootBus := true pcieRootPort := uint32(2) orgVHostVSockDevicePath := utils.VHostVSockDevicePath + blockDeviceAIO := "io_uring" defer func() { utils.VHostVSockDevicePath = orgVHostVSockDevicePath }() @@ -614,6 +619,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) { TxRateLimiterMaxRate: txRateLimiterMaxRate, SharedFS: "virtio-fs", VirtioFSDaemon: filepath.Join(dir, "virtiofsd"), + BlockDeviceAIO: blockDeviceAIO, } files := []string{hypervisorPath, kernelPath, imagePath} @@ -674,6 +680,11 @@ func TestNewQemuHypervisorConfig(t *testing.T) { if config.TxRateLimiterMaxRate != txRateLimiterMaxRate { t.Errorf("Expected value for tx rate limiter %v, got %v", txRateLimiterMaxRate, config.TxRateLimiterMaxRate) } + + if config.BlockDeviceAIO != blockDeviceAIO { + t.Errorf("Expected value for BlockDeviceAIO %v, got %v", blockDeviceAIO, config.BlockDeviceAIO) + } + } func TestNewFirecrackerHypervisorConfig(t *testing.T) { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 48eda09778..6f549454ed 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -371,6 +371,9 @@ type HypervisorConfig struct { // SeccompSandbox is the qemu function which enables the seccomp feature SeccompSandbox string + // BlockiDeviceAIO specifies the I/O API to be used. + BlockDeviceAIO string + // KernelParams are additional guest kernel parameters. KernelParams []Param