From 977056492de61c74f7933cebed8d57a0d24f89e3 Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Thu, 24 Jul 2025 15:50:02 +0800 Subject: [PATCH 1/7] runtime: Introduce independent IOThreads framework Introduce independent IOThread framework for Kata container. What is the indep_iothreads: This new feature introduce a way to pre-alloc IOThreads for QEMU hypervisor (maybe other hypervisor can support too). Independent IOThreads enables IO to be processed in a separate thread. To generally improve the performance of each module, avoid them running in the QEMU main loop. Why need indep_iothreads: In Kata container implementation, many devices based on hotplug mechanism. The real workload container may not sync the same lifecycle with the VM. It may require to hotplug/unplug new disks or other devices without destroying the VM. So we can keep the IOThread with the VM as a IOThread pool(some devices need multi iothreads for performance like virtio-blk vq-mapping), the hotplug devices can attach/detach with the IOThread according to business needs. At the same time, QEMU also support the "x-blockdev-set-iothread" to change iothreads(but it need stop VM for data secure). Current QEMU have many devices support iothread, virtio-blk, virtio-scsi, virtio-balloon, monitor, colo-compare...etc... How it works: Add new item in hypervisor struct named "indep_iothreads" in toml. The default value is 0, it reused the original "enable_iothreads" as the switch. If the "indep_iothreads" != 0 and "enable_iothreads" = true it will add qmp object -iothread indepIOThreadsPrefix_No when VM startup. The first user is the virtio-blk, it will attach the indep_iothread_0 as default when enable iothread for virtio-blk. Thanks Chen Signed-off-by: zhangchen.kidd --- src/runtime/Makefile | 2 + src/runtime/pkg/katatestutils/utils.go | 72 ++++++++++--------- .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 13 ++++ src/runtime/pkg/oci/utils.go | 10 +++ .../documentation/api/1.0/api.md | 4 ++ src/runtime/virtcontainers/hypervisor.go | 3 + src/runtime/virtcontainers/persist.go | 2 + .../virtcontainers/persist/api/config.go | 4 ++ .../pkg/annotations/annotations.go | 5 ++ 10 files changed, 81 insertions(+), 35 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 940338c8e6..192093c74b 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -260,6 +260,7 @@ DEFVIRTIOFSQUEUESIZE ?= 1024 # Make sure you quote args. DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"] DEFENABLEIOTHREADS := false +DEFINDEPIOTHREADS := 0 DEFENABLEVHOSTUSERSTORE := false DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"] @@ -731,6 +732,7 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS USER_VARS += DEFENABLEANNOTATIONS USER_VARS += DEFENABLEANNOTATIONSTEE USER_VARS += DEFENABLEIOTHREADS +USER_VARS += DEFINDEPIOTHREADS USER_VARS += DEFSECCOMPSANDBOXPARAM USER_VARS += DEFENABLEVHOSTUSERSTORE USER_VARS += DEFVHOSTUSERSTOREPATH diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index c855ae9fbe..fd189dfd33 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -207,41 +207,42 @@ const ( ) type RuntimeConfigOptions struct { - Hypervisor string - HypervisorPath string - DefaultGuestHookPath string - KernelPath string - ImagePath string - RootfsType string - KernelParams string - MachineType string - LogPath string - BlockDeviceDriver string - BlockDeviceAIO string - SharedFS string - VirtioFSDaemon string - JaegerEndpoint string - JaegerUser string - JaegerPassword string - PFlash []string - HotPlugVFIO config.PCIePort - ColdPlugVFIO config.PCIePort - PCIeRootPort uint32 - PCIeSwitchPort uint32 - DefaultVCPUCount uint32 - DefaultMaxVCPUCount uint32 - DefaultMemSize uint32 - DefaultMaxMemorySize uint64 - DefaultMsize9p uint32 - DisableBlock bool - EnableIOThreads bool - DisableNewNetNs bool - HypervisorDebug bool - RuntimeDebug bool - RuntimeTrace bool - AgentDebug bool - AgentTrace bool - EnablePprof bool + Hypervisor string + HypervisorPath string + DefaultGuestHookPath string + KernelPath string + ImagePath string + RootfsType string + KernelParams string + MachineType string + LogPath string + BlockDeviceDriver string + BlockDeviceAIO string + SharedFS string + VirtioFSDaemon string + JaegerEndpoint string + JaegerUser string + JaegerPassword string + PFlash []string + HotPlugVFIO config.PCIePort + ColdPlugVFIO config.PCIePort + PCIeRootPort uint32 + PCIeSwitchPort uint32 + DefaultVCPUCount uint32 + DefaultMaxVCPUCount uint32 + DefaultMemSize uint32 + DefaultMaxMemorySize uint64 + DefaultMsize9p uint32 + DefaultIndepIOThreads uint32 + DisableBlock bool + EnableIOThreads bool + DisableNewNetNs bool + HypervisorDebug bool + RuntimeDebug bool + RuntimeTrace bool + AgentDebug bool + AgentTrace bool + EnablePprof bool } // ContainerIDTestDataType is a type used to test Container and Sandbox ID's. @@ -318,6 +319,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + ` disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + ` enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + ` + indep_iothreads = ` + strconv.FormatUint(uint64(config.DefaultIndepIOThreads), 10) + ` cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `" hot_plug_vfio = "` + config.HotPlugVFIO.String() + `" pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + ` diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index d7680dd1bd..cecce93f18 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -75,6 +75,7 @@ const defaultBlockDeviceCacheSet bool = false const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheNoflush bool = false const defaultEnableIOThreads bool = false +const defaultIndepIOThreads uint32 = 0 const defaultEnableMemPrealloc bool = false const defaultEnableReclaimGuestFreedMemory bool = false const defaultEnableHugePages bool = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index f15d945ca9..35a592680b 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -155,6 +155,7 @@ type hypervisor struct { Debug bool `toml:"enable_debug"` DisableNestingChecks bool `toml:"disable_nesting_checks"` EnableIOThreads bool `toml:"enable_iothreads"` + IndepIOThreads uint32 `toml:"indep_iothreads"` DisableImageNvdimm bool `toml:"disable_image_nvdimm"` HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"` ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"` @@ -614,6 +615,14 @@ func (h hypervisor) msize9p() uint32 { return h.Msize9p } +func (h hypervisor) indepiothreads() uint32 { + if h.IndepIOThreads == 0 { + return defaultIndepIOThreads + } + + return h.IndepIOThreads +} + func (h hypervisor) guestHookPath() string { if h.GuestHookPath == "" { return defaultGuestHookPath @@ -810,6 +819,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DisableNestingChecks: h.DisableNestingChecks, BlockDeviceDriver: blockDriver, EnableIOThreads: h.EnableIOThreads, + IndepIOThreads: h.indepiothreads(), DisableVhostNet: true, // vhost-net backend is not supported in Firecracker GuestHookPath: h.guestHookPath(), RxRateLimiterMaxRate: rxRateLimiterMaxRate, @@ -964,6 +974,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, EnableIOThreads: h.EnableIOThreads, + IndepIOThreads: h.indepiothreads(), Msize9p: h.msize9p(), DisableImageNvdimm: h.DisableImageNvdimm, HotPlugVFIO: h.hotPlugVFIO(), @@ -1094,6 +1105,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { BlockDeviceCacheSet: h.BlockDeviceCacheSet, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, EnableIOThreads: h.EnableIOThreads, + IndepIOThreads: h.indepiothreads(), Msize9p: h.msize9p(), DisableImageNvdimm: h.DisableImageNvdimm, ColdPlugVFIO: h.coldPlugVFIO(), @@ -1452,6 +1464,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, EnableIOThreads: defaultEnableIOThreads, + IndepIOThreads: defaultIndepIOThreads, Msize9p: defaultMsize9p, ColdPlugVFIO: defaultColdPlugVFIO, HotPlugVFIO: defaultHotPlugVFIO, diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index d49aabd988..ee5272a53c 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -840,6 +840,16 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.IndepIOThreads).setUintWithCheck(func(indepiothreads uint64) error { + if indepiothreads < 0 { + return fmt.Errorf("Error parsing annotation for indepiothreads, please specify positive numeric value") + } + sbConfig.HypervisorConfig.IndepIOThreads = uint32(indepiothreads) + return nil + }); err != nil { + return err + } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) { sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet }); err != nil { diff --git a/src/runtime/virtcontainers/documentation/api/1.0/api.md b/src/runtime/virtcontainers/documentation/api/1.0/api.md index ad336d359b..953c837af7 100644 --- a/src/runtime/virtcontainers/documentation/api/1.0/api.md +++ b/src/runtime/virtcontainers/documentation/api/1.0/api.md @@ -249,6 +249,10 @@ type HypervisorConfig struct { // Supported currently for virtio-scsi driver. EnableIOThreads bool + // Independent IOThreads enables IO to be processed in a separate thread, it is + // for QEMU hotplug device attach to iothread, like virtio-blk. + IndepIOThreads uint32 + // Debug changes the default hypervisor and kernel parameters to // enable debug output where available. Debug bool diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 22423ab122..e7e294e4c8 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -605,6 +605,9 @@ type HypervisorConfig struct { // Supported currently for virtio-scsi driver. EnableIOThreads bool + // Independent IOThreads enables IO to be processed in a separate thread. + IndepIOThreads uint32 + // Debug changes the default hypervisor and kernel parameters to // enable debug output where available. Debug bool diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 79731b7998..a3cb0bc859 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -235,6 +235,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush, DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse, EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads, + IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads, Debug: sconfig.HypervisorConfig.Debug, MemPrealloc: sconfig.HypervisorConfig.MemPrealloc, HugePages: sconfig.HypervisorConfig.HugePages, @@ -473,6 +474,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush, DisableBlockDeviceUse: hconf.DisableBlockDeviceUse, EnableIOThreads: hconf.EnableIOThreads, + IndepIOThreads: hconf.IndepIOThreads, Debug: hconf.Debug, MemPrealloc: hconf.MemPrealloc, HugePages: hconf.HugePages, diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 194456554f..e9284d30d8 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -164,6 +164,10 @@ type HypervisorConfig struct { // Supported currently for virtio-scsi driver. EnableIOThreads bool + // Independent IOThreads enables IO to be processed in a separate thread, it is + // for QEMU hotplug device attach to iothread, like virtio-blk. + IndepIOThreads uint32 + // Debug changes the default hypervisor and kernel parameters to // enable debug output where available. Debug bool diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 03b9e9b70c..d32a14ed1a 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -221,6 +221,11 @@ const ( // Supported currently for virtio-scsi driver. EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads" + // Independent IOThreads enables IO to be processed in a separate thread, it is + // for QEMU hotplug device attach to iothread, like virtio-blk. + IndepIOThreads = kataAnnotHypervisorPrefix + "indep_iothreads" + + // BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not. BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set" From da213b839555afd8341604e66982c9c8b386016d Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Thu, 24 Jul 2025 15:57:19 +0800 Subject: [PATCH 2/7] rumtime: qemu: Add indep_iothreads for QEMU hypervisor toml Add indep_iothreads args for QEMU related configuration toml. The default value is 0. Signed-off-by: zhangchen.kidd --- src/runtime/config/configuration-qemu-coco-dev.toml.in | 8 ++++++-- .../config/configuration-qemu-nvidia-gpu-snp.toml.in | 8 ++++++-- .../config/configuration-qemu-nvidia-gpu-tdx.toml.in | 8 ++++++-- src/runtime/config/configuration-qemu-nvidia-gpu.toml.in | 8 ++++++-- src/runtime/config/configuration-qemu-se.toml.in | 8 ++++++-- src/runtime/config/configuration-qemu-snp.toml.in | 8 ++++++-- src/runtime/config/configuration-qemu-tdx.toml.in | 8 ++++++-- src/runtime/config/configuration-qemu.toml.in | 8 ++++++-- 8 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/runtime/config/configuration-qemu-coco-dev.toml.in b/src/runtime/config/configuration-qemu-coco-dev.toml.in index 79de271708..e70f0d8a75 100644 --- a/src/runtime/config/configuration-qemu-coco-dev.toml.in +++ b/src/runtime/config/configuration-qemu-coco-dev.toml.in @@ -259,11 +259,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index f56a287d9e..dcda636b17 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -268,11 +268,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 123b2a2be7..2bbd0f9f85 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -252,11 +252,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in index 8f92987f96..d0460f3f5a 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -257,11 +257,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu-se.toml.in b/src/runtime/config/configuration-qemu-se.toml.in index 9f3c258dc8..8342099133 100644 --- a/src/runtime/config/configuration-qemu-se.toml.in +++ b/src/runtime/config/configuration-qemu-se.toml.in @@ -243,11 +243,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu-snp.toml.in b/src/runtime/config/configuration-qemu-snp.toml.in index 0c2a55021a..efc4c63316 100644 --- a/src/runtime/config/configuration-qemu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-snp.toml.in @@ -268,11 +268,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in index 1c810e8cd6..91f40f7f0a 100644 --- a/src/runtime/config/configuration-qemu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -253,11 +253,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 4afb3291ed..44c6579b9c 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -258,11 +258,15 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" #block_device_cache_noflush = true # Enable iothreads (data-plane) to be used. This causes IO to be -# handled in a separate IO thread. This is currently only implemented -# for SCSI. +# handled in a separate IO thread. This is currently implemented +# for virtio-scsi and virtio-blk. # enable_iothreads = @DEFENABLEIOTHREADS@ +# Independent IOThreads enables IO to be processed in a separate thread, it is +# for QEMU hotplug device attach to iothread, like virtio-blk. +indep_iothreads = @DEFINDEPIOTHREADS@ + # Enable pre allocation of VM RAM, default false # Enabling this will result in lower container density # as all of the memory will be allocated and locked From a5e8b32b34d5b446b0de434d3ec8932ead8dbb9d Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Tue, 29 Jul 2025 12:02:07 +0800 Subject: [PATCH 3/7] runtime: qemu: Add comments for virtio-scsi iothread args For current implementation, just virtio-scsi use this iothread path. Signed-off-by: zhangchen.kidd --- src/runtime/virtcontainers/qemu.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 0de579c870..7e7f747fd1 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -734,6 +734,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } + // Note: Only virtio-SCSI device driver use this ioThread args. devices, ioThread, kernel, err := q.buildDevices(ctx, kernelPath) if err != nil { return err From 9175668e8e05514a71b280a43e0db751100c05ed Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Tue, 29 Jul 2025 15:30:29 +0800 Subject: [PATCH 4/7] runtime: qemu: introduce setup iothread function Make the original virtio-scsi iothread and the new independent iothread to a dedicated method for handing the related logics. Signed-off-by: zhangchen.kidd --- src/runtime/virtcontainers/qemu.go | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 7e7f747fd1..7e20128d53 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -143,6 +143,8 @@ const ( qemuStopSandboxTimeoutSecs = 15 qomPathPrefix = "/machine/peripheral/" + + indepIOThreadsPrefix = "indep_iothread" ) // agnostic list of kernel parameters @@ -513,6 +515,26 @@ func (q *qemu) setupFileBackedMem(knobs *govmmQemu.Knobs, memory *govmmQemu.Memo memory.Path = target } +func (q *qemu) setupIoThread(ioThread *govmmQemu.IOThread) []govmmQemu.IOThread { + + var tmp_threads []govmmQemu.IOThread + + // Add virtio-scsi IOThreads for QEMU + if ioThread != nil { + tmp_threads = append(tmp_threads, *ioThread) + } + + // Add Independent IOThreads for QEMU + if q.config.IndepIOThreads > 0 { + for i := uint32(0); i < q.config.IndepIOThreads; i++ { + id := fmt.Sprintf("%s_%d", indepIOThreadsPrefix, i) + tmp_threads = append(tmp_threads, govmmQemu.IOThread{ID: id}) + } + } + + return tmp_threads +} + func (q *qemu) setConfig(config *HypervisorConfig) error { q.config = *config @@ -803,9 +825,9 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } - if ioThread != nil { - qemuConfig.IOThreads = []govmmQemu.IOThread{*ioThread} - } + // Setup iothread for devices. + qemuConfig.IOThreads = q.setupIoThread(ioThread) + // Add RNG device to hypervisor // Skip for s390x (as CPACF is used) or when Confidential Guest is enabled if machine.Type != QemuCCWVirtio && !q.config.ConfidentialGuest { From 1d273895b8f56d5808a89dbda82664fb698e3e56 Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Mon, 16 Jun 2025 21:01:02 +0800 Subject: [PATCH 5/7] runtime: qemu: qmp: Add iothread args for QMP ExecutePCIDeviceAdd Qemu already support the device_add with iothread args. Make KATA have ability to hotplug PCI device with IOThreads. Currently, just support QEMU as the hypervisor, not sure it works for stratovirt. Signed-off-by: zhangchen.kidd --- src/runtime/pkg/govmm/qemu/qmp.go | 6 +++++- src/runtime/pkg/govmm/qemu/qmp_test.go | 2 +- src/runtime/virtcontainers/qemu.go | 2 +- src/runtime/virtcontainers/stratovirt.go | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index 4eafb66767..d98a5bc866 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -1102,7 +1102,7 @@ func (q *QMP) ExecuteDeviceDel(ctx context.Context, devID string) error { // disableModern indicates if virtio version 1.0 should be replaced by the // former version 0.9, as there is a KVM bug that occurs when using virtio // 1.0 in nested environments. -func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool) error { +func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool, iothreadID string) error { args := map[string]interface{}{ "id": devID, "driver": driver, @@ -1129,6 +1129,10 @@ func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver } } + if iothreadID != "" { + args["iothread"] = iothreadID + } + return q.executeCommand(ctx, "device_add", args, nil) } diff --git a/src/runtime/pkg/govmm/qemu/qmp_test.go b/src/runtime/pkg/govmm/qemu/qmp_test.go index 7aeb907e48..83442ee5e0 100644 --- a/src/runtime/pkg/govmm/qemu/qmp_test.go +++ b/src/runtime/pkg/govmm/qemu/qmp_test.go @@ -1066,7 +1066,7 @@ func TestQMPPCIDeviceAdd(t *testing.T) { blockdevID := fmt.Sprintf("drive_%s", volumeUUID) devID := fmt.Sprintf("device_%s", volumeUUID) err := q.ExecutePCIDeviceAdd(context.Background(), blockdevID, devID, - "virtio-blk-pci", "0x1", "", "", 1, true, false) + "virtio-blk-pci", "0x1", "", "", 1, true, false, "") if err != nil { t.Fatalf("Unexpected error %v", err) } diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 7e20128d53..045306be0e 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1744,7 +1744,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri queues := int(q.config.NumVCPUs()) - if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern); err != nil { + if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, ""); err != nil { return err } case q.config.BlockDeviceDriver == config.VirtioBlockCCW: diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go index 9bfab6bf44..5010e00ff4 100644 --- a/src/runtime/virtcontainers/stratovirt.go +++ b/src/runtime/virtcontainers/stratovirt.go @@ -900,7 +900,7 @@ func (s *stratovirt) hotplugBlk(ctx context.Context, drive *config.BlockDrive, o } devAddr := fmt.Sprintf("%d", slot) - if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false); err != nil { + if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false, ""); err != nil { return err } case RemoveDevice: From 6e677794edaa7a6f10c6b6fee98dc29f56a4d8a8 Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Mon, 16 Jun 2025 21:03:36 +0800 Subject: [PATCH 6/7] runtime: qemu: Add Independent IOThread support for virtio-blk Make hotplug virtio-blk device attach to Independent IOThread 0 as default when enabled the EnableIOThreads and IndepIOThreads. Signed-off-by: zhangchen.kidd --- src/runtime/virtcontainers/qemu.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 045306be0e..6a732bc212 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1744,7 +1744,13 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri queues := int(q.config.NumVCPUs()) - if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, ""); err != nil { + // Make Independent IOThread 0 as the virtio-blk default. + var iothreadID string + if q.config.EnableIOThreads && q.config.IndepIOThreads > 0 { + iothreadID = fmt.Sprintf("%s_%d", indepIOThreadsPrefix, 0) + } + + if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, iothreadID); err != nil { return err } case q.config.BlockDeviceDriver == config.VirtioBlockCCW: From e29e65c103336f85f2b52d9e666135a2c60399c1 Mon Sep 17 00:00:00 2001 From: "zhangchen.kidd" Date: Tue, 17 Jun 2025 11:06:22 +0800 Subject: [PATCH 7/7] runtime: documentation: Add virtio-blk support iothread comments in docs Signed-off-by: zhangchen.kidd --- src/runtime/virtcontainers/documentation/api/1.0/api.md | 2 +- src/runtime/virtcontainers/hypervisor.go | 2 +- src/runtime/virtcontainers/persist/api/config.go | 2 +- src/runtime/virtcontainers/pkg/annotations/annotations.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/documentation/api/1.0/api.md b/src/runtime/virtcontainers/documentation/api/1.0/api.md index 953c837af7..016060df54 100644 --- a/src/runtime/virtcontainers/documentation/api/1.0/api.md +++ b/src/runtime/virtcontainers/documentation/api/1.0/api.md @@ -246,7 +246,7 @@ type HypervisorConfig struct { DisableBlockDeviceUse bool // EnableIOThreads enables IO to be processed in a separate thread. - // Supported currently for virtio-scsi driver. + // Supported currently for virtio-scsi and virtio-blk(depend on IndepIOThreads) driver. EnableIOThreads bool // Independent IOThreads enables IO to be processed in a separate thread, it is diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index e7e294e4c8..0bf68d18df 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -602,7 +602,7 @@ type HypervisorConfig struct { DisableBlockDeviceUse bool // EnableIOThreads enables IO to be processed in a separate thread. - // Supported currently for virtio-scsi driver. + // Supported currently for virtio-scsi driver and virtio-blk(depend on IndepIOThreads) driver. EnableIOThreads bool // Independent IOThreads enables IO to be processed in a separate thread. diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index e9284d30d8..08aebfa9d4 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -161,7 +161,7 @@ type HypervisorConfig struct { DisableBlockDeviceUse bool // EnableIOThreads enables IO to be processed in a separate thread. - // Supported currently for virtio-scsi driver. + // Supported currently for virtio-scsi and virtio-blk(depend on IndepIOThreads) driver. EnableIOThreads bool // Independent IOThreads enables IO to be processed in a separate thread, it is diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index d32a14ed1a..2ba0e047a9 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -218,7 +218,7 @@ const ( DisableBlockDeviceUse = kataAnnotHypervisorPrefix + "disable_block_device_use" // EnableIOThreads is a sandbox annotation to enable IO to be processed in a separate thread. - // Supported currently for virtio-scsi driver. + // Supported currently for virtio-scsi and virtio-blk(depend on IndepIOThreads) driver. EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads" // Independent IOThreads enables IO to be processed in a separate thread, it is