runtime: Introduce independent IOThreads framework

Introduce independent IOThread framework for Kata container.

What is the indep_iothreads:
This new feature introduce a way to pre-alloc IOThreads
for QEMU hypervisor (maybe other hypervisor can support too).
Independent IOThreads enables IO to be processed in a separate thread.
To generally improve the performance of each module, avoid them
running in the QEMU main loop.

Why need indep_iothreads:
In Kata container implementation, many devices based on hotplug
mechanism. The real workload container may not sync the same
lifecycle with the VM. It may require to hotplug/unplug new disks
or other devices without destroying the VM. So we can keep the
IOThread with the VM as a IOThread pool(some devices need multi iothreads
for performance like virtio-blk vq-mapping), the hotplug devices
can attach/detach with the IOThread according to business needs.
At the same time, QEMU also support the "x-blockdev-set-iothread"
to change iothreads(but it need stop VM for data secure).
Current QEMU have many devices support iothread, virtio-blk,
virtio-scsi, virtio-balloon, monitor, colo-compare...etc...

How it works:
Add new item in hypervisor struct named "indep_iothreads" in toml.
The default value is 0, it reused the original "enable_iothreads" as
the switch. If the "indep_iothreads" != 0 and "enable_iothreads" = true
it will add qmp object -iothread indepIOThreadsPrefix_No when VM startup.
The first user is the virtio-blk, it will attach the indep_iothread_0
as default when enable iothread for virtio-blk.

Thanks
Chen

Signed-off-by: zhangchen.kidd <zhangchen.kidd@jd.com>
This commit is contained in:
zhangchen.kidd 2025-07-24 15:50:02 +08:00
parent 9379a18c8a
commit 977056492d
10 changed files with 81 additions and 35 deletions

View File

@ -260,6 +260,7 @@ DEFVIRTIOFSQUEUESIZE ?= 1024
# Make sure you quote args. # Make sure you quote args.
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"] DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"]
DEFENABLEIOTHREADS := false DEFENABLEIOTHREADS := false
DEFINDEPIOTHREADS := 0
DEFENABLEVHOSTUSERSTORE := false DEFENABLEVHOSTUSERSTORE := false
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"] DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
@ -731,6 +732,7 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEANNOTATIONSTEE USER_VARS += DEFENABLEANNOTATIONSTEE
USER_VARS += DEFENABLEIOTHREADS USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFINDEPIOTHREADS
USER_VARS += DEFSECCOMPSANDBOXPARAM USER_VARS += DEFSECCOMPSANDBOXPARAM
USER_VARS += DEFENABLEVHOSTUSERSTORE USER_VARS += DEFENABLEVHOSTUSERSTORE
USER_VARS += DEFVHOSTUSERSTOREPATH USER_VARS += DEFVHOSTUSERSTOREPATH

View File

@ -233,6 +233,7 @@ type RuntimeConfigOptions struct {
DefaultMemSize uint32 DefaultMemSize uint32
DefaultMaxMemorySize uint64 DefaultMaxMemorySize uint64
DefaultMsize9p uint32 DefaultMsize9p uint32
DefaultIndepIOThreads uint32
DisableBlock bool DisableBlock bool
EnableIOThreads bool EnableIOThreads bool
DisableNewNetNs bool DisableNewNetNs bool
@ -318,6 +319,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + ` default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + `
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + ` disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + ` enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
indep_iothreads = ` + strconv.FormatUint(uint64(config.DefaultIndepIOThreads), 10) + `
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `" cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `" hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + ` pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `

View File

@ -75,6 +75,7 @@ const defaultBlockDeviceCacheSet bool = false
const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheDirect bool = false
const defaultBlockDeviceCacheNoflush bool = false const defaultBlockDeviceCacheNoflush bool = false
const defaultEnableIOThreads bool = false const defaultEnableIOThreads bool = false
const defaultIndepIOThreads uint32 = 0
const defaultEnableMemPrealloc bool = false const defaultEnableMemPrealloc bool = false
const defaultEnableReclaimGuestFreedMemory bool = false const defaultEnableReclaimGuestFreedMemory bool = false
const defaultEnableHugePages bool = false const defaultEnableHugePages bool = false

View File

@ -155,6 +155,7 @@ type hypervisor struct {
Debug bool `toml:"enable_debug"` Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"` DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"` EnableIOThreads bool `toml:"enable_iothreads"`
IndepIOThreads uint32 `toml:"indep_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"` DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"` HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"` ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
@ -614,6 +615,14 @@ func (h hypervisor) msize9p() uint32 {
return h.Msize9p return h.Msize9p
} }
func (h hypervisor) indepiothreads() uint32 {
if h.IndepIOThreads == 0 {
return defaultIndepIOThreads
}
return h.IndepIOThreads
}
func (h hypervisor) guestHookPath() string { func (h hypervisor) guestHookPath() string {
if h.GuestHookPath == "" { if h.GuestHookPath == "" {
return defaultGuestHookPath return defaultGuestHookPath
@ -810,6 +819,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
DisableNestingChecks: h.DisableNestingChecks, DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver, BlockDeviceDriver: blockDriver,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
DisableVhostNet: true, // vhost-net backend is not supported in Firecracker DisableVhostNet: true, // vhost-net backend is not supported in Firecracker
GuestHookPath: h.guestHookPath(), GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate, RxRateLimiterMaxRate: rxRateLimiterMaxRate,
@ -964,6 +974,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(), Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm, DisableImageNvdimm: h.DisableImageNvdimm,
HotPlugVFIO: h.hotPlugVFIO(), HotPlugVFIO: h.hotPlugVFIO(),
@ -1094,6 +1105,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheSet: h.BlockDeviceCacheSet, BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(), Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm, DisableImageNvdimm: h.DisableImageNvdimm,
ColdPlugVFIO: h.coldPlugVFIO(), ColdPlugVFIO: h.coldPlugVFIO(),
@ -1452,6 +1464,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect,
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,
EnableIOThreads: defaultEnableIOThreads, EnableIOThreads: defaultEnableIOThreads,
IndepIOThreads: defaultIndepIOThreads,
Msize9p: defaultMsize9p, Msize9p: defaultMsize9p,
ColdPlugVFIO: defaultColdPlugVFIO, ColdPlugVFIO: defaultColdPlugVFIO,
HotPlugVFIO: defaultHotPlugVFIO, HotPlugVFIO: defaultHotPlugVFIO,

View File

@ -840,6 +840,16 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig)
return err return err
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.IndepIOThreads).setUintWithCheck(func(indepiothreads uint64) error {
if indepiothreads < 0 {
return fmt.Errorf("Error parsing annotation for indepiothreads, please specify positive numeric value")
}
sbConfig.HypervisorConfig.IndepIOThreads = uint32(indepiothreads)
return nil
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) { if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) {
sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet
}); err != nil { }); err != nil {

View File

@ -249,6 +249,10 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads bool EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to // Debug changes the default hypervisor and kernel parameters to
// enable debug output where available. // enable debug output where available.
Debug bool Debug bool

View File

@ -605,6 +605,9 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads bool EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to // Debug changes the default hypervisor and kernel parameters to
// enable debug output where available. // enable debug output where available.
Debug bool Debug bool

View File

@ -235,6 +235,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse, DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse,
EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads, EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads,
IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads,
Debug: sconfig.HypervisorConfig.Debug, Debug: sconfig.HypervisorConfig.Debug,
MemPrealloc: sconfig.HypervisorConfig.MemPrealloc, MemPrealloc: sconfig.HypervisorConfig.MemPrealloc,
HugePages: sconfig.HypervisorConfig.HugePages, HugePages: sconfig.HypervisorConfig.HugePages,
@ -473,6 +474,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: hconf.DisableBlockDeviceUse, DisableBlockDeviceUse: hconf.DisableBlockDeviceUse,
EnableIOThreads: hconf.EnableIOThreads, EnableIOThreads: hconf.EnableIOThreads,
IndepIOThreads: hconf.IndepIOThreads,
Debug: hconf.Debug, Debug: hconf.Debug,
MemPrealloc: hconf.MemPrealloc, MemPrealloc: hconf.MemPrealloc,
HugePages: hconf.HugePages, HugePages: hconf.HugePages,

View File

@ -164,6 +164,10 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads bool EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to // Debug changes the default hypervisor and kernel parameters to
// enable debug output where available. // enable debug output where available.
Debug bool Debug bool

View File

@ -221,6 +221,11 @@ const (
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads" EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads"
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads = kataAnnotHypervisorPrefix + "indep_iothreads"
// BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not. // BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set" BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set"