runtime: Introduce independent IOThreads framework

Introduce independent IOThread framework for Kata container.

What is the indep_iothreads:
This new feature introduce a way to pre-alloc IOThreads
for QEMU hypervisor (maybe other hypervisor can support too).
Independent IOThreads enables IO to be processed in a separate thread.
To generally improve the performance of each module, avoid them
running in the QEMU main loop.

Why need indep_iothreads:
In Kata container implementation, many devices based on hotplug
mechanism. The real workload container may not sync the same
lifecycle with the VM. It may require to hotplug/unplug new disks
or other devices without destroying the VM. So we can keep the
IOThread with the VM as a IOThread pool(some devices need multi iothreads
for performance like virtio-blk vq-mapping), the hotplug devices
can attach/detach with the IOThread according to business needs.
At the same time, QEMU also support the "x-blockdev-set-iothread"
to change iothreads(but it need stop VM for data secure).
Current QEMU have many devices support iothread, virtio-blk,
virtio-scsi, virtio-balloon, monitor, colo-compare...etc...

How it works:
Add new item in hypervisor struct named "indep_iothreads" in toml.
The default value is 0, it reused the original "enable_iothreads" as
the switch. If the "indep_iothreads" != 0 and "enable_iothreads" = true
it will add qmp object -iothread indepIOThreadsPrefix_No when VM startup.
The first user is the virtio-blk, it will attach the indep_iothread_0
as default when enable iothread for virtio-blk.

Thanks
Chen

Signed-off-by: zhangchen.kidd <zhangchen.kidd@jd.com>
This commit is contained in:
zhangchen.kidd 2025-07-24 15:50:02 +08:00
parent 9379a18c8a
commit 977056492d
10 changed files with 81 additions and 35 deletions

View File

@ -260,6 +260,7 @@ DEFVIRTIOFSQUEUESIZE ?= 1024
# Make sure you quote args. # Make sure you quote args.
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"] DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"]
DEFENABLEIOTHREADS := false DEFENABLEIOTHREADS := false
DEFINDEPIOTHREADS := 0
DEFENABLEVHOSTUSERSTORE := false DEFENABLEVHOSTUSERSTORE := false
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"] DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
@ -731,6 +732,7 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEANNOTATIONSTEE USER_VARS += DEFENABLEANNOTATIONSTEE
USER_VARS += DEFENABLEIOTHREADS USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFINDEPIOTHREADS
USER_VARS += DEFSECCOMPSANDBOXPARAM USER_VARS += DEFSECCOMPSANDBOXPARAM
USER_VARS += DEFENABLEVHOSTUSERSTORE USER_VARS += DEFENABLEVHOSTUSERSTORE
USER_VARS += DEFVHOSTUSERSTOREPATH USER_VARS += DEFVHOSTUSERSTOREPATH

View File

@ -207,41 +207,42 @@ const (
) )
type RuntimeConfigOptions struct { type RuntimeConfigOptions struct {
Hypervisor string Hypervisor string
HypervisorPath string HypervisorPath string
DefaultGuestHookPath string DefaultGuestHookPath string
KernelPath string KernelPath string
ImagePath string ImagePath string
RootfsType string RootfsType string
KernelParams string KernelParams string
MachineType string MachineType string
LogPath string LogPath string
BlockDeviceDriver string BlockDeviceDriver string
BlockDeviceAIO string BlockDeviceAIO string
SharedFS string SharedFS string
VirtioFSDaemon string VirtioFSDaemon string
JaegerEndpoint string JaegerEndpoint string
JaegerUser string JaegerUser string
JaegerPassword string JaegerPassword string
PFlash []string PFlash []string
HotPlugVFIO config.PCIePort HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort ColdPlugVFIO config.PCIePort
PCIeRootPort uint32 PCIeRootPort uint32
PCIeSwitchPort uint32 PCIeSwitchPort uint32
DefaultVCPUCount uint32 DefaultVCPUCount uint32
DefaultMaxVCPUCount uint32 DefaultMaxVCPUCount uint32
DefaultMemSize uint32 DefaultMemSize uint32
DefaultMaxMemorySize uint64 DefaultMaxMemorySize uint64
DefaultMsize9p uint32 DefaultMsize9p uint32
DisableBlock bool DefaultIndepIOThreads uint32
EnableIOThreads bool DisableBlock bool
DisableNewNetNs bool EnableIOThreads bool
HypervisorDebug bool DisableNewNetNs bool
RuntimeDebug bool HypervisorDebug bool
RuntimeTrace bool RuntimeDebug bool
AgentDebug bool RuntimeTrace bool
AgentTrace bool AgentDebug bool
EnablePprof bool AgentTrace bool
EnablePprof bool
} }
// ContainerIDTestDataType is a type used to test Container and Sandbox ID's. // ContainerIDTestDataType is a type used to test Container and Sandbox ID's.
@ -318,6 +319,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + ` default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + `
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + ` disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + ` enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
indep_iothreads = ` + strconv.FormatUint(uint64(config.DefaultIndepIOThreads), 10) + `
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `" cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `" hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + ` pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `

View File

@ -75,6 +75,7 @@ const defaultBlockDeviceCacheSet bool = false
const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheDirect bool = false
const defaultBlockDeviceCacheNoflush bool = false const defaultBlockDeviceCacheNoflush bool = false
const defaultEnableIOThreads bool = false const defaultEnableIOThreads bool = false
const defaultIndepIOThreads uint32 = 0
const defaultEnableMemPrealloc bool = false const defaultEnableMemPrealloc bool = false
const defaultEnableReclaimGuestFreedMemory bool = false const defaultEnableReclaimGuestFreedMemory bool = false
const defaultEnableHugePages bool = false const defaultEnableHugePages bool = false

View File

@ -155,6 +155,7 @@ type hypervisor struct {
Debug bool `toml:"enable_debug"` Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"` DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"` EnableIOThreads bool `toml:"enable_iothreads"`
IndepIOThreads uint32 `toml:"indep_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"` DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"` HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"` ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
@ -614,6 +615,14 @@ func (h hypervisor) msize9p() uint32 {
return h.Msize9p return h.Msize9p
} }
func (h hypervisor) indepiothreads() uint32 {
if h.IndepIOThreads == 0 {
return defaultIndepIOThreads
}
return h.IndepIOThreads
}
func (h hypervisor) guestHookPath() string { func (h hypervisor) guestHookPath() string {
if h.GuestHookPath == "" { if h.GuestHookPath == "" {
return defaultGuestHookPath return defaultGuestHookPath
@ -810,6 +819,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
DisableNestingChecks: h.DisableNestingChecks, DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver, BlockDeviceDriver: blockDriver,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
DisableVhostNet: true, // vhost-net backend is not supported in Firecracker DisableVhostNet: true, // vhost-net backend is not supported in Firecracker
GuestHookPath: h.guestHookPath(), GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate, RxRateLimiterMaxRate: rxRateLimiterMaxRate,
@ -964,6 +974,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(), Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm, DisableImageNvdimm: h.DisableImageNvdimm,
HotPlugVFIO: h.hotPlugVFIO(), HotPlugVFIO: h.hotPlugVFIO(),
@ -1094,6 +1105,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheSet: h.BlockDeviceCacheSet, BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(), Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm, DisableImageNvdimm: h.DisableImageNvdimm,
ColdPlugVFIO: h.coldPlugVFIO(), ColdPlugVFIO: h.coldPlugVFIO(),
@ -1452,6 +1464,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect,
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,
EnableIOThreads: defaultEnableIOThreads, EnableIOThreads: defaultEnableIOThreads,
IndepIOThreads: defaultIndepIOThreads,
Msize9p: defaultMsize9p, Msize9p: defaultMsize9p,
ColdPlugVFIO: defaultColdPlugVFIO, ColdPlugVFIO: defaultColdPlugVFIO,
HotPlugVFIO: defaultHotPlugVFIO, HotPlugVFIO: defaultHotPlugVFIO,

View File

@ -840,6 +840,16 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig)
return err return err
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.IndepIOThreads).setUintWithCheck(func(indepiothreads uint64) error {
if indepiothreads < 0 {
return fmt.Errorf("Error parsing annotation for indepiothreads, please specify positive numeric value")
}
sbConfig.HypervisorConfig.IndepIOThreads = uint32(indepiothreads)
return nil
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) { if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) {
sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet
}); err != nil { }); err != nil {

View File

@ -249,6 +249,10 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads bool EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to // Debug changes the default hypervisor and kernel parameters to
// enable debug output where available. // enable debug output where available.
Debug bool Debug bool

View File

@ -605,6 +605,9 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads bool EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to // Debug changes the default hypervisor and kernel parameters to
// enable debug output where available. // enable debug output where available.
Debug bool Debug bool

View File

@ -235,6 +235,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse, DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse,
EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads, EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads,
IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads,
Debug: sconfig.HypervisorConfig.Debug, Debug: sconfig.HypervisorConfig.Debug,
MemPrealloc: sconfig.HypervisorConfig.MemPrealloc, MemPrealloc: sconfig.HypervisorConfig.MemPrealloc,
HugePages: sconfig.HypervisorConfig.HugePages, HugePages: sconfig.HypervisorConfig.HugePages,
@ -473,6 +474,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: hconf.DisableBlockDeviceUse, DisableBlockDeviceUse: hconf.DisableBlockDeviceUse,
EnableIOThreads: hconf.EnableIOThreads, EnableIOThreads: hconf.EnableIOThreads,
IndepIOThreads: hconf.IndepIOThreads,
Debug: hconf.Debug, Debug: hconf.Debug,
MemPrealloc: hconf.MemPrealloc, MemPrealloc: hconf.MemPrealloc,
HugePages: hconf.HugePages, HugePages: hconf.HugePages,

View File

@ -164,6 +164,10 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads bool EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to // Debug changes the default hypervisor and kernel parameters to
// enable debug output where available. // enable debug output where available.
Debug bool Debug bool

View File

@ -221,6 +221,11 @@ const (
// Supported currently for virtio-scsi driver. // Supported currently for virtio-scsi driver.
EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads" EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads"
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads = kataAnnotHypervisorPrefix + "indep_iothreads"
// BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not. // BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set" BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set"