runtime: Introduce independent IOThreads framework

Introduce independent IOThread framework for Kata container.

What is the indep_iothreads:
This new feature introduce a way to pre-alloc IOThreads
for QEMU hypervisor (maybe other hypervisor can support too).
Independent IOThreads enables IO to be processed in a separate thread.
To generally improve the performance of each module, avoid them
running in the QEMU main loop.

Why need indep_iothreads:
In Kata container implementation, many devices based on hotplug
mechanism. The real workload container may not sync the same
lifecycle with the VM. It may require to hotplug/unplug new disks
or other devices without destroying the VM. So we can keep the
IOThread with the VM as a IOThread pool(some devices need multi iothreads
for performance like virtio-blk vq-mapping), the hotplug devices
can attach/detach with the IOThread according to business needs.
At the same time, QEMU also support the "x-blockdev-set-iothread"
to change iothreads(but it need stop VM for data secure).
Current QEMU have many devices support iothread, virtio-blk,
virtio-scsi, virtio-balloon, monitor, colo-compare...etc...

How it works:
Add new item in hypervisor struct named "indep_iothreads" in toml.
The default value is 0, it reused the original "enable_iothreads" as
the switch. If the "indep_iothreads" != 0 and "enable_iothreads" = true
it will add qmp object -iothread indepIOThreadsPrefix_No when VM startup.
The first user is the virtio-blk, it will attach the indep_iothread_0
as default when enable iothread for virtio-blk.

Thanks
Chen

Signed-off-by: zhangchen.kidd <zhangchen.kidd@jd.com>
This commit is contained in:
zhangchen.kidd 2025-07-24 15:50:02 +08:00
parent 9379a18c8a
commit 977056492d
10 changed files with 81 additions and 35 deletions

View File

@ -260,6 +260,7 @@ DEFVIRTIOFSQUEUESIZE ?= 1024
# Make sure you quote args.
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"]
DEFENABLEIOTHREADS := false
DEFINDEPIOTHREADS := 0
DEFENABLEVHOSTUSERSTORE := false
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
@ -731,6 +732,7 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEANNOTATIONSTEE
USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFINDEPIOTHREADS
USER_VARS += DEFSECCOMPSANDBOXPARAM
USER_VARS += DEFENABLEVHOSTUSERSTORE
USER_VARS += DEFVHOSTUSERSTOREPATH

View File

@ -207,41 +207,42 @@ const (
)
type RuntimeConfigOptions struct {
Hypervisor string
HypervisorPath string
DefaultGuestHookPath string
KernelPath string
ImagePath string
RootfsType string
KernelParams string
MachineType string
LogPath string
BlockDeviceDriver string
BlockDeviceAIO string
SharedFS string
VirtioFSDaemon string
JaegerEndpoint string
JaegerUser string
JaegerPassword string
PFlash []string
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
PCIeRootPort uint32
PCIeSwitchPort uint32
DefaultVCPUCount uint32
DefaultMaxVCPUCount uint32
DefaultMemSize uint32
DefaultMaxMemorySize uint64
DefaultMsize9p uint32
DisableBlock bool
EnableIOThreads bool
DisableNewNetNs bool
HypervisorDebug bool
RuntimeDebug bool
RuntimeTrace bool
AgentDebug bool
AgentTrace bool
EnablePprof bool
Hypervisor string
HypervisorPath string
DefaultGuestHookPath string
KernelPath string
ImagePath string
RootfsType string
KernelParams string
MachineType string
LogPath string
BlockDeviceDriver string
BlockDeviceAIO string
SharedFS string
VirtioFSDaemon string
JaegerEndpoint string
JaegerUser string
JaegerPassword string
PFlash []string
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
PCIeRootPort uint32
PCIeSwitchPort uint32
DefaultVCPUCount uint32
DefaultMaxVCPUCount uint32
DefaultMemSize uint32
DefaultMaxMemorySize uint64
DefaultMsize9p uint32
DefaultIndepIOThreads uint32
DisableBlock bool
EnableIOThreads bool
DisableNewNetNs bool
HypervisorDebug bool
RuntimeDebug bool
RuntimeTrace bool
AgentDebug bool
AgentTrace bool
EnablePprof bool
}
// ContainerIDTestDataType is a type used to test Container and Sandbox ID's.
@ -318,6 +319,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + `
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
indep_iothreads = ` + strconv.FormatUint(uint64(config.DefaultIndepIOThreads), 10) + `
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `

View File

@ -75,6 +75,7 @@ const defaultBlockDeviceCacheSet bool = false
const defaultBlockDeviceCacheDirect bool = false
const defaultBlockDeviceCacheNoflush bool = false
const defaultEnableIOThreads bool = false
const defaultIndepIOThreads uint32 = 0
const defaultEnableMemPrealloc bool = false
const defaultEnableReclaimGuestFreedMemory bool = false
const defaultEnableHugePages bool = false

View File

@ -155,6 +155,7 @@ type hypervisor struct {
Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
IndepIOThreads uint32 `toml:"indep_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
@ -614,6 +615,14 @@ func (h hypervisor) msize9p() uint32 {
return h.Msize9p
}
func (h hypervisor) indepiothreads() uint32 {
if h.IndepIOThreads == 0 {
return defaultIndepIOThreads
}
return h.IndepIOThreads
}
func (h hypervisor) guestHookPath() string {
if h.GuestHookPath == "" {
return defaultGuestHookPath
@ -810,6 +819,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
DisableVhostNet: true, // vhost-net backend is not supported in Firecracker
GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
@ -964,6 +974,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotPlugVFIO: h.hotPlugVFIO(),
@ -1094,6 +1105,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
ColdPlugVFIO: h.coldPlugVFIO(),
@ -1452,6 +1464,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect,
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,
EnableIOThreads: defaultEnableIOThreads,
IndepIOThreads: defaultIndepIOThreads,
Msize9p: defaultMsize9p,
ColdPlugVFIO: defaultColdPlugVFIO,
HotPlugVFIO: defaultHotPlugVFIO,

View File

@ -840,6 +840,16 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig)
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.IndepIOThreads).setUintWithCheck(func(indepiothreads uint64) error {
if indepiothreads < 0 {
return fmt.Errorf("Error parsing annotation for indepiothreads, please specify positive numeric value")
}
sbConfig.HypervisorConfig.IndepIOThreads = uint32(indepiothreads)
return nil
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) {
sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet
}); err != nil {

View File

@ -249,6 +249,10 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver.
EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to
// enable debug output where available.
Debug bool

View File

@ -605,6 +605,9 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver.
EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to
// enable debug output where available.
Debug bool

View File

@ -235,6 +235,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse,
EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads,
IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads,
Debug: sconfig.HypervisorConfig.Debug,
MemPrealloc: sconfig.HypervisorConfig.MemPrealloc,
HugePages: sconfig.HypervisorConfig.HugePages,
@ -473,6 +474,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: hconf.DisableBlockDeviceUse,
EnableIOThreads: hconf.EnableIOThreads,
IndepIOThreads: hconf.IndepIOThreads,
Debug: hconf.Debug,
MemPrealloc: hconf.MemPrealloc,
HugePages: hconf.HugePages,

View File

@ -164,6 +164,10 @@ type HypervisorConfig struct {
// Supported currently for virtio-scsi driver.
EnableIOThreads bool
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads uint32
// Debug changes the default hypervisor and kernel parameters to
// enable debug output where available.
Debug bool

View File

@ -221,6 +221,11 @@ const (
// Supported currently for virtio-scsi driver.
EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads"
// Independent IOThreads enables IO to be processed in a separate thread, it is
// for QEMU hotplug device attach to iothread, like virtio-blk.
IndepIOThreads = kataAnnotHypervisorPrefix + "indep_iothreads"
// BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set"