Merge pull request #6045 from egernst/fix-6044

Address issues with the initial vCPU pinning functionality
2025-09-26 11:13:40 +00:00 · 2023-01-13 11:06:42 -08:00
parent 9c6e90fd55 6ee550e9a5
commit f82918f872
10 changed files with 62 additions and 38 deletions
--- a/src/runtime/config/configuration-qemu.toml.in
+++ b/src/runtime/config/configuration-qemu.toml.in
@@ -96,11 +96,6 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # For example, `cpu_features = "pmu=off,vmx=off"
 cpu_features="@CPUFEATURES@"
 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
 # enable_vcpus_pinning = false
 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
 # < 0                             --> will be set to the actual number of physical cores
@@ -563,6 +558,11 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@"
 # (default: true)
 disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
 # enable_vcpus_pinning = false
 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
 # so general users should not uncomment and apply it.
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -156,7 +156,6 @@ type hypervisor struct {
 	DisableSeLinux                 bool     `toml:"disable_selinux"`
 	DisableGuestSeLinux            bool     `toml:"disable_guest_selinux"`
 	LegacySerial                   bool     `toml:"use_legacy_serial"`
 	EnableVCPUsPinning             bool     `toml:"enable_vcpus_pinning"`
 }
 type runtime struct {
@@ -171,6 +170,7 @@ type runtime struct {
 	Tracing                   bool     `toml:"enable_tracing"`
 	DisableNewNetNs           bool     `toml:"disable_new_netns"`
 	DisableGuestSeccomp       bool     `toml:"disable_guest_seccomp"`
 	EnableVCPUsPinning        bool     `toml:"enable_vcpus_pinning"`
 	Debug                     bool     `toml:"enable_debug"`
 	SandboxCgroupOnly         bool     `toml:"sandbox_cgroup_only"`
 	StaticSandboxResourceMgmt bool     `toml:"static_sandbox_resource_mgmt"`
@@ -838,7 +838,6 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		Rootless:                h.Rootless,
 		LegacySerial:            h.LegacySerial,
 		DisableSeLinux:          h.DisableSeLinux,
 		EnableVCPUsPinning:      h.EnableVCPUsPinning,
 		DisableGuestSeLinux:     h.DisableGuestSeLinux,
 	}, nil
 }
@@ -1324,6 +1323,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
 	}
 	config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
 	config.EnableVCPUsPinning = tomlConf.Runtime.EnableVCPUsPinning
 	config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel
 	config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
 	config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
--- a/src/runtime/pkg/oci/utils.go
+++ b/src/runtime/pkg/oci/utils.go
@@ -128,6 +128,9 @@ type RuntimeConfig struct {
 	//Determines if seccomp should be applied inside guest
 	DisableGuestSeccomp bool
 	// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
 	EnableVCPUsPinning bool
 	//SELinux security context applied to the container process inside guest.
 	GuestSeLinuxLabel string
@@ -442,7 +445,7 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
 		return err
 	}
-	if err := addHypervisporNetworkOverrides(ocispec, config); err != nil {
+	if err := addHypervisorNetworkOverrides(ocispec, config); err != nil {
 		return err
 	}
@@ -654,12 +657,6 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e
 		return err
 	}
 	if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) {
 		sbConfig.HypervisorConfig.EnableVCPUsPinning = enableVCPUsPinning
 	}); err != nil {
 		return err
 	}
 	return newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMaxVCPUs).setUintWithCheck(func(maxVCPUs uint64) error {
 		max := uint32(maxVCPUs)
@@ -792,7 +789,7 @@ func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConf
 	})
 }
-func addHypervisporNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
+func addHypervisorNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
 	if value, ok := ocispec.Annotations[vcAnnotations.CPUFeatures]; ok {
 		if value != "" {
 			sbConfig.HypervisorConfig.CPUFeatures = value
@@ -830,6 +827,12 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r
 		return err
 	}
 	if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) {
 		sbConfig.EnableVCPUsPinning = enableVCPUsPinning
 	}); err != nil {
 		return err
 	}
 	if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok {
 		features := strings.Split(value, " ")
 		sbConfig.Experimental = []exp.Feature{}
--- a/src/runtime/pkg/resourcecontrol/utils_darwin.go
+++ b/src/runtime/pkg/resourcecontrol/utils_darwin.go
@@ -0,0 +1,10 @@
 // Copyright (c) 2023 Apple Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
 //
 package resourcecontrol
 func SetThreadAffinity(threadID int, cpuSetSlice []int) error {
 	return nil
 }
--- a/src/runtime/pkg/resourcecontrol/utils_linux.go
+++ b/src/runtime/pkg/resourcecontrol/utils_linux.go
@@ -15,6 +15,7 @@ import (
 	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
 	"github.com/godbus/dbus/v5"
 	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
 	"golang.org/x/sys/unix"
 )
 // DefaultResourceControllerID runtime-determined location in the cgroups hierarchy.
@@ -141,3 +142,17 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) {
 	return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath)
 }
 func SetThreadAffinity(threadID int, cpuSetSlice []int) error {
 	unixCPUSet := unix.CPUSet{}
 	for cpuId := range cpuSetSlice {
 		unixCPUSet.Set(cpuId)
 	}
 	if err := unix.SchedSetaffinity(threadID, &unixCPUSet); err != nil {
 		return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", threadID, cpuSetSlice, err)
 	}
 	return nil
 }
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@@ -575,9 +575,6 @@ type HypervisorConfig struct {
 	// Use legacy serial for the guest console
 	LegacySerial bool
 	// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
 	EnableVCPUsPinning bool
 }
 // vcpu mapping from vcpu number to thread number
--- a/src/runtime/virtcontainers/persist.go
+++ b/src/runtime/virtcontainers/persist.go
@@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
 		SystemdCgroup:       sconfig.SystemdCgroup,
 		SandboxCgroupOnly:   sconfig.SandboxCgroupOnly,
 		DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
 		EnableVCPUsPinning:  sconfig.EnableVCPUsPinning,
 		GuestSeLinuxLabel:   sconfig.GuestSeLinuxLabel,
 	}
@@ -430,6 +431,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
 		SystemdCgroup:       savedConf.SystemdCgroup,
 		SandboxCgroupOnly:   savedConf.SandboxCgroupOnly,
 		DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
 		EnableVCPUsPinning:  savedConf.EnableVCPUsPinning,
 		GuestSeLinuxLabel:   savedConf.GuestSeLinuxLabel,
 	}
 	sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)
--- a/src/runtime/virtcontainers/persist/api/config.go
+++ b/src/runtime/virtcontainers/persist/api/config.go
@@ -288,4 +288,7 @@ type SandboxConfig struct {
 	SandboxCgroupOnly bool
 	DisableGuestSeccomp bool
 	// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
 	EnableVCPUsPinning bool
 }
--- a/src/runtime/virtcontainers/pkg/annotations/annotations.go
+++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go
@@ -143,9 +143,6 @@ const (
 	// DefaultVCPUs is a sandbox annotation that specifies the maximum number of vCPUs allocated for the VM by the hypervisor.
 	DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus"
 	// EnableVCPUsPinning is a sandbox annotation that controls bundling between vCPU threads and CPUs
 	EnableVCPUsPinning = kataAnnotationsPrefix + "enable_vcpus_pinning"
 	//
 	//	Memory related annotations
 	//
@@ -253,6 +250,9 @@ const (
 	// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
 	SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"
 	// EnableVCPUsPinning is a sandbox annotation that controls bundling between vCPU threads and CPUs
 	EnableVCPUsPinning = kataAnnotationsPrefix + "enable_vcpus_pinning"
 	// EnablePprof is a sandbox annotation that determines if pprof enabled.
 	EnablePprof = kataAnnotRuntimePrefix + "enable_pprof"
--- a/src/runtime/virtcontainers/sandbox.go
+++ b/src/runtime/virtcontainers/sandbox.go
@@ -44,7 +44,6 @@ import (
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
 	"golang.org/x/sys/unix"
 )
 // sandboxTracingTags defines tags for the trace span
@@ -179,7 +178,11 @@ type SandboxConfig struct {
 	// SandboxCgroupOnly enables cgroup only at podlevel in the host
 	SandboxCgroupOnly bool
 	// DisableGuestSeccomp disable seccomp within the guest
 	DisableGuestSeccomp bool
 	// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
 	EnableVCPUsPinning bool
 }
 // valid checks that the sandbox configuration is valid.
@@ -2509,9 +2512,9 @@ func (s *Sandbox) fetchContainers(ctx context.Context) error {
 // is then pinned to one fixed CPU in CPUSet.
 func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error {
 	if s.config == nil {
-		return fmt.Errorf("no hypervisor config found")
+		return fmt.Errorf("no sandbox config found")
 	}
-	if !s.config.HypervisorConfig.EnableVCPUsPinning {
+	if !s.config.EnableVCPUsPinning {
 		return nil
 	}
@@ -2540,19 +2543,14 @@ func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error {
 		}
 		return nil
 	}
-
+	// if equal, we can use vCPU thread pinning
-	// if equal, we can now start vCPU threads pinning
+	for i, tid := range vCPUThreadsMap.vcpus {
-	i := 0
+		if err := resCtrl.SetThreadAffinity(tid, cpuSetSlice[i:i+1]); err != nil {
 	for _, tid := range vCPUThreadsMap.vcpus {
 		unixCPUSet := unix.CPUSet{}
 		unixCPUSet.Set(cpuSetSlice[i])
 		if err := unix.SchedSetaffinity(tid, &unixCPUSet); err != nil {
 			if err := s.resetVCPUsPinning(ctx, vCPUThreadsMap, cpuSetSlice); err != nil {
 				return err
 			}
 			return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", tid, cpuSetSlice[i], err)
 		}
 		i++
 	}
 	s.isVCPUsPinningOn = true
 	return nil
@@ -2560,13 +2558,9 @@ func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error {
 // resetVCPUsPinning cancels current pinning and restores default random vCPU threads scheduling
 func (s *Sandbox) resetVCPUsPinning(ctx context.Context, vCPUThreadsMap VcpuThreadIDs, cpuSetSlice []int) error {
 	unixCPUSet := unix.CPUSet{}
 	for cpuId := range cpuSetSlice {
 		unixCPUSet.Set(cpuId)
 	}
 	for _, tid := range vCPUThreadsMap.vcpus {
-		if err := unix.SchedSetaffinity(tid, &unixCPUSet); err != nil {
+		if err := resCtrl.SetThreadAffinity(tid, cpuSetSlice); err != nil {
-			return fmt.Errorf("failed to reset vcpu thread %d affinity to default mode: %v", tid, err)
+			return fmt.Errorf("failed to reset vcpu thread %d affinity: %v", tid, err)
 		}
 	}
 	return nil