From f137048be37e0296444cac6b091c9a8ba848b428 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Wed, 11 Jan 2023 16:05:18 -0800 Subject: [PATCH 1/3] resource-control: add helper function for setting CPU affinity Let's abstract the CPU affinity Fixes: #6044 Signed-off-by: Eric Ernst --- src/runtime/pkg/resourcecontrol/utils_darwin.go | 10 ++++++++++ src/runtime/pkg/resourcecontrol/utils_linux.go | 15 +++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 src/runtime/pkg/resourcecontrol/utils_darwin.go diff --git a/src/runtime/pkg/resourcecontrol/utils_darwin.go b/src/runtime/pkg/resourcecontrol/utils_darwin.go new file mode 100644 index 0000000000..86c50ae783 --- /dev/null +++ b/src/runtime/pkg/resourcecontrol/utils_darwin.go @@ -0,0 +1,10 @@ +// Copyright (c) 2023 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package resourcecontrol + +func SetThreadAffinity(threadID int, cpuSetSlice []int) error { + return nil +} diff --git a/src/runtime/pkg/resourcecontrol/utils_linux.go b/src/runtime/pkg/resourcecontrol/utils_linux.go index a3e7ef7105..04c2e1944b 100644 --- a/src/runtime/pkg/resourcecontrol/utils_linux.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux.go @@ -15,6 +15,7 @@ import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" "github.com/godbus/dbus/v5" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "golang.org/x/sys/unix" ) // DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. @@ -141,3 +142,17 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) { return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) } + +func SetThreadAffinity(threadID int, cpuSetSlice []int) error { + unixCPUSet := unix.CPUSet{} + + for cpuId := range cpuSetSlice { + unixCPUSet.Set(cpuId) + } + + if err := unix.SchedSetaffinity(threadID, &unixCPUSet); err != nil { + return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", threadID, cpuSetSlice, err) + } + + return nil +} From e3d3b72fa2a24921543503fd06580c9ce38f646c Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Wed, 11 Jan 2023 16:05:18 -0800 Subject: [PATCH 2/3] virtcontainers: use resource control for setting CPU affinity Let's abstract the CPU affinity, instead of calling linux only code from sandbox. Fixes: #6044 Signed-off-by: Eric Ernst --- src/runtime/virtcontainers/sandbox.go | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 9f87cc2ffe..6dcd0e847a 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -44,7 +44,6 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" - "golang.org/x/sys/unix" ) // sandboxTracingTags defines tags for the trace span @@ -2540,19 +2539,14 @@ func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error { } return nil } - - // if equal, we can now start vCPU threads pinning - i := 0 - for _, tid := range vCPUThreadsMap.vcpus { - unixCPUSet := unix.CPUSet{} - unixCPUSet.Set(cpuSetSlice[i]) - if err := unix.SchedSetaffinity(tid, &unixCPUSet); err != nil { + // if equal, we can use vCPU thread pinning + for i, tid := range vCPUThreadsMap.vcpus { + if err := resCtrl.SetThreadAffinity(tid, cpuSetSlice[i:i+1]); err != nil { if err := s.resetVCPUsPinning(ctx, vCPUThreadsMap, cpuSetSlice); err != nil { return err } return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", tid, cpuSetSlice[i], err) } - i++ } s.isVCPUsPinningOn = true return nil @@ -2560,13 +2554,9 @@ func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error { // resetVCPUsPinning cancels current pinning and restores default random vCPU threads scheduling func (s *Sandbox) resetVCPUsPinning(ctx context.Context, vCPUThreadsMap VcpuThreadIDs, cpuSetSlice []int) error { - unixCPUSet := unix.CPUSet{} - for cpuId := range cpuSetSlice { - unixCPUSet.Set(cpuId) - } for _, tid := range vCPUThreadsMap.vcpus { - if err := unix.SchedSetaffinity(tid, &unixCPUSet); err != nil { - return fmt.Errorf("failed to reset vcpu thread %d affinity to default mode: %v", tid, err) + if err := resCtrl.SetThreadAffinity(tid, cpuSetSlice); err != nil { + return fmt.Errorf("failed to reset vcpu thread %d affinity: %v", tid, err) } } return nil From 6ee550e9a5e100c9d6e532c0d8af221a4da0267d Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Wed, 11 Jan 2023 16:25:44 -0800 Subject: [PATCH 3/3] runtime: vCPUs pinning is sandbox specific, not hypervisor While at it, make sure we persist this and fix a misc typo. Signed-off-by: Eric Ernst --- src/runtime/config/configuration-qemu.toml.in | 10 +++++----- src/runtime/pkg/katautils/config.go | 4 ++-- src/runtime/pkg/oci/utils.go | 19 +++++++++++-------- src/runtime/virtcontainers/hypervisor.go | 3 --- src/runtime/virtcontainers/persist.go | 2 ++ .../virtcontainers/persist/api/config.go | 3 +++ .../pkg/annotations/annotations.go | 6 +++--- src/runtime/virtcontainers/sandbox.go | 8 ++++++-- 8 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 26b7e2f087..0394d01246 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -96,11 +96,6 @@ machine_accelerators="@MACHINEACCELERATORS@" # For example, `cpu_features = "pmu=off,vmx=off" cpu_features="@CPUFEATURES@" -# vCPUs pinning settings -# if enabled, each vCPU thread will be scheduled to a fixed CPU -# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) -# enable_vcpus_pinning = false - # Default number of vCPUs per SB/VM: # unspecified or 0 --> will be set to @DEFVCPUS@ # < 0 --> will be set to the actual number of physical cores @@ -563,6 +558,11 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + # Apply a custom SELinux security policy to the container process inside the VM. # This is used when you want to apply a type other than the default `container_t`, # so general users should not uncomment and apply it. diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 3ed3177f57..7a1f57ac41 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -156,7 +156,6 @@ type hypervisor struct { DisableSeLinux bool `toml:"disable_selinux"` DisableGuestSeLinux bool `toml:"disable_guest_selinux"` LegacySerial bool `toml:"use_legacy_serial"` - EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` } type runtime struct { @@ -171,6 +170,7 @@ type runtime struct { Tracing bool `toml:"enable_tracing"` DisableNewNetNs bool `toml:"disable_new_netns"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` Debug bool `toml:"enable_debug"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` @@ -838,7 +838,6 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { Rootless: h.Rootless, LegacySerial: h.LegacySerial, DisableSeLinux: h.DisableSeLinux, - EnableVCPUsPinning: h.EnableVCPUsPinning, DisableGuestSeLinux: h.DisableGuestSeLinux, }, nil } @@ -1324,6 +1323,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat } config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp + config.EnableVCPUsPinning = tomlConf.Runtime.EnableVCPUsPinning config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 2cd7c10f53..c56bc298b8 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -128,6 +128,9 @@ type RuntimeConfig struct { //Determines if seccomp should be applied inside guest DisableGuestSeccomp bool + // EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU + EnableVCPUsPinning bool + //SELinux security context applied to the container process inside guest. GuestSeLinuxLabel string @@ -442,7 +445,7 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } - if err := addHypervisporNetworkOverrides(ocispec, config); err != nil { + if err := addHypervisorNetworkOverrides(ocispec, config); err != nil { return err } @@ -654,12 +657,6 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e return err } - if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) { - sbConfig.HypervisorConfig.EnableVCPUsPinning = enableVCPUsPinning - }); err != nil { - return err - } - return newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMaxVCPUs).setUintWithCheck(func(maxVCPUs uint64) error { max := uint32(maxVCPUs) @@ -792,7 +789,7 @@ func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConf }) } -func addHypervisporNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { +func addHypervisorNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { if value, ok := ocispec.Annotations[vcAnnotations.CPUFeatures]; ok { if value != "" { sbConfig.HypervisorConfig.CPUFeatures = value @@ -830,6 +827,12 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) { + sbConfig.EnableVCPUsPinning = enableVCPUsPinning + }); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok { features := strings.Split(value, " ") sbConfig.Experimental = []exp.Feature{} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index eb6ed3fd36..bb1b8e90e7 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -575,9 +575,6 @@ type HypervisorConfig struct { // Use legacy serial for the guest console LegacySerial bool - - // EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU - EnableVCPUsPinning bool } // vcpu mapping from vcpu number to thread number diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 906ed10761..18c83e2515 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { SystemdCgroup: sconfig.SystemdCgroup, SandboxCgroupOnly: sconfig.SandboxCgroupOnly, DisableGuestSeccomp: sconfig.DisableGuestSeccomp, + EnableVCPUsPinning: sconfig.EnableVCPUsPinning, GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel, } @@ -430,6 +431,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { SystemdCgroup: savedConf.SystemdCgroup, SandboxCgroupOnly: savedConf.SandboxCgroupOnly, DisableGuestSeccomp: savedConf.DisableGuestSeccomp, + EnableVCPUsPinning: savedConf.EnableVCPUsPinning, GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel, } sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 9b5af5668d..5bef012194 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -288,4 +288,7 @@ type SandboxConfig struct { SandboxCgroupOnly bool DisableGuestSeccomp bool + + // EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU + EnableVCPUsPinning bool } diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 5d36926c5e..a94878f930 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -143,9 +143,6 @@ const ( // DefaultVCPUs is a sandbox annotation that specifies the maximum number of vCPUs allocated for the VM by the hypervisor. DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus" - // EnableVCPUsPinning is a sandbox annotation that controls bundling between vCPU threads and CPUs - EnableVCPUsPinning = kataAnnotationsPrefix + "enable_vcpus_pinning" - // // Memory related annotations // @@ -253,6 +250,9 @@ const ( // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" + // EnableVCPUsPinning is a sandbox annotation that controls bundling between vCPU threads and CPUs + EnableVCPUsPinning = kataAnnotationsPrefix + "enable_vcpus_pinning" + // EnablePprof is a sandbox annotation that determines if pprof enabled. EnablePprof = kataAnnotRuntimePrefix + "enable_pprof" diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 6dcd0e847a..e93868c1b6 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -178,7 +178,11 @@ type SandboxConfig struct { // SandboxCgroupOnly enables cgroup only at podlevel in the host SandboxCgroupOnly bool + // DisableGuestSeccomp disable seccomp within the guest DisableGuestSeccomp bool + + // EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU + EnableVCPUsPinning bool } // valid checks that the sandbox configuration is valid. @@ -2508,9 +2512,9 @@ func (s *Sandbox) fetchContainers(ctx context.Context) error { // is then pinned to one fixed CPU in CPUSet. func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error { if s.config == nil { - return fmt.Errorf("no hypervisor config found") + return fmt.Errorf("no sandbox config found") } - if !s.config.HypervisorConfig.EnableVCPUsPinning { + if !s.config.EnableVCPUsPinning { return nil }