Merge pull request #6045 from egernst/fix-6044

Address issues with the initial vCPU pinning functionality
This commit is contained in:
Eric Ernst
2023-01-13 11:06:42 -08:00
committed by GitHub
10 changed files with 62 additions and 38 deletions

View File

@@ -96,11 +96,6 @@ machine_accelerators="@MACHINEACCELERATORS@"
# For example, `cpu_features = "pmu=off,vmx=off" # For example, `cpu_features = "pmu=off,vmx=off"
cpu_features="@CPUFEATURES@" cpu_features="@CPUFEATURES@"
# vCPUs pinning settings
# if enabled, each vCPU thread will be scheduled to a fixed CPU
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
# enable_vcpus_pinning = false
# Default number of vCPUs per SB/VM: # Default number of vCPUs per SB/VM:
# unspecified or 0 --> will be set to @DEFVCPUS@ # unspecified or 0 --> will be set to @DEFVCPUS@
# < 0 --> will be set to the actual number of physical cores # < 0 --> will be set to the actual number of physical cores
@@ -563,6 +558,11 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@"
# (default: true) # (default: true)
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# vCPUs pinning settings
# if enabled, each vCPU thread will be scheduled to a fixed CPU
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
# enable_vcpus_pinning = false
# Apply a custom SELinux security policy to the container process inside the VM. # Apply a custom SELinux security policy to the container process inside the VM.
# This is used when you want to apply a type other than the default `container_t`, # This is used when you want to apply a type other than the default `container_t`,
# so general users should not uncomment and apply it. # so general users should not uncomment and apply it.

View File

@@ -156,7 +156,6 @@ type hypervisor struct {
DisableSeLinux bool `toml:"disable_selinux"` DisableSeLinux bool `toml:"disable_selinux"`
DisableGuestSeLinux bool `toml:"disable_guest_selinux"` DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
LegacySerial bool `toml:"use_legacy_serial"` LegacySerial bool `toml:"use_legacy_serial"`
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
} }
type runtime struct { type runtime struct {
@@ -171,6 +170,7 @@ type runtime struct {
Tracing bool `toml:"enable_tracing"` Tracing bool `toml:"enable_tracing"`
DisableNewNetNs bool `toml:"disable_new_netns"` DisableNewNetNs bool `toml:"disable_new_netns"`
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
Debug bool `toml:"enable_debug"` Debug bool `toml:"enable_debug"`
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
@@ -838,7 +838,6 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
Rootless: h.Rootless, Rootless: h.Rootless,
LegacySerial: h.LegacySerial, LegacySerial: h.LegacySerial,
DisableSeLinux: h.DisableSeLinux, DisableSeLinux: h.DisableSeLinux,
EnableVCPUsPinning: h.EnableVCPUsPinning,
DisableGuestSeLinux: h.DisableGuestSeLinux, DisableGuestSeLinux: h.DisableGuestSeLinux,
}, nil }, nil
} }
@@ -1324,6 +1323,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
} }
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
config.EnableVCPUsPinning = tomlConf.Runtime.EnableVCPUsPinning
config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly

View File

@@ -128,6 +128,9 @@ type RuntimeConfig struct {
//Determines if seccomp should be applied inside guest //Determines if seccomp should be applied inside guest
DisableGuestSeccomp bool DisableGuestSeccomp bool
// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
EnableVCPUsPinning bool
//SELinux security context applied to the container process inside guest. //SELinux security context applied to the container process inside guest.
GuestSeLinuxLabel string GuestSeLinuxLabel string
@@ -442,7 +445,7 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
return err return err
} }
if err := addHypervisporNetworkOverrides(ocispec, config); err != nil { if err := addHypervisorNetworkOverrides(ocispec, config); err != nil {
return err return err
} }
@@ -654,12 +657,6 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e
return err return err
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) {
sbConfig.HypervisorConfig.EnableVCPUsPinning = enableVCPUsPinning
}); err != nil {
return err
}
return newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMaxVCPUs).setUintWithCheck(func(maxVCPUs uint64) error { return newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMaxVCPUs).setUintWithCheck(func(maxVCPUs uint64) error {
max := uint32(maxVCPUs) max := uint32(maxVCPUs)
@@ -792,7 +789,7 @@ func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConf
}) })
} }
func addHypervisporNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { func addHypervisorNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
if value, ok := ocispec.Annotations[vcAnnotations.CPUFeatures]; ok { if value, ok := ocispec.Annotations[vcAnnotations.CPUFeatures]; ok {
if value != "" { if value != "" {
sbConfig.HypervisorConfig.CPUFeatures = value sbConfig.HypervisorConfig.CPUFeatures = value
@@ -830,6 +827,12 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r
return err return err
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) {
sbConfig.EnableVCPUsPinning = enableVCPUsPinning
}); err != nil {
return err
}
if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok { if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok {
features := strings.Split(value, " ") features := strings.Split(value, " ")
sbConfig.Experimental = []exp.Feature{} sbConfig.Experimental = []exp.Feature{}

View File

@@ -0,0 +1,10 @@
// Copyright (c) 2023 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package resourcecontrol
func SetThreadAffinity(threadID int, cpuSetSlice []int) error {
return nil
}

View File

@@ -15,6 +15,7 @@ import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus" systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5" "github.com/godbus/dbus/v5"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"golang.org/x/sys/unix"
) )
// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. // DefaultResourceControllerID runtime-determined location in the cgroups hierarchy.
@@ -141,3 +142,17 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) {
return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath)
} }
func SetThreadAffinity(threadID int, cpuSetSlice []int) error {
unixCPUSet := unix.CPUSet{}
for cpuId := range cpuSetSlice {
unixCPUSet.Set(cpuId)
}
if err := unix.SchedSetaffinity(threadID, &unixCPUSet); err != nil {
return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", threadID, cpuSetSlice, err)
}
return nil
}

View File

@@ -575,9 +575,6 @@ type HypervisorConfig struct {
// Use legacy serial for the guest console // Use legacy serial for the guest console
LegacySerial bool LegacySerial bool
// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
EnableVCPUsPinning bool
} }
// vcpu mapping from vcpu number to thread number // vcpu mapping from vcpu number to thread number

View File

@@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
SystemdCgroup: sconfig.SystemdCgroup, SystemdCgroup: sconfig.SystemdCgroup,
SandboxCgroupOnly: sconfig.SandboxCgroupOnly, SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
DisableGuestSeccomp: sconfig.DisableGuestSeccomp, DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
EnableVCPUsPinning: sconfig.EnableVCPUsPinning,
GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel, GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel,
} }
@@ -430,6 +431,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
SystemdCgroup: savedConf.SystemdCgroup, SystemdCgroup: savedConf.SystemdCgroup,
SandboxCgroupOnly: savedConf.SandboxCgroupOnly, SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
DisableGuestSeccomp: savedConf.DisableGuestSeccomp, DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
EnableVCPUsPinning: savedConf.EnableVCPUsPinning,
GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel, GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel,
} }
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)

View File

@@ -288,4 +288,7 @@ type SandboxConfig struct {
SandboxCgroupOnly bool SandboxCgroupOnly bool
DisableGuestSeccomp bool DisableGuestSeccomp bool
// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
EnableVCPUsPinning bool
} }

View File

@@ -143,9 +143,6 @@ const (
// DefaultVCPUs is a sandbox annotation that specifies the maximum number of vCPUs allocated for the VM by the hypervisor. // DefaultVCPUs is a sandbox annotation that specifies the maximum number of vCPUs allocated for the VM by the hypervisor.
DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus" DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus"
// EnableVCPUsPinning is a sandbox annotation that controls bundling between vCPU threads and CPUs
EnableVCPUsPinning = kataAnnotationsPrefix + "enable_vcpus_pinning"
// //
// Memory related annotations // Memory related annotations
// //
@@ -253,6 +250,9 @@ const (
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"
// EnableVCPUsPinning is a sandbox annotation that controls bundling between vCPU threads and CPUs
EnableVCPUsPinning = kataAnnotationsPrefix + "enable_vcpus_pinning"
// EnablePprof is a sandbox annotation that determines if pprof enabled. // EnablePprof is a sandbox annotation that determines if pprof enabled.
EnablePprof = kataAnnotRuntimePrefix + "enable_pprof" EnablePprof = kataAnnotRuntimePrefix + "enable_pprof"

View File

@@ -44,7 +44,6 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"golang.org/x/sys/unix"
) )
// sandboxTracingTags defines tags for the trace span // sandboxTracingTags defines tags for the trace span
@@ -179,7 +178,11 @@ type SandboxConfig struct {
// SandboxCgroupOnly enables cgroup only at podlevel in the host // SandboxCgroupOnly enables cgroup only at podlevel in the host
SandboxCgroupOnly bool SandboxCgroupOnly bool
// DisableGuestSeccomp disable seccomp within the guest
DisableGuestSeccomp bool DisableGuestSeccomp bool
// EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU
EnableVCPUsPinning bool
} }
// valid checks that the sandbox configuration is valid. // valid checks that the sandbox configuration is valid.
@@ -2509,9 +2512,9 @@ func (s *Sandbox) fetchContainers(ctx context.Context) error {
// is then pinned to one fixed CPU in CPUSet. // is then pinned to one fixed CPU in CPUSet.
func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error { func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error {
if s.config == nil { if s.config == nil {
return fmt.Errorf("no hypervisor config found") return fmt.Errorf("no sandbox config found")
} }
if !s.config.HypervisorConfig.EnableVCPUsPinning { if !s.config.EnableVCPUsPinning {
return nil return nil
} }
@@ -2540,19 +2543,14 @@ func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error {
} }
return nil return nil
} }
// if equal, we can use vCPU thread pinning
// if equal, we can now start vCPU threads pinning for i, tid := range vCPUThreadsMap.vcpus {
i := 0 if err := resCtrl.SetThreadAffinity(tid, cpuSetSlice[i:i+1]); err != nil {
for _, tid := range vCPUThreadsMap.vcpus {
unixCPUSet := unix.CPUSet{}
unixCPUSet.Set(cpuSetSlice[i])
if err := unix.SchedSetaffinity(tid, &unixCPUSet); err != nil {
if err := s.resetVCPUsPinning(ctx, vCPUThreadsMap, cpuSetSlice); err != nil { if err := s.resetVCPUsPinning(ctx, vCPUThreadsMap, cpuSetSlice); err != nil {
return err return err
} }
return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", tid, cpuSetSlice[i], err) return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", tid, cpuSetSlice[i], err)
} }
i++
} }
s.isVCPUsPinningOn = true s.isVCPUsPinningOn = true
return nil return nil
@@ -2560,13 +2558,9 @@ func (s *Sandbox) checkVCPUsPinning(ctx context.Context) error {
// resetVCPUsPinning cancels current pinning and restores default random vCPU threads scheduling // resetVCPUsPinning cancels current pinning and restores default random vCPU threads scheduling
func (s *Sandbox) resetVCPUsPinning(ctx context.Context, vCPUThreadsMap VcpuThreadIDs, cpuSetSlice []int) error { func (s *Sandbox) resetVCPUsPinning(ctx context.Context, vCPUThreadsMap VcpuThreadIDs, cpuSetSlice []int) error {
unixCPUSet := unix.CPUSet{}
for cpuId := range cpuSetSlice {
unixCPUSet.Set(cpuId)
}
for _, tid := range vCPUThreadsMap.vcpus { for _, tid := range vCPUThreadsMap.vcpus {
if err := unix.SchedSetaffinity(tid, &unixCPUSet); err != nil { if err := resCtrl.SetThreadAffinity(tid, cpuSetSlice); err != nil {
return fmt.Errorf("failed to reset vcpu thread %d affinity to default mode: %v", tid, err) return fmt.Errorf("failed to reset vcpu thread %d affinity: %v", tid, err)
} }
} }
return nil return nil