shim: add support for core scheduling

In linux 5.14 and hopefully some backports, core scheduling allows processes to
be co scheduled within the same domain on SMT enabled systems.

Containerd impl sets the core sched domain when launching a shim. This
allows a clean way for each shim(container/pod) to be in its own domain and any
additional containers, (v2 pods) be be launched with the same domain as well as
any exec'd process added to the container.

kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html

For Kata specifically, we will look for SCHED_CORE environment variable
to be set to indicate we shuold create a new schedule core domain.

This is equivalent to the containerd shim's PR: e48bbe8394

Fixes: #4309

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
Signed-off-by: Michael Crosby <michael@thepasture.io>
This commit is contained in:
Michael Crosby 2022-05-24 11:23:34 -07:00 committed by Eric Ernst
parent 8a2b82ff51
commit 22b6a94a84
2 changed files with 48 additions and 0 deletions

View File

@ -10,6 +10,7 @@ import (
"io"
"os"
sysexec "os/exec"
goruntime "runtime"
"sync"
"syscall"
"time"
@ -31,6 +32,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
@ -234,9 +236,19 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
goruntime.LockOSThread()
if os.Getenv("SCHED_CORE") != "" {
if err := utils.Create(utils.ProcessGroup); err != nil {
return "", errors.Wrap(err, "enable sched core support")
}
}
if err := cmd.Start(); err != nil {
return "", err
}
goruntime.UnlockOSThread()
defer func() {
if retErr != nil {
cmd.Process.Kill()

View File

@ -0,0 +1,36 @@
// Copyright (c) 2022 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package utils
import (
"golang.org/x/sys/unix"
)
// PidType is the type of provided pid value and how it should be treated
type PidType int
const (
pidTypePid = 0
pidTypeThreadGroupId = 1
pidTypeProcessGroupId = 2
// Pid affects the current pid
Pid PidType = pidtypePid
// ThreadGroup affects all threads in the group
ThreadGroup PidType = pidtypeTgid
// ProcessGroup affects all processes in the group
ProcessGroup PidType = pidtypePgid
)
// Create a new sched core domain
func Create(t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0)
}
// ShareFrom shares the sched core domain from the provided pid
func ShareFrom(pid uint64, t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0)
}