From 22b6a94a848b1e06ba5145bf7777b15d124d259f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 24 May 2022 11:23:34 -0700 Subject: [PATCH] shim: add support for core scheduling In linux 5.14 and hopefully some backports, core scheduling allows processes to be co scheduled within the same domain on SMT enabled systems. Containerd impl sets the core sched domain when launching a shim. This allows a clean way for each shim(container/pod) to be in its own domain and any additional containers, (v2 pods) be be launched with the same domain as well as any exec'd process added to the container. kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html For Kata specifically, we will look for SCHED_CORE environment variable to be set to indicate we shuold create a new schedule core domain. This is equivalent to the containerd shim's PR: https://github.com/containerd/containerd/commit/e48bbe83949a43dedd3e2727452259f99dd81635 Fixes: #4309 Signed-off-by: Eric Ernst Signed-off-by: Michael Crosby --- src/runtime/pkg/containerd-shim-v2/service.go | 12 +++++++ src/runtime/pkg/utils/schedcore.go | 36 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 src/runtime/pkg/utils/schedcore.go diff --git a/src/runtime/pkg/containerd-shim-v2/service.go b/src/runtime/pkg/containerd-shim-v2/service.go index 72f3f14a04..27ebe19268 100644 --- a/src/runtime/pkg/containerd-shim-v2/service.go +++ b/src/runtime/pkg/containerd-shim-v2/service.go @@ -10,6 +10,7 @@ import ( "io" "os" sysexec "os/exec" + goruntime "runtime" "sync" "syscall" "time" @@ -31,6 +32,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" + "github.com/kata-containers/kata-containers/src/runtime/pkg/utils" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -234,9 +236,19 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin cmd.ExtraFiles = append(cmd.ExtraFiles, f) + goruntime.LockOSThread() + if os.Getenv("SCHED_CORE") != "" { + if err := utils.Create(utils.ProcessGroup); err != nil { + return "", errors.Wrap(err, "enable sched core support") + } + } + if err := cmd.Start(); err != nil { return "", err } + + goruntime.UnlockOSThread() + defer func() { if retErr != nil { cmd.Process.Kill() diff --git a/src/runtime/pkg/utils/schedcore.go b/src/runtime/pkg/utils/schedcore.go new file mode 100644 index 0000000000..e5084bfd9a --- /dev/null +++ b/src/runtime/pkg/utils/schedcore.go @@ -0,0 +1,36 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package utils + +import ( + "golang.org/x/sys/unix" +) + +// PidType is the type of provided pid value and how it should be treated +type PidType int + +const ( + pidTypePid = 0 + pidTypeThreadGroupId = 1 + pidTypeProcessGroupId = 2 + + // Pid affects the current pid + Pid PidType = pidtypePid + // ThreadGroup affects all threads in the group + ThreadGroup PidType = pidtypeTgid + // ProcessGroup affects all processes in the group + ProcessGroup PidType = pidtypePgid +) + +// Create a new sched core domain +func Create(t PidType) error { + return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0) +} + +// ShareFrom shares the sched core domain from the provided pid +func ShareFrom(pid uint64, t PidType) error { + return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0) +}