Merge pull request #4310 from egernst/core-sched

shim: add support for core scheduling
This commit is contained in:
Eric Ernst 2022-06-08 17:42:45 +02:00 committed by GitHub
commit 4ebf9d38b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 61 additions and 1 deletions

View File

@ -12,7 +12,7 @@ Kata Containers design documents:
- [Metrics(Kata 2.0)](kata-2-0-metrics.md)
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
- [Design for direct-assigned volume](direct-blk-device-assignment.md)
- [Design for core-scheduling](core-scheduling.md)
---
- [Design proposals](proposals)

View File

@ -0,0 +1,12 @@
# Core scheduling
Core scheduling is a Linux kernel feature that allows only trusted tasks to run concurrently on
CPUs sharing compute resources (for example, hyper-threads on a core).
Containerd versions >= 1.6.4 leverage this to treat all of the processes associated with a
given pod or container to be a single group of trusted tasks. To indicate this should be carried
out, containerd sets the `SCHED_CORE` environment variable for each shim it spawns. When this is
set, the Kata Containers shim implementation uses the `prctl` syscall to create a new core scheduling
domain for the shim process itself as well as future VMM processes it will start.
For more details on the core scheduling feature, see the [Linux documentation](https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html).

View File

@ -10,6 +10,7 @@ import (
"io"
"os"
sysexec "os/exec"
goruntime "runtime"
"sync"
"syscall"
"time"
@ -31,6 +32,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
@ -234,9 +236,19 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
goruntime.LockOSThread()
if os.Getenv("SCHED_CORE") != "" {
if err := utils.Create(utils.ProcessGroup); err != nil {
return "", errors.Wrap(err, "enable sched core support")
}
}
if err := cmd.Start(); err != nil {
return "", err
}
goruntime.UnlockOSThread()
defer func() {
if retErr != nil {
cmd.Process.Kill()

View File

@ -0,0 +1,36 @@
// Copyright (c) 2022 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package utils
import (
"golang.org/x/sys/unix"
)
// PidType is the type of provided pid value and how it should be treated
type PidType int
const (
pidTypePid = 0
pidTypeThreadGroupId = 1
pidTypeProcessGroupId = 2
// Pid affects the current pid
Pid PidType = pidTypePid
// ThreadGroup affects all threads in the group
ThreadGroup PidType = pidTypeThreadGroupId
// ProcessGroup affects all processes in the group
ProcessGroup PidType = pidTypeProcessGroupId
)
// Create a new sched core domain
func Create(t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0)
}
// ShareFrom shares the sched core domain from the provided pid
func ShareFrom(pid uint64, t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0)
}