mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-04 09:40:25 +00:00
Merge pull request #4310 from egernst/core-sched
shim: add support for core scheduling
This commit is contained in:
commit
4ebf9d38b9
@ -12,7 +12,7 @@ Kata Containers design documents:
|
|||||||
- [Metrics(Kata 2.0)](kata-2-0-metrics.md)
|
- [Metrics(Kata 2.0)](kata-2-0-metrics.md)
|
||||||
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
|
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
|
||||||
- [Design for direct-assigned volume](direct-blk-device-assignment.md)
|
- [Design for direct-assigned volume](direct-blk-device-assignment.md)
|
||||||
|
- [Design for core-scheduling](core-scheduling.md)
|
||||||
---
|
---
|
||||||
|
|
||||||
- [Design proposals](proposals)
|
- [Design proposals](proposals)
|
||||||
|
12
docs/design/core-scheduling.md
Normal file
12
docs/design/core-scheduling.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# Core scheduling
|
||||||
|
|
||||||
|
Core scheduling is a Linux kernel feature that allows only trusted tasks to run concurrently on
|
||||||
|
CPUs sharing compute resources (for example, hyper-threads on a core).
|
||||||
|
|
||||||
|
Containerd versions >= 1.6.4 leverage this to treat all of the processes associated with a
|
||||||
|
given pod or container to be a single group of trusted tasks. To indicate this should be carried
|
||||||
|
out, containerd sets the `SCHED_CORE` environment variable for each shim it spawns. When this is
|
||||||
|
set, the Kata Containers shim implementation uses the `prctl` syscall to create a new core scheduling
|
||||||
|
domain for the shim process itself as well as future VMM processes it will start.
|
||||||
|
|
||||||
|
For more details on the core scheduling feature, see the [Linux documentation](https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html).
|
@ -10,6 +10,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
sysexec "os/exec"
|
sysexec "os/exec"
|
||||||
|
goruntime "runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
@ -31,6 +32,7 @@ import (
|
|||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||||
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
|
||||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||||
@ -234,9 +236,19 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin
|
|||||||
|
|
||||||
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
|
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
|
||||||
|
|
||||||
|
goruntime.LockOSThread()
|
||||||
|
if os.Getenv("SCHED_CORE") != "" {
|
||||||
|
if err := utils.Create(utils.ProcessGroup); err != nil {
|
||||||
|
return "", errors.Wrap(err, "enable sched core support")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
goruntime.UnlockOSThread()
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
if retErr != nil {
|
if retErr != nil {
|
||||||
cmd.Process.Kill()
|
cmd.Process.Kill()
|
||||||
|
36
src/runtime/pkg/utils/schedcore.go
Normal file
36
src/runtime/pkg/utils/schedcore.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
// Copyright (c) 2022 Apple Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PidType is the type of provided pid value and how it should be treated
|
||||||
|
type PidType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
pidTypePid = 0
|
||||||
|
pidTypeThreadGroupId = 1
|
||||||
|
pidTypeProcessGroupId = 2
|
||||||
|
|
||||||
|
// Pid affects the current pid
|
||||||
|
Pid PidType = pidTypePid
|
||||||
|
// ThreadGroup affects all threads in the group
|
||||||
|
ThreadGroup PidType = pidTypeThreadGroupId
|
||||||
|
// ProcessGroup affects all processes in the group
|
||||||
|
ProcessGroup PidType = pidTypeProcessGroupId
|
||||||
|
)
|
||||||
|
|
||||||
|
// Create a new sched core domain
|
||||||
|
func Create(t PidType) error {
|
||||||
|
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ShareFrom shares the sched core domain from the provided pid
|
||||||
|
func ShareFrom(pid uint64, t PidType) error {
|
||||||
|
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user