cgroups: add host cgroup support

Fixes #344

Add host cgroup support for kata.

This commits only adds cpu.cfs_period and cpu.cfs_quota support.

It will create 3-level hierarchy, take "cpu" cgroup as an example:

```
/sys/fs/cgroup
|---cpu
   |---kata
      |---<sandbox-id>
         |--vcpu
      |---<sandbox-id>
```

* `vc` cgroup is common parent for all kata-container sandbox, it won't be removed
after sandbox removed. This cgroup has no limitation.
* `<sandbox-id>` cgroup is the layer for each sandbox, it contains all other qemu
threads except for vcpu threads. In future, we can consider putting all shim
processes and proxy process here. This cgroup has no limitation yet.
* `vcpu` cgroup contains vcpu threads from qemu. Currently cpu quota and period
constraint applies to this cgroup.

Signed-off-by: Wei Zhang <zhangwei555@huawei.com>
Signed-off-by: Jingxiao Lu <lujingxiao@huawei.com>
This commit is contained in:
Wei Zhang 2018-09-13 20:10:29 +08:00
parent 523d49c076
commit 34fe3b9d6d
13 changed files with 468 additions and 586 deletions

View File

@ -10,9 +10,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath"
"strings" "strings"
vc "github.com/kata-containers/runtime/virtcontainers" vc "github.com/kata-containers/runtime/virtcontainers"
@ -177,26 +175,6 @@ func create(ctx context.Context, containerID, bundlePath, console, pidFilePath s
} }
} }
// config.json provides a cgroups path that has to be used to create "tasks"
// and "cgroups.procs" files. Those files have to be filled with a PID, which
// is shim's in our case. This is mandatory to make sure there is no one
// else (like Docker) trying to create those files on our behalf. We want to
// know those files location so that we can remove them when delete is called.
cgroupsPathList, err := processCgroupsPath(ctx, ociSpec, containerType.IsSandbox())
if err != nil {
return err
}
// cgroupsDirPath is CgroupsPath fetch from OCI spec
var cgroupsDirPath string
if ociSpec.Linux != nil {
cgroupsDirPath = ociSpec.Linux.CgroupsPath
}
if err := createCgroupsFiles(ctx, containerID, cgroupsDirPath, cgroupsPathList, process.Pid); err != nil {
return err
}
// Creation of PID file has to be the last thing done in the create // Creation of PID file has to be the last thing done in the create
// because containerd considers the create complete after this file // because containerd considers the create complete after this file
// is created. // is created.
@ -379,52 +357,6 @@ func createContainer(ctx context.Context, ociSpec oci.CompatOCISpec, containerID
return c.Process(), nil return c.Process(), nil
} }
func createCgroupsFiles(ctx context.Context, containerID string, cgroupsDirPath string, cgroupsPathList []string, pid int) error {
span, _ := trace(ctx, "createCgroupsFiles")
defer span.Finish()
if len(cgroupsPathList) == 0 {
kataLog.WithField("pid", pid).Info("Cgroups files not created because cgroupsPath was empty")
return nil
}
for _, cgroupsPath := range cgroupsPathList {
if err := os.MkdirAll(cgroupsPath, cgroupsDirMode); err != nil {
return err
}
if strings.Contains(cgroupsPath, "cpu") && cgroupsDirPath != "" {
parent := strings.TrimSuffix(cgroupsPath, cgroupsDirPath)
copyParentCPUSet(cgroupsPath, parent)
}
tasksFilePath := filepath.Join(cgroupsPath, cgroupsTasksFile)
procsFilePath := filepath.Join(cgroupsPath, cgroupsProcsFile)
pidStr := fmt.Sprintf("%d", pid)
for _, path := range []string{tasksFilePath, procsFilePath} {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, cgroupsFileMode)
if err != nil {
return err
}
defer f.Close()
n, err := f.WriteString(pidStr)
if err != nil {
return err
}
if n < len(pidStr) {
return fmt.Errorf("Could not write pid to %q: only %d bytes written out of %d",
path, n, len(pidStr))
}
}
}
return nil
}
func createPIDFile(ctx context.Context, pidFilePath string, pid int) error { func createPIDFile(ctx context.Context, pidFilePath string, pid int) error {
span, _ := trace(ctx, "createPIDFile") span, _ := trace(ctx, "createPIDFile")
defer span.Finish() defer span.Finish()
@ -457,48 +389,3 @@ func createPIDFile(ctx context.Context, pidFilePath string, pid int) error {
return nil return nil
} }
// copyParentCPUSet copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func copyParentCPUSet(current, parent string) error {
currentCpus, currentMems, err := getCPUSet(current)
if err != nil {
return err
}
parentCpus, parentMems, err := getCPUSet(parent)
if err != nil {
return err
}
if len(parentCpus) < 1 || len(parentMems) < 1 {
return nil
}
var cgroupsFileMode = os.FileMode(0600)
if isEmptyString(currentCpus) {
if err := writeFile(filepath.Join(current, "cpuset.cpus"), string(parentCpus), cgroupsFileMode); err != nil {
return err
}
}
if isEmptyString(currentMems) {
if err := writeFile(filepath.Join(current, "cpuset.mems"), string(parentMems), cgroupsFileMode); err != nil {
return err
}
}
return nil
}
func getCPUSet(parent string) (cpus []byte, mems []byte, err error) {
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
return
}
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
return
}
return cpus, mems, nil
}

View File

@ -36,22 +36,6 @@ const (
var testStrPID = fmt.Sprintf("%d", testPID) var testStrPID = fmt.Sprintf("%d", testPID)
func mockCPUSetContent(contents map[string]string) error {
for filePath, data := range contents {
if err := writeFile(filePath, data, testFileMode); err != nil {
return err
}
}
return nil
}
func testCreateCgroupsFilesSuccessful(t *testing.T, cgroupsDirPath string, cgroupsPathList []string, pid int) {
if err := createCgroupsFiles(context.Background(), "foo", cgroupsDirPath, cgroupsPathList, pid); err != nil {
t.Fatalf("This test should succeed (cgroupsPath %q, pid %d): %s", cgroupsPathList, pid, err)
}
}
// return the value of the *last* param with the specified key // return the value of the *last* param with the specified key
func findLastParam(key string, params []vc.Param) (string, error) { func findLastParam(key string, params []vc.Param) (string, error) {
if key == "" { if key == "" {
@ -74,62 +58,6 @@ func findLastParam(key string, params []vc.Param) (string, error) {
return "", fmt.Errorf("no param called %q found", name) return "", fmt.Errorf("no param called %q found", name)
} }
func TestCgroupsFilesEmptyCgroupsPathSuccessful(t *testing.T) {
testCreateCgroupsFilesSuccessful(t, "", []string{}, testPID)
}
func TestCreateCgroupsFilesFailToWriteFile(t *testing.T) {
if os.Geteuid() == 0 {
// The os.FileMode(0000) trick doesn't work for root.
t.Skip(testDisabledNeedNonRoot)
}
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
// create the file as a directory to force an error
file := filepath.Join(tmpdir, "cgroups-file")
err = os.MkdirAll(file, os.FileMode(0000))
assert.NoError(err)
files := []string{file}
err = createCgroupsFiles(context.Background(), "foo", "cgroups-file", files, testPID)
assert.Error(err)
}
func TestCgroupsFilesNonEmptyCgroupsPathSuccessful(t *testing.T) {
cgroupsPath, err := ioutil.TempDir(testDir, "cgroups-path-")
if err != nil {
t.Fatalf("Could not create temporary cgroups directory: %s", err)
}
testCreateCgroupsFilesSuccessful(t, "cgroups-path-", []string{cgroupsPath}, testPID)
defer os.RemoveAll(cgroupsPath)
tasksPath := filepath.Join(cgroupsPath, cgroupsTasksFile)
procsPath := filepath.Join(cgroupsPath, cgroupsProcsFile)
for _, path := range []string{tasksPath, procsPath} {
if _, err := os.Stat(path); err != nil {
t.Fatalf("Path %q should have been created: %s", path, err)
}
fileBytes, err := ioutil.ReadFile(path)
if err != nil {
t.Fatalf("Could not read %q previously created: %s", path, err)
}
if string(fileBytes) != testStrPID {
t.Fatalf("PID %s read from %q different from expected PID %s", string(fileBytes), path, testStrPID)
}
}
}
func TestCreatePIDFileSuccessful(t *testing.T) { func TestCreatePIDFileSuccessful(t *testing.T) {
pidDirPath, err := ioutil.TempDir(testDir, "pid-path-") pidDirPath, err := ioutil.TempDir(testDir, "pid-path-")
if err != nil { if err != nil {
@ -1087,56 +1015,6 @@ func TestCreateCreateContainer(t *testing.T) {
} }
} }
func TestCopyParentCPUSetFail(t *testing.T) {
assert := assert.New(t)
cgroupsPath, err := ioutil.TempDir(testDir, "cgroups-path-")
assert.NoError(err)
defer os.RemoveAll(cgroupsPath)
err = copyParentCPUSet(cgroupsPath, testDir)
assert.Error(err)
}
func TestCopyParentCPUSetSuccessful(t *testing.T) {
assert := assert.New(t)
cgroupsPath, err := ioutil.TempDir(testDir, "cgroups-path-")
assert.NoError(err)
defer os.RemoveAll(cgroupsPath)
cgroupsSrcPath := filepath.Join(cgroupsPath, "src")
err = os.Mkdir(cgroupsSrcPath, testDirMode)
assert.NoError(err)
err = mockCPUSetContent(map[string]string{
filepath.Join(cgroupsSrcPath, "cpuset.cpus"): "0-1",
filepath.Join(cgroupsSrcPath, "cpuset.mems"): "0-1",
})
assert.NoError(err)
cgroupsDstPath := filepath.Join(cgroupsPath, "dst")
err = os.Mkdir(cgroupsDstPath, testDirMode)
assert.NoError(err)
fd, err := os.Create(filepath.Join(cgroupsDstPath, "cpuset.cpus"))
assert.NoError(err)
fd.Close()
fd, err = os.Create(filepath.Join(cgroupsDstPath, "cpuset.mems"))
assert.NoError(err)
fd.Close()
err = copyParentCPUSet(cgroupsDstPath, cgroupsSrcPath)
assert.NoError(err)
currentCpus, currentMems, err := getCPUSet(cgroupsDstPath)
assert.NoError(err)
assert.False(isEmptyString(currentCpus))
assert.False(isEmptyString(currentMems))
}
func TestSetKernelParams(t *testing.T) { func TestSetKernelParams(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)

View File

@ -123,19 +123,7 @@ func delete(ctx context.Context, containerID string, force bool) error {
return err return err
} }
// In order to prevent any file descriptor leak related to cgroups files return delContainerIDMapping(ctx, containerID)
// that have been previously created, we have to remove them before this
// function returns.
cgroupsPathList, err := processCgroupsPath(ctx, ociSpec, containerType.IsSandbox())
if err != nil {
return err
}
if err := delContainerIDMapping(ctx, containerID); err != nil {
return err
}
return removeCgroupsPath(ctx, containerID, cgroupsPathList)
} }
func deleteSandbox(ctx context.Context, sandboxID string) error { func deleteSandbox(ctx context.Context, sandboxID string) error {

View File

@ -8,7 +8,6 @@ package main
import ( import (
"bufio" "bufio"
"context" "context"
"errors"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"net" "net"
@ -20,18 +19,11 @@ import (
"github.com/containernetworking/plugins/pkg/ns" "github.com/containernetworking/plugins/pkg/ns"
vc "github.com/kata-containers/runtime/virtcontainers" vc "github.com/kata-containers/runtime/virtcontainers"
"github.com/kata-containers/runtime/virtcontainers/pkg/oci"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
) )
// Contants related to cgroup memory directory // Contants related to cgroup memory directory
const ( const (
cgroupsTasksFile = "tasks"
cgroupsProcsFile = "cgroup.procs"
cgroupsDirMode = os.FileMode(0750)
cgroupsFileMode = os.FileMode(0640)
ctrsMappingDirMode = os.FileMode(0750) ctrsMappingDirMode = os.FileMode(0750)
// Filesystem type corresponding to CGROUP_SUPER_MAGIC as listed // Filesystem type corresponding to CGROUP_SUPER_MAGIC as listed
@ -39,8 +31,6 @@ const (
cgroupFsType = 0x27e0eb cgroupFsType = 0x27e0eb
) )
var errNeedLinuxResource = errors.New("Linux resource cannot be empty")
var cgroupsDirPath string var cgroupsDirPath string
var procMountInfo = "/proc/self/mountinfo" var procMountInfo = "/proc/self/mountinfo"
@ -125,128 +115,6 @@ func validCreateParams(ctx context.Context, containerID, bundlePath string) (str
return resolved, nil return resolved, nil
} }
// processCgroupsPath process the cgroups path as expected from the
// OCI runtime specification. It returns a list of complete paths
// that should be created and used for every specified resource.
func processCgroupsPath(ctx context.Context, ociSpec oci.CompatOCISpec, isSandbox bool) ([]string, error) {
span, _ := trace(ctx, "processCgroupsPath")
defer span.Finish()
var cgroupsPathList []string
if ociSpec.Linux.CgroupsPath == "" {
return []string{}, nil
}
if ociSpec.Linux.Resources == nil {
return []string{}, nil
}
if ociSpec.Linux.Resources.Memory != nil {
memCgroupsPath, err := processCgroupsPathForResource(ociSpec, "memory", isSandbox)
if err != nil {
return []string{}, err
}
if memCgroupsPath != "" {
cgroupsPathList = append(cgroupsPathList, memCgroupsPath)
}
}
if ociSpec.Linux.Resources.CPU != nil {
cpuCgroupsPath, err := processCgroupsPathForResource(ociSpec, "cpu", isSandbox)
if err != nil {
return []string{}, err
}
if cpuCgroupsPath != "" {
cgroupsPathList = append(cgroupsPathList, cpuCgroupsPath)
}
}
if ociSpec.Linux.Resources.Pids != nil {
pidsCgroupsPath, err := processCgroupsPathForResource(ociSpec, "pids", isSandbox)
if err != nil {
return []string{}, err
}
if pidsCgroupsPath != "" {
cgroupsPathList = append(cgroupsPathList, pidsCgroupsPath)
}
}
if ociSpec.Linux.Resources.BlockIO != nil {
blkIOCgroupsPath, err := processCgroupsPathForResource(ociSpec, "blkio", isSandbox)
if err != nil {
return []string{}, err
}
if blkIOCgroupsPath != "" {
cgroupsPathList = append(cgroupsPathList, blkIOCgroupsPath)
}
}
return cgroupsPathList, nil
}
func processCgroupsPathForResource(ociSpec oci.CompatOCISpec, resource string, isSandbox bool) (string, error) {
if resource == "" {
return "", errNeedLinuxResource
}
var err error
cgroupsDirPath, err = getCgroupsDirPath(procMountInfo)
if err != nil {
return "", fmt.Errorf("get CgroupsDirPath error: %s", err)
}
// Relative cgroups path provided.
if filepath.IsAbs(ociSpec.Linux.CgroupsPath) == false {
return filepath.Join(cgroupsDirPath, resource, ociSpec.Linux.CgroupsPath), nil
}
// Absolute cgroups path provided.
var cgroupMount specs.Mount
cgroupMountFound := false
for _, mount := range ociSpec.Mounts {
if mount.Type == "cgroup" {
cgroupMount = mount
cgroupMountFound = true
break
}
}
if !cgroupMountFound {
// According to the OCI spec, an absolute path should be
// interpreted as relative to the system cgroup mount point
// when there is no cgroup mount point.
return filepath.Join(cgroupsDirPath, resource, ociSpec.Linux.CgroupsPath), nil
}
if cgroupMount.Destination == "" {
return "", fmt.Errorf("cgroupsPath is absolute, cgroup mount destination cannot be empty")
}
cgroupPath := filepath.Join(cgroupMount.Destination, resource)
// It is not an error to have this cgroup not mounted. It is usually
// due to an old kernel version with missing support for specific
// cgroups.
fields := logrus.Fields{
"path": cgroupPath,
"type": "cgroup",
}
if !isCgroupMounted(cgroupPath) {
kataLog.WithFields(fields).Info("path not mounted")
return "", nil
}
kataLog.WithFields(fields).Info("path mounted")
return filepath.Join(cgroupPath, ociSpec.Linux.CgroupsPath), nil
}
func isCgroupMounted(cgroupPath string) bool { func isCgroupMounted(cgroupPath string) bool {
var statFs syscall.Statfs_t var statFs syscall.Statfs_t

View File

@ -13,17 +13,13 @@ import (
"net" "net"
"os" "os"
"path/filepath" "path/filepath"
"reflect"
"syscall"
"testing" "testing"
"time" "time"
vc "github.com/kata-containers/runtime/virtcontainers" vc "github.com/kata-containers/runtime/virtcontainers"
vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
"github.com/kata-containers/runtime/virtcontainers/pkg/oci"
"github.com/kata-containers/runtime/virtcontainers/pkg/vcmock" "github.com/kata-containers/runtime/virtcontainers/pkg/vcmock"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -32,38 +28,6 @@ var (
consoleSocketPathTest = "console-socket-test" consoleSocketPathTest = "console-socket-test"
) )
type cgroupTestDataType struct {
resource string
linuxSpec *specs.LinuxResources
}
var cgroupTestData = []cgroupTestDataType{
{
"memory",
&specs.LinuxResources{
Memory: &specs.LinuxMemory{},
},
},
{
"cpu",
&specs.LinuxResources{
CPU: &specs.LinuxCPU{},
},
},
{
"pids",
&specs.LinuxResources{
Pids: &specs.LinuxPids{},
},
},
{
"blkio",
&specs.LinuxResources{
BlockIO: &specs.LinuxBlockIO{},
},
},
}
func TestGetContainerInfoContainerIDEmptyFailure(t *testing.T) { func TestGetContainerInfoContainerIDEmptyFailure(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
status, _, err := getContainerInfo(context.Background(), "") status, _, err := getContainerInfo(context.Background(), "")
@ -181,141 +145,6 @@ func TestValidCreateParamsBundleIsAFile(t *testing.T) {
assert.False(vcmock.IsMockError(err)) assert.False(vcmock.IsMockError(err))
} }
func testProcessCgroupsPath(t *testing.T, ociSpec oci.CompatOCISpec, expected []string) {
assert := assert.New(t)
result, err := processCgroupsPath(context.Background(), ociSpec, true)
assert.NoError(err)
if reflect.DeepEqual(result, expected) == false {
assert.FailNow("DeepEqual failed", "Result path %q should match the expected one %q", result, expected)
}
}
func TestProcessCgroupsPathEmptyPathSuccessful(t *testing.T) {
ociSpec := oci.CompatOCISpec{}
ociSpec.Linux = &specs.Linux{
CgroupsPath: "",
}
testProcessCgroupsPath(t, ociSpec, []string{})
}
func TestProcessCgroupsPathEmptyResources(t *testing.T) {
ociSpec := oci.CompatOCISpec{}
ociSpec.Linux = &specs.Linux{
CgroupsPath: "foo",
}
testProcessCgroupsPath(t, ociSpec, []string{})
}
func TestProcessCgroupsPathRelativePathSuccessful(t *testing.T) {
relativeCgroupsPath := "relative/cgroups/path"
cgroupsDirPath = "/foo/runtime/base"
ociSpec := oci.CompatOCISpec{}
ociSpec.Linux = &specs.Linux{
CgroupsPath: relativeCgroupsPath,
}
for _, d := range cgroupTestData {
ociSpec.Linux.Resources = d.linuxSpec
p := filepath.Join(cgroupsDirPath, d.resource, relativeCgroupsPath)
testProcessCgroupsPath(t, ociSpec, []string{p})
}
}
func TestProcessCgroupsPathAbsoluteNoCgroupMountSuccessful(t *testing.T) {
absoluteCgroupsPath := "/absolute/cgroups/path"
cgroupsDirPath = "/foo/runtime/base"
ociSpec := oci.CompatOCISpec{}
ociSpec.Linux = &specs.Linux{
CgroupsPath: absoluteCgroupsPath,
}
for _, d := range cgroupTestData {
ociSpec.Linux.Resources = d.linuxSpec
p := filepath.Join(cgroupsDirPath, d.resource, absoluteCgroupsPath)
testProcessCgroupsPath(t, ociSpec, []string{p})
}
}
func TestProcessCgroupsPathAbsoluteNoCgroupMountDestinationFailure(t *testing.T) {
assert := assert.New(t)
absoluteCgroupsPath := "/absolute/cgroups/path"
ociSpec := oci.CompatOCISpec{}
ociSpec.Mounts = []specs.Mount{
{
Type: "cgroup",
},
}
ociSpec.Linux = &specs.Linux{
CgroupsPath: absoluteCgroupsPath,
}
for _, d := range cgroupTestData {
ociSpec.Linux.Resources = d.linuxSpec
for _, isSandbox := range []bool{true, false} {
_, err := processCgroupsPath(context.Background(), ociSpec, isSandbox)
assert.Error(err, "This test should fail because no cgroup mount destination provided")
}
}
}
func TestProcessCgroupsPathAbsoluteSuccessful(t *testing.T) {
assert := assert.New(t)
if os.Geteuid() != 0 {
t.Skip(testDisabledNeedRoot)
}
memoryResource := "memory"
absoluteCgroupsPath := "/cgroup/mount/destination"
cgroupMountDest, err := ioutil.TempDir("", "cgroup-memory-")
assert.NoError(err)
defer os.RemoveAll(cgroupMountDest)
resourceMountPath := filepath.Join(cgroupMountDest, memoryResource)
err = os.MkdirAll(resourceMountPath, cgroupsDirMode)
assert.NoError(err)
err = syscall.Mount("go-test", resourceMountPath, "cgroup", 0, memoryResource)
assert.NoError(err)
defer syscall.Unmount(resourceMountPath, 0)
ociSpec := oci.CompatOCISpec{}
ociSpec.Linux = &specs.Linux{
Resources: &specs.LinuxResources{
Memory: &specs.LinuxMemory{},
},
CgroupsPath: absoluteCgroupsPath,
}
ociSpec.Mounts = []specs.Mount{
{
Type: "cgroup",
Destination: cgroupMountDest,
},
}
testProcessCgroupsPath(t, ociSpec, []string{filepath.Join(resourceMountPath, absoluteCgroupsPath)})
}
func TestSetupConsoleExistingConsolePathSuccessful(t *testing.T) { func TestSetupConsoleExistingConsolePathSuccessful(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
console, err := setupConsole(consolePathTest, "") console, err := setupConsole(consolePathTest, "")
@ -436,31 +265,6 @@ func TestIsCgroupMounted(t *testing.T) {
assert.True(isCgroupMounted(memoryCgroupPath), "%s is a cgroup", memoryCgroupPath) assert.True(isCgroupMounted(memoryCgroupPath), "%s is a cgroup", memoryCgroupPath)
} }
func TestProcessCgroupsPathForResource(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
bundlePath := filepath.Join(tmpdir, "bundle")
err = makeOCIBundle(bundlePath)
assert.NoError(err)
ociConfigFile := filepath.Join(bundlePath, specConfig)
assert.True(fileExists(ociConfigFile))
spec, err := readOCIConfigFile(ociConfigFile)
assert.NoError(err)
for _, isSandbox := range []bool{true, false} {
_, err := processCgroupsPathForResource(spec, "", isSandbox)
assert.Error(err)
assert.False(vcmock.IsMockError(err))
}
}
func TestGetCgroupsDirPath(t *testing.T) { func TestGetCgroupsDirPath(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)

View File

@ -6,7 +6,6 @@
package main package main
import ( import (
"bytes"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
@ -227,11 +226,6 @@ func writeFile(filePath string, data string, fileMode os.FileMode) error {
return nil return nil
} }
// isEmptyString return if string is empty
func isEmptyString(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}
// fileSize returns the number of bytes in the specified file // fileSize returns the number of bytes in the specified file
func fileSize(file string) (int64, error) { func fileSize(file string) (int64, error) {
st := syscall.Stat_t{} st := syscall.Stat_t{}

View File

@ -126,6 +126,11 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
return nil, err return nil, err
} }
// Setup host cgroups
if err := s.setupCgroups(); err != nil {
return nil, err
}
return s, nil return s, nil
} }

191
virtcontainers/cgroups.go Normal file
View File

@ -0,0 +1,191 @@
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"encoding/json"
"fmt"
"github.com/containerd/cgroups"
"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
const (
vcpuGroupName = "vcpu"
defaultCgroupParent = "/kata"
)
type sandboxCgroups struct {
commonParent cgroups.Cgroup
sandboxSub cgroups.Cgroup
vcpuSub cgroups.Cgroup
}
func (s *Sandbox) newCgroups() error {
// New will still succeed when cgroup exists
// create common parent for all kata-containers
// e.g. /sys/fs/cgroup/cpu/vc
parent, err := cgroups.New(cgroups.V1,
cgroups.StaticPath(defaultCgroupParent), &specs.LinuxResources{})
if err != nil {
return fmt.Errorf("failed to create cgroup for %q", defaultCgroupParent)
}
// create sub-cgroup for each sandbox
// e.g. /sys/fs/cgroup/cpu/vc/<sandbox>
sandboxSub, err := parent.New(s.id, &specs.LinuxResources{})
if err != nil {
return fmt.Errorf("failed to create cgroup for %s/%s", defaultCgroupParent, s.id)
}
// create sub-cgroup for vcpu threads
vcpuSub, err := sandboxSub.New(vcpuGroupName, &specs.LinuxResources{})
if err != nil {
return fmt.Errorf("failed to create cgroup for %s/%s/%s", defaultCgroupParent, s.id, vcpuGroupName)
}
s.cgroup = &sandboxCgroups{
commonParent: parent,
sandboxSub: sandboxSub,
vcpuSub: vcpuSub,
}
return nil
}
func (s *Sandbox) destroyCgroups() error {
if s.cgroup == nil {
s.Logger().Warningf("cgroup is not initialized, no need to destroy")
return nil
}
// first move all processes in subgroup to parent in case live process blocks
// deletion of cgroup
if err := s.cgroup.sandboxSub.MoveTo(s.cgroup.commonParent); err != nil {
return fmt.Errorf("failed to clear cgroup processes")
}
return s.cgroup.sandboxSub.Delete()
}
func (s *Sandbox) setupCgroups() error {
if s.cgroup == nil {
return fmt.Errorf("failed to setup uninitialized cgroup for sandbox")
}
resource, err := s.mergeSpecResource()
if err != nil {
return err
}
if err := s.applyCPUCgroup(resource); err != nil {
return err
}
return nil
}
func (s *Sandbox) applyCPUCgroup(rc *specs.LinuxResources) error {
if s.cgroup == nil {
return fmt.Errorf("failed to setup uninitialized cgroup for sandbox")
}
// apply cpu constraint to vcpu cgroup
if err := s.cgroup.vcpuSub.Update(rc); err != nil {
return err
}
// when new container joins, new CPU could be hotplugged, so we
// have to query fresh vcpu info from hypervisor for every time.
tids, err := s.hypervisor.getThreadIDs()
if err != nil || tids == nil {
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
}
// use Add() to add vcpu thread to s.cgroup, it will write thread id to
// `cgroup.procs` which will move all threads in qemu process to this cgroup
// immediately as default behaviour.
if len(tids.vcpus) > 0 {
if err := s.cgroup.sandboxSub.Add(cgroups.Process{
Pid: tids.vcpus[0],
}); err != nil {
return err
}
}
for _, i := range tids.vcpus {
if i <= 0 {
continue
}
// In contrast, AddTask will write thread id to `tasks`
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
// qemu will be left in parent cgroup untouched.
if err := s.cgroup.vcpuSub.AddTask(cgroups.Process{
Pid: i,
}); err != nil {
return err
}
}
return nil
}
func (s *Sandbox) mergeSpecResource() (*specs.LinuxResources, error) {
if s.config == nil {
return nil, fmt.Errorf("sandbox config is nil")
}
resource := &specs.LinuxResources{
CPU: &specs.LinuxCPU{},
}
for _, c := range s.config.Containers {
config, ok := c.Annotations[annotations.ConfigJSONKey]
if !ok {
s.Logger().WithField("container", c.ID).Warningf("failed to find config from container annotations")
continue
}
var spec specs.Spec
if err := json.Unmarshal([]byte(config), &spec); err != nil {
return nil, err
}
// TODO: how to handle empty/unlimited resource?
// maybe we should add a default CPU/Memory delta when no
// resource limit is given. -- @WeiZhang555
if spec.Linux == nil || spec.Linux.Resources == nil {
continue
}
// calculate cpu quota and period
s.mergeCPUResource(resource, spec.Linux.Resources)
}
return resource, nil
}
func (s *Sandbox) mergeCPUResource(orig, rc *specs.LinuxResources) {
if orig.CPU == nil {
orig.CPU = &specs.LinuxCPU{}
}
if rc.CPU != nil && rc.CPU.Quota != nil && rc.CPU.Period != nil &&
*rc.CPU.Quota > 0 && *rc.CPU.Period > 0 {
if orig.CPU.Period == nil {
orig.CPU.Period = rc.CPU.Period
orig.CPU.Quota = rc.CPU.Quota
} else {
// this is an example to show how it works:
// container A and `orig` has quota: 5000 and period 10000
// here comes container B with quota 40 and period 100,
// so use previous period 10000 as a baseline, container B
// has proportional resource of quota 4000 and period 10000, calculated as
// delta := 40 / 100 * 10000 = 4000
// and final `*orig.CPU.Quota` = 5000 + 4000 = 9000
delta := float64(*rc.CPU.Quota) / float64(*rc.CPU.Period) * float64(*orig.CPU.Period)
*orig.CPU.Quota += int64(delta)
}
}
}

View File

@ -0,0 +1,211 @@
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"bufio"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
)
func getCgroupDestination(subsystem string) (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
}
fields := strings.Fields(s.Text())
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
return "", fmt.Errorf("failed to find cgroup mountpoint for %q", subsystem)
}
func TestMergeSpecResource(t *testing.T) {
s := &Sandbox{
config: &SandboxConfig{
Containers: []ContainerConfig{
{
ID: "containerA",
Annotations: make(map[string]string),
},
{
ID: "containerA",
Annotations: make(map[string]string),
},
},
},
}
contA := s.config.Containers[0]
contB := s.config.Containers[1]
getIntP := func(x int64) *int64 { return &x }
getUintP := func(x uint64) *uint64 { return &x }
type testData struct {
first *specs.LinuxResources
second *specs.LinuxResources
expected *specs.LinuxResources
}
for _, testdata := range []testData{
{
nil,
nil,
&specs.LinuxResources{CPU: &specs.LinuxCPU{}},
},
{
nil,
&specs.LinuxResources{},
&specs.LinuxResources{CPU: &specs.LinuxCPU{}},
},
{
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(0), Period: getUintP(100000)}},
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}},
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}},
},
{
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(10000), Period: getUintP(0)}},
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}},
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}},
},
{
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(1000), Period: getUintP(2000)}},
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}},
&specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(1400), Period: getUintP(2000)}},
},
} {
data, err := json.Marshal(&specs.Spec{
Linux: &specs.Linux{
Resources: testdata.first,
},
})
assert.Nil(t, err)
contA.Annotations[annotations.ConfigJSONKey] = string(data)
data, err = json.Marshal(&specs.Spec{
Linux: &specs.Linux{
Resources: testdata.second,
},
})
assert.Nil(t, err)
contB.Annotations[annotations.ConfigJSONKey] = string(data)
rc, err := s.mergeSpecResource()
assert.Nil(t, err)
assert.True(t, reflect.DeepEqual(rc, testdata.expected), "should be equal, got: %#v, expected: %#v", rc, testdata.expected)
}
}
func TestSetupCgroups(t *testing.T) {
if os.Geteuid() != 0 {
t.Skip("Test disabled as requires root privileges")
}
s := &Sandbox{
id: "test-sandbox",
hypervisor: &mockHypervisor{},
config: &SandboxConfig{
Containers: []ContainerConfig{
{
ID: "containerA",
Annotations: make(map[string]string),
},
{
ID: "containerA",
Annotations: make(map[string]string),
},
},
},
}
contA := s.config.Containers[0]
contB := s.config.Containers[1]
getIntP := func(x int64) *int64 { return &x }
getUintP := func(x uint64) *uint64 { return &x }
data, err := json.Marshal(&specs.Spec{
Linux: &specs.Linux{
Resources: &specs.LinuxResources{
CPU: &specs.LinuxCPU{
Quota: getIntP(5000),
Period: getUintP(10000),
},
},
},
})
assert.Nil(t, err)
contA.Annotations[annotations.ConfigJSONKey] = string(data)
data, err = json.Marshal(&specs.Spec{
Linux: &specs.Linux{
Resources: &specs.LinuxResources{
CPU: &specs.LinuxCPU{
Quota: getIntP(10000),
Period: getUintP(40000),
},
},
},
})
assert.Nil(t, err)
contB.Annotations[annotations.ConfigJSONKey] = string(data)
err = s.newCgroups()
assert.Nil(t, err, "failed to create cgroups")
defer s.destroyCgroups()
// test if function works without error
err = s.setupCgroups()
assert.Nil(t, err, "setup host cgroup failed")
// test if the quota and period value are written into cgroup files
cpu, err := getCgroupDestination("cpu")
assert.Nil(t, err, "failed to get cpu cgroup path")
assert.NotEqual(t, "", cpu, "cpu cgroup value can't be empty")
parentDir := filepath.Join(cpu, defaultCgroupParent, "test-sandbox", "vcpu")
quotaFile := filepath.Join(parentDir, "cpu.cfs_quota_us")
periodFile := filepath.Join(parentDir, "cpu.cfs_period_us")
expectedQuota := "7500\n"
expectedPeriod := "10000\n"
fquota, err := os.Open(quotaFile)
assert.Nil(t, err, "open file %q failed", quotaFile)
defer fquota.Close()
data, err = ioutil.ReadAll(fquota)
assert.Nil(t, err)
assert.Equal(t, expectedQuota, string(data), "failed to get expected cfs_quota")
fperiod, err := os.Open(periodFile)
assert.Nil(t, err, "open file %q failed", periodFile)
defer fperiod.Close()
data, err = ioutil.ReadAll(fperiod)
assert.Nil(t, err)
assert.Equal(t, expectedPeriod, string(data), "failed to get expected cfs_period")
}

View File

@ -252,6 +252,10 @@ type HypervisorConfig struct {
DisableVhostNet bool DisableVhostNet bool
} }
type threadIDs struct {
vcpus []int
}
func (conf *HypervisorConfig) checkTemplateConfig() error { func (conf *HypervisorConfig) checkTemplateConfig() error {
if conf.BootToBeTemplate && conf.BootFromTemplate { if conf.BootToBeTemplate && conf.BootFromTemplate {
return fmt.Errorf("Cannot set both 'to be' and 'from' vm tempate") return fmt.Errorf("Cannot set both 'to be' and 'from' vm tempate")
@ -571,4 +575,5 @@ type hypervisor interface {
disconnect() disconnect()
capabilities() capabilities capabilities() capabilities
hypervisorConfig() HypervisorConfig hypervisorConfig() HypervisorConfig
getThreadIDs() (*threadIDs, error)
} }

View File

@ -5,7 +5,10 @@
package virtcontainers package virtcontainers
import "context" import (
"context"
"os"
)
type mockHypervisor struct { type mockHypervisor struct {
} }
@ -86,3 +89,8 @@ func (m *mockHypervisor) getSandboxConsole(sandboxID string) (string, error) {
func (m *mockHypervisor) disconnect() { func (m *mockHypervisor) disconnect() {
} }
func (m *mockHypervisor) getThreadIDs() (*threadIDs, error) {
vcpus := []int{os.Getpid()}
return &threadIDs{vcpus}, nil
}

View File

@ -1293,3 +1293,27 @@ func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu
return memory return memory
} }
func (q *qemu) getThreadIDs() (*threadIDs, error) {
span, _ := q.trace("getThreadIDs")
defer span.Finish()
err := q.qmpSetup()
if err != nil {
return nil, err
}
cpuInfos, err := q.qmpMonitorCh.qmp.ExecQueryCpus(q.qmpMonitorCh.ctx)
if err != nil {
q.Logger().WithError(err).Error("failed to query cpu infos")
return nil, err
}
var tid threadIDs
for _, i := range cpuInfos {
if i.ThreadID > 0 {
tid.vcpus = append(tid.vcpus, i.ThreadID)
}
}
return &tid, nil
}

View File

@ -30,9 +30,11 @@ import (
"github.com/vishvananda/netlink" "github.com/vishvananda/netlink"
) )
// vmStartTimeout represents the time in seconds a sandbox can wait before const (
// to consider the VM starting operation failed. // vmStartTimeout represents the time in seconds a sandbox can wait before
const vmStartTimeout = 10 // to consider the VM starting operation failed.
vmStartTimeout = 10
)
// stateString is a string representing a sandbox state. // stateString is a string representing a sandbox state.
type stateString string type stateString string
@ -491,6 +493,8 @@ type Sandbox struct {
stateful bool stateful bool
ctx context.Context ctx context.Context
cgroup *sandboxCgroups
} }
// ID returns the sandbox identifier string. // ID returns the sandbox identifier string.
@ -861,6 +865,11 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return nil, err return nil, err
} }
// create new cgroup for sandbox
if err := s.newCgroups(); err != nil {
return nil, err
}
return s, nil return s, nil
} }
@ -978,6 +987,12 @@ func (s *Sandbox) Delete() error {
} }
} }
// destroy sandbox cgroup
if err := s.destroyCgroups(); err != nil {
// continue the removal process even cgroup failed to destroy
s.Logger().WithError(err).Error("failed to destroy cgroup")
}
globalSandboxList.removeSandbox(s.id) globalSandboxList.removeSandbox(s.id)
if s.monitor != nil { if s.monitor != nil {
@ -1283,6 +1298,10 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro
return nil, err return nil, err
} }
// Setup host cgroups for new container
if err := s.setupCgroups(); err != nil {
return nil, err
}
return c, nil return c, nil
} }