Files
kata-containers/cli/create.go
James O. D. Hunt 3a1bbd0271 tracing: Add initial opentracing support
Add initial support for opentracing by using the `jaeger` package.
Since opentracing uses the `context` package, add a `context.Context`
as the first parameter to all the functions that we might want to
trace. Trace "spans" (trace points) are then added by extracting the
trace details from the specified context parameter.

Notes:

- Although the tracer is created in `main()`, the "root span"
  (aka the first trace point) is not added until `beforeSubcommands()`.

  This is by design and is a compromise: by delaying the creation of the
  root span, the spans become much more readable since using the web-based
  JaegerUI, you will see traces like this:

  ```
  kata-runtime: kata-runtime create
  ------------  -------------------
       ^                ^
       |                |
  Trace name        First span name
                    (which clearly shows the CLI command that was run)
  ```

  Creating the span earlier means it is necessary to expand 'n' spans in
  the UI before you get to see the name of the CLI command that was run.
  In adding support, this became very tedious, hence my design decision to
  defer the creation of the root span until after signal handling has been
  setup and after CLI options have been parsed, but still very early in
  the code path.

  - At this stage, the tracing stops at the `virtcontainers` call
  boundary.

- Tracing is "always on" as there doesn't appear to be a way to toggle
  it. However, its resolves to a "nop" unless the tracer can talk to a
  jaeger agent.

Note that this commit required a bit of rework to `beforeSubcommands()`
to reduce the cyclomatic complexity.

Fixes #557.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2018-08-10 16:13:48 +01:00

463 lines
12 KiB
Go

// Copyright (c) 2014,2015,2016 Docker, Inc.
// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
vc "github.com/kata-containers/runtime/virtcontainers"
vf "github.com/kata-containers/runtime/virtcontainers/factory"
"github.com/kata-containers/runtime/virtcontainers/pkg/oci"
opentracing "github.com/opentracing/opentracing-go"
"github.com/urfave/cli"
)
var createCLICommand = cli.Command{
Name: "create",
Usage: "Create a container",
ArgsUsage: `<container-id>
<container-id> is your name for the instance of the container that you
are starting. The name you provide for the container instance must be unique
on your host.`,
Description: `The create command creates an instance of a container for a bundle. The
bundle is a directory with a specification file named "` + specConfig + `" and a
root filesystem.
The specification file includes an args parameter. The args parameter is
used to specify command(s) that get run when the container is started.
To change the command(s) that get executed on start, edit the args
parameter of the spec.`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "bundle, b",
Value: "",
Usage: `path to the root of the bundle directory, defaults to the current directory`,
},
cli.StringFlag{
Name: "console",
Value: "",
Usage: "path to a pseudo terminal",
},
cli.StringFlag{
Name: "console-socket",
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.BoolFlag{
Name: "no-pivot",
Usage: "warning: this flag is meaningless to kata-runtime, just defined in order to be compatible with docker in ramdisk",
},
},
Action: func(context *cli.Context) error {
ctx, err := cliContextToContext(context)
if err != nil {
return err
}
runtimeConfig, ok := context.App.Metadata["runtimeConfig"].(oci.RuntimeConfig)
if !ok {
return errors.New("invalid runtime config")
}
console, err := setupConsole(context.String("console"), context.String("console-socket"))
if err != nil {
return err
}
return create(ctx, context.Args().First(),
context.String("bundle"),
console,
context.String("pid-file"),
true,
runtimeConfig,
)
},
}
// Use a variable to allow tests to modify its value
var getKernelParamsFunc = getKernelParams
func create(ctx context.Context, containerID, bundlePath, console, pidFilePath string, detach bool,
runtimeConfig oci.RuntimeConfig) error {
var err error
span, ctx := opentracing.StartSpanFromContext(ctx, "create")
defer span.Finish()
kataLog = kataLog.WithField("container", containerID)
setExternalLoggers(kataLog)
span.SetTag("container", containerID)
// Checks the MUST and MUST NOT from OCI runtime specification
if bundlePath, err = validCreateParams(containerID, bundlePath); err != nil {
return err
}
ociSpec, err := oci.ParseConfigJSON(bundlePath)
if err != nil {
return err
}
containerType, err := ociSpec.ContainerType()
if err != nil {
return err
}
if runtimeConfig.FactoryConfig.Template {
factoryConfig := vf.Config{
Template: true,
VMConfig: vc.VMConfig{
HypervisorType: runtimeConfig.HypervisorType,
HypervisorConfig: runtimeConfig.HypervisorConfig,
AgentType: runtimeConfig.AgentType,
AgentConfig: runtimeConfig.AgentConfig,
},
}
kataLog.WithField("factory", factoryConfig).Info("load vm factory")
f, err := vf.NewFactory(factoryConfig, true)
if err != nil {
kataLog.WithError(err).Warn("load vm factory failed, about to create new one")
f, err = vf.NewFactory(factoryConfig, false)
if err != nil {
kataLog.WithError(err).Warn("create vm factory failed")
}
}
if err == nil {
vci.SetFactory(f)
}
}
disableOutput := noNeedForOutput(detach, ociSpec.Process.Terminal)
var process vc.Process
switch containerType {
case vc.PodSandbox:
process, err = createSandbox(ctx, ociSpec, runtimeConfig, containerID, bundlePath, console, disableOutput)
if err != nil {
return err
}
case vc.PodContainer:
process, err = createContainer(ctx, ociSpec, containerID, bundlePath, console, disableOutput)
if err != nil {
return err
}
}
// config.json provides a cgroups path that has to be used to create "tasks"
// and "cgroups.procs" files. Those files have to be filled with a PID, which
// is shim's in our case. This is mandatory to make sure there is no one
// else (like Docker) trying to create those files on our behalf. We want to
// know those files location so that we can remove them when delete is called.
cgroupsPathList, err := processCgroupsPath(ctx, ociSpec, containerType.IsSandbox())
if err != nil {
return err
}
// cgroupsDirPath is CgroupsPath fetch from OCI spec
var cgroupsDirPath string
if ociSpec.Linux != nil {
cgroupsDirPath = ociSpec.Linux.CgroupsPath
}
if err := createCgroupsFiles(ctx, containerID, cgroupsDirPath, cgroupsPathList, process.Pid); err != nil {
return err
}
// Creation of PID file has to be the last thing done in the create
// because containerd considers the create complete after this file
// is created.
return createPIDFile(ctx, pidFilePath, process.Pid)
}
var systemdKernelParam = []vc.Param{
{
Key: "init",
Value: "/usr/lib/systemd/systemd",
},
{
Key: "systemd.unit",
Value: systemdUnitName,
},
{
Key: "systemd.mask",
Value: "systemd-networkd.service",
},
{
Key: "systemd.mask",
Value: "systemd-networkd.socket",
},
}
func getKernelParams(needSystemd bool) []vc.Param {
p := []vc.Param{}
if needSystemd {
p = append(p, systemdKernelParam...)
}
return p
}
func needSystemd(config vc.HypervisorConfig) bool {
return config.ImagePath != ""
}
// setKernelParams adds the user-specified kernel parameters (from the
// configuration file) to the defaults so that the former take priority.
func setKernelParams(containerID string, runtimeConfig *oci.RuntimeConfig) error {
defaultKernelParams := getKernelParamsFunc(needSystemd(runtimeConfig.HypervisorConfig))
if runtimeConfig.HypervisorConfig.Debug {
strParams := vc.SerializeParams(defaultKernelParams, "=")
formatted := strings.Join(strParams, " ")
kataLog.WithField("default-kernel-parameters", formatted).Debug()
}
// retrieve the parameters specified in the config file
userKernelParams := runtimeConfig.HypervisorConfig.KernelParams
// reset
runtimeConfig.HypervisorConfig.KernelParams = []vc.Param{}
// first, add default values
for _, p := range defaultKernelParams {
if err := (runtimeConfig).AddKernelParam(p); err != nil {
return err
}
}
// now re-add the user-specified values so that they take priority.
for _, p := range userKernelParams {
if err := (runtimeConfig).AddKernelParam(p); err != nil {
return err
}
}
return nil
}
func createSandbox(ctx context.Context, ociSpec oci.CompatOCISpec, runtimeConfig oci.RuntimeConfig,
containerID, bundlePath, console string, disableOutput bool) (vc.Process, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "createSandbox")
defer span.Finish()
err := setKernelParams(containerID, &runtimeConfig)
if err != nil {
return vc.Process{}, err
}
sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, console, disableOutput)
if err != nil {
return vc.Process{}, err
}
sandbox, err := vci.CreateSandbox(sandboxConfig)
if err != nil {
return vc.Process{}, err
}
sid := sandbox.ID()
kataLog = kataLog.WithField("sandbox", sid)
setExternalLoggers(kataLog)
span.SetTag("sandbox", sid)
containers := sandbox.GetAllContainers()
if len(containers) != 1 {
return vc.Process{}, fmt.Errorf("BUG: Container list from sandbox is wrong, expecting only one container, found %d containers", len(containers))
}
if err := addContainerIDMapping(ctx, containerID, sandbox.ID()); err != nil {
return vc.Process{}, err
}
return containers[0].Process(), nil
}
// setEphemeralStorageType sets the mount type to 'ephemeral'
// if the mount source path is provisioned by k8s for ephemeral storage.
// For the given pod ephemeral volume is created only once
// backed by tmpfs inside the VM. For successive containers
// of the same pod the already existing volume is reused.
func setEphemeralStorageType(ociSpec oci.CompatOCISpec) oci.CompatOCISpec {
for idx, mnt := range ociSpec.Mounts {
if IsEphemeralStorage(mnt.Source) {
ociSpec.Mounts[idx].Type = "ephemeral"
}
}
return ociSpec
}
func createContainer(ctx context.Context, ociSpec oci.CompatOCISpec, containerID, bundlePath,
console string, disableOutput bool) (vc.Process, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "createContainer")
defer span.Finish()
ociSpec = setEphemeralStorageType(ociSpec)
contConfig, err := oci.ContainerConfig(ociSpec, bundlePath, containerID, console, disableOutput)
if err != nil {
return vc.Process{}, err
}
sandboxID, err := ociSpec.SandboxID()
if err != nil {
return vc.Process{}, err
}
kataLog = kataLog.WithField("sandbox", sandboxID)
setExternalLoggers(kataLog)
span.SetTag("sandbox", sandboxID)
_, c, err := vci.CreateContainer(sandboxID, contConfig)
if err != nil {
return vc.Process{}, err
}
if err := addContainerIDMapping(ctx, containerID, sandboxID); err != nil {
return vc.Process{}, err
}
return c.Process(), nil
}
func createCgroupsFiles(ctx context.Context, containerID string, cgroupsDirPath string, cgroupsPathList []string, pid int) error {
span, _ := opentracing.StartSpanFromContext(ctx, "createCgroupsFiles")
defer span.Finish()
if len(cgroupsPathList) == 0 {
kataLog.WithField("pid", pid).Info("Cgroups files not created because cgroupsPath was empty")
return nil
}
for _, cgroupsPath := range cgroupsPathList {
if err := os.MkdirAll(cgroupsPath, cgroupsDirMode); err != nil {
return err
}
if strings.Contains(cgroupsPath, "cpu") && cgroupsDirPath != "" {
parent := strings.TrimSuffix(cgroupsPath, cgroupsDirPath)
copyParentCPUSet(cgroupsPath, parent)
}
tasksFilePath := filepath.Join(cgroupsPath, cgroupsTasksFile)
procsFilePath := filepath.Join(cgroupsPath, cgroupsProcsFile)
pidStr := fmt.Sprintf("%d", pid)
for _, path := range []string{tasksFilePath, procsFilePath} {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, cgroupsFileMode)
if err != nil {
return err
}
defer f.Close()
n, err := f.WriteString(pidStr)
if err != nil {
return err
}
if n < len(pidStr) {
return fmt.Errorf("Could not write pid to %q: only %d bytes written out of %d",
path, n, len(pidStr))
}
}
}
return nil
}
func createPIDFile(ctx context.Context, pidFilePath string, pid int) error {
span, _ := opentracing.StartSpanFromContext(ctx, "createPIDFile")
defer span.Finish()
if pidFilePath == "" {
// runtime should not fail since pid file is optional
return nil
}
if err := os.RemoveAll(pidFilePath); err != nil {
return err
}
f, err := os.Create(pidFilePath)
if err != nil {
return err
}
defer f.Close()
pidStr := fmt.Sprintf("%d", pid)
n, err := f.WriteString(pidStr)
if err != nil {
return err
}
if n < len(pidStr) {
return fmt.Errorf("Could not write pid to '%s': only %d bytes written out of %d", pidFilePath, n, len(pidStr))
}
return nil
}
// copyParentCPUSet copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func copyParentCPUSet(current, parent string) error {
currentCpus, currentMems, err := getCPUSet(current)
if err != nil {
return err
}
parentCpus, parentMems, err := getCPUSet(parent)
if err != nil {
return err
}
if len(parentCpus) < 1 || len(parentMems) < 1 {
return nil
}
var cgroupsFileMode = os.FileMode(0600)
if isEmptyString(currentCpus) {
if err := writeFile(filepath.Join(current, "cpuset.cpus"), string(parentCpus), cgroupsFileMode); err != nil {
return err
}
}
if isEmptyString(currentMems) {
if err := writeFile(filepath.Join(current, "cpuset.mems"), string(parentMems), cgroupsFileMode); err != nil {
return err
}
}
return nil
}
func getCPUSet(parent string) (cpus []byte, mems []byte, err error) {
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
return
}
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
return
}
return cpus, mems, nil
}