Files
kata-containers/virtcontainers/container.go
Samuel Ortiz 24eff72d82 virtcontainers: Initial import
This is a virtcontainers 1.0.8 import into Kata Containers runtime.

virtcontainers is a Go library designed to manage hardware virtualized
pods and containers. It is the core Clear Containers framework and will
become the core Kata Containers framework, as discussed at
https://github.com/kata-containers/runtime/issues/33

Some more more pointers:

virtcontainers README, including some design and architecure notes:
https://github.com/containers/virtcontainers/blob/master/README.md

virtcontainers 1.0 API:
https://github.com/containers/virtcontainers/blob/master/documentation/api/1.0/api.md

Fixes #40

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
2018-03-13 00:49:46 +01:00

815 lines
20 KiB
Go

//
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package virtcontainers
import (
"encoding/hex"
"fmt"
"os"
"path/filepath"
"syscall"
"time"
"github.com/sirupsen/logrus"
)
// Process gathers data related to a container process.
type Process struct {
// Token is the process execution context ID. It must be
// unique per pod.
// Token is used to manipulate processes for containers
// that have not started yet, and later identify them
// uniquely within a pod.
Token string
// Pid is the process ID as seen by the host software
// stack, e.g. CRI-O, containerd. This is typically the
// shim PID.
Pid int
StartTime time.Time
}
// ContainerStatus describes a container status.
type ContainerStatus struct {
ID string
State State
PID int
StartTime time.Time
RootFs string
// Annotations allow clients to store arbitrary values,
// for example to add additional status values required
// to support particular specifications.
Annotations map[string]string
}
// ContainerResources describes container resources
type ContainerResources struct {
// CPUQuota specifies the total amount of time in microseconds
// The number of microseconds per CPUPeriod that the container is guaranteed CPU access
CPUQuota int64
// CPUPeriod specifies the CPU CFS scheduler period of time in microseconds
CPUPeriod uint64
// CPUShares specifies container's weight vs. other containers
CPUShares uint64
}
// ContainerConfig describes one container runtime configuration.
type ContainerConfig struct {
ID string
// RootFs is the container workload image on the host.
RootFs string
// ReadOnlyRootfs indicates if the rootfs should be mounted readonly
ReadonlyRootfs bool
// Cmd specifies the command to run on a container
Cmd Cmd
// Annotations allow clients to store arbitrary values,
// for example to add additional status values required
// to support particular specifications.
Annotations map[string]string
Mounts []Mount
// Device configuration for devices that must be available within the container.
DeviceInfos []DeviceInfo
// Resources container resources
Resources ContainerResources
}
// valid checks that the container configuration is valid.
func (c *ContainerConfig) valid() bool {
if c == nil {
return false
}
if c.ID == "" {
return false
}
return true
}
// SystemMountsInfo describes additional information for system mounts that the agent
// needs to handle
type SystemMountsInfo struct {
// Indicates if /dev has been passed as a bind mount for the host /dev
BindMountDev bool
// Size of /dev/shm assigned on the host.
DevShmSize uint
}
// Container is composed of a set of containers and a runtime environment.
// A Container can be created, deleted, started, stopped, listed, entered, paused and restored.
type Container struct {
id string
podID string
rootFs string
config *ContainerConfig
pod *Pod
runPath string
configPath string
containerPath string
state State
process Process
mounts []Mount
devices []Device
systemMountsInfo SystemMountsInfo
}
// ID returns the container identifier string.
func (c *Container) ID() string {
return c.id
}
// Logger returns a logrus logger appropriate for logging Container messages
func (c *Container) Logger() *logrus.Entry {
return virtLog.WithFields(logrus.Fields{
"subsystem": "container",
"container-id": c.id,
"pod-id": c.podID,
})
}
// Pod returns the pod handler related to this container.
func (c *Container) Pod() VCPod {
return c.pod
}
// Process returns the container process.
func (c *Container) Process() Process {
return c.process
}
// GetToken returns the token related to this container's process.
func (c *Container) GetToken() string {
return c.process.Token
}
// GetPid returns the pid related to this container's process.
func (c *Container) GetPid() int {
return c.process.Pid
}
// SetPid sets and stores the given pid as the pid of container's process.
func (c *Container) SetPid(pid int) error {
c.process.Pid = pid
return c.storeProcess()
}
func (c *Container) setStateBlockIndex(index int) error {
c.state.BlockIndex = index
err := c.pod.storage.storeContainerResource(c.pod.id, c.id, stateFileType, c.state)
if err != nil {
return err
}
return nil
}
func (c *Container) setStateFstype(fstype string) error {
c.state.Fstype = fstype
err := c.pod.storage.storeContainerResource(c.pod.id, c.id, stateFileType, c.state)
if err != nil {
return err
}
return nil
}
func (c *Container) setStateHotpluggedDrive(hotplugged bool) error {
c.state.HotpluggedDrive = hotplugged
err := c.pod.storage.storeContainerResource(c.pod.id, c.id, stateFileType, c.state)
if err != nil {
return err
}
return nil
}
// GetAnnotations returns container's annotations
func (c *Container) GetAnnotations() map[string]string {
return c.config.Annotations
}
func (c *Container) storeProcess() error {
return c.pod.storage.storeContainerProcess(c.podID, c.id, c.process)
}
func (c *Container) fetchProcess() (Process, error) {
return c.pod.storage.fetchContainerProcess(c.podID, c.id)
}
func (c *Container) storeMounts() error {
return c.pod.storage.storeContainerMounts(c.podID, c.id, c.mounts)
}
func (c *Container) fetchMounts() ([]Mount, error) {
return c.pod.storage.fetchContainerMounts(c.podID, c.id)
}
func (c *Container) storeDevices() error {
return c.pod.storage.storeContainerDevices(c.podID, c.id, c.devices)
}
func (c *Container) fetchDevices() ([]Device, error) {
return c.pod.storage.fetchContainerDevices(c.podID, c.id)
}
// storeContainer stores a container config.
func (c *Container) storeContainer() error {
fs := filesystem{}
err := fs.storeContainerResource(c.pod.id, c.id, configFileType, *(c.config))
if err != nil {
return err
}
return nil
}
// setContainerState sets both the in-memory and on-disk state of the
// container.
func (c *Container) setContainerState(state stateString) error {
if state == "" {
return errNeedState
}
// update in-memory state
c.state.State = state
// update on-disk state
err := c.pod.storage.storeContainerResource(c.pod.id, c.id, stateFileType, c.state)
if err != nil {
return err
}
return nil
}
func (c *Container) createContainersDirs() error {
err := os.MkdirAll(c.runPath, dirMode)
if err != nil {
return err
}
err = os.MkdirAll(c.configPath, dirMode)
if err != nil {
c.pod.storage.deleteContainerResources(c.podID, c.id, nil)
return err
}
return nil
}
// mountSharedDirMounts handles bind-mounts by bindmounting to the host shared
// directory which is mounted through 9pfs in the VM.
// It also updates the container mount list with the HostPath info, and store
// container mounts to the storage. This way, we will have the HostPath info
// available when we will need to unmount those mounts.
func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) ([]Mount, error) {
var sharedDirMounts []Mount
for idx, m := range c.mounts {
if isSystemMount(m.Destination) || m.Type != "bind" {
continue
}
randBytes, err := generateRandomBytes(8)
if err != nil {
return nil, err
}
// These mounts are created in the shared dir
filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination))
mountDest := filepath.Join(hostSharedDir, c.pod.id, filename)
if err := bindMount(m.Source, mountDest, false); err != nil {
return nil, err
}
// Save HostPath mount value into the mount list of the container.
c.mounts[idx].HostPath = mountDest
// Check if mount is readonly, let the agent handle the readonly mount
// within the VM.
readonly := false
for _, flag := range m.Options {
if flag == "ro" {
readonly = true
}
}
sharedDirMount := Mount{
Source: filepath.Join(guestSharedDir, filename),
Destination: m.Destination,
Type: m.Type,
Options: m.Options,
ReadOnly: readonly,
}
sharedDirMounts = append(sharedDirMounts, sharedDirMount)
}
if err := c.storeMounts(); err != nil {
return nil, err
}
return sharedDirMounts, nil
}
func (c *Container) unmountHostMounts() error {
for _, m := range c.mounts {
if m.HostPath != "" {
if err := syscall.Unmount(m.HostPath, 0); err != nil {
c.Logger().WithFields(logrus.Fields{
"host-path": m.HostPath,
"error": err,
}).Warn("Could not umount")
return err
}
}
}
return nil
}
// newContainer creates a Container structure from a pod and a container configuration.
func newContainer(pod *Pod, contConfig ContainerConfig) (*Container, error) {
if contConfig.valid() == false {
return &Container{}, fmt.Errorf("Invalid container configuration")
}
c := &Container{
id: contConfig.ID,
podID: pod.id,
rootFs: contConfig.RootFs,
config: &contConfig,
pod: pod,
runPath: filepath.Join(runStoragePath, pod.id, contConfig.ID),
configPath: filepath.Join(configStoragePath, pod.id, contConfig.ID),
containerPath: filepath.Join(pod.id, contConfig.ID),
state: State{},
process: Process{},
mounts: contConfig.Mounts,
}
state, err := c.pod.storage.fetchContainerState(c.podID, c.id)
if err == nil {
c.state = state
}
process, err := c.pod.storage.fetchContainerProcess(c.podID, c.id)
if err == nil {
c.process = process
}
mounts, err := c.fetchMounts()
if err == nil {
c.mounts = mounts
}
// Devices will be found in storage after create stage has completed.
// We fetch devices from storage at all other stages.
storedDevices, err := c.fetchDevices()
if err == nil {
c.devices = storedDevices
} else {
// If devices were not found in storage, create Device implementations
// from the configuration. This should happen at create.
devices, err := newDevices(contConfig.DeviceInfos)
if err != nil {
return &Container{}, err
}
c.devices = devices
}
return c, nil
}
// createContainer creates and start a container inside a Pod. It has to be
// called only when a new container, not known by the pod, has to be created.
func createContainer(pod *Pod, contConfig ContainerConfig) (*Container, error) {
if pod == nil {
return nil, errNeedPod
}
c, err := newContainer(pod, contConfig)
if err != nil {
return nil, err
}
if err := c.createContainersDirs(); err != nil {
return nil, err
}
if !c.pod.config.HypervisorConfig.DisableBlockDeviceUse {
agentCaps := c.pod.agent.capabilities()
hypervisorCaps := c.pod.hypervisor.capabilities()
if agentCaps.isBlockDeviceSupported() && hypervisorCaps.isBlockDeviceHotplugSupported() {
if err := c.hotplugDrive(); err != nil {
return nil, err
}
}
}
// Attach devices
if err := c.attachDevices(); err != nil {
return nil, err
}
if err := c.addResources(); err != nil {
return nil, err
}
// Deduce additional system mount info that should be handled by the agent
// inside the VM
c.getSystemMountInfo()
if err := c.storeDevices(); err != nil {
return nil, err
}
process, err := pod.agent.createContainer(c.pod, c)
if err != nil {
return nil, err
}
c.process = *process
// Store the container process returned by the agent.
if err := c.storeProcess(); err != nil {
return nil, err
}
if err := c.setContainerState(StateReady); err != nil {
return nil, err
}
return c, nil
}
func (c *Container) delete() error {
if c.state.State != StateReady &&
c.state.State != StateStopped {
return fmt.Errorf("Container not ready or stopped, impossible to delete")
}
// Remove the container from pod structure
if err := c.pod.removeContainer(c.id); err != nil {
return err
}
return c.pod.storage.deleteContainerResources(c.podID, c.id, nil)
}
// checkPodRunning validates the container state.
//
// cmd specifies the operation (or verb) that the retrieval is destined
// for and is only used to make the returned error as descriptive as
// possible.
func (c *Container) checkPodRunning(cmd string) error {
if cmd == "" {
return fmt.Errorf("Cmd cannot be empty")
}
if c.pod.state.State != StateRunning {
return fmt.Errorf("Pod not running, impossible to %s the container", cmd)
}
return nil
}
func (c *Container) getSystemMountInfo() {
// check if /dev needs to be bind mounted from host /dev
c.systemMountsInfo.BindMountDev = false
for _, m := range c.mounts {
if m.Source == "/dev" && m.Destination == "/dev" && m.Type == "bind" {
c.systemMountsInfo.BindMountDev = true
}
}
// TODO Deduce /dev/shm size. See https://github.com/clearcontainers/runtime/issues/138
}
func (c *Container) start() error {
if err := c.checkPodRunning("start"); err != nil {
return err
}
if c.state.State != StateReady &&
c.state.State != StateStopped {
return fmt.Errorf("Container not ready or stopped, impossible to start")
}
if err := c.state.validTransition(c.state.State, StateRunning); err != nil {
return err
}
if err := c.pod.agent.startContainer(*(c.pod), c); err != nil {
c.Logger().WithError(err).Error("Failed to start container")
if err := c.stop(); err != nil {
c.Logger().WithError(err).Warn("Failed to stop container")
}
return err
}
return c.setContainerState(StateRunning)
}
func (c *Container) stop() error {
// In case the container status has been updated implicitly because
// the container process has terminated, it might be possible that
// someone try to stop the container, and we don't want to issue an
// error in that case. This should be a no-op.
//
// This has to be handled before the transition validation since this
// is an exception.
if c.state.State == StateStopped {
c.Logger().Info("Container already stopped")
return nil
}
if c.pod.state.State != StateReady && c.pod.state.State != StateRunning {
return fmt.Errorf("Pod not ready or running, impossible to stop the container")
}
if err := c.state.validTransition(c.state.State, StateStopped); err != nil {
return err
}
defer func() {
// If shim is still running something went wrong
// Make sure we stop the shim process
if running, _ := isShimRunning(c.process.Pid); running {
l := c.Logger()
l.Error("Failed to stop container so stopping dangling shim")
if err := stopShim(c.process.Pid); err != nil {
l.WithError(err).Warn("failed to stop shim")
}
}
}()
// Here we expect that stop() has been called because the container
// process returned or because it received a signal. In case of a
// signal, we want to give it some time to end the container process.
// However, if the signal didn't reach its goal, the caller still
// expects this container to be stopped, that's why we should not
// return an error, but instead try to kill it forcefully.
if err := waitForShim(c.process.Pid); err != nil {
// Force the container to be killed.
if err := c.pod.agent.killContainer(*(c.pod), *c, syscall.SIGKILL, true); err != nil {
return err
}
// Wait for the end of container process. We expect this call
// to succeed. Indeed, we have already given a second chance
// to the container by trying to kill it with SIGKILL, there
// is no reason to try to go further if we got an error.
if err := waitForShim(c.process.Pid); err != nil {
return err
}
}
if err := c.pod.agent.stopContainer(*(c.pod), *c); err != nil {
return err
}
if err := c.removeResources(); err != nil {
return err
}
if err := c.detachDevices(); err != nil {
return err
}
if err := c.removeDrive(); err != nil {
return err
}
return c.setContainerState(StateStopped)
}
func (c *Container) enter(cmd Cmd) (*Process, error) {
if err := c.checkPodRunning("enter"); err != nil {
return nil, err
}
if c.state.State != StateRunning {
return nil, fmt.Errorf("Container not running, impossible to enter")
}
process, err := c.pod.agent.exec(c.pod, *c, cmd)
if err != nil {
return nil, err
}
return process, nil
}
func (c *Container) kill(signal syscall.Signal, all bool) error {
if c.pod.state.State != StateReady && c.pod.state.State != StateRunning {
return fmt.Errorf("Pod not ready or running, impossible to signal the container")
}
if c.state.State != StateReady && c.state.State != StateRunning {
return fmt.Errorf("Container not ready or running, impossible to signal the container")
}
return c.pod.agent.killContainer(*(c.pod), *c, signal, all)
}
func (c *Container) processList(options ProcessListOptions) (ProcessList, error) {
if err := c.checkPodRunning("ps"); err != nil {
return nil, err
}
if c.state.State != StateRunning {
return nil, fmt.Errorf("Container not running, impossible to list processes")
}
return c.pod.agent.processListContainer(*(c.pod), *c, options)
}
func (c *Container) hotplugDrive() error {
dev, err := getDeviceForPath(c.rootFs)
if err == errMountPointNotFound {
return nil
}
if err != nil {
return err
}
c.Logger().WithFields(logrus.Fields{
"device-major": dev.major,
"device-minor": dev.minor,
"mount-point": dev.mountPoint,
}).Info("device details")
isDM, err := checkStorageDriver(dev.major, dev.minor)
if err != nil {
return err
}
if !isDM {
return nil
}
// If device mapper device, then fetch the full path of the device
devicePath, fsType, err := getDevicePathAndFsType(dev.mountPoint)
if err != nil {
return err
}
c.Logger().WithFields(logrus.Fields{
"device-path": devicePath,
"fs-type": fsType,
}).Info("Block device detected")
driveIndex, err := c.pod.getAndSetPodBlockIndex()
if err != nil {
return err
}
// Add drive with id as container id
devID := makeNameID("drive", c.id)
drive := Drive{
File: devicePath,
Format: "raw",
ID: devID,
Index: driveIndex,
}
if err := c.pod.hypervisor.hotplugAddDevice(drive, blockDev); err != nil {
return err
}
c.setStateHotpluggedDrive(true)
if err := c.setStateBlockIndex(driveIndex); err != nil {
return err
}
return c.setStateFstype(fsType)
}
// isDriveUsed checks if a drive has been used for container rootfs
func (c *Container) isDriveUsed() bool {
if c.state.Fstype == "" {
return false
}
return true
}
func (c *Container) removeDrive() (err error) {
if c.isDriveUsed() && c.state.HotpluggedDrive {
c.Logger().Info("unplugging block device")
devID := makeNameID("drive", c.id)
drive := Drive{
ID: devID,
}
l := c.Logger().WithField("device-id", devID)
l.Info("Unplugging block device")
if err := c.pod.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil {
l.WithError(err).Info("Failed to unplug block device")
return err
}
}
return nil
}
func (c *Container) attachDevices() error {
for _, device := range c.devices {
if err := device.attach(c.pod.hypervisor, c); err != nil {
return err
}
}
return nil
}
func (c *Container) detachDevices() error {
for _, device := range c.devices {
if err := device.detach(c.pod.hypervisor); err != nil {
return err
}
}
return nil
}
func (c *Container) addResources() error {
//TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578
if c.config == nil {
return nil
}
vCPUs := ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod)
if vCPUs != 0 {
virtLog.Debugf("hot adding %d vCPUs", vCPUs)
if err := c.pod.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil {
return err
}
}
return nil
}
func (c *Container) removeResources() error {
//TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578
if c.config == nil {
return nil
}
vCPUs := ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod)
if vCPUs != 0 {
virtLog.Debugf("hot removing %d vCPUs", vCPUs)
if err := c.pod.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil {
return err
}
}
return nil
}