dockershim: Implement sandbox methods

This commit is contained in:
Yu-Ju Hong 2016-07-25 16:25:36 -07:00
parent 68632db799
commit 5960d87d21
8 changed files with 434 additions and 26 deletions

View File

@ -21,6 +21,7 @@ import (
"strings"
dockertypes "github.com/docker/engine-api/types"
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
@ -89,3 +90,25 @@ func toRuntimeAPIContainerState(state string) runtimeApi.ContainerState {
return runtimeApi.ContainerState_UNKNOWN
}
}
func toRuntimeAPISandboxState(state string) runtimeApi.PodSandBoxState {
// Parse the state string in dockertypes.Container. This could break when
// we upgrade docker.
switch {
case strings.HasPrefix(state, statusRunningPrefix):
return runtimeApi.PodSandBoxState_READY
default:
return runtimeApi.PodSandBoxState_NOTREADY
}
}
func toRuntimeAPISandbox(c *dockertypes.Container) *runtimeApi.PodSandbox {
state := toRuntimeAPISandboxState(c.Status)
return &runtimeApi.PodSandbox{
Id: &c.ID,
Name: &c.Names[0],
State: &state,
CreatedAt: &c.Created, // TODO: Why do we need CreateAt timestamp for sandboxes?
Labels: c.Labels, // TODO: Need to disthinguish annotaions and labels.
}
}

View File

@ -35,15 +35,17 @@ func (ds *dockerService) ListContainers(filter *runtimeApi.ContainerFilter) ([]*
opts := dockertypes.ContainerListOptions{All: true}
opts.Filter = dockerfilters.NewArgs()
f := newDockerFilter(&opts.Filter)
if filter != nil {
if filter.Name != nil {
opts.Filter.Add("name", filter.GetName())
f.Add("name", filter.GetName())
}
if filter.Id != nil {
opts.Filter.Add("id", filter.GetId())
f.Add("id", filter.GetId())
}
if filter.State != nil {
opts.Filter.Add("status", toDockerContainerStatus(filter.GetState()))
f.Add("status", toDockerContainerStatus(filter.GetState()))
}
if filter.PodSandboxId != nil {
// TODO: implement this after sandbox functions are implemented.
@ -51,9 +53,11 @@ func (ds *dockerService) ListContainers(filter *runtimeApi.ContainerFilter) ([]*
if filter.LabelSelector != nil {
for k, v := range filter.LabelSelector {
opts.Filter.Add("label", fmt.Sprintf("%s=%s", k, v))
f.AddLabel(k, v)
}
}
// Filter out sandbox containers.
f.AddLabel(containerTypeLabelKey, containerTypeLabelContainer)
}
containers, err := ds.client.ListContainers(opts)
if err != nil {
@ -81,14 +85,9 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi
// Merge annotations and labels because docker supports only labels.
// TODO: add a prefix to annotations so that we can distinguish labels and
// annotations when reading back them from the docker container.
// TODO: should we apply docker-specific labels?
labels := config.GetLabels()
for k, v := range config.GetAnnotations() {
if _, ok := labels[k]; !ok {
// Only write to labels if the key doesn't exist.
labels[k] = v
}
}
labels := makeLabels(config.GetLabels(), config.GetAnnotations())
// Apply a the container type label.
labels[containerTypeLabelKey] = containerTypeLabelContainer
image := ""
if iSpec := config.GetImage(); iSpec != nil {
@ -163,10 +162,7 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi
// Note: ShmSize is handled in kube_docker_client.go
}
// TODO: Seccomp support. Need to figure out how to pass seccomp options
// through the runtime API (annotations?).See dockerManager.getSecurityOpts()
// for the details. Always set the default seccomp profile for now.
hc.SecurityOpt = []string{fmt.Sprintf("%s=%s", "seccomp", defaultSeccompProfile)}
hc.SecurityOpt = []string{getSeccompOpts()}
// TODO: Add or drop capabilities.
createConfig.HostConfig = hc

View File

@ -19,33 +19,243 @@ package dockershim
import (
"fmt"
dockertypes "github.com/docker/engine-api/types"
dockercontainer "github.com/docker/engine-api/types/container"
dockerfilters "github.com/docker/engine-api/types/filters"
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
const (
defaultSandboxImage = "gcr.io/google_containers/pause-amd64:3.0"
// Various default sandbox resources requests/limits.
defaultSandboxCPUshares int64 = 2
defaultSandboxOOMScore int = -999
// Termination grace period
defaultSandboxGracePeriod int = 10
)
// CreatePodSandbox creates a pod-level sandbox.
// The definition of PodSandbox is at https://github.com/kubernetes/kubernetes/pull/25899
// For docker, PodSandbox is implemented by a container holding the network
// namespace for the pod.
// Note: docker doesn't use LogDirectory (yet).
func (ds *dockerService) CreatePodSandbox(config *runtimeApi.PodSandboxConfig) (string, error) {
return "", fmt.Errorf("Not implemented")
// Step 1: Pull the image for the sandbox.
// TODO: How should we handle pulling custom pod infra container image
// (with credentials)?
image := defaultSandboxImage
if err := ds.client.PullImage(image, dockertypes.AuthConfig{}, dockertypes.ImagePullOptions{}); err != nil {
return "", fmt.Errorf("unable to pull image for the sandbox container: %v", err)
}
// Step 2: Create the sandbox container.
createConfig := makeSandboxDockerConfig(config, image)
createResp, err := ds.client.CreateContainer(*createConfig)
if err != nil || createResp == nil {
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.GetName(), err)
}
// Step 3: Start the sandbox container.
// Assume kubelet's garbage collector would remove the sandbox later, if
// startContainer failed.
err = ds.StartContainer(createResp.ID)
return createResp.ID, err
}
// StopPodSandbox stops the sandbox. If there are any running containers in the
// sandbox, they should be force terminated.
func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
return fmt.Errorf("Not implemented")
return ds.client.StopContainer(podSandboxID, defaultSandboxGracePeriod)
// TODO: Stop all running containers in the sandbox.
}
// DeletePodSandbox deletes the sandbox. If there are running containers in the
// sandbox, they should be forcibly deleted.
func (ds *dockerService) DeletePodSandbox(podSandboxID string) error {
return fmt.Errorf("Not implemented")
return ds.client.RemoveContainer(podSandboxID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true})
// TODO: remove all containers in the sandbox.
}
// PodSandboxStatus returns the Status of the PodSandbox.
// PodSandboxStatus returns the status of the PodSandbox.
func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeApi.PodSandboxStatus, error) {
return nil, fmt.Errorf("Not implemented")
// Inspect the container.
r, err := ds.client.InspectContainer(podSandboxID)
if err != nil {
return nil, err
}
// Parse the timstamps.
createdAt, _, _, err := getContainerTimestamps(r)
if err != nil {
return nil, fmt.Errorf("failed to parse timestamp for container %q: %v", podSandboxID, err)
}
ct := createdAt.Unix()
// Translate container to sandbox state.
state := runtimeApi.PodSandBoxState_NOTREADY
if r.State.Running {
state = runtimeApi.PodSandBoxState_READY
}
// TODO: We can't really get the IP address from the network plugin, which
// is handled by kubelet as of now. Should we amend the interface? How is
// this handled in the new remote runtime integration?
// See DockerManager.determineContainerIP() for more details.
// For now, just assume that there is no network plugin.
// Related issue: https://github.com/kubernetes/kubernetes/issues/28667
var IP string
if r.NetworkSettings != nil {
IP = r.NetworkSettings.IPAddress
// Fall back to IPv6 address if no IPv4 address is present
if IP == "" {
IP = r.NetworkSettings.GlobalIPv6Address
}
}
network := &runtimeApi.PodSandboxNetworkStatus{Ip: &IP}
netNS := getNetworkNamespace(r)
return &runtimeApi.PodSandboxStatus{
Id: &r.ID,
Name: &r.Name,
State: &state,
CreatedAt: &ct,
// TODO: We write annotations as labels on the docker containers. All
// these annotations will be read back as labels. Need to fix this.
// Also filter out labels only relevant to this shim.
Labels: r.Config.Labels,
Network: network,
Linux: &runtimeApi.LinuxPodSandboxStatus{Namespaces: &runtimeApi.Namespace{Network: &netNS}},
}, nil
}
// ListPodSandbox returns a list of Sandbox.
func (ds *dockerService) ListPodSandbox(filter *runtimeApi.PodSandboxFilter) ([]*runtimeApi.PodSandbox, error) {
return nil, fmt.Errorf("Not implemented")
// By default, list all containers whether they are running or not.
opts := dockertypes.ContainerListOptions{All: true}
filterOutReadySandboxes := false
opts.Filter = dockerfilters.NewArgs()
f := newDockerFilter(&opts.Filter)
if filter != nil {
if filter.Name != nil {
f.Add("name", filter.GetName())
}
if filter.Id != nil {
f.Add("id", filter.GetId())
}
if filter.State != nil {
if filter.GetState() == runtimeApi.PodSandBoxState_READY {
// Only list running containers.
opts.All = false
} else {
// runtimeApi.PodSandBoxState_NOTREADY can mean the
// container is in any of the non-running state (e.g., created,
// exited). We can't tell docker to filter out running
// containers directly, so we'll need to filter them out
// ourselves after getting the results.
filterOutReadySandboxes = true
}
}
if filter.LabelSelector != nil {
for k, v := range filter.LabelSelector {
f.AddLabel(k, v)
}
}
// Filter out sandbox containers.
f.AddLabel(containerTypeLabelKey, containerTypeLabelSandbox)
}
containers, err := ds.client.ListContainers(opts)
if err != nil {
return nil, err
}
// Convert docker containers to runtime api sandboxes.
result := []*runtimeApi.PodSandbox{}
for _, c := range containers {
s := toRuntimeAPISandbox(&c)
if filterOutReadySandboxes && s.GetState() == runtimeApi.PodSandBoxState_READY {
continue
}
result = append(result, s)
}
return result, nil
}
func makeSandboxDockerConfig(c *runtimeApi.PodSandboxConfig, image string) *dockertypes.ContainerCreateConfig {
// Merge annotations and labels because docker supports only labels.
labels := makeLabels(c.GetLabels(), c.GetAnnotations())
// Apply a label to distinguish sandboxes from regular containers.
labels[containerTypeLabelKey] = containerTypeLabelSandbox
hc := &dockercontainer.HostConfig{}
createConfig := &dockertypes.ContainerCreateConfig{
Name: c.GetName(),
Config: &dockercontainer.Config{
Hostname: c.GetHostname(),
// TODO: Handle environment variables.
Image: image,
Labels: labels,
},
HostConfig: hc,
}
// Apply linux-specific options.
if lc := c.GetLinux(); lc != nil {
// Apply Cgroup options.
// TODO: Check if this works with per-pod cgroups.
hc.CgroupParent = lc.GetCgroupParent()
// Apply namespace options.
hc.NetworkMode, hc.UTSMode, hc.PidMode = "", "", ""
nsOpts := lc.GetNamespaceOptions()
if nsOpts != nil {
if nsOpts.GetHostNetwork() {
hc.NetworkMode = namespaceModeHost
} else {
// Assume kubelet uses either the cni or the kubenet plugin.
// TODO: support docker networking.
hc.NetworkMode = "none"
}
if nsOpts.GetHostIpc() {
hc.IpcMode = namespaceModeHost
}
if nsOpts.GetHostPid() {
hc.PidMode = namespaceModeHost
}
}
}
// Set port mappings.
exposedPorts, portBindings := makePortsAndBindings(c.GetPortMappings())
createConfig.Config.ExposedPorts = exposedPorts
hc.PortBindings = portBindings
// Set DNS options.
if dnsOpts := c.GetDnsOptions(); dnsOpts != nil {
hc.DNS = dnsOpts.GetServers()
hc.DNSSearch = dnsOpts.GetSearches()
}
// Apply resource options.
setSandboxResources(c.GetResources(), hc)
// Set security options.
hc.SecurityOpt = []string{getSeccompOpts()}
return createConfig
}
func setSandboxResources(_ *runtimeApi.PodSandboxResources, hc *dockercontainer.HostConfig) {
// Ignore the resource requests and limits for now and just use the docker
// defaults.
// TODO: apply resource limits based on the configuration.
hc.Resources = dockercontainer.Resources{
MemorySwap: -1, // Always disable memory swap.
CPUShares: defaultSandboxCPUshares,
// Use docker's default cpu quota/period.
}
hc.OomScoreAdj = defaultSandboxOOMScore
}

View File

@ -0,0 +1,58 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dockershim
import (
"testing"
dockertypes "github.com/docker/engine-api/types"
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
func TestCreateSandbox(t *testing.T) {
ds, fakeDocker := newTestDockerSevice()
name := "FOO"
// We don't really want to test k8s name format and parsing,
// but FakeDockerClient parses the name internally during AssertCreated().
// TODO: fix this.
fullName := "k8s_" + name + ".abcde_foo_new_12345678_0"
config := &runtimeApi.PodSandboxConfig{Name: &fullName}
id, err := ds.CreatePodSandbox(config)
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if err := fakeDocker.AssertCreated([]string{name}); err != nil {
t.Errorf("%v", err)
}
if err := fakeDocker.AssertStarted([]string{id}); err != nil {
t.Errorf("%v", err)
}
// List running containers and verify that there is one (and only one)
// running container that we just created.
containers, err := fakeDocker.ListContainers(dockertypes.ContainerListOptions{All: false})
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if len(containers) != 1 {
t.Errorf("More than one running containers: %+v", containers)
}
if containers[0].ID != id {
t.Errorf("Expected id %q, got %v", id, containers[0].ID)
}
}

View File

@ -36,7 +36,17 @@ const (
// '{{.HostConfig.NetworkMode}}'.
namespaceModeHost = "host"
dockerNetNSFmt = "/proc/%v/ns/net"
defaultSeccompProfile = "unconfined"
// Internal docker labels used to identify whether a container is a sandbox
// or a regular container.
// TODO: This is not backward compatible with older containers. We will
// need to add filtering based on names.
containerTypeLabelKey = "io.kubernetes.docker.type"
containerTypeLabelSandbox = "podsandbox"
containerTypeLabelContainer = "container"
)
func NewDockerSevice(client dockertools.DockerInterface) DockerLegacyService {

View File

@ -20,8 +20,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/dockertools"
)
func newFakeDockerSevice() *dockerService {
return &dockerService{
client: dockertools.NewFakeDockerClient(),
}
func newTestDockerSevice() (*dockerService, *dockertools.FakeDockerClient) {
c := dockertools.NewFakeDockerClient()
return &dockerService{client: c}, c
}

View File

@ -18,8 +18,15 @@ package dockershim
import (
"fmt"
"strconv"
"strings"
dockertypes "github.com/docker/engine-api/types"
dockerfilters "github.com/docker/engine-api/types/filters"
dockerapiversion "github.com/docker/engine-api/types/versions"
dockernat "github.com/docker/go-connections/nat"
"github.com/golang/glog"
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
@ -50,6 +57,23 @@ func generateEnvList(envs []*runtimeApi.KeyValue) (result []string) {
return
}
// Merge annotations and labels because docker supports only labels.
// TODO: Need to be able to distinguish annotations from labels; otherwise, we
// couldn't restore the information when reading the labels back from docker.
func makeLabels(labels, annotations map[string]string) map[string]string {
merged := make(map[string]string)
for k, v := range labels {
merged[k] = v
}
for k, v := range annotations {
if _, ok := merged[k]; !ok {
// Don't overwrite the key if it already exists.
merged[k] = v
}
}
return merged
}
// generateMountBindings converts the mount list to a list of strings that
// can be understood by docker.
// Each element in the string is in the form of:
@ -76,3 +100,78 @@ func generateMountBindings(mounts []*runtimeApi.Mount) (result []string) {
}
return
}
func makePortsAndBindings(pm []*runtimeApi.PortMapping) (map[dockernat.Port]struct{}, map[dockernat.Port][]dockernat.PortBinding) {
exposedPorts := map[dockernat.Port]struct{}{}
portBindings := map[dockernat.Port][]dockernat.PortBinding{}
for _, port := range pm {
exteriorPort := port.GetHostPort()
if exteriorPort == 0 {
// No need to do port binding when HostPort is not specified
continue
}
interiorPort := port.GetContainerPort()
// Some of this port stuff is under-documented voodoo.
// See http://stackoverflow.com/questions/20428302/binding-a-port-to-a-host-interface-using-the-rest-api
var protocol string
switch strings.ToUpper(string(port.GetProtocol())) {
case "UDP":
protocol = "/udp"
case "TCP":
protocol = "/tcp"
default:
glog.Warningf("Unknown protocol %q: defaulting to TCP", port.Protocol)
protocol = "/tcp"
}
dockerPort := dockernat.Port(strconv.Itoa(int(interiorPort)) + protocol)
exposedPorts[dockerPort] = struct{}{}
hostBinding := dockernat.PortBinding{
HostPort: strconv.Itoa(int(exteriorPort)),
HostIP: port.GetHostIp(),
}
// Allow multiple host ports bind to same docker port
if existedBindings, ok := portBindings[dockerPort]; ok {
// If a docker port already map to a host port, just append the host ports
portBindings[dockerPort] = append(existedBindings, hostBinding)
} else {
// Otherwise, it's fresh new port binding
portBindings[dockerPort] = []dockernat.PortBinding{
hostBinding,
}
}
}
return exposedPorts, portBindings
}
// TODO: Seccomp support. Need to figure out how to pass seccomp options
// through the runtime API (annotations?).See dockerManager.getSecurityOpts()
// for the details. Always set the default seccomp profile for now.
// Also need to support syntax for different docker versions.
func getSeccompOpts() string {
return fmt.Sprintf("%s=%s", "seccomp", defaultSeccompProfile)
}
func getNetworkNamespace(c *dockertypes.ContainerJSON) string {
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid)
}
// dockerFilter wraps around dockerfilters.Args and provides methods to modify
// the filter easily.
type dockerFilter struct {
f *dockerfilters.Args
}
func newDockerFilter(args *dockerfilters.Args) *dockerFilter {
return &dockerFilter{f: args}
}
func (f *dockerFilter) Add(key, value string) {
f.Add(key, value)
}
func (f *dockerFilter) AddLabel(key, value string) {
f.Add("label", fmt.Sprintf("%s=%s", key, value))
}

View File

@ -51,6 +51,7 @@ type FakeDockerClient struct {
// Created, Stopped and Removed all container docker ID
Created []string
Started []string
Stopped []string
Removed []string
VersionInfo dockertypes.Version
@ -229,6 +230,17 @@ func (f *FakeDockerClient) AssertCreated(created []string) error {
return nil
}
func (f *FakeDockerClient) AssertStarted(started []string) error {
f.Lock()
defer f.Unlock()
sort.StringSlice(started).Sort()
sort.StringSlice(f.Started).Sort()
if !reflect.DeepEqual(started, f.Started) {
return fmt.Errorf("expected %#v, got %#v", started, f.Started)
}
return nil
}
func (f *FakeDockerClient) AssertStopped(stopped []string) error {
f.Lock()
defer f.Unlock()
@ -339,6 +351,7 @@ func (f *FakeDockerClient) StartContainer(id string) error {
if err := f.popError("start"); err != nil {
return err
}
f.Started = append(f.Started, id)
container, ok := f.ContainerMap[id]
if !ok {
container = convertFakeContainer(&FakeContainer{ID: id, Name: id})